|
15 | 15 | # specific language governing permissions and limitations |
16 | 16 | # under the License. |
17 | 17 | from pathlib import PosixPath |
18 | | -from typing import Any |
19 | 18 |
|
20 | 19 | import pyarrow as pa |
21 | 20 | import pytest |
|
24 | 23 |
|
25 | 24 | from pyiceberg.catalog import Catalog |
26 | 25 | from pyiceberg.exceptions import NoSuchTableError |
27 | | -from pyiceberg.expressions import AlwaysTrue, And, BooleanExpression, EqualTo, In, IsNaN, IsNull, Or, Reference |
| 26 | +from pyiceberg.expressions import AlwaysTrue, And, EqualTo, In, IsNaN, IsNull, Or, Reference |
28 | 27 | from pyiceberg.expressions.literals import DoubleLiteral, LongLiteral |
29 | 28 | from pyiceberg.io.pyarrow import schema_to_pyarrow |
30 | 29 | from pyiceberg.schema import Schema |
@@ -444,73 +443,80 @@ def test_create_match_filter_single_condition() -> None: |
444 | 443 | ) |
445 | 444 |
|
446 | 445 |
|
447 | | -@pytest.mark.parametrize( |
448 | | - "data, expected", |
449 | | - [ |
450 | | - pytest.param( |
451 | | - [{"x": 1.0}, {"x": 2.0}, {"x": 3.0}], |
452 | | - In(Reference(name="x"), {DoubleLiteral(1.0), DoubleLiteral(2.0), DoubleLiteral(3.0)}), |
453 | | - id="single-column-without-null", |
| 446 | +def test_create_match_filter_single_column_without_null() -> None: |
| 447 | + data = [{"x": 1.0}, {"x": 2.0}, {"x": 3.0}] |
| 448 | + |
| 449 | + schema = pa.schema([pa.field("x", pa.float64())]) |
| 450 | + table = pa.Table.from_pylist(data, schema=schema) |
| 451 | + |
| 452 | + expr = create_match_filter(table, join_cols=["x"]) |
| 453 | + |
| 454 | + assert expr == In(Reference(name="x"), {DoubleLiteral(1.0), DoubleLiteral(2.0), DoubleLiteral(3.0)}) |
| 455 | + |
| 456 | + |
| 457 | +def test_create_match_filter_single_column_with_null() -> None: |
| 458 | + data = [ |
| 459 | + {"x": 1.0}, |
| 460 | + {"x": 2.0}, |
| 461 | + {"x": None}, |
| 462 | + {"x": 4.0}, |
| 463 | + {"x": float("nan")}, |
| 464 | + ] |
| 465 | + schema = pa.schema([pa.field("x", pa.float64())]) |
| 466 | + table = pa.Table.from_pylist(data, schema=schema) |
| 467 | + |
| 468 | + expr = create_match_filter(table, join_cols=["x"]) |
| 469 | + |
| 470 | + assert expr == Or( |
| 471 | + left=IsNull(term=Reference(name="x")), |
| 472 | + right=Or( |
| 473 | + left=IsNaN(term=Reference(name="x")), |
| 474 | + right=In(Reference(name="x"), {DoubleLiteral(1.0), DoubleLiteral(2.0), DoubleLiteral(4.0)}), |
454 | 475 | ), |
455 | | - pytest.param( |
456 | | - [{"x": 1.0}, {"x": 2.0}, {"x": None}, {"x": 4.0}, {"x": float("nan")}], |
457 | | - Or( |
458 | | - left=IsNull(term=Reference(name="x")), |
459 | | - right=Or( |
460 | | - left=IsNaN(term=Reference(name="x")), |
461 | | - right=In(Reference(name="x"), {DoubleLiteral(1.0), DoubleLiteral(2.0), DoubleLiteral(4.0)}), |
462 | | - ), |
| 476 | + ) |
| 477 | + |
| 478 | + |
| 479 | +def test_create_match_filter_multi_column_with_null() -> None: |
| 480 | + data = [ |
| 481 | + {"x": 1.0, "y": 9.0}, |
| 482 | + {"x": 2.0, "y": None}, |
| 483 | + {"x": None, "y": 7.0}, |
| 484 | + {"x": 4.0, "y": float("nan")}, |
| 485 | + {"x": float("nan"), "y": 0.0}, |
| 486 | + ] |
| 487 | + schema = pa.schema([pa.field("x", pa.float64()), pa.field("y", pa.float64())]) |
| 488 | + table = pa.Table.from_pylist(data, schema=schema) |
| 489 | + |
| 490 | + expr = create_match_filter(table, join_cols=["x", "y"]) |
| 491 | + |
| 492 | + assert expr == Or( |
| 493 | + left=Or( |
| 494 | + left=And( |
| 495 | + left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(1.0)), |
| 496 | + right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(9.0)), |
| 497 | + ), |
| 498 | + right=And( |
| 499 | + left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(2.0)), |
| 500 | + right=IsNull(term=Reference(name="y")), |
463 | 501 | ), |
464 | | - id="single-column-with-null", |
465 | 502 | ), |
466 | | - pytest.param( |
467 | | - [ |
468 | | - {"x": 1.0, "y": 9.0}, |
469 | | - {"x": 2.0, "y": None}, |
470 | | - {"x": None, "y": 7.0}, |
471 | | - {"x": 4.0, "y": float("nan")}, |
472 | | - {"x": float("nan"), "y": 0.0}, |
473 | | - ], |
474 | | - Or( |
475 | | - left=Or( |
476 | | - left=And( |
477 | | - left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(1.0)), |
478 | | - right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(9.0)), |
479 | | - ), |
480 | | - right=And( |
481 | | - left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(2.0)), |
482 | | - right=IsNull(term=Reference(name="y")), |
483 | | - ), |
| 503 | + right=Or( |
| 504 | + left=And( |
| 505 | + left=IsNull(term=Reference(name="x")), |
| 506 | + right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(7.0)), |
| 507 | + ), |
| 508 | + right=Or( |
| 509 | + left=And( |
| 510 | + left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(4.0)), |
| 511 | + right=IsNaN(term=Reference(name="y")), |
484 | 512 | ), |
485 | | - right=Or( |
486 | | - left=And( |
487 | | - left=IsNull(term=Reference(name="x")), |
488 | | - right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(7.0)), |
489 | | - ), |
490 | | - right=Or( |
491 | | - left=And( |
492 | | - left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(4.0)), |
493 | | - right=IsNaN(term=Reference(name="y")), |
494 | | - ), |
495 | | - right=And( |
496 | | - left=IsNaN(term=Reference(name="x")), |
497 | | - right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(0.0)), |
498 | | - ), |
499 | | - ), |
| 513 | + right=And( |
| 514 | + left=IsNaN(term=Reference(name="x")), |
| 515 | + right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(0.0)), |
500 | 516 | ), |
501 | 517 | ), |
502 | | - id="multi-column-with-null", |
503 | 518 | ), |
504 | | - ], |
505 | | -) |
506 | | -def test_create_match_filter(data: list[dict[str, Any]], expected: BooleanExpression) -> None: |
507 | | - schema = pa.schema([pa.field("x", pa.float64()), pa.field("y", pa.float64())]) |
508 | | - table = pa.Table.from_pylist(data, schema=schema) |
509 | | - join_cols = sorted({col for record in data for col in record}) |
510 | | - |
511 | | - expr = create_match_filter(table, join_cols) |
512 | | - |
513 | | - assert expr == expected |
| 519 | + ) |
514 | 520 |
|
515 | 521 |
|
516 | 522 | def test_upsert_with_duplicate_rows_in_table(catalog: Catalog) -> None: |
|
0 commit comments