From 05e2d6d4e8cb95cf75d1111f0ba3f61e838bbc7e Mon Sep 17 00:00:00 2001 From: Srinivas Lade Date: Sun, 23 Jun 2024 17:47:48 -0400 Subject: [PATCH 1/7] Fix PyIceberg Tests --- tests/pyiceberg/conftest.py | 27 ++++++++++++++++++--------- tests/pyiceberg/test_ns.py | 18 +++++++++++------- tests/pyiceberg/test_table.py | 16 +++++++++------- tests/requirements.txt | 1 + 4 files changed, 39 insertions(+), 23 deletions(-) diff --git a/tests/pyiceberg/conftest.py b/tests/pyiceberg/conftest.py index a1d9e05..9dd2989 100644 --- a/tests/pyiceberg/conftest.py +++ b/tests/pyiceberg/conftest.py @@ -1,22 +1,31 @@ import subprocess +import re import pytest from pyiceberg.catalog.rest import RestCatalog -from pyiceberg.exceptions import ForbiddenError -@pytest.fixture(scope="session") -def catalog(): +@pytest.fixture(scope="function") +def catalog(tmp_path): process = subprocess.Popen( ["./denali", "start"], env={ - "DENALI_API_PORT": "5151", - "DENALI_WAREHOUSE_PATH": "/tmp/iceberg", + "DENALI_API_PORT": "0", + "DENALI_WAREHOUSE_PATH": str(tmp_path), "DENALI_DATABASE_URL": ":memory:", - "DENALI_DATABASE_TYPE": "sqlite3", - } + "DENALI_DATABASE_DIALECT": "sqlite3", + }, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, ) - breakpoint() - yield RestCatalog("rest_catalog", uri="http://localhost:5151") + last_line = [] + while len(last_line) == 0 or "Started the Denali Catalog Server at" not in last_line[-1]: + if process.stdout is None: + continue + process.stdout.readlines + last_line.append(process.stdout.readline().decode("utf-8")) + + url = re.search(r"Started the Denali Catalog Server at `(?P[\[\]\:\.\d]+)`", last_line[-1]).group("url") + yield RestCatalog("rest_catalog", uri=f"http://{url}") process.kill() diff --git a/tests/pyiceberg/test_ns.py b/tests/pyiceberg/test_ns.py index 5d44047..a9d8363 100644 --- a/tests/pyiceberg/test_ns.py +++ b/tests/pyiceberg/test_ns.py @@ -6,29 +6,33 @@ def test_create_drop_namespace(catalog): assert catalog.list_namespaces() == [("default",)] catalog.create_namespace("test") assert catalog.list_namespaces() == [("default",), ("test",)] - assert catalog.load_namespace_properties("test") == {} + assert "created_at" in catalog.load_namespace_properties("test") catalog.drop_namespace("test") assert catalog.list_namespaces() == [("default",)] def test_create_drop_namespace_with_properties(catalog): assert catalog.list_namespaces() == [("default",)] - props = { "creator": "denali" } - catalog.create_namespace("test", props) + catalog.create_namespace("test", { "creator": "denali" }) + assert catalog.list_namespaces() == [("default",), ("test",)] - assert catalog.load_namespace_properties("test") == props + props = catalog.load_namespace_properties("test") + assert props.get("creator") == "denali" + assert props.get("created_at").isnumeric() # is numeric timestamp + catalog.drop_namespace("test") assert catalog.list_namespaces() == [("default",)] def test_create_sub_namespace(catalog): assert catalog.list_namespaces("default") == [] - props = { "owner": "pyiceberg" } - catalog.create_namespace("default.def_inner", props) + catalog.create_namespace("default.def_inner", { "owner": "pyiceberg" }) # Note: Bug in PyIceberg, does not follow the REST spec assert catalog.list_namespaces("default") == [("default", "default", "def_inner")] - assert catalog.load_namespace_properties("default.def_inner") == props + props = catalog.load_namespace_properties("default.def_inner") + assert props.get("owner") == "pyiceberg" + assert props.get("created_at").isnumeric() # is numeric timestamp # Attempt to delete `default` should fail because of sub-namespace # TODO: Change error thrown from NoSuchNamespaceError to another diff --git a/tests/pyiceberg/test_table.py b/tests/pyiceberg/test_table.py index 37d37e2..f90dfa8 100644 --- a/tests/pyiceberg/test_table.py +++ b/tests/pyiceberg/test_table.py @@ -3,26 +3,29 @@ def test_create_empty_table(catalog): - schema = pa.schema([("id", pa.int32(), False), ("name", pa.string(), True)]) + in_schema = pa.schema([("id", pa.int32(), False), ("name", pa.string(), True)]) - table = catalog.create_table( + created_table = catalog.create_table( "default.test_create_table", - schema=schema, + schema=in_schema, properties={"creator": "iceberg"} ) + table = catalog.load_table("default.test_create_table") + assert created_table == table + assert table.identifier == ("rest_catalog", "default", "test_create_table") schema = table.schema() assert schema.schema_id == 0 id_col = schema.columns[0] assert id_col.name == "id" - assert isinstance(id_col.type, IntegerType) - assert id_col.required is False + assert isinstance(id_col.field_type, IntegerType) + assert id_col.required is True name_col = schema.columns[1] assert name_col.name == "name" - assert isinstance(name_col.type, StringType) + assert isinstance(name_col.field_type, StringType) assert name_col.required is False assert table.properties == {"creator": "iceberg"} @@ -45,7 +48,6 @@ def test_append_table(catalog): table.append(df) read_df = table.scan().to_arrow() - breakpoint() assert read_df.equals(df) catalog.drop_table("default.test_append_table") diff --git a/tests/requirements.txt b/tests/requirements.txt index 60d7d61..d65daf2 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,4 @@ pyiceberg==0.6.1 +pyarrow==16.1.0 pytest From dc586a515510ef39a024c5763d626c34ad116c04 Mon Sep 17 00:00:00 2001 From: Srinivas Lade Date: Sun, 23 Jun 2024 18:32:32 -0400 Subject: [PATCH 2/7] Setup CI --- .github/workflows/test_ci.yml | 32 ++++++++++++++++++++++++++++++++ tests/pyiceberg/conftest.py | 27 ++++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/test_ci.yml diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml new file mode 100644 index 0000000..5342b50 --- /dev/null +++ b/.github/workflows/test_ci.yml @@ -0,0 +1,32 @@ +name: Test CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + workflow_dispatch: + +jobs: + python-tests: + name: Run Python Tests + runs-on: ubuntu-latest + steps: + # Setup & Install Dependencies + - uses: actions/checkout@v4 + - uses: actions/setup-go@v4 + with: + go-version-file: denali/go.mod + cache-dependency-path: denali/go.sum + - uses: actions/setup-python@v5 + with: + python-version: 3.11 + cache: 'pip' + - run: pip install -r denali/tests/requirements.txt + name: Install Python Deps + + # Run Python Tests + - run: pytest denali/tests + name: Run Python Tests diff --git a/tests/pyiceberg/conftest.py b/tests/pyiceberg/conftest.py index 9dd2989..01225e7 100644 --- a/tests/pyiceberg/conftest.py +++ b/tests/pyiceberg/conftest.py @@ -1,22 +1,41 @@ import subprocess import re +import os +from pathlib import Path import pytest from pyiceberg.catalog.rest import RestCatalog +base_path = str(Path(os.path.realpath(__file__)).parent.parent.parent) + + +@pytest.fixture(scope="session") +def build_binary(): + subprocess.run( + ["go", "build", "."], + cwd=base_path, + check=True, + env={ + "PATH": os.environ.get("PATH", ""), + "HOME": os.environ.get("HOME", ""), + }, + ) + + @pytest.fixture(scope="function") -def catalog(tmp_path): +def catalog(tmp_path, build_binary): process = subprocess.Popen( ["./denali", "start"], + cwd=base_path, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, env={ "DENALI_API_PORT": "0", "DENALI_WAREHOUSE_PATH": str(tmp_path), "DENALI_DATABASE_URL": ":memory:", "DENALI_DATABASE_DIALECT": "sqlite3", }, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, ) last_line = [] @@ -25,6 +44,8 @@ def catalog(tmp_path): continue process.stdout.readlines last_line.append(process.stdout.readline().decode("utf-8")) + if len(last_line) > 15: + raise EnvironmentError("Failed to start Denali Catalog Server") url = re.search(r"Started the Denali Catalog Server at `(?P[\[\]\:\.\d]+)`", last_line[-1]).group("url") yield RestCatalog("rest_catalog", uri=f"http://{url}") From 863c5cec265c877e82278d451d13163acdfc060a Mon Sep 17 00:00:00 2001 From: Srinivas Lade Date: Wed, 26 Jun 2024 10:18:19 -0400 Subject: [PATCH 3/7] Debug --- .github/workflows/test_ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml index 5342b50..baa8a11 100644 --- a/.github/workflows/test_ci.yml +++ b/.github/workflows/test_ci.yml @@ -16,6 +16,7 @@ jobs: steps: # Setup & Install Dependencies - uses: actions/checkout@v4 + - run: pwd - uses: actions/setup-go@v4 with: go-version-file: denali/go.mod From fdd7312c9cdca5c807b6f47ca3edd18e03d62b55 Mon Sep 17 00:00:00 2001 From: Srinivas Lade Date: Wed, 26 Jun 2024 10:19:57 -0400 Subject: [PATCH 4/7] Fix based on PWD --- .github/workflows/test_ci.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml index baa8a11..a1c5602 100644 --- a/.github/workflows/test_ci.yml +++ b/.github/workflows/test_ci.yml @@ -16,18 +16,17 @@ jobs: steps: # Setup & Install Dependencies - uses: actions/checkout@v4 - - run: pwd - uses: actions/setup-go@v4 with: - go-version-file: denali/go.mod - cache-dependency-path: denali/go.sum + go-version-file: go.mod + cache-dependency-path: go.sum - uses: actions/setup-python@v5 with: python-version: 3.11 cache: 'pip' - - run: pip install -r denali/tests/requirements.txt + - run: pip install -r tests/requirements.txt name: Install Python Deps # Run Python Tests - - run: pytest denali/tests - name: Run Python Tests + - run: pytest tests + name: Run Tests From 58422694669a9ed6f7b7d7a216ed26aca3c5fec3 Mon Sep 17 00:00:00 2001 From: Srinivas Lade Date: Wed, 26 Jun 2024 10:39:11 -0400 Subject: [PATCH 5/7] Status --- .github/workflows/test_ci.yml | 2 +- tests/pyiceberg/conftest.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_ci.yml b/.github/workflows/test_ci.yml index a1c5602..a8113e0 100644 --- a/.github/workflows/test_ci.yml +++ b/.github/workflows/test_ci.yml @@ -28,5 +28,5 @@ jobs: name: Install Python Deps # Run Python Tests - - run: pytest tests + - run: pytest -v tests name: Run Tests diff --git a/tests/pyiceberg/conftest.py b/tests/pyiceberg/conftest.py index 01225e7..18328d7 100644 --- a/tests/pyiceberg/conftest.py +++ b/tests/pyiceberg/conftest.py @@ -44,6 +44,7 @@ def catalog(tmp_path, build_binary): continue process.stdout.readlines last_line.append(process.stdout.readline().decode("utf-8")) + print(last_line[-1]) if len(last_line) > 15: raise EnvironmentError("Failed to start Denali Catalog Server") From a2e1546ba5affa816fb5c014985422ef6b86204c Mon Sep 17 00:00:00 2001 From: Srinivas Lade Date: Wed, 26 Jun 2024 10:42:35 -0400 Subject: [PATCH 6/7] Remove print --- tests/pyiceberg/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/pyiceberg/conftest.py b/tests/pyiceberg/conftest.py index 18328d7..01225e7 100644 --- a/tests/pyiceberg/conftest.py +++ b/tests/pyiceberg/conftest.py @@ -44,7 +44,6 @@ def catalog(tmp_path, build_binary): continue process.stdout.readlines last_line.append(process.stdout.readline().decode("utf-8")) - print(last_line[-1]) if len(last_line) > 15: raise EnvironmentError("Failed to start Denali Catalog Server") From b33986e0bed137aab2d90adcea13714dee540c20 Mon Sep 17 00:00:00 2001 From: Srinivas Lade Date: Wed, 26 Jun 2024 11:50:45 -0400 Subject: [PATCH 7/7] Debug --- tests/pyiceberg/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pyiceberg/conftest.py b/tests/pyiceberg/conftest.py index 01225e7..a6d7929 100644 --- a/tests/pyiceberg/conftest.py +++ b/tests/pyiceberg/conftest.py @@ -38,14 +38,14 @@ def catalog(tmp_path, build_binary): }, ) - last_line = [] + last_line: list[str] = [] while len(last_line) == 0 or "Started the Denali Catalog Server at" not in last_line[-1]: if process.stdout is None: continue process.stdout.readlines last_line.append(process.stdout.readline().decode("utf-8")) if len(last_line) > 15: - raise EnvironmentError("Failed to start Denali Catalog Server") + raise EnvironmentError("Failed to start Denali Catalog Server:\n\t" + "\n\t".join("`" + l + "`" for l in last_line)) url = re.search(r"Started the Denali Catalog Server at `(?P[\[\]\:\.\d]+)`", last_line[-1]).group("url") yield RestCatalog("rest_catalog", uri=f"http://{url}")