From e73fe05376556e11fac4e52fa1dd41957be774b5 Mon Sep 17 00:00:00 2001 From: codeforkjeff Date: Mon, 16 Jan 2023 18:12:13 -0800 Subject: [PATCH 1/5] default blank type affinity to less misleading 'UNKNOWN' vs 'TEXT' --- dbt/adapters/sqlite/impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/sqlite/impl.py b/dbt/adapters/sqlite/impl.py index 3d2f193..f0d6b1d 100644 --- a/dbt/adapters/sqlite/impl.py +++ b/dbt/adapters/sqlite/impl.py @@ -102,7 +102,7 @@ def get_columns_in_relation(self, relation): for row in results: new_row = [ row[1], - row[2] or 'TEXT', + row[2] or 'UNKNOWN', None, None, None From 099cf8d9fcfbe403818a2450c475af6576c6dd5f Mon Sep 17 00:00:00 2001 From: codeforkjeff Date: Mon, 16 Jan 2023 19:23:27 -0800 Subject: [PATCH 2/5] make data type tests pass; add notes --- dbt/adapters/sqlite/impl.py | 14 +++--- .../adapter/utils/test_data_types.py | 50 +++++++++++++------ 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/dbt/adapters/sqlite/impl.py b/dbt/adapters/sqlite/impl.py index f0d6b1d..bcd1d4c 100644 --- a/dbt/adapters/sqlite/impl.py +++ b/dbt/adapters/sqlite/impl.py @@ -24,7 +24,9 @@ class SQLiteAdapter(SQLAdapter): def date_function(cls): return 'date()' - # sqlite reports the exact string (including case) used when declaring a column of a certain type + # sqlite reports the exact string (including case) used when declaring a column of a certain type. + # the types here should correspond to affinities recognized by SQLite. + # see https://www.sqlite.org/datatype3.html @classmethod def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str: @@ -33,23 +35,23 @@ def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str: @classmethod def convert_number_type(cls, agate_table: agate.Table, col_idx: int) -> str: decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) # type: ignore[attr-defined] - return "NUMERIC" if decimals else "INT" + return "REAL" if decimals else "INT" @classmethod def convert_boolean_type(cls, agate_table: agate.Table, col_idx: int) -> str: - return "BOOLEAN" + return "INT" @classmethod def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str: - return "TIMESTAMP WITHOUT TIMEZONE" + return "TEXT" @classmethod def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str: - return "DATE" + return "TEXT" @classmethod def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str: - return "TIME" + return "TEXT" def get_live_relation_type(self, relation): """ diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py index 7b817af..4e2a279 100644 --- a/tests/functional/adapter/utils/test_data_types.py +++ b/tests/functional/adapter/utils/test_data_types.py @@ -6,14 +6,24 @@ from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp -# sqlite's table_info() pragma returns an empty type for columns in views -# so we tweak the models in these tests to materialize as tables +# These tests compare the resulting column types of CASTs against the types +# inferred by agate when loading seeds. +# +# There's a Column class in dbt-core that's used by the default adapter implementation +# of methods like [adapter].type_timestamp() to get a type for CASTs. +# +# Some quirks of SQLite that make these tests challenging: +# +# - a CAST seems to always result in an empty type (i.e. no type affinity) in views, +# but not in a CREATE TABLE AS. So we tweak the tests to materialize models as tables. +# +# - CASTs to an unrecognized type will result in the type being 'NUM' which is a bit +# mysterious. class TestTypeBigInt(BaseTypeBigInt): pass -@pytest.mark.skip("TODO: fix this") class TestTypeFloat(BaseTypeFloat): models__actual_sql = """ @@ -40,29 +50,39 @@ def models(self): return {"actual.sql": self.interpolate_macro_namespace(self.models__actual_sql, "type_int")} -@pytest.mark.skip("TODO: fix this") class TestTypeNumeric(BaseTypeNumeric): + + models__actual_sql = """ +{{ config(materialized='table') }} + +select cast('1.2345' as {{ type_numeric() }}) as numeric_col +""" + + @pytest.fixture(scope="class") + def models(self): + return {"actual.sql": self.interpolate_macro_namespace(self.models__actual_sql, "type_numeric")} + def numeric_fixture_type(self): - return "numeric" + return "NUM" class TestTypeString(BaseTypeString): - pass - - -@pytest.mark.skip("TODO: fix this") -class TestTypeTimestamp(BaseTypeTimestamp): models__actual_sql = """ {{ config(materialized='table') }} -select cast('2021-01-01 01:01:01' as {{ type_timestamp() }}) as timestamp_col +select cast('Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.' +as {{ type_string() }}) as string_col """ @pytest.fixture(scope="class") def models(self): - return { - "actual.sql": self.interpolate_macro_namespace(self.models__actual_sql, "type_timestamp") - } + return {"actual.sql": self.interpolate_macro_namespace(self.models__actual_sql, "type_string")} + - \ No newline at end of file +# casting to TIMESTAMP results in an 'NUM' type which truncates the original value +# to only the year portion. It's up to the user to properly deal with timestamps +# values from source tables. +@pytest.mark.skip("timestamp not supported in SQLite") +class TestTypeTimestamp(BaseTypeTimestamp): + pass From dcc96556ebc28d2f783684ffda7f948f987b5142 Mon Sep 17 00:00:00 2001 From: codeforkjeff Date: Mon, 16 Jan 2023 20:41:11 -0800 Subject: [PATCH 3/5] get TestDocsGenerateSqlite to work --- tests/functional/adapter/test_basic.py | 107 +++---------------------- 1 file changed, 9 insertions(+), 98 deletions(-) diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py index 406b594..74a57d2 100644 --- a/tests/functional/adapter/test_basic.py +++ b/tests/functional/adapter/test_basic.py @@ -15,7 +15,7 @@ from dbt.tests.adapter.basic.test_snapshot_check_cols import BaseSnapshotCheckCols from dbt.tests.adapter.basic.test_snapshot_timestamp import BaseSnapshotTimestamp from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod -from dbt.tests.adapter.basic.test_docs_generate import BaseDocsGenerate, BaseDocsGenReferences +from dbt.tests.adapter.basic.test_docs_generate import BaseDocsGenerate, BaseDocsGenReferences, models__schema_yml, models__readme_md class TestSimpleMaterializationsSqlite(BaseSimpleMaterializations): @@ -58,101 +58,11 @@ class TestBaseAdapterMethodSqlite(BaseAdapterMethod): pass -@pytest.mark.skip("TODO: fix data type problems") class TestDocsGenerateSqlite(BaseDocsGenerate): """ Change underlying test to avoid having views referencing views in other schemas, which is a no-no in sqlite. """ - models__schema_yml = """ -version: 2 - -models: - - name: model - description: "The test model" - docs: - show: false - columns: - - name: id - description: The user ID number - tests: - - unique - - not_null - - name: first_name - description: The user's first name - - name: email - description: The user's email - - name: ip_address - description: The user's IP address - - name: updated_at - description: The last time this user's email was updated - tests: - - test.nothing - - - name: second_model - description: "The second test model" - docs: - show: false - columns: - - name: id - description: The user ID number - - name: first_name - description: The user's first name - - name: email - description: The user's email - - name: ip_address - description: The user's IP address - - name: updated_at - description: The last time this user's email was updated - - -sources: - - name: my_source - description: "My source" - loader: a_loader - schema: "{{ var('test_schema') }}" - tables: - - name: my_table - description: "My table" - identifier: seed - quoting: - identifier: True - columns: - - name: id - description: "An ID field" - - -exposures: - - name: simple_exposure - type: dashboard - depends_on: - - ref('model') - - source('my_source', 'my_table') - owner: - email: something@example.com - - name: notebook_exposure - type: notebook - depends_on: - - ref('model') - - ref('second_model') - owner: - email: something@example.com - name: Some name - description: > - A description of the complex exposure - maturity: medium - meta: - tool: 'my_tool' - languages: - - python - tags: ['my_department'] - url: http://example.com/notebook/1 -""" - - models__readme_md = """ -This is a readme.md file with {{ invalid-ish jinja }} in it -""" - models__model_sql = """ {{ config( @@ -178,9 +88,9 @@ class TestDocsGenerateSqlite(BaseDocsGenerate): def models(self): # replace models with return { - "schema.yml": self.models__schema_yml, + "schema.yml": models__schema_yml, "second_model.sql": self.models__second_model_sql, - "readme.md": self.models__readme_md, + "readme.md": models__readme_md, "model.sql": self.models__model_sql, } @@ -191,7 +101,7 @@ def expected_catalog(self, project): role=None, id_type="INT", text_type="TEXT", - time_type="DATETIME", + time_type="TEXT", view_type="view", table_type="table", model_stats=no_stats(), @@ -205,8 +115,9 @@ def expected_catalog(self, project): return expected_catalog -@pytest.mark.skip("TODO: fix data type problems") +@pytest.mark.skip("TODO: not sure why 'index' values are off by 1") class TestDocsGenReferencesSqlite(BaseDocsGenReferences): + @pytest.fixture(scope="class") def expected_catalog(self, project): return expected_references_catalog( @@ -214,11 +125,11 @@ def expected_catalog(self, project): role=None, id_type="INT", text_type="TEXT", - time_type="DATETIME", + time_type="TEXT", bigint_type="bigint", view_type="view", table_type="table", model_stats=no_stats(), - seed_stats=no_stats(), - view_summary_stats=no_stats(), + #seed_stats=no_stats(), + #view_summary_stats=no_stats(), ) From 0a2711c40cad9eb94c78e16f4a1463d8c82a90cb Mon Sep 17 00:00:00 2001 From: codeforkjeff Date: Mon, 16 Jan 2023 20:41:32 -0800 Subject: [PATCH 4/5] better caching --- Dockerfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d428f02..1816ebc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,10 +16,15 @@ RUN wget -q https://github.com/nalgeon/sqlean/releases/download/0.15.2/text.so WORKDIR /opt/dbt-sqlite/src -COPY . . +COPY setup.py . +COPY dbt ./dbt RUN pip install . +COPY run_tests.sh . +COPY pytest.ini . +COPY tests ./tests + ENV TESTDATA=/opt/dbt-sqlite/testdata RUN mkdir $TESTDATA From 168836530284af3a8d13b8ffacff9e9c511e64b8 Mon Sep 17 00:00:00 2001 From: codeforkjeff Date: Mon, 16 Jan 2023 20:41:43 -0800 Subject: [PATCH 5/5] improve test scripts --- run_tests.sh | 4 ++-- run_tests_docker.sh | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/run_tests.sh b/run_tests.sh index 718bfef..550688d 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -24,8 +24,6 @@ git clone --depth 1 https://github.com/dbt-labs/jaffle_shop.git cd jaffle_shop -git pull - mkdir -p /tmp/jaffle_shop mkdir -p $HOME/.dbt @@ -57,4 +55,6 @@ jaffle_shop: EOF +dbt seed +dbt run dbt docs generate diff --git a/run_tests_docker.sh b/run_tests_docker.sh index bfd7e04..2cec6c1 100755 --- a/run_tests_docker.sh +++ b/run_tests_docker.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + docker build . -t dbt-sqlite docker run \