diff --git a/.changes/unreleased/Fixes-20240610-104114.yaml b/.changes/unreleased/Fixes-20240610-104114.yaml new file mode 100644 index 000000000..d220288f8 --- /dev/null +++ b/.changes/unreleased/Fixes-20240610-104114.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix config change detection not working for multiple sortkey in materialized views +time: 2024-06-10T10:41:14.721411-03:00 +custom: + Author: lvitti + Issue: "838" diff --git a/.changes/unreleased/Fixes-20240715-143507.yaml b/.changes/unreleased/Fixes-20240715-143507.yaml new file mode 100644 index 000000000..e3dd2f0bf --- /dev/null +++ b/.changes/unreleased/Fixes-20240715-143507.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix materialized views comment syntax +time: 2024-07-15T14:35:07.412274+12:00 +custom: + Author: jeremyyeo + Issue: "837" diff --git a/dbt/adapters/redshift/relation_configs/materialized_view.py b/dbt/adapters/redshift/relation_configs/materialized_view.py index 48c04b554..a01185f22 100644 --- a/dbt/adapters/redshift/relation_configs/materialized_view.py +++ b/dbt/adapters/redshift/relation_configs/materialized_view.py @@ -169,6 +169,13 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict: "query": agate.Table( agate.Row({"definition": "")} ), + "columns": agate.Table( + agate.Row({ + "column": "", + "sort_key_position": , + "is_dist_key: any(true, false), + }) + ), } Additional columns in either value is fine, as long as `sortkey` and `sortstyle` are available. @@ -199,11 +206,12 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict: {"dist": RedshiftDistConfig.parse_relation_results(materialized_view)} ) - # TODO: this only shows the first column in the sort key - if materialized_view.get("sortkey1"): - config_dict.update( - {"sort": RedshiftSortConfig.parse_relation_results(materialized_view)} - ) + if columns := relation_results.get("columns"): + sort_columns = [row for row in columns.rows if row.get("sort_key_position", 0) > 0] + if sort_columns: + config_dict.update( + {"sort": RedshiftSortConfig.parse_relation_results(sort_columns)} + ) return config_dict diff --git a/dbt/adapters/redshift/relation_configs/sort.py b/dbt/adapters/redshift/relation_configs/sort.py index be5b3627d..f38d5a1e1 100644 --- a/dbt/adapters/redshift/relation_configs/sort.py +++ b/dbt/adapters/redshift/relation_configs/sort.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from dbt.adapters.contracts.relation import RelationConfig -from typing import Optional, FrozenSet, Set, Dict, Any, TYPE_CHECKING +from typing import Optional, Set, Dict, Any, TYPE_CHECKING, Tuple from dbt.adapters.relation_configs import ( RelationConfigChange, @@ -46,7 +46,7 @@ class RedshiftSortConfig(RedshiftRelationConfigBase, RelationConfigValidationMix """ sortstyle: Optional[RedshiftSortStyle] = None - sortkey: Optional[FrozenSet[str]] = None + sortkey: Optional[Tuple[str]] = None def __post_init__(self): # maintains `frozen=True` while allowing for a variable default on `sort_type` @@ -103,7 +103,7 @@ def validation_rules(self) -> Set[RelationConfigValidationRule]: def from_dict(cls, config_dict) -> Self: kwargs_dict = { "sortstyle": config_dict.get("sortstyle"), - "sortkey": frozenset(column for column in config_dict.get("sortkey", {})), + "sortkey": tuple(column for column in config_dict.get("sortkey", {})), } sort: Self = super().from_dict(kwargs_dict) # type: ignore return sort # type: ignore @@ -133,12 +133,12 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any if isinstance(sortkey, str): sortkey = [sortkey] - config_dict.update({"sortkey": set(sortkey)}) + config_dict.update({"sortkey": tuple(sortkey)}) return config_dict @classmethod - def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict: + def parse_relation_results(cls, relation_results_entry: "agate.MappedSequence") -> dict: """ Translate agate objects from the database into a standard dictionary. @@ -147,19 +147,26 @@ def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict: Processing of `sortstyle` has been omitted here, which means it's the default (compound). Args: - relation_results_entry: the description of the sortkey and sortstyle from the database in this format: - - agate.Row({ - ..., - "sortkey1": "", - ... - }) + relation_results_entry: The list of rows that contains the sortkey in this format: + [ + agate.Row({ + ..., + "column": "", + "sort_key_position": , + ... + }), + ] Returns: a standard dictionary describing this `RedshiftSortConfig` instance """ - if sortkey := relation_results_entry.get("sortkey1"): - return {"sortkey": {sortkey}} - return {} + sort_config = [] + + sorted_columns = sorted(relation_results_entry, key=lambda x: x["sort_key_position"]) + for column in sorted_columns: + if column.get("sort_key_position"): + sort_config.append(column.get("column")) + + return {"sortkey": sort_config} @dataclass(frozen=True, eq=True, unsafe_hash=True) diff --git a/dbt/include/redshift/macros/adapters.sql b/dbt/include/redshift/macros/adapters.sql index 9eb6e1cee..b38897161 100644 --- a/dbt/include/redshift/macros/adapters.sql +++ b/dbt/include/redshift/macros/adapters.sql @@ -280,9 +280,24 @@ {% endif %} {% endmacro %} +{# + Copied from the postgres-adapter. +#} +{% macro escape_comment(comment) -%} + {% if comment is not string %} + {% do exceptions.raise_compiler_error('cannot escape a non-string: ' ~ comment) %} + {% endif %} + {%- set magic = '$dbt_comment_literal_block$' -%} + {%- if magic in comment -%} + {%- do exceptions.raise_compiler_error('The string ' ~ magic ~ ' is not allowed in comments.') -%} + {%- endif -%} + {{ magic }}{{ comment }}{{ magic }} +{%- endmacro %} {% macro redshift__alter_relation_comment(relation, comment) %} - {% do return(postgres__alter_relation_comment(relation, comment)) %} + {%- set escaped_comment = escape_comment(comment) -%} + {%- set relation_type = 'view' if relation.type == 'materialized_view' else relation.type -%} + comment on {{ relation_type }} {{ relation }} is {{ escaped_comment }}; {% endmacro %} diff --git a/dbt/include/redshift/macros/relations/materialized_view/describe.sql b/dbt/include/redshift/macros/relations/materialized_view/describe.sql index a56d0756e..3f038b9ad 100644 --- a/dbt/include/redshift/macros/relations/materialized_view/describe.sql +++ b/dbt/include/redshift/macros/relations/materialized_view/describe.sql @@ -24,6 +24,20 @@ {%- endset %} {% set _materialized_view = run_query(_materialized_view_sql) %} + {%- set _column_descriptor_sql -%} + SELECT + a.attname as column, + a.attisdistkey as is_dist_key, + a.attsortkeyord as sort_key_position + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + JOIN pg_attribute a ON a.attrelid = c.oid + WHERE + n.nspname ilike '{{ relation.schema }}' + AND c.relname LIKE 'mv_tbl__{{ relation.identifier }}__%' + {%- endset %} + {% set _column_descriptor = run_query(_column_descriptor_sql) %} + {%- set _query_sql -%} select vw.definition @@ -34,6 +48,10 @@ {%- endset %} {% set _query = run_query(_query_sql) %} - {% do return({'materialized_view': _materialized_view, 'query': _query}) %} + {% do return({ + 'materialized_view': _materialized_view, + 'query': _query, + 'columns': _column_descriptor, + })%} {% endmacro %} diff --git a/tests/functional/adapter/test_persist_docs.py b/tests/functional/adapter/test_persist_docs.py index 6eeaf881c..cb5e632b6 100644 --- a/tests/functional/adapter/test_persist_docs.py +++ b/tests/functional/adapter/test_persist_docs.py @@ -1,6 +1,7 @@ import json import pytest +from dbt.tests.adapter.materialized_view import files from dbt.tests.util import run_dbt from dbt.tests.adapter.persist_docs.test_persist_docs import ( @@ -10,6 +11,21 @@ BasePersistDocsCommentOnQuotedColumn, ) +_MATERIALIZED_VIEW_PROPERTIES__SCHEMA_YML = """ +version: 2 +models: + - name: my_materialized_view + description: | + Materialized view model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + 80% of statistics are made up on the spot + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting +""" + class TestPersistDocs(BasePersistDocs): @pytest.mark.flaky @@ -62,3 +78,31 @@ def test_comment_on_late_binding_view(self, project): no_docs_node = catalog_data["nodes"]["model.test.no_docs_model"] self._assert_has_view_comments(no_docs_node, False, False) + + +class TestPersistDocsWithMaterializedView(BasePersistDocs): + @pytest.fixture(scope="class", autouse=True) + def seeds(self): + return {"my_seed.csv": files.MY_SEED} + + @pytest.fixture(scope="class") + def models(self): + return { + "my_materialized_view.sql": files.MY_MATERIALIZED_VIEW, + } + + @pytest.fixture(scope="class") + def properties(self): + return { + "schema.yml": _MATERIALIZED_VIEW_PROPERTIES__SCHEMA_YML, + } + + @pytest.mark.flaky + def test_has_comments_pglike(self, project): + run_dbt(["docs", "generate"]) + with open("target/catalog.json") as fp: + catalog_data = json.load(fp) + assert "nodes" in catalog_data + assert len(catalog_data["nodes"]) == 2 + view_node = catalog_data["nodes"]["model.test.my_materialized_view"] + assert view_node["metadata"]["comment"].startswith("Materialized view model description") diff --git a/tests/unit/test_materialized_view.py b/tests/unit/test_materialized_view.py index 8e4f6ca3e..322b134f8 100644 --- a/tests/unit/test_materialized_view.py +++ b/tests/unit/test_materialized_view.py @@ -1,5 +1,6 @@ from unittest.mock import Mock +import agate import pytest from dbt.adapters.redshift.relation_configs import RedshiftMaterializedViewConfig @@ -53,3 +54,57 @@ def test_redshift_materialized_view_config_throws_expected_exception_with_invali ) with pytest.raises(ValueError): config.parse_relation_config(model_node) + + +def test_redshift_materialized_view_parse_relation_results_handles_multiples_sort_key(): + materialized_view = agate.Table.from_object( + [], + [ + "database", + "schema", + "table", + "diststyle", + "sortkey1", + "autorefresh", + ], + ) + + column_descriptor = agate.Table.from_object( + [ + { + "column": "my_column", + "is_dist_key": True, + "sort_key_position": 1, + }, + { + "column": "my_column2", + "is_dist_key": True, + "sort_key_position": 2, + }, + { + "column": "my_column5", + "is_dist_key": False, + "sort_key_position": 0, + }, + ], + ) + + query = agate.Table.from_object( + [ + { + "definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" + } + ] + ) + + relation_results = { + "materialized_view": materialized_view, + "columns": column_descriptor, + "query": query, + } + + config_dict = RedshiftMaterializedViewConfig.parse_relation_results(relation_results) + + assert isinstance(config_dict["sort"], dict) + assert config_dict["sort"]["sortkey"][0] == "my_column" + assert config_dict["sort"]["sortkey"][1] == "my_column2" diff --git a/tests/unit/test_relation.py b/tests/unit/test_relation.py index 4817ab100..0985c62c8 100644 --- a/tests/unit/test_relation.py +++ b/tests/unit/test_relation.py @@ -1,5 +1,15 @@ +from unittest.mock import Mock + +import agate +import pytest + from dbt.adapters.redshift.relation import RedshiftRelation -from dbt.adapters.contracts.relation import RelationType +from dbt.adapters.contracts.relation import ( + RelationType, + RelationConfig, +) + +from dbt.adapters.redshift.relation_configs.sort import RedshiftSortStyle def test_renameable_relation(): @@ -15,3 +25,133 @@ def test_renameable_relation(): RelationType.Table, } ) + + +@pytest.fixture +def materialized_view_without_sort_key_from_db(): + materialized_view = agate.Table.from_object( + [ + { + "database": "my_db", + "schema": "my_schema", + "table": "my_table", + } + ], + ) + + column_descriptor = agate.Table.from_object([]) + + query = agate.Table.from_object( + [ + { + "definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" + } + ] + ) + + relation_results = { + "materialized_view": materialized_view, + "columns": column_descriptor, + "query": query, + } + return relation_results + + +@pytest.fixture +def materialized_view_without_sort_key_config(): + relation_config = Mock(spec=RelationConfig) + + relation_config.database = "my_db" + relation_config.identifier = "my_table" + relation_config.schema = "my_schema" + relation_config.config = Mock() + relation_config.config.extra = {} + relation_config.config.sort = "" + relation_config.compiled_code = ( + "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" + ) + return relation_config + + +@pytest.fixture +def materialized_view_multiple_sort_key_from_db(materialized_view_without_sort_key_from_db): + materialized_view_without_sort_key_from_db["columns"] = agate.Table.from_object( + [ + { + "column": "my_column", + "is_dist_key": True, + "sort_key_position": 1, + }, + { + "column": "my_column2", + "is_dist_key": True, + "sort_key_position": 2, + }, + ], + ) + return materialized_view_without_sort_key_from_db + + +@pytest.fixture +def materialized_view_multiple_sort_key_config(materialized_view_without_sort_key_config): + materialized_view_without_sort_key_config.config.extra = { + "sort_type": RedshiftSortStyle.compound, + "sort": ["my_column", "my_column2"], + } + + return materialized_view_without_sort_key_config + + +def test_materialized_view_config_changeset_without_sort_key_empty_changes( + materialized_view_without_sort_key_from_db, + materialized_view_without_sort_key_config, +): + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_without_sort_key_from_db, + materialized_view_without_sort_key_config, + ) + + assert change_set is None + + +def test_materialized_view_config_changeset_multiple_sort_key_without_changes( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, +): + + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, + ) + + assert change_set is None + + +def test_materialized_view_config_changeset_multiple_sort_key_with_changes( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, +): + materialized_view_multiple_sort_key_config.config.extra["sort"].append("my_column3") + + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, + ) + + assert change_set is not None + assert change_set.sort.context.sortkey == ("my_column", "my_column2", "my_column3") + + +def test_materialized_view_config_changeset_multiple_sort_key_with_changes_in_order_column( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, +): + materialized_view_multiple_sort_key_config.config.extra["sort"] = ["my_column2", "my_column"] + + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, + ) + + assert change_set is not None + assert change_set.sort.context.sortkey == ("my_column2", "my_column")