Skip to content

Commit

Permalink
Merge branch 'main' into fix/224/distkey-error-message
Browse files Browse the repository at this point in the history
  • Loading branch information
dbeatty10 authored Aug 30, 2024
2 parents 023c3ba + 8bbdbf2 commit 9ae7650
Show file tree
Hide file tree
Showing 9 changed files with 322 additions and 23 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240610-104114.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix config change detection not working for multiple sortkey in materialized views
time: 2024-06-10T10:41:14.721411-03:00
custom:
Author: lvitti
Issue: "838"
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240715-143507.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix materialized views comment syntax
time: 2024-07-15T14:35:07.412274+12:00
custom:
Author: jeremyyeo
Issue: "837"
18 changes: 13 additions & 5 deletions dbt/adapters/redshift/relation_configs/materialized_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,13 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict:
"query": agate.Table(
agate.Row({"definition": "<query>")}
),
"columns": agate.Table(
agate.Row({
"column": "<column_name>",
"sort_key_position": <int>,
"is_dist_key: any(true, false),
})
),
}
Additional columns in either value is fine, as long as `sortkey` and `sortstyle` are available.
Expand Down Expand Up @@ -199,11 +206,12 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict:
{"dist": RedshiftDistConfig.parse_relation_results(materialized_view)}
)

# TODO: this only shows the first column in the sort key
if materialized_view.get("sortkey1"):
config_dict.update(
{"sort": RedshiftSortConfig.parse_relation_results(materialized_view)}
)
if columns := relation_results.get("columns"):
sort_columns = [row for row in columns.rows if row.get("sort_key_position", 0) > 0]
if sort_columns:
config_dict.update(
{"sort": RedshiftSortConfig.parse_relation_results(sort_columns)}
)

return config_dict

Expand Down
37 changes: 22 additions & 15 deletions dbt/adapters/redshift/relation_configs/sort.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from dbt.adapters.contracts.relation import RelationConfig
from typing import Optional, FrozenSet, Set, Dict, Any, TYPE_CHECKING
from typing import Optional, Set, Dict, Any, TYPE_CHECKING, Tuple

from dbt.adapters.relation_configs import (
RelationConfigChange,
Expand Down Expand Up @@ -46,7 +46,7 @@ class RedshiftSortConfig(RedshiftRelationConfigBase, RelationConfigValidationMix
"""

sortstyle: Optional[RedshiftSortStyle] = None
sortkey: Optional[FrozenSet[str]] = None
sortkey: Optional[Tuple[str]] = None

def __post_init__(self):
# maintains `frozen=True` while allowing for a variable default on `sort_type`
Expand Down Expand Up @@ -103,7 +103,7 @@ def validation_rules(self) -> Set[RelationConfigValidationRule]:
def from_dict(cls, config_dict) -> Self:
kwargs_dict = {
"sortstyle": config_dict.get("sortstyle"),
"sortkey": frozenset(column for column in config_dict.get("sortkey", {})),
"sortkey": tuple(column for column in config_dict.get("sortkey", {})),
}
sort: Self = super().from_dict(kwargs_dict) # type: ignore
return sort # type: ignore
Expand Down Expand Up @@ -133,12 +133,12 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any
if isinstance(sortkey, str):
sortkey = [sortkey]

config_dict.update({"sortkey": set(sortkey)})
config_dict.update({"sortkey": tuple(sortkey)})

return config_dict

@classmethod
def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict:
def parse_relation_results(cls, relation_results_entry: "agate.MappedSequence") -> dict:
"""
Translate agate objects from the database into a standard dictionary.
Expand All @@ -147,19 +147,26 @@ def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict:
Processing of `sortstyle` has been omitted here, which means it's the default (compound).
Args:
relation_results_entry: the description of the sortkey and sortstyle from the database in this format:
agate.Row({
...,
"sortkey1": "<column_name>",
...
})
relation_results_entry: The list of rows that contains the sortkey in this format:
[
agate.Row({
...,
"column": "<column_name>",
"sort_key_position": <int>,
...
}),
]
Returns: a standard dictionary describing this `RedshiftSortConfig` instance
"""
if sortkey := relation_results_entry.get("sortkey1"):
return {"sortkey": {sortkey}}
return {}
sort_config = []

sorted_columns = sorted(relation_results_entry, key=lambda x: x["sort_key_position"])
for column in sorted_columns:
if column.get("sort_key_position"):
sort_config.append(column.get("column"))

return {"sortkey": sort_config}


@dataclass(frozen=True, eq=True, unsafe_hash=True)
Expand Down
17 changes: 16 additions & 1 deletion dbt/include/redshift/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,24 @@
{% endif %}
{% endmacro %}

{#
Copied from the postgres-adapter.
#}
{% macro escape_comment(comment) -%}
{% if comment is not string %}
{% do exceptions.raise_compiler_error('cannot escape a non-string: ' ~ comment) %}
{% endif %}
{%- set magic = '$dbt_comment_literal_block$' -%}
{%- if magic in comment -%}
{%- do exceptions.raise_compiler_error('The string ' ~ magic ~ ' is not allowed in comments.') -%}
{%- endif -%}
{{ magic }}{{ comment }}{{ magic }}
{%- endmacro %}

{% macro redshift__alter_relation_comment(relation, comment) %}
{% do return(postgres__alter_relation_comment(relation, comment)) %}
{%- set escaped_comment = escape_comment(comment) -%}
{%- set relation_type = 'view' if relation.type == 'materialized_view' else relation.type -%}
comment on {{ relation_type }} {{ relation }} is {{ escaped_comment }};
{% endmacro %}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@
{%- endset %}
{% set _materialized_view = run_query(_materialized_view_sql) %}

{%- set _column_descriptor_sql -%}
SELECT
a.attname as column,
a.attisdistkey as is_dist_key,
a.attsortkeyord as sort_key_position
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
JOIN pg_attribute a ON a.attrelid = c.oid
WHERE
n.nspname ilike '{{ relation.schema }}'
AND c.relname LIKE 'mv_tbl__{{ relation.identifier }}__%'
{%- endset %}
{% set _column_descriptor = run_query(_column_descriptor_sql) %}

{%- set _query_sql -%}
select
vw.definition
Expand All @@ -34,6 +48,10 @@
{%- endset %}
{% set _query = run_query(_query_sql) %}

{% do return({'materialized_view': _materialized_view, 'query': _query}) %}
{% do return({
'materialized_view': _materialized_view,
'query': _query,
'columns': _column_descriptor,
})%}

{% endmacro %}
44 changes: 44 additions & 0 deletions tests/functional/adapter/test_persist_docs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import pytest

from dbt.tests.adapter.materialized_view import files
from dbt.tests.util import run_dbt

from dbt.tests.adapter.persist_docs.test_persist_docs import (
Expand All @@ -10,6 +11,21 @@
BasePersistDocsCommentOnQuotedColumn,
)

_MATERIALIZED_VIEW_PROPERTIES__SCHEMA_YML = """
version: 2
models:
- name: my_materialized_view
description: |
Materialized view model description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
80% of statistics are made up on the spot
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
"""


class TestPersistDocs(BasePersistDocs):
@pytest.mark.flaky
Expand Down Expand Up @@ -62,3 +78,31 @@ def test_comment_on_late_binding_view(self, project):

no_docs_node = catalog_data["nodes"]["model.test.no_docs_model"]
self._assert_has_view_comments(no_docs_node, False, False)


class TestPersistDocsWithMaterializedView(BasePersistDocs):
@pytest.fixture(scope="class", autouse=True)
def seeds(self):
return {"my_seed.csv": files.MY_SEED}

@pytest.fixture(scope="class")
def models(self):
return {
"my_materialized_view.sql": files.MY_MATERIALIZED_VIEW,
}

@pytest.fixture(scope="class")
def properties(self):
return {
"schema.yml": _MATERIALIZED_VIEW_PROPERTIES__SCHEMA_YML,
}

@pytest.mark.flaky
def test_has_comments_pglike(self, project):
run_dbt(["docs", "generate"])
with open("target/catalog.json") as fp:
catalog_data = json.load(fp)
assert "nodes" in catalog_data
assert len(catalog_data["nodes"]) == 2
view_node = catalog_data["nodes"]["model.test.my_materialized_view"]
assert view_node["metadata"]["comment"].startswith("Materialized view model description")
55 changes: 55 additions & 0 deletions tests/unit/test_materialized_view.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest.mock import Mock

import agate
import pytest

from dbt.adapters.redshift.relation_configs import RedshiftMaterializedViewConfig
Expand Down Expand Up @@ -53,3 +54,57 @@ def test_redshift_materialized_view_config_throws_expected_exception_with_invali
)
with pytest.raises(ValueError):
config.parse_relation_config(model_node)


def test_redshift_materialized_view_parse_relation_results_handles_multiples_sort_key():
materialized_view = agate.Table.from_object(
[],
[
"database",
"schema",
"table",
"diststyle",
"sortkey1",
"autorefresh",
],
)

column_descriptor = agate.Table.from_object(
[
{
"column": "my_column",
"is_dist_key": True,
"sort_key_position": 1,
},
{
"column": "my_column2",
"is_dist_key": True,
"sort_key_position": 2,
},
{
"column": "my_column5",
"is_dist_key": False,
"sort_key_position": 0,
},
],
)

query = agate.Table.from_object(
[
{
"definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
}
]
)

relation_results = {
"materialized_view": materialized_view,
"columns": column_descriptor,
"query": query,
}

config_dict = RedshiftMaterializedViewConfig.parse_relation_results(relation_results)

assert isinstance(config_dict["sort"], dict)
assert config_dict["sort"]["sortkey"][0] == "my_column"
assert config_dict["sort"]["sortkey"][1] == "my_column2"
Loading

0 comments on commit 9ae7650

Please sign in to comment.