From 753c638ef04d263ad63638e533a2306193edaa62 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 7 Nov 2024 16:57:24 -0500 Subject: [PATCH 1/5] make list relations configurable --- dbt/include/snowflake/macros/adapters.sql | 5 +- .../list_relations_tests/test_pagination.py | 243 +++++------------- 2 files changed, 71 insertions(+), 177 deletions(-) diff --git a/dbt/include/snowflake/macros/adapters.sql b/dbt/include/snowflake/macros/adapters.sql index b60cea0b0..eadd03f46 100644 --- a/dbt/include/snowflake/macros/adapters.sql +++ b/dbt/include/snowflake/macros/adapters.sql @@ -134,7 +134,10 @@ {% endmacro %} -{% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000) %} +{% macro snowflake__list_relations_without_caching(schema_relation) %} + + {%- set max_results_per_iter = adapter.config.flags.get('list_relations_per_iteration', 10000) -%} + {%- set max_iter = adapter.config.flags.get('list_relations_iteration_limit', 10) -%} {%- set max_total_results = max_results_per_iter * max_iter -%} {%- set sql -%} diff --git a/tests/functional/adapter/list_relations_tests/test_pagination.py b/tests/functional/adapter/list_relations_tests/test_pagination.py index 407f9c501..927405558 100644 --- a/tests/functional/adapter/list_relations_tests/test_pagination.py +++ b/tests/functional/adapter/list_relations_tests/test_pagination.py @@ -1,34 +1,31 @@ import os + import pytest -import json -from dbt.tests.util import run_dbt, run_dbt_and_capture -from dbt.adapters.snowflake import SnowflakeRelation # Ensure this is the correct import path - -# Testing rationale: -# - snowflake SHOW TERSE OBJECTS command returns at max 10K objects in a single call -# - when dbt attempts to write into a schema with more than 10K objects, compilation will fail -# unless we paginate the result -# - however, testing this process is difficult at a full scale of 10K actual objects populated -# into a fresh testing schema -# - accordingly, we create a smaller set of views and test the looping iteration logic in -# smaller chunks - -NUM_VIEWS = 90 -NUM_DYNAMIC_TABLES = 10 -# the total number should be between the numbers referenced in the "passing" and "failing" macros below -# - MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING (11 iter * 10 results per iter -> 110 objects) -# - MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING_RAISE_ERROR (33 iter * 3 results per iter -> 99 objects) -NUM_EXPECTED_RELATIONS = 1 + NUM_VIEWS + NUM_DYNAMIC_TABLES - -TABLE_BASE_SQL = """ -{{ config(materialized='table') }} +from dbt_common.exceptions import CompilationError +from dbt.tests.util import run_dbt + +""" +Testing rationale: +- snowflake SHOW TERSE OBJECTS command returns at max 10K objects in a single call +- when dbt attempts to write into a schema with more than 10K objects, compilation will fail + unless we paginate the result +- we default pagination to 10 pages, but users want to configure this + - we instead use that here to force failures by making it smaller +""" + + +TABLE = """ +{{ config(materialized='table') }} select 1 as id -""".lstrip() +""" + -VIEW_X_SQL = """ +VIEW = """ +{{ config(materialized='view') }} select id from {{ ref('my_model_base') }} -""".lstrip() +""" + DYNAMIC_TABLE = ( """ @@ -44,173 +41,67 @@ """ ) -MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING = """ -{% macro validate_list_relations_without_caching(schema_relation) %} - {% set relation_list_result = snowflake__list_relations_without_caching(schema_relation, max_iter=11, max_results_per_iter=10) %} - {% set n_relations = relation_list_result | length %} - {{ log("n_relations: " ~ n_relations) }} -{% endmacro %} -""" - -MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING_RAISE_ERROR = """ -{% macro validate_list_relations_without_caching_raise_error(schema_relation) %} - {{ snowflake__list_relations_without_caching(schema_relation, max_iter=33, max_results_per_iter=3) }} -{% endmacro %} -""" - - -def parse_json_logs(json_log_output): - parsed_logs = [] - for line in json_log_output.split("\n"): - try: - log = json.loads(line) - except ValueError: - continue - - parsed_logs.append(log) - - return parsed_logs +class BaseConfig: + VIEWS = 90 + DYNAMIC_TABLES = 10 -def find_result_in_parsed_logs(parsed_logs, result_name): - return next( - ( - item["data"]["msg"] - for item in parsed_logs - if result_name in item["data"].get("msg", "msg") - ), - False, - ) - - -def find_exc_info_in_parsed_logs(parsed_logs, exc_info_name): - return next( - ( - item["data"]["exc_info"] - for item in parsed_logs - if exc_info_name in item["data"].get("exc_info", "exc_info") - ), - False, - ) - - -class TestListRelationsWithoutCachingSingle: @pytest.fixture(scope="class") def models(self): - my_models = {"my_model_base.sql": TABLE_BASE_SQL} - for view in range(0, NUM_VIEWS): - my_models.update({f"my_model_{view}.sql": VIEW_X_SQL}) - for dynamic_table in range(0, NUM_DYNAMIC_TABLES): - my_models.update({f"my_dynamic_table_{dynamic_table}.sql": DYNAMIC_TABLE}) + my_models = {"my_model_base.sql": TABLE} + for view in range(0, self.VIEWS): + my_models[f"my_model_{view}.sql"] = VIEW + for dynamic_table in range(0, self.DYNAMIC_TABLES): + my_models[f"my_dynamic_table_{dynamic_table}.sql"] = DYNAMIC_TABLE return my_models - @pytest.fixture(scope="class") - def macros(self): - return { - "validate_list_relations_without_caching.sql": MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING, - } + @pytest.fixture(scope="class", autouse=True) + def setup(self, project): + run_dbt(["run"]) - def test__snowflake__list_relations_without_caching_termination(self, project): - """ - validates that we do NOT trigger pagination logic snowflake__list_relations_without_caching - macro when there are fewer than max_results_per_iter relations in the target schema - """ - run_dbt(["run", "-s", "my_model_base"]) - - database = project.database - schemas = project.created_schemas - - for schema in schemas: - schema_relation = SnowflakeRelation.create(database=database, schema=schema) - kwargs = {"schema_relation": schema_relation.render()} - _, log_output = run_dbt_and_capture( - [ - "--debug", - "--log-format=json", - "run-operation", - "validate_list_relations_without_caching", - "--args", - str(kwargs), - ] + def test_list_relations(self, project): + kwargs = {"schema_relation": project.test_schema} + with project.adapter.connection_named("__test"): + relations = project.adapter.execute_macro( + "snowflake__list_relations_without_caching", kwargs=kwargs ) + assert len(relations) == self.VIEWS + self.DYNAMIC_TABLES + 1 - parsed_logs = parse_json_logs(log_output) - n_relations = find_result_in_parsed_logs(parsed_logs, "n_relations") - assert n_relations == "n_relations: 1" +class TestListRelationsWithoutCachingSmall(BaseConfig): + pass -class TestListRelationsWithoutCachingFull: - @pytest.fixture(scope="class") - def models(self): - my_models = {"my_model_base.sql": TABLE_BASE_SQL} - for view in range(0, NUM_VIEWS): - my_models.update({f"my_model_{view}.sql": VIEW_X_SQL}) - for dynamic_table in range(0, NUM_DYNAMIC_TABLES): - my_models.update({f"my_dynamic_table_{dynamic_table}.sql": DYNAMIC_TABLE}) - return my_models +class TestListRelationsWithoutCachingLarge(BaseConfig): @pytest.fixture(scope="class") - def macros(self): + def profiles_config_update(self): return { - "validate_list_relations_without_caching.sql": MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING, - "validate_list_relations_without_caching_raise_error.sql": MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING_RAISE_ERROR, + "flags": { + "list_relations_per_iteration": 10, + "list_relations_iteration_limit": 20, + } } - def test__snowflake__list_relations_without_caching(self, project): - """ - validates pagination logic in snowflake__list_relations_without_caching macro counts - the correct number of objects in the target schema when having to make multiple looped - calls of SHOW TERSE OBJECTS. - """ - # purpose of the first run is to create the replicated views in the target schema - run_dbt(["run"]) - database = project.database - schemas = project.created_schemas - - for schema in schemas: - schema_relation = SnowflakeRelation.create(database=database, schema=schema) - kwargs = {"schema_relation": schema_relation.render()} - _, log_output = run_dbt_and_capture( - [ - "--debug", - "--log-format=json", - "run-operation", - "validate_list_relations_without_caching", - "--args", - str(kwargs), - ] - ) - parsed_logs = parse_json_logs(log_output) - n_relations = find_result_in_parsed_logs(parsed_logs, "n_relations") +class TestListRelationsWithoutCachingTooLarge(BaseConfig): - assert n_relations == f"n_relations: {NUM_EXPECTED_RELATIONS}" - - def test__snowflake__list_relations_without_caching_raise_error(self, project): - """ - validates pagination logic terminates and raises a compilation error - when exceeding the limit of how many results to return. - """ - run_dbt(["run"]) + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": { + "list_relations_per_iteration": 10, + "list_relations_iteration_limit": 5, + } + } - database = project.database - schemas = project.created_schemas - - for schema in schemas: - schema_relation = SnowflakeRelation.create(database=database, schema=schema) - - kwargs = {"schema_relation": schema_relation.render()} - _, log_output = run_dbt_and_capture( - [ - "--debug", - "--log-format=json", - "run-operation", - "validate_list_relations_without_caching_raise_error", - "--args", - str(kwargs), - ], - expect_pass=False, - ) - parsed_logs = parse_json_logs(log_output) - traceback = find_exc_info_in_parsed_logs(parsed_logs, "Traceback") - assert "dbt will list a maximum of 99 objects in schema " in traceback + def test_list_relations(self, project): + kwargs = {"schema_relation": project.test_schema} + with project.adapter.connection_named("__test"): + with pytest.raises(CompilationError): + project.adapter.execute_macro( + "snowflake__list_relations_without_caching", kwargs=kwargs + ) + + def test_on_run(self, project): + with pytest.raises(CompilationError): + run_dbt(["run"]) From 0ec9641e9b93986be519f5abe5ea3676d5811d9f Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 7 Nov 2024 17:04:36 -0500 Subject: [PATCH 2/5] changie --- .changes/unreleased/Features-20241107-170307.yaml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changes/unreleased/Features-20241107-170307.yaml diff --git a/.changes/unreleased/Features-20241107-170307.yaml b/.changes/unreleased/Features-20241107-170307.yaml new file mode 100644 index 000000000..1479c5805 --- /dev/null +++ b/.changes/unreleased/Features-20241107-170307.yaml @@ -0,0 +1,7 @@ +kind: Features +body: 'Allow configurable pagination on list_relations_without_caching to support + users with a large number of objects per schema' +time: 2024-11-07T17:03:07.826352-05:00 +custom: + Author: mikealfare + Issue: "1234" From 4d297cde829c348fe59127098d5351befd4e65f4 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 8 Nov 2024 10:41:21 -0500 Subject: [PATCH 3/5] update iteration to page in the config settings --- dbt/include/snowflake/macros/adapters.sql | 4 ++-- .../adapter/list_relations_tests/test_pagination.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbt/include/snowflake/macros/adapters.sql b/dbt/include/snowflake/macros/adapters.sql index eadd03f46..108f2b394 100644 --- a/dbt/include/snowflake/macros/adapters.sql +++ b/dbt/include/snowflake/macros/adapters.sql @@ -136,8 +136,8 @@ {% macro snowflake__list_relations_without_caching(schema_relation) %} - {%- set max_results_per_iter = adapter.config.flags.get('list_relations_per_iteration', 10000) -%} - {%- set max_iter = adapter.config.flags.get('list_relations_iteration_limit', 10) -%} + {%- set max_results_per_iter = adapter.config.flags.get('list_relations_per_page', 10000) -%} + {%- set max_iter = adapter.config.flags.get('list_relations_page_limit', 10) -%} {%- set max_total_results = max_results_per_iter * max_iter -%} {%- set sql -%} diff --git a/tests/functional/adapter/list_relations_tests/test_pagination.py b/tests/functional/adapter/list_relations_tests/test_pagination.py index 927405558..da0e7920d 100644 --- a/tests/functional/adapter/list_relations_tests/test_pagination.py +++ b/tests/functional/adapter/list_relations_tests/test_pagination.py @@ -77,8 +77,8 @@ class TestListRelationsWithoutCachingLarge(BaseConfig): def profiles_config_update(self): return { "flags": { - "list_relations_per_iteration": 10, - "list_relations_iteration_limit": 20, + "list_relations_per_page": 10, + "list_relations_page_limit": 20, } } @@ -89,8 +89,8 @@ class TestListRelationsWithoutCachingTooLarge(BaseConfig): def project_config_update(self): return { "flags": { - "list_relations_per_iteration": 10, - "list_relations_iteration_limit": 5, + "list_relations_per_page": 10, + "list_relations_page_limit": 5, } } From 9eef24c75db75cac0bc5122a1beef15f25eb5a77 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 8 Nov 2024 10:46:42 -0500 Subject: [PATCH 4/5] update the warning to recommend how to account for breaching the limit on list_relations --- dbt/include/snowflake/macros/adapters.sql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbt/include/snowflake/macros/adapters.sql b/dbt/include/snowflake/macros/adapters.sql index 108f2b394..4f2006d28 100644 --- a/dbt/include/snowflake/macros/adapters.sql +++ b/dbt/include/snowflake/macros/adapters.sql @@ -111,9 +111,10 @@ {%- if loop.index == max_iter -%} {%- set msg -%} - dbt will list a maximum of {{ max_total_results }} objects in schema {{ schema_relation }}. - Your schema exceeds this limit. Please contact support@getdbt.com for troubleshooting tips, - or review and reduce the number of objects contained. + dbt is currently configured to list a maximum of {{ max_total_results }} objects per schema. + {{ schema_relation }} exceeds this limit. If this is expected, you may configure this limit + by setting list_relations_per_page and list_relations_page_limit in your project flags. + It is recommended to start by increasing list_relations_page_limit to something more than the default of 10. {%- endset -%} {% do exceptions.raise_compiler_error(msg) %} From e9e5fe423361bb502cceedbc55364fedfdd44ba5 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 8 Nov 2024 10:52:12 -0500 Subject: [PATCH 5/5] update the warning to recommend how to account for breaching the limit on list_relations --- .../adapter/list_relations_tests/test_pagination.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/functional/adapter/list_relations_tests/test_pagination.py b/tests/functional/adapter/list_relations_tests/test_pagination.py index da0e7920d..7dd382af5 100644 --- a/tests/functional/adapter/list_relations_tests/test_pagination.py +++ b/tests/functional/adapter/list_relations_tests/test_pagination.py @@ -97,11 +97,15 @@ def project_config_update(self): def test_list_relations(self, project): kwargs = {"schema_relation": project.test_schema} with project.adapter.connection_named("__test"): - with pytest.raises(CompilationError): + with pytest.raises(CompilationError) as error: project.adapter.execute_macro( "snowflake__list_relations_without_caching", kwargs=kwargs ) + assert "list_relations_per_page" in error.value.msg + assert "list_relations_page_limit" in error.value.msg def test_on_run(self, project): - with pytest.raises(CompilationError): + with pytest.raises(CompilationError) as error: run_dbt(["run"]) + assert "list_relations_per_page" in error.value.msg + assert "list_relations_page_limit" in error.value.msg