From 7c63a40c6948e0bd27d756e7adb9604b44347352 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Mon, 18 Nov 2024 15:51:51 -0500 Subject: [PATCH] Make `list_relations_without_caching` pagination configurable (#1235) * make list relations configurable * update iteration to page in the config settings * update the warning to recommend how to account for breaching the limit on list_relations --- .../unreleased/Features-20241107-170307.yaml | 7 + dbt/include/snowflake/macros/adapters.sql | 10 +- .../list_relations_tests/test_pagination.py | 247 +++++------------- 3 files changed, 85 insertions(+), 179 deletions(-) create mode 100644 .changes/unreleased/Features-20241107-170307.yaml diff --git a/.changes/unreleased/Features-20241107-170307.yaml b/.changes/unreleased/Features-20241107-170307.yaml new file mode 100644 index 000000000..1479c5805 --- /dev/null +++ b/.changes/unreleased/Features-20241107-170307.yaml @@ -0,0 +1,7 @@ +kind: Features +body: 'Allow configurable pagination on list_relations_without_caching to support + users with a large number of objects per schema' +time: 2024-11-07T17:03:07.826352-05:00 +custom: + Author: mikealfare + Issue: "1234" diff --git a/dbt/include/snowflake/macros/adapters.sql b/dbt/include/snowflake/macros/adapters.sql index 0ca756c6c..3c93d41ad 100644 --- a/dbt/include/snowflake/macros/adapters.sql +++ b/dbt/include/snowflake/macros/adapters.sql @@ -111,9 +111,10 @@ {%- if loop.index == max_iter -%} {%- set msg -%} - dbt will list a maximum of {{ max_total_results }} objects in schema {{ schema_relation }}. - Your schema exceeds this limit. Please contact support@getdbt.com for troubleshooting tips, - or review and reduce the number of objects contained. + dbt is currently configured to list a maximum of {{ max_total_results }} objects per schema. + {{ schema_relation }} exceeds this limit. If this is expected, you may configure this limit + by setting list_relations_per_page and list_relations_page_limit in your project flags. + It is recommended to start by increasing list_relations_page_limit to something more than the default of 10. {%- endset -%} {% do exceptions.raise_compiler_error(msg) %} @@ -135,6 +136,9 @@ {% endmacro %} {% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000) %} + + {%- set max_results_per_iter = adapter.config.flags.get('list_relations_per_page', max_results_per_iter) -%} + {%- set max_iter = adapter.config.flags.get('list_relations_page_limit', max_iter) -%} {%- set max_total_results = max_results_per_iter * max_iter -%} {%- set sql -%} {% if schema_relation is string %} diff --git a/tests/functional/adapter/list_relations_tests/test_pagination.py b/tests/functional/adapter/list_relations_tests/test_pagination.py index 407f9c501..7dd382af5 100644 --- a/tests/functional/adapter/list_relations_tests/test_pagination.py +++ b/tests/functional/adapter/list_relations_tests/test_pagination.py @@ -1,34 +1,31 @@ import os + import pytest -import json -from dbt.tests.util import run_dbt, run_dbt_and_capture -from dbt.adapters.snowflake import SnowflakeRelation # Ensure this is the correct import path - -# Testing rationale: -# - snowflake SHOW TERSE OBJECTS command returns at max 10K objects in a single call -# - when dbt attempts to write into a schema with more than 10K objects, compilation will fail -# unless we paginate the result -# - however, testing this process is difficult at a full scale of 10K actual objects populated -# into a fresh testing schema -# - accordingly, we create a smaller set of views and test the looping iteration logic in -# smaller chunks - -NUM_VIEWS = 90 -NUM_DYNAMIC_TABLES = 10 -# the total number should be between the numbers referenced in the "passing" and "failing" macros below -# - MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING (11 iter * 10 results per iter -> 110 objects) -# - MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING_RAISE_ERROR (33 iter * 3 results per iter -> 99 objects) -NUM_EXPECTED_RELATIONS = 1 + NUM_VIEWS + NUM_DYNAMIC_TABLES - -TABLE_BASE_SQL = """ -{{ config(materialized='table') }} +from dbt_common.exceptions import CompilationError +from dbt.tests.util import run_dbt + +""" +Testing rationale: +- snowflake SHOW TERSE OBJECTS command returns at max 10K objects in a single call +- when dbt attempts to write into a schema with more than 10K objects, compilation will fail + unless we paginate the result +- we default pagination to 10 pages, but users want to configure this + - we instead use that here to force failures by making it smaller +""" + + +TABLE = """ +{{ config(materialized='table') }} select 1 as id -""".lstrip() +""" + -VIEW_X_SQL = """ +VIEW = """ +{{ config(materialized='view') }} select id from {{ ref('my_model_base') }} -""".lstrip() +""" + DYNAMIC_TABLE = ( """ @@ -44,173 +41,71 @@ """ ) -MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING = """ -{% macro validate_list_relations_without_caching(schema_relation) %} - {% set relation_list_result = snowflake__list_relations_without_caching(schema_relation, max_iter=11, max_results_per_iter=10) %} - {% set n_relations = relation_list_result | length %} - {{ log("n_relations: " ~ n_relations) }} -{% endmacro %} -""" - -MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING_RAISE_ERROR = """ -{% macro validate_list_relations_without_caching_raise_error(schema_relation) %} - {{ snowflake__list_relations_without_caching(schema_relation, max_iter=33, max_results_per_iter=3) }} -{% endmacro %} -""" - - -def parse_json_logs(json_log_output): - parsed_logs = [] - for line in json_log_output.split("\n"): - try: - log = json.loads(line) - except ValueError: - continue - - parsed_logs.append(log) - - return parsed_logs +class BaseConfig: + VIEWS = 90 + DYNAMIC_TABLES = 10 -def find_result_in_parsed_logs(parsed_logs, result_name): - return next( - ( - item["data"]["msg"] - for item in parsed_logs - if result_name in item["data"].get("msg", "msg") - ), - False, - ) - - -def find_exc_info_in_parsed_logs(parsed_logs, exc_info_name): - return next( - ( - item["data"]["exc_info"] - for item in parsed_logs - if exc_info_name in item["data"].get("exc_info", "exc_info") - ), - False, - ) - - -class TestListRelationsWithoutCachingSingle: @pytest.fixture(scope="class") def models(self): - my_models = {"my_model_base.sql": TABLE_BASE_SQL} - for view in range(0, NUM_VIEWS): - my_models.update({f"my_model_{view}.sql": VIEW_X_SQL}) - for dynamic_table in range(0, NUM_DYNAMIC_TABLES): - my_models.update({f"my_dynamic_table_{dynamic_table}.sql": DYNAMIC_TABLE}) + my_models = {"my_model_base.sql": TABLE} + for view in range(0, self.VIEWS): + my_models[f"my_model_{view}.sql"] = VIEW + for dynamic_table in range(0, self.DYNAMIC_TABLES): + my_models[f"my_dynamic_table_{dynamic_table}.sql"] = DYNAMIC_TABLE return my_models - @pytest.fixture(scope="class") - def macros(self): - return { - "validate_list_relations_without_caching.sql": MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING, - } + @pytest.fixture(scope="class", autouse=True) + def setup(self, project): + run_dbt(["run"]) - def test__snowflake__list_relations_without_caching_termination(self, project): - """ - validates that we do NOT trigger pagination logic snowflake__list_relations_without_caching - macro when there are fewer than max_results_per_iter relations in the target schema - """ - run_dbt(["run", "-s", "my_model_base"]) - - database = project.database - schemas = project.created_schemas - - for schema in schemas: - schema_relation = SnowflakeRelation.create(database=database, schema=schema) - kwargs = {"schema_relation": schema_relation.render()} - _, log_output = run_dbt_and_capture( - [ - "--debug", - "--log-format=json", - "run-operation", - "validate_list_relations_without_caching", - "--args", - str(kwargs), - ] + def test_list_relations(self, project): + kwargs = {"schema_relation": project.test_schema} + with project.adapter.connection_named("__test"): + relations = project.adapter.execute_macro( + "snowflake__list_relations_without_caching", kwargs=kwargs ) + assert len(relations) == self.VIEWS + self.DYNAMIC_TABLES + 1 - parsed_logs = parse_json_logs(log_output) - n_relations = find_result_in_parsed_logs(parsed_logs, "n_relations") - assert n_relations == "n_relations: 1" +class TestListRelationsWithoutCachingSmall(BaseConfig): + pass -class TestListRelationsWithoutCachingFull: - @pytest.fixture(scope="class") - def models(self): - my_models = {"my_model_base.sql": TABLE_BASE_SQL} - for view in range(0, NUM_VIEWS): - my_models.update({f"my_model_{view}.sql": VIEW_X_SQL}) - for dynamic_table in range(0, NUM_DYNAMIC_TABLES): - my_models.update({f"my_dynamic_table_{dynamic_table}.sql": DYNAMIC_TABLE}) - return my_models +class TestListRelationsWithoutCachingLarge(BaseConfig): @pytest.fixture(scope="class") - def macros(self): + def profiles_config_update(self): return { - "validate_list_relations_without_caching.sql": MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING, - "validate_list_relations_without_caching_raise_error.sql": MACROS__VALIDATE__SNOWFLAKE__LIST_RELATIONS_WITHOUT_CACHING_RAISE_ERROR, + "flags": { + "list_relations_per_page": 10, + "list_relations_page_limit": 20, + } } - def test__snowflake__list_relations_without_caching(self, project): - """ - validates pagination logic in snowflake__list_relations_without_caching macro counts - the correct number of objects in the target schema when having to make multiple looped - calls of SHOW TERSE OBJECTS. - """ - # purpose of the first run is to create the replicated views in the target schema - run_dbt(["run"]) - database = project.database - schemas = project.created_schemas - - for schema in schemas: - schema_relation = SnowflakeRelation.create(database=database, schema=schema) - kwargs = {"schema_relation": schema_relation.render()} - _, log_output = run_dbt_and_capture( - [ - "--debug", - "--log-format=json", - "run-operation", - "validate_list_relations_without_caching", - "--args", - str(kwargs), - ] - ) - parsed_logs = parse_json_logs(log_output) - n_relations = find_result_in_parsed_logs(parsed_logs, "n_relations") +class TestListRelationsWithoutCachingTooLarge(BaseConfig): - assert n_relations == f"n_relations: {NUM_EXPECTED_RELATIONS}" - - def test__snowflake__list_relations_without_caching_raise_error(self, project): - """ - validates pagination logic terminates and raises a compilation error - when exceeding the limit of how many results to return. - """ - run_dbt(["run"]) + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": { + "list_relations_per_page": 10, + "list_relations_page_limit": 5, + } + } - database = project.database - schemas = project.created_schemas - - for schema in schemas: - schema_relation = SnowflakeRelation.create(database=database, schema=schema) - - kwargs = {"schema_relation": schema_relation.render()} - _, log_output = run_dbt_and_capture( - [ - "--debug", - "--log-format=json", - "run-operation", - "validate_list_relations_without_caching_raise_error", - "--args", - str(kwargs), - ], - expect_pass=False, - ) - parsed_logs = parse_json_logs(log_output) - traceback = find_exc_info_in_parsed_logs(parsed_logs, "Traceback") - assert "dbt will list a maximum of 99 objects in schema " in traceback + def test_list_relations(self, project): + kwargs = {"schema_relation": project.test_schema} + with project.adapter.connection_named("__test"): + with pytest.raises(CompilationError) as error: + project.adapter.execute_macro( + "snowflake__list_relations_without_caching", kwargs=kwargs + ) + assert "list_relations_per_page" in error.value.msg + assert "list_relations_page_limit" in error.value.msg + + def test_on_run(self, project): + with pytest.raises(CompilationError) as error: + run_dbt(["run"]) + assert "list_relations_per_page" in error.value.msg + assert "list_relations_page_limit" in error.value.msg