From 3795d06f365213ca4930d2447bd1580cb7031557 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 14 Sep 2020 10:32:15 -0400 Subject: [PATCH] Upgrade for v0.18.0 (utils, dispatch) (#97) * Scope potential spark support * Use adapter.dispatch * Update is_adapter macro * Fixups, packages, readme * Rm wip spark vestiges * Rm whitespace additions * More ambitious lower utils bound --- README.md | 4 +++ dbt_project.yml | 4 +-- macros/adapters/_get_snowplow_namespaces.sql | 4 +++ macros/adapters/convert_timezone.sql | 3 +- macros/adapters/get_start_ts.sql | 2 +- macros/adapters/is_adapter.sql | 36 ++++++------------- macros/adapters/timestamp_ntz.sql | 2 +- macros/snowplow_macros.yml | 16 +++------ .../default/snowplow_page_views.sql | 36 +++++++++---------- .../snowplow_web_events_scroll_depth.sql | 8 ++--- packages.yml | 2 +- 11 files changed, 52 insertions(+), 65 deletions(-) create mode 100644 macros/adapters/_get_snowplow_namespaces.sql diff --git a/README.md b/README.md index 0cd16ee..3607f51 100644 --- a/README.md +++ b/README.md @@ -82,11 +82,15 @@ vars: ### Database support +Core: * Redshift * Snowflake * BigQuery * Postgres +Plugins: +* Spark (via [`spark_utils`](https://github.com/fishtown-analytics/spark-utils)) + ### Contributions ### Additional contributions to this package are very welcome! Please create issues diff --git a/dbt_project.yml b/dbt_project.yml index a47493c..bf4839b 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,7 +1,7 @@ #settings specifically for this models directory #config other dbt settings within ~/.dbt/profiles.yml name: 'snowplow' -version: '0.10.0' +version: '0.12.0' config-version: 2 source-paths: ["models"] @@ -12,7 +12,7 @@ analysis-paths: ["analysis"] data-paths: ["data"] macro-paths: ["macros"] -require-dbt-version: ">=0.17.0" +require-dbt-version: ">=0.18.0" vars: #'snowplow:events': TABLE OR {{ REF() }} diff --git a/macros/adapters/_get_snowplow_namespaces.sql b/macros/adapters/_get_snowplow_namespaces.sql new file mode 100644 index 0000000..ca7486d --- /dev/null +++ b/macros/adapters/_get_snowplow_namespaces.sql @@ -0,0 +1,4 @@ +{% macro _get_snowplow_namespaces() %} + {% set override_namespaces = var('snowplow_dispatch_list', []) %} + {% do return(override_namespaces + ['snowplow']) %} +{% endmacro %} diff --git a/macros/adapters/convert_timezone.sql b/macros/adapters/convert_timezone.sql index 051eb5d..3d9da85 100644 --- a/macros/adapters/convert_timezone.sql +++ b/macros/adapters/convert_timezone.sql @@ -1,5 +1,6 @@ {%- macro convert_timezone(in_tz, out_tz, in_timestamp) -%} - {{ adapter_macro('convert_timezone', in_tz, out_tz, in_timestamp) }} + {{ adapter.dispatch('convert_timezone', packages = snowplow._get_snowplow_namespaces()) + (in_tz, out_tz, in_timestamp) }} {%- endmacro -%} {% macro default__convert_timezone(in_tz, out_tz, in_timestamp) %} diff --git a/macros/adapters/get_start_ts.sql b/macros/adapters/get_start_ts.sql index b510d7d..887b2cf 100644 --- a/macros/adapters/get_start_ts.sql +++ b/macros/adapters/get_start_ts.sql @@ -29,7 +29,7 @@ {%- macro get_start_ts(relation, field = 'collector_tstamp') -%} - {{ adapter_macro('get_start_ts', relation, field) }} + {{ adapter.dispatch('get_start_ts', packages = snowplow._get_snowplow_namespaces())(relation, field) }} {%- endmacro -%} diff --git a/macros/adapters/is_adapter.sql b/macros/adapters/is_adapter.sql index 35ae903..ec7e100 100644 --- a/macros/adapters/is_adapter.sql +++ b/macros/adapters/is_adapter.sql @@ -1,35 +1,19 @@ -{% macro set_default_adapters() %} - - {% set default_adapters = ['postgres', 'redshift', 'snowflake'] %} +{% macro is_adapter(adapter_type='default') %} - {% do return(default_adapters) %} + {{ return(adapter.dispatch('is_adapter', packages = snowplow._get_snowplow_namespaces()) (adapter_type)) }} {% endmacro %} -{% macro is_adapter(adapter='default') %} +{% macro default__is_adapter(adapter_type='default') %} -{#- - This logic means that if you add your own macro named `set_default_adapters` - to your project, that will be used, giving you the flexibility of overriding - which target types use the default implementation of Snowplow models. --#} + {% set result = (adapter_type == 'default') %} + {{return(result)}} - {% if context.get(ref.config.project_name, {}).get('set_default_adapters') %} - {% set default_adapters=context[ref.config.project_name].set_default_adapters() %} - {% else %} - {% set default_adapters=snowplow.set_default_adapters() %} - {% endif %} +{% endmacro %} - {% if adapter == 'default' %} - {% set adapters = default_adapters %} - {% elif adapter is string %} - {% set adapters = [adapter] %} - {% else %} - {% set adapters = adapter %} - {% endif %} - - {% set result = (target.type in adapters) %} - - {{return(result)}} +{% macro bigquery__is_adapter(adapter_type='default') %} + {% set result = (adapter_type == 'bigquery') %} + {{return(result)}} + {% endmacro %} diff --git a/macros/adapters/timestamp_ntz.sql b/macros/adapters/timestamp_ntz.sql index c5d264f..381ea63 100644 --- a/macros/adapters/timestamp_ntz.sql +++ b/macros/adapters/timestamp_ntz.sql @@ -1,5 +1,5 @@ {% macro timestamp_ntz(field) %} - {{ adapter_macro('snowplow.timestamp_ntz', field) }} + {{ adapter.dispatch('timestamp_ntz', packages = snowplow._get_snowplow_namespaces()) (field) }} {% endmacro %} {% macro default__timestamp_ntz(field) %} diff --git a/macros/snowplow_macros.yml b/macros/snowplow_macros.yml index b94c3f0..2526f10 100644 --- a/macros/snowplow_macros.yml +++ b/macros/snowplow_macros.yml @@ -74,21 +74,15 @@ macros: Differs depending on the adapter: - name of the timestamp/date column to get max value, with default value of `collector_tstamp` - not needed on Bigquery, which always uses the model's configured partition field - - - name: set_default_adapters - description: > - Which adapters should use the `default` implementation of Snowplow package - models? By default, includes Postgres, Redshift, and Snowflake. - Override by creating a macro named `set_default_adapters` in your - own project. - name: is_adapter description: > - Determine whether a model should be enabled depending on the `target.type` - of the current run. Returns `true` or `false`. All `default` models run on Postgres, Redshift, - and Snowflake. All `bigquery` models run on BigQuery. + Determine whether a model should be enabled depending on adapter being run. + Returns `true` or `false`. In practice, this macro runs the 'default' implementation + of Snowplow models unless an adapter-level override points to a different + model set (e.g. BigQuery). arguments: - - name: adapter + - name: adapter_type type: string description: "*default* or *bigquery*" diff --git a/models/page_views/default/snowplow_page_views.sql b/models/page_views/default/snowplow_page_views.sql index c36cbd9..f0e47a2 100644 --- a/models/page_views/default/snowplow_page_views.sql +++ b/models/page_views/default/snowplow_page_views.sql @@ -220,17 +220,17 @@ prep as ( d.os_patch as os_build_version, d.device_family as device, {% else %} - null::text as browser, + cast(null as {{ dbt_utils.type_string() }}) as browser, a.br_family as browser_name, a.br_name as browser_major_version, a.br_version as browser_minor_version, - null::text as browser_build_version, + cast(null as {{ dbt_utils.type_string() }}) as browser_build_version, a.os_family as os, a.os_name as os_name, - null::text as os_major_version, - null::text as os_minor_version, - null::text as os_build_version, - null::text as device, + cast(null as {{ dbt_utils.type_string() }}) as os_major_version, + cast(null as {{ dbt_utils.type_string() }}) as os_minor_version, + cast(null as {{ dbt_utils.type_string() }}) as os_build_version, + cast(null as {{ dbt_utils.type_string() }}) as device, {% endif %} c.br_viewwidth as browser_window_width, @@ -256,18 +256,18 @@ prep as ( e.onload_time_in_ms, e.total_time_in_ms, {% else %} - null::bigint as redirect_time_in_ms, - null::bigint as unload_time_in_ms, - null::bigint as app_cache_time_in_ms, - null::bigint as dns_time_in_ms, - null::bigint as tcp_time_in_ms, - null::bigint as request_time_in_ms, - null::bigint as response_time_in_ms, - null::bigint as processing_time_in_ms, - null::bigint as dom_loading_to_interactive_time_in_ms, - null::bigint as dom_interactive_to_complete_time_in_ms, - null::bigint as onload_time_in_ms, - null::bigint as total_time_in_ms, + cast(null as bigint) as redirect_time_in_ms, + cast(null as bigint) as unload_time_in_ms, + cast(null as bigint) as app_cache_time_in_ms, + cast(null as bigint) as dns_time_in_ms, + cast(null as bigint) as tcp_time_in_ms, + cast(null as bigint) as request_time_in_ms, + cast(null as bigint) as response_time_in_ms, + cast(null as bigint) as processing_time_in_ms, + cast(null as bigint) as dom_loading_to_interactive_time_in_ms, + cast(null as bigint) as dom_interactive_to_complete_time_in_ms, + cast(null as bigint) as onload_time_in_ms, + cast(null as bigint) as total_time_in_ms, {% endif %} -- device diff --git a/models/page_views/default/snowplow_web_events_scroll_depth.sql b/models/page_views/default/snowplow_web_events_scroll_depth.sql index 75dd825..7a57c50 100644 --- a/models/page_views/default/snowplow_web_events_scroll_depth.sql +++ b/models/page_views/default/snowplow_web_events_scroll_depth.sql @@ -78,10 +78,10 @@ relative as ( vmin, vmax, - round(100*(greatest(hmin, 0)/doc_width::float)) as relative_hmin, - round(100*(least(hmax + br_viewwidth, doc_width)/doc_width::float)) as relative_hmax, - round(100*(greatest(vmin, 0)/doc_height::float)) as relative_vmin, - round(100*(least(vmax + br_viewheight, doc_height)/doc_height::float)) as relative_vmax + round(100*(greatest(hmin, 0)/cast(doc_width as float))) as relative_hmin, + round(100*(least(hmax + br_viewwidth, doc_width)/cast(doc_width as float))) as relative_hmax, + round(100*(greatest(vmin, 0)/cast(doc_height as float))) as relative_vmin, + round(100*(least(vmax + br_viewheight, doc_height)/cast(doc_height as float))) as relative_vmax from prep diff --git a/packages.yml b/packages.yml index ecb7073..2246ac8 100644 --- a/packages.yml +++ b/packages.yml @@ -1,3 +1,3 @@ packages: - package: fishtown-analytics/dbt_utils - version: [">=0.4.0", "<0.6.0"] + version: [">=0.6.0", "<0.7.0"]