From 773e8d3b06b093883ed3175c4a62adb8eaa16ffd Mon Sep 17 00:00:00 2001 From: Andrew Taft <11670864+ataft@users.noreply.github.com> Date: Wed, 10 Apr 2024 11:06:32 -0700 Subject: [PATCH] Fix incremental delete+insert SQL resolves dbt-labs/dbt-adapters#150 Problem The delete query for the 'delete+insert' incremental_strategy with 2+ unique_key columns is VERY inefficient. In many cases, it will hang and never return for deleting small amounts of data (<100K rows). Solution Improve the query by switching to a much more efficient delete strategy: ``` delete from table1 where (col1, col2) in ( select distinct col1, col2 from table1_tmp ) ``` --- .../unreleased/Fixes-20240410-163601.yaml | 6 +++ .../models/incremental/merge.sql | 43 +++++++------------ 2 files changed, 22 insertions(+), 27 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240410-163601.yaml diff --git a/.changes/unreleased/Fixes-20240410-163601.yaml b/.changes/unreleased/Fixes-20240410-163601.yaml new file mode 100644 index 00000000..f0642f47 --- /dev/null +++ b/.changes/unreleased/Fixes-20240410-163601.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix incremental delete+insert SQL +time: 2024-04-10T16:36:43.253228-05:00 +custom: + Author: ataft + Issue: "150" diff --git a/dbt/include/global_project/macros/materializations/models/incremental/merge.sql b/dbt/include/global_project/macros/materializations/models/incremental/merge.sql index ca972c9f..ff84c594 100644 --- a/dbt/include/global_project/macros/materializations/models/incremental/merge.sql +++ b/dbt/include/global_project/macros/materializations/models/incremental/merge.sql @@ -61,34 +61,23 @@ {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%} {% if unique_key %} - {% if unique_key is sequence and unique_key is not string %} - delete from {{target }} - using {{ source }} - where ( - {% for key in unique_key %} - {{ source }}.{{ key }} = {{ target }}.{{ key }} - {{ "and " if not loop.last}} - {% endfor %} - {% if incremental_predicates %} - {% for predicate in incremental_predicates %} - and {{ predicate }} - {% endfor %} - {% endif %} - ); - {% else %} - delete from {{ target }} - where ( - {{ unique_key }}) in ( - select ({{ unique_key }}) - from {{ source }} - ) - {%- if incremental_predicates %} - {% for predicate in incremental_predicates %} - and {{ predicate }} - {% endfor %} - {%- endif -%}; - + {% if unique_key is string %} + {% set unique_key = [unique_key] %} {% endif %} + + {%- set unique_key_str = unique_key|join(', ') -%} + + delete from {{ target }} + where ({{ unique_key_str }}) in ( + select distinct {{ unique_key_str }} + from {{ source }} + ) + {%- if incremental_predicates %} + {% for predicate in incremental_predicates %} + and {{ predicate }} + {% endfor %} + {%- endif -%}; + {% endif %} insert into {{ target }} ({{ dest_cols_csv }})