Skip to content
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions docker/base/Dockerfile.duckdb-dbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@ FROM python:3.11-slim
RUN apt-get update && apt-get install -y \
tmux asciinema \
curl \
&& curl -sSL https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically this isn't necessary because the duckdb dockerfile isn't used for any of the migrations you're working on here, but probably fine to leave to keep it consistent with the otehrs

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The solution includes modifying dbt_project.yml to disable the overridden models, which is why this is now needed

-o /usr/bin/yq \
&& chmod +x /usr/bin/yq \
&& curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
&& apt-get install -y nodejs \
&& rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir \
dbt-core==1.9.6 \
dbt-core==1.10.11 \
dbt-duckdb==1.9.3 \
duckdb==1.3.0 \
pyyaml>=6.0 \
Expand All @@ -24,4 +27,4 @@ RUN pip install --no-cache-dir \
RUN mkdir -p /installed-agent /scripts /sage/solutions /sage /app /app/setup /app/migrations /seeds /solutions /logs /tests
WORKDIR /app

CMD ["bash"]
CMD ["bash"]
10 changes: 10 additions & 0 deletions shared/migrations/quickbooks__duckdb_to_snowflake/migration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# Update primary schema in dbt_project.yml file
yq -i '.vars.quickbooks_schema = "public"' dbt_project.yml

# Copy Snowflake-specific solution models that handle epoch-to-timestamp conversion
MIGRATION_DIR="$(dirname "$(readlink -f "${BASH_SOURCE}")")"
cp $MIGRATION_DIR/solutions/stg_quickbooks__refund_receipt.sql solutions/
cp $MIGRATION_DIR/solutions/stg_quickbooks__sales_receipt.sql solutions/
cp $MIGRATION_DIR/solutions/stg_quickbooks__estimate.sql solutions/
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
--To disable this model, set the using_estimate variable within your dbt_project.yml file to False.
{{ config(enabled=var('using_estimate', True)) }}

with base as (

select *
from {{ ref('stg_quickbooks__estimate_tmp') }}

),

fields as (

select
/*
The below macro is used to generate the correct SQL for package staging models. It takes a list of columns
that are expected/needed (staging_columns from dbt_quickbooks_source/models/tmp/) and compares it with columns
in the source (source_columns from dbt_quickbooks_source/macros/).
For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git).
*/

{{
fivetran_utils.fill_staging_columns(
source_columns=adapter.get_columns_in_relation(ref('stg_quickbooks__estimate_tmp')),
staging_columns=quickbooks_source.get_estimate_columns()
)
}}

{{
fivetran_utils.source_relation(
union_schema_variable='quickbooks_union_schemas',
union_database_variable='quickbooks_union_databases'
)
}}

from base
),

final as (

select
cast(id as {{ dbt.type_string() }}) as estimate_id,
cast(class_id as {{ dbt.type_string() }}) as class_id,
created_at,
currency_id,
customer_id,
cast(department_id as {{ dbt.type_string() }}) as department_id,
-- Convert unix epoch to timestamp, then truncate to date
cast( {{ dbt.date_trunc('day', 'TO_TIMESTAMP_NTZ(due_date)') }} as date) as due_date,
exchange_rate,
total_amount,
cast( {{ dbt.date_trunc('day', 'transaction_date') }} as date) as transaction_date,
transaction_status,
_fivetran_deleted,
source_relation
from fields
)

select *
from final
where not coalesce(_fivetran_deleted, false)

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
--To disable this model, set the using_refund_receipt variable within your dbt_project.yml file to False.
{{ config(enabled=var('using_refund_receipt', True)) }}

with base as (

select *
from {{ ref('stg_quickbooks__refund_receipt_tmp') }}

),

fields as (

select
/*
The below macro is used to generate the correct SQL for package staging models. It takes a list of columns
that are expected/needed (staging_columns from dbt_quickbooks_source/models/tmp/) and compares it with columns
in the source (source_columns from dbt_quickbooks_source/macros/).
For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git).
*/

{{
fivetran_utils.fill_staging_columns(
source_columns=adapter.get_columns_in_relation(ref('stg_quickbooks__refund_receipt_tmp')),
staging_columns=quickbooks_source.get_refund_receipt_columns()
)
}}

{{
fivetran_utils.source_relation(
union_schema_variable='quickbooks_union_schemas',
union_database_variable='quickbooks_union_databases'
)
}}

from base
),

final as (

select
cast(id as {{ dbt.type_string() }}) as refund_id,
balance,
cast(doc_number as {{ dbt.type_string() }}) as doc_number,
total_amount,
cast(class_id as {{ dbt.type_string() }}) as class_id,
cast(deposit_to_account_id as {{ dbt.type_string() }}) as deposit_to_account_id,
created_at,
cast(department_id as {{ dbt.type_string() }}) as department_id,
cast(customer_id as {{ dbt.type_string() }}) as customer_id,
currency_id,
exchange_rate,
-- Convert unix epoch to timestamp, then truncate to date
cast( {{ dbt.date_trunc('day', 'TO_TIMESTAMP_NTZ(transaction_date)') }} as date) as transaction_date,
_fivetran_deleted,
source_relation
from fields
)

select *
from final
where not coalesce(_fivetran_deleted, false)

Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{{ config(enabled=var('using_sales_receipt', True)) }}

with base as (

select *
from {{ ref('stg_quickbooks__sales_receipt_tmp') }}

),

fields as (

select
/*
The below macro is used to generate the correct SQL for package staging models. It takes a list of columns
that are expected/needed (staging_columns from dbt_quickbooks_source/models/tmp/) and compares it with columns
in the source (source_columns from dbt_quickbooks_source/macros/).
For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git).
*/

{{
fivetran_utils.fill_staging_columns(
source_columns=adapter.get_columns_in_relation(ref('stg_quickbooks__sales_receipt_tmp')),
staging_columns=quickbooks_source.get_sales_receipt_columns()
)
}}

{{
fivetran_utils.source_relation(
union_schema_variable='quickbooks_union_schemas',
union_database_variable='quickbooks_union_databases'
)
}}
from base
),

final as (

select
cast(id as {{ dbt.type_string() }}) as sales_receipt_id,
balance,
cast(doc_number as {{ dbt.type_string() }}) as doc_number,
total_amount,
cast(deposit_to_account_id as {{ dbt.type_string() }}) as deposit_to_account_id,
created_at,
cast(customer_id as {{ dbt.type_string() }}) as customer_id,
cast(department_id as {{ dbt.type_string() }}) as department_id,
cast(class_id as {{ dbt.type_string() }}) as class_id,
currency_id,
exchange_rate,
-- Convert unix epoch to timestamp, then truncate to date
cast( {{ dbt.date_trunc('day', 'TO_TIMESTAMP_NTZ(transaction_date)') }} as date) as transaction_date,
_fivetran_deleted,
source_relation
from fields
)

select *
from final
where not coalesce(_fivetran_deleted, false)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a broader note on these migrations, if the change that's needed is fairly small (eg, replace a function with another one), I try to do it inline in the script, because it's easier to see what's changing. Full replacing a file can be hard to follow, since you don't know what's different between things. That's a judgement call though. How much are we needing to update this?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These files aren't in the original project, because they're initially installed from the package hub. I guess they could be copied from inside of the Docker container's project's packages directory and then have the one-line change applied, but that feels kinda janky too.

Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# Update primary schema in dbt_project.yml file
yq -i '.vars.quickbooks_schema = "public"' dbt_project.yml

# Copy Snowflake-specific solution models that handle epoch-to-timestamp conversion
MIGRATION_DIR="$(dirname "$(readlink -f "${BASH_SOURCE}")")"
cp $MIGRATION_DIR/solutions/stg_quickbooks__refund_receipt.sql solutions/
cp $MIGRATION_DIR/solutions/stg_quickbooks__sales_receipt.sql solutions/
cp $MIGRATION_DIR/solutions/stg_quickbooks__estimate.sql solutions/
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
--To disable this model, set the using_estimate variable within your dbt_project.yml file to False.
{{ config(enabled=var('using_estimate', True)) }}

with base as (

select *
from {{ ref('stg_quickbooks__estimate_tmp') }}

),

fields as (

select
/*
The below macro is used to generate the correct SQL for package staging models. It takes a list of columns
that are expected/needed (staging_columns from dbt_quickbooks_source/models/tmp/) and compares it with columns
in the source (source_columns from dbt_quickbooks_source/macros/).
For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git).
*/

{{
fivetran_utils.fill_staging_columns(
source_columns=adapter.get_columns_in_relation(ref('stg_quickbooks__estimate_tmp')),
staging_columns=quickbooks_source.get_estimate_columns()
)
}}

{{
fivetran_utils.source_relation(
union_schema_variable='quickbooks_union_schemas',
union_database_variable='quickbooks_union_databases'
)
}}

from base
),

final as (

select
cast(id as {{ dbt.type_string() }}) as estimate_id,
cast(class_id as {{ dbt.type_string() }}) as class_id,
created_at,
currency_id,
customer_id,
cast(department_id as {{ dbt.type_string() }}) as department_id,
-- Convert unix epoch to timestamp, then truncate to date
cast( {{ dbt.date_trunc('day', 'TO_TIMESTAMP_NTZ(due_date)') }} as date) as due_date,
exchange_rate,
total_amount,
cast( {{ dbt.date_trunc('day', 'transaction_date') }} as date) as transaction_date,
transaction_status,
_fivetran_deleted,
source_relation
from fields
)

select *
from final
where not coalesce(_fivetran_deleted, false)

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
--To disable this model, set the using_refund_receipt variable within your dbt_project.yml file to False.
{{ config(enabled=var('using_refund_receipt', True)) }}

with base as (

select *
from {{ ref('stg_quickbooks__refund_receipt_tmp') }}

),

fields as (

select
/*
The below macro is used to generate the correct SQL for package staging models. It takes a list of columns
that are expected/needed (staging_columns from dbt_quickbooks_source/models/tmp/) and compares it with columns
in the source (source_columns from dbt_quickbooks_source/macros/).
For more information refer to our dbt_fivetran_utils documentation (https://github.com/fivetran/dbt_fivetran_utils.git).
*/

{{
fivetran_utils.fill_staging_columns(
source_columns=adapter.get_columns_in_relation(ref('stg_quickbooks__refund_receipt_tmp')),
staging_columns=quickbooks_source.get_refund_receipt_columns()
)
}}

{{
fivetran_utils.source_relation(
union_schema_variable='quickbooks_union_schemas',
union_database_variable='quickbooks_union_databases'
)
}}

from base
),

final as (

select
cast(id as {{ dbt.type_string() }}) as refund_id,
balance,
cast(doc_number as {{ dbt.type_string() }}) as doc_number,
total_amount,
cast(class_id as {{ dbt.type_string() }}) as class_id,
cast(deposit_to_account_id as {{ dbt.type_string() }}) as deposit_to_account_id,
created_at,
cast(department_id as {{ dbt.type_string() }}) as department_id,
cast(customer_id as {{ dbt.type_string() }}) as customer_id,
currency_id,
exchange_rate,
-- Convert unix epoch to timestamp, then truncate to date
cast( {{ dbt.date_trunc('day', 'TO_TIMESTAMP_NTZ(transaction_date)') }} as date) as transaction_date,
_fivetran_deleted,
source_relation
from fields
)

select *
from final
where not coalesce(_fivetran_deleted, false)

Loading