diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8aaf0375007..1983b0201d9 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -4,14 +4,14 @@ * @dbt-labs/product-docs # Adapter & Package Development Docs -/website/docs/docs/supported-data-platforms.md @dbt-labs/product-docs @dataders -/website/docs/reference/warehouse-setups @dbt-labs/product-docs @dataders +/website/docs/docs/supported-data-platforms.md @dbt-labs/product-docs @amychen1776 +/website/docs/reference/warehouse-setups @dbt-labs/product-docs @amychen1776 # `resource-configs` contains more than just warehouse setups -/website/docs/reference/resource-configs/*-configs.md @dbt-labs/product-docs @dataders -/website/docs/guides/advanced/adapter-development @dbt-labs/product-docs @dataders @dbeatty10 +/website/docs/reference/resource-configs/*-configs.md @dbt-labs/product-docs @amychen1776 +/website/docs/guides/advanced/adapter-development @dbt-labs/product-docs @amychen1776 -/website/docs/guides/building-packages @dbt-labs/product-docs @amychen1776 @dataders @dbeatty10 -/website/docs/guides/creating-new-materializations @dbt-labs/product-docs @dataders @dbeatty10 +/website/docs/guides/building-packages @dbt-labs/product-docs @amychen1776 +/website/docs/guides/creating-new-materializations @dbt-labs/product-docs # Require approval from the Multicell team when making # changes to the public facing migration documentation. diff --git a/.github/ISSUE_TEMPLATE/internal-orch-team.yml b/.github/ISSUE_TEMPLATE/internal-orch-team.yml deleted file mode 100644 index 8c4d61df10c..00000000000 --- a/.github/ISSUE_TEMPLATE/internal-orch-team.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: Orchestration team - Request changes to docs -description: File a docs update request that is not already tracked in Orch team's Release Plans (Notion database). -labels: ["content","internal-orch-team"] -body: - - type: markdown - attributes: - value: | - * You can ask questions or submit ideas for the dbt docs in [Issues](https://github.com/dbt-labs/docs-internal/issues/new/choose) - * Before you file an issue read the [Contributing guide](https://github.com/dbt-labs/docs-internal#contributing). - * Check to make sure someone hasn't already opened a similar [issue](https://github.com/dbt-labs/docs-internal/issues). - - - type: checkboxes - id: contributions - attributes: - label: Contributions - description: Please read the contribution docs before opening an issue or pull request. - options: - - label: I have read the contribution docs, and understand what's expected of me. - - - type: textarea - attributes: - label: Link to the page on docs.getdbt.com requiring updates - description: Please link to the page or pages you'd like to see improved. - validations: - required: true - - - type: textarea - attributes: - label: What part(s) of the page would you like to see updated? - description: | - - Give as much detail as you can to help us understand the change you want to see. - - Why should the docs be changed? What use cases does it support? - - What is the expected outcome? - validations: - required: true - - - type: textarea - attributes: - label: Reviewers/Stakeholders/SMEs - description: List the reviewers, stakeholders, and subject matter experts (SMEs) to collaborate with for the docs update. - validations: - required: true - - - type: textarea - attributes: - label: Related Jira tickets - description: Add any other context or screenshots about the feature request here. - validations: - required: false diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 870dadcd183..d2bb72552bd 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -9,6 +9,7 @@ To learn more about the writing conventions used in the dbt Labs docs, see the [ - [ ] I have reviewed the [Content style guide](https://github.com/dbt-labs/docs.getdbt.com/blob/current/contributing/content-style-guide.md) so my content adheres to these guidelines. - [ ] The topic I'm writing about is for specific dbt version(s) and I have versioned it according to the [version a whole page](https://github.com/dbt-labs/docs.getdbt.com/blob/current/contributing/single-sourcing-content.md#adding-a-new-version) and/or [version a block of content](https://github.com/dbt-labs/docs.getdbt.com/blob/current/contributing/single-sourcing-content.md#versioning-blocks-of-content) guidelines. - [ ] I have added checklist item(s) to this list for anything anything that needs to happen before this PR is merged, such as "needs technical review" or "change base branch." +- [ ] The content in this PR requires a dbt release note, so I added one to the [release notes page](https://docs.getdbt.com/docs/dbt-versions/dbt-cloud-release-notes). For customers, this means less maintenance overhead, faster access to bug fixes and features, and more time to focus on what matters most: building trusted data products. This will be our stable foundation for improvement and innovation in dbt Cloud. -But we wanted to go a step beyond just making this option available to you. In this blog post, we aim to shed a little light on the extensive work we've done to ensure that using "Versionless" is a stable, reliable experience for the thousands of customers who rely daily on dbt Cloud. +But we wanted to go a step beyond just making this option available to you. In this blog post, we aim to shed a little light on the extensive work we've done to ensure that using the "Latest" release track is a stable and reliable experience for the thousands of customers who rely daily on dbt Cloud. ## How we safely deploy dbt upgrades to Cloud We've put in place a rigorous, best-in-class suite of tests and control mechanisms to ensure that all changes to dbt under the hood are fully vetted before they're deployed to customers of dbt Cloud. -This pipeline has in fact been in place since January! It's how we've already been shipping continuous changes to the hundreds of customers who've selected "Versionless" while it's been in Beta and Preview. In that time, this process has enabled us to prevent multiple regressions before they were rolled out to any customers. +This pipeline has in fact been in place since January! It's how we've already been shipping continuous changes to the hundreds of customers who've selected the "Latest" release track while it's been in Beta and Preview. In that time, this process has enabled us to prevent multiple regressions before they were rolled out to any customers. We're very confident in the robustness of this process**. We also know that we'll need to continue building trust with time.** We're sharing details about this work in the spirit of transparency and to build that trust. @@ -82,9 +86,9 @@ All incidents are retrospected to make sure we not only identify and fix the roo ::: -The outcome of this process is that, when you select "Versionless" in dbt Cloud, the time between an improvement being made to dbt Core and you *safely* getting access to it in your projects is a matter of days — rather than months of waiting for the next dbt Core release, on top of any additional time it may have taken to actually carry out the upgrade. +The outcome of this process is that, when you select the "Latest" release track in dbt Cloud, the time between an improvement being made to dbt Core and you *safely* getting access to it in your projects is a matter of days — rather than months of waiting for the next dbt Core release, on top of any additional time it may have taken to actually carry out the upgrade. -We’re pleased to say that since the beta launch of “Versionless” in dbt Cloud in March, **we have not had any functional regressions reach customers**, while we’ve also been shipping multiple improvements to dbt functionality every day. This is a foundation that we aim to build on for the foreseeable future. +We’re pleased to say that, at the time of writing (May 2, 2024), since the beta launch of the "Latest" release track in dbt Cloud in March, **we have not had any functional regressions reach customers**, while we’ve also been shipping multiple improvements to dbt functionality every day. This is a foundation that we aim to build on for the foreseeable future. ## Stability as a feature @@ -98,7 +102,7 @@ The adapter interface — i.e. how dbt Core actually connects to a third-party d To solve that, we've released a new set of interfaces that are entirely independent of the `dbt-core` library: [`dbt-adapters==1.0.0`](https://github.com/dbt-labs/dbt-adapters). From now on, any changes to `dbt-adapters` will be backward and forward-compatible. This also decouples adapter maintenance from the regular release cadence of dbt Core — meaning maintainers get full control over when they ship implementations of new adapter-powered features. -Note that adapters running in dbt Cloud **must** be [migrated to the new decoupled architecture](https://github.com/dbt-labs/dbt-adapters/discussions/87) as a baseline in order to support the new "Versionless" option. +Note that adapters running in dbt Cloud **must** be [migrated to the new decoupled architecture](https://github.com/dbt-labs/dbt-adapters/discussions/87) as a baseline in order to support the new "Latest" release track. ### Managing behavior changes: stability as a feature @@ -118,7 +122,7 @@ We’ve now [formalized our development best practices](https://github.com/dbt-l In conclusion, we’re putting a lot of new muscle behind our commitments to dbt Cloud customers, the dbt Community, and the broader ecosystem: -- **Continuous updates**: "Versionless" dbt Cloud simplifies the update process, ensuring you always have the latest features and bug fixes without the maintenance overhead. +- **Continuous updates**: The "Latest" release track in dbt Cloud simplifies the update process, ensuring you always have the latest features and bug fixes without the maintenance overhead. - **A rigorous new testing and deployment process**: Our new testing pipeline ensures that every update is carefully vetted against documented interfaces, Cloud-supported adapters, and popular packages before it reaches you. This process minimizes the risk of regressions — and has now been successful at entirely preventing them for hundreds of customers over multiple months. - **A commitment to stability**: We’ve reworked our approaches to adapter interfaces, behaviour change management, and metadata artifacts to give you more stability and control. diff --git a/website/blog/2024-06-12-putting-your-dag-on-the-internet.md b/website/blog/2024-06-12-putting-your-dag-on-the-internet.md index 535cfc34d6e..54864916d0e 100644 --- a/website/blog/2024-06-12-putting-your-dag-on-the-internet.md +++ b/website/blog/2024-06-12-putting-your-dag-on-the-internet.md @@ -12,7 +12,7 @@ date: 2024-06-14 is_featured: true --- -**New in dbt: allow Snowflake Python models to access the internet** +## New in dbt: allow Snowflake Python models to access the internet With dbt 1.8, dbt released support for Snowflake’s [external access integrations](https://docs.snowflake.com/en/developer-guide/external-network-access/external-network-access-overview) further enabling the use of dbt + AI to enrich your data. This allows querying of external APIs within dbt Python models, a functionality that was required for dbt Cloud customer, [EQT AB](https://eqtgroup.com/). Learn about why they needed it and how they helped build the feature and get it shipped! @@ -45,7 +45,7 @@ This API is open and if it requires an API key, handle it similarly to managing For simplicity’s sake, we will show how to create them using [pre-hooks](/reference/resource-configs/pre-hook-post-hook) in a model configuration yml file: -``` +```yml models: - name: external_access_sample config: @@ -57,7 +57,7 @@ models: Then we can simply use the new external_access_integrations configuration parameter to use our network rule within a Python model (called external_access_sample.py): -``` +```python import snowflake.snowpark as snowpark def model(dbt, session: snowpark.Session): dbt.config( @@ -75,7 +75,7 @@ def model(dbt, session: snowpark.Session): The result is a model with some json I can parse, for example, in a SQL model to extract some information: -``` +```sql {{ config( materialized='incremental', @@ -108,12 +108,12 @@ The result is a model that will keep track of dbt invocations, and the current U This is a very new area to Snowflake and dbt -- something special about SQL and dbt is that it’s very resistant to external entropy. The second we rely on API calls, Python packages and other external dependencies, we open up to a lot more external entropy. APIs will change, break, and your models could fail. -Traditionally dbt is the T in ELT (dbt overview [here](https://docs.getdbt.com/terms/elt)), and this functionality unlocks brand new EL capabilities for which best practices do not yet exist. What’s clear is that EL workloads should be separated from T workloads, perhaps in a different modeling layer. Note also that unless using incremental models, your historical data can easily be deleted. dbt has seen a lot of use cases for this, including this AI example as outlined in this external [engineering blog post](https://klimmy.hashnode.dev/enhancing-your-dbt-project-with-large-language-models). +Traditionally dbt is the T in ELT (dbt overview [here](https://docs.getdbt.com/terms/elt)), and this functionality unlocks brand new EL capabilities for which best practices do not yet exist. What’s clear is that EL workloads should be separated from T workloads, perhaps in a different modeling layer. Note also that unless using incremental models, your historical data can easily be deleted. dbt has seen a lot of use cases for this, including this AI example as outlined in this external [engineering blog post](https://klimmy.hashnode.dev/enhancing-your-dbt-project-with-large-language-models). -**A few words about the power of Commercial Open Source Software** +## A few words about the power of Commercial Open Source Software In order to get this functionality shipped quickly, EQT opened a pull request, Snowflake helped with some problems we had with CI and a member of dbt Labs helped write the tests and merge the code in! -dbt now features this functionality in dbt 1.8+ or the “Versionless” option of dbt Cloud (dbt overview [here](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless)). +dbt now features this functionality in dbt 1.8+ and all [Release tracks](/docs/dbt-versions/cloud-release-tracks) in dbt Cloud. dbt Labs staff and community members would love to chat more about it in the [#db-snowflake](https://getdbt.slack.com/archives/CJN7XRF1B) slack channel. diff --git a/website/blog/2024-10-04-hybrid-mesh.md b/website/blog/2024-10-04-hybrid-mesh.md index 34b2a67d1cb..05a45599318 100644 --- a/website/blog/2024-10-04-hybrid-mesh.md +++ b/website/blog/2024-10-04-hybrid-mesh.md @@ -59,7 +59,7 @@ This allows dbt Cloud to know about the contents and metadata of your project, w - Note: If you have [environment variables](/docs/build/environment-variables) in your project, dbt Cloud environment variables must be prefixed with `DBT_ `(including `DBT_ENV_CUSTOM_ENV_` or `DBT_ENV_SECRET`). Follow the instructions in [this guide](https://docs.getdbt.com/guides/core-to-cloud-1?step=8#environment-variables) to convert them for dbt Cloud. - Each upstream Core project has to have a production [environment](/docs/dbt-cloud-environments) in dbt Cloud. You need to configure credentials and environment variables in dbt Cloud just so that it will resolve relation names to the same places where your dbt Core workflows are deploying those models. - Set up a [merge job](/docs/deploy/merge-jobs) in a production environment to run `dbt parse`. This will enable connecting downstream projects in dbt Mesh by producing the necessary [artifacts](/reference/artifacts/dbt-artifacts) for cross-project referencing. - - Note: Set up a regular job to run `dbt build` instead of using a merge job for `dbt parse`, and centralize your dbt orchestration by moving production runs to dbt Cloud. Check out [this guide](/guides/core-to-cloud-1?step=9) for more details on converting your production runs to dbt Cloud. + - Optional: Set up a regular job to run `dbt build` instead of using a merge job for `dbt parse`, and centralize your dbt orchestration by moving production runs to dbt Cloud. Check out [this guide](/guides/core-to-cloud-1?step=9) for more details on converting your production runs to dbt Cloud. - Optional: Set up a regular job (for example, daily) to run `source freshness` and `docs generate`. This will hydrate dbt Cloud with additional metadata and enable features in [dbt Explorer](/docs/collaborate/explore-projects) that will benefit both teams, including [Column-level lineage](/docs/collaborate/column-level-lineage). ### Step 3: Create and connect your downstream projects to your Core project using dbt Mesh diff --git a/website/blog/2024-10-04-iceberg-is-an-implementation-detail.md b/website/blog/2024-10-04-iceberg-is-an-implementation-detail.md index eca0a411dad..dc9b78bba8d 100644 --- a/website/blog/2024-10-04-iceberg-is-an-implementation-detail.md +++ b/website/blog/2024-10-04-iceberg-is-an-implementation-detail.md @@ -16,6 +16,8 @@ If you haven’t paid attention to the data industry news cycle, you might have But I have to be honest: **I don’t care**. But not for the reasons you think. + + ## What is Iceberg? To have this conversation, we need to start with the same foundational understanding of Iceberg. Apache Iceberg is a high-performance open table format developed for modern data lakes. It was designed for large-scale datasets, and within the project, there are many ways to interact with it. When people talk about Iceberg, it often means multiple components including but not limited to: diff --git a/website/blog/2024-10-05-snowflake-feature-store.md b/website/blog/2024-10-05-snowflake-feature-store.md index fb62955d4a4..cf5c55be1b5 100644 --- a/website/blog/2024-10-05-snowflake-feature-store.md +++ b/website/blog/2024-10-05-snowflake-feature-store.md @@ -13,6 +13,8 @@ Flying home into Detroit this past week working on this blog post on a plane and Think of the manufacturing materials needed as our data and the building of the bridge as the building of our ML models. There are thousands of engineers and construction workers taking materials from all over the world, pulling only the specific pieces needed for each part of the project. However, to make this project truly work at this scale, we need the warehousing and logistics to ensure that each load of concrete rebar and steel meets the standards for quality and safety needed and is available to the right people at the right time — as even a single fault can have catastrophic consequences or cause serious delays in project success. This warehouse and the associated logistics play the role of the feature store, ensuring that data is delivered consistently where and when it is needed to train and run ML models. + + ## What is a feature? A feature is a transformed or enriched data that serves as an input into a machine learning model to make predictions. In machine learning, a data scientist derives features from various data sources to build a model that makes predictions based on historical data. To capture the value from this model, the enterprise must operationalize the data pipeline, ensuring that the features being used in production at inference time match those being used in training and development. diff --git a/website/blog/2024-11-04-test-smarter-not-harder.md b/website/blog/2024-11-04-test-smarter-not-harder.md new file mode 100644 index 00000000000..58adfb38cb9 --- /dev/null +++ b/website/blog/2024-11-04-test-smarter-not-harder.md @@ -0,0 +1,163 @@ +--- +title: "Test smarter not harder: add the right tests to your dbt project" +description: "Testing your data should drive action, not accumulate alerts. We synthesized countless customer experiences to build a repeatable testing framework." +slug: test-smarter-not-harder + +authors: [faith_mckenna, jerrie_kumalah_kenney] + +tags: [analytics craft] +hide_table_of_contents: false + +date: 2024-11-11 +is_featured: true +--- + + + +The [Analytics Development Lifecycle (ADLC)](https://www.getdbt.com/resources/guides/the-analytics-development-lifecycle) is a workflow for improving data maturity and velocity. Testing is a key phase here. Many dbt developers tend to focus on [primary keys and source freshness.](https://www.getdbt.com/blog/building-a-data-quality-framework-with-dbt-and-dbt-cloud) We think there is a more holistic and in-depth path to tread. Testing is a key piece of the ADLC, and it should drive data quality. + +In this blog, we’ll walk through a plan to define data quality. This will look like: + +- identifying *data hygiene* issues +- identifying *business-focused anomaly* issues +- identifying *stats-focused anomaly* issues + +Once we have *defined* data quality, we’ll move on to *prioritize* those concerns. We will: + +- think through each concern in terms of the breadth of impact +- decide if each concern should be at error or warning severity + + + +### Who are we? + +Let’s start with introductions - we’re Faith and Jerrie, and we work on dbt Labs’s training and services teams, respectively. By working closely with countless companies using dbt, we’ve gained unique perspectives of the landscape. + +The training team collates problems organizations think about today and gauge how our solutions fit. These are shorter engagements, which means we see the data world shift and change in real time. Resident Architects spend much more time with teams to craft much more in-depth solutions, figure out where those solutions are helping, and where problems still need to be addressed. Trainers help identify patterns in the problems data teams face, and Resident Architects dive deep on solutions. + +Today, we’ll guide you through a particularly thorny problem: testing. + +## Why testing? + +Mariah Rogers broke early ground on data quality and testing in her [Coalesce 2022 talk](https://www.youtube.com/watch?v=hxvVhmhWRJA). We’ve seen similar talks again at Coalesce 2024, like [this one](https://www.youtube.com/watch?v=iCG-5vqMRAo) from the data team at Aiven and [this one](https://www.youtube.com/watch?v=5bRG3y9IM4Q&list=PL0QYlrC86xQnWJ72sJlzDqPS0peE7j9Ed&index=71) from the co-founder at Omni Analytics. These talks share a common theme: testing your dbt project too much can get out of control quickly, leading to alert fatigue. + +In our customer engagements, we see *wildly different approaches* to testing data. We’ve definitely seen what Mariah, the Aiven team, and the Omni team have described, which is so many tests that errors and alerts just become noise. We’ve also seen the opposite end of the spectrum—only primary keys being tested. From our field experiences, we believe there’s room for a middle path. +A desire for a better approach to data quality and testing isn’t just anecdotal to Coalesce, or to dbt’s training and services. The dbt community has long called for a more intentional approach to data quality and testing - data quality is on the industry’s mind! In fact, [57% of respondents](https://www.getdbt.com/resources/reports/state-of-analytics-engineering-2024) to dbt’s 2024 State of Analytics Engineering survey said that data quality is a predominant issue facing their day-to-day work. + +### What does d@tA qUaL1Ty even mean?! + +High-quality data is *trusted* and *used frequently.* It doesn’t get argued over or endlessly scrutinized for matching to other data. Data *testing* should lead to higher data *quality* and insights, period. + +Best practices in data quality are still nascent. That said, a lot of important baseline work has been done here. There are [case](https://medium.com/@AtheonAnalytics/mastering-data-testing-with-dbt-part-1-689b2a025675) [studies](https://medium.com/@AtheonAnalytics/mastering-data-testing-with-dbt-part-2-c4031af3df18) on implementing dbt testing well. dbt Labs also has an [Advanced Testing](https://learn.getdbt.com/courses/advanced-testing) course, emphasizing that testing should spur action and be focused and informative enough to help address failures. You can even enforce testing best practices and dbt Labs’s own best practices using the [dbt_meta_testing](https://hub.getdbt.com/tnightengale/dbt_meta_testing/latest/) or [dbt_project_evaluator](https://github.com/dbt-labs/dbt-project-evaluator) packages and dbt Explorer’s [Recommendations](https://docs.getdbt.com/docs/collaborate/project-recommendations) page. + +The missing piece is still cohesion and guidance for everyday practitioners to help develop their testing framework. + +To recap, we’re going to start with: + +- identifying *data hygiene* issues +- identifying *business-focused anomaly* issues +- identifying *stats-focused anomaly* issues + +Next, we’ll prioritize. We will: + +- think through each concern in terms of the breadth of impact +- decide if each concern should be at error or warning severity + +Get a pen and paper (or a google doc) and join us in constructing your own testing framework. + +## Identifying data quality issues in your pipeline + +Let’s start our framework by *identifying* types of data quality issues. + +In our daily work with customers, we find that data quality issues tend to fall into one of three broad buckets: *data hygiene, business-focused anomalies,* and *stats-focused anomalies.* Read the bucket descriptions below, and list 2-3 data quality concerns in your own business context that fall into each bucket. + +### Bucket 1: Data hygiene + +*Data hygiene* issues are concerns you address in your [staging layer.](https://docs.getdbt.com/best-practices/how-we-structure/2-staging) Hygienic data meets your expectations around formatting, completeness, and granularity requirements. Here are a few examples. + +- *Granularity:* primary keys are unique and not null. Duplicates throw off calculations. +- *Completeness:* columns that should always contain text, *do.* Incomplete data often has to get excluded, reducing your overall analytical power. +- *Formatting:* email addresses always have a valid domain. Incorrect emails may affect things like marketing outreach. + +### Bucket 2: Business-focused anomalies + +*Business-focused anomalies* catch unexpected behavior. You can flag unexpected behavior by clearly defining *expected* behavior. *Business-focused anomalies* are when aspects of the data differ from what you know to be typical in your business. You’ll know what’s typical either through your own analyses, your colleagues’ analyses, or things your stakeholder homies point out to you. + +Since business-focused anomaly testing is set by a human, it will be fluid and need to be adjusted periodically. Here’s an example. + +Imagine you’re a sales analyst. Generally, you know that if your daily sales amount goes up or down by more than 20% daily, that’s bad. Specifically, it’s usually a warning sign for fraud or the order management system (OMS) dropping orders. You set a test in dbt to fail if any given day’s sales amount is a delta of 20% from the previous day. This works for a while. + +Then, you have a stretch of 3 months where your test fails 5 times a week! Every time you investigate, it turns out to be valid consumer behavior. You’re suddenly in hypergrowth, and sales are legitimately increasing that much. + +Your 20%-change fraud and OMS failure detector is no longer valid. You need to investigate anew which sales spikes or drops indicate fraud or OMS problems. Once you figure out a new threshold, you’ll go back and adjust your testing criteria. + +Although your data’s expected behavior will shift over time, you should still commit to defining business-focused anomalies to grow your understanding of what is normal for your data. + +Here’s how to identify potential anomalies. + +Start at your business intelligence (BI) layer. Pick 1-3 dashboards or tables that you *know* are used frequently. List these 1-3 dashboards or tables. For each dashboard or table you have, identify 1-3 “expected” behaviors that your end-users rely on. Here are a few examples to get you thinking: + +- Revenue numbers should not change by more than X% in Y amount of time. This could indicate fraud or OMS problems. +- Monthly active users should not decline more than X% after the initial onboarding period. This might indicate user dissatisfaction, usability issues, or that users not finding a feature valuable. +- Exam passing rates should stay above Y%. A decline below that threshold may indicate recent content changes or technical issues are affecting understanding or accessibility. + +You should also consider what data issues you have had in the past! Look through recent data incidents and pick out 3 or 4 to guard against next time. These might be in a #data-questions channel or perhaps a DM from a stakeholder. + +### Bucket 3: Stats-focused anomalies + +*Stats-focused anomalies* are fluctuations that go against your expected volumes or metrics. Some examples include: + +- Volume anomalies. This could be site traffic amounts that may indicate illicit behavior, or perhaps site traffic dropping one day then doubling the next, indicating that a chunk of data were not loaded properly. +- Dimensional anomalies, like too many product types underneath a particular product line that may indicate incorrect barcodes. +- Column anomalies, like sale values more than a certain number of standard deviations from a mean, that may indicate improper discounting. + +Overall, stats-focused anomalies can indicate system flaws, illicit site behavior, or fraud, depending on your industry. They also tend to require more advanced testing practices than we are covering in this blog. We feel stats-based anomalies are worth exploring once you have a good handle on your data hygiene and business-focused anomalies. We won’t give recommendations on stats-focused anomalies in this post. + +## How to prioritize data quality concerns in your pipeline + +Now, you have a written and categorized list of data hygiene concerns and business-focused anomalies to guard against. It’s time to *prioritize* which quality issues deserve to fail your pipelines. + +To prioritize your data quality concerns, think about real-life impact. A couple of guiding questions to consider are: + +- Are your numbers *customer-facing?* For example, maybe you work with temperature-tracking devices. Your customers rely on these devices to show them average temperatures on perishable goods like strawberries in-transit. What happens if the temperature of the strawberries reads as 300C when they know their refrigerated truck was working just fine? How is your brand perception impacted when the numbers are wrong? +- Are your numbers *used to make financial decisions?* For example, is the marketing team relying on your numbers to choose how to spend campaign funds? +- Are your numbers *executive-facing?* Will executives use these numbers to reallocate funds or shift priorities? + +We think these 3 categories above constitute high-impact, pipeline-failing events, and should be your top priorities. Of course, adjust priority order if your business context calls for it. + +Consult your list of data quality issues in the categories we mention above. Decide and mark if any are customer facing, used for financial decisions, or are executive-facing. Mark any data quality issues in those categories as “error”. These are your pipeline-failing events. + +If any data quality concerns fall outside of these 3 categories, we classify them as **nice-to-knows**. **Nice-to-know** data quality testing *can* be helpful. But if you don’t have a *specific action you can immediately take* when a nice-to-know quality test fails, the test *should be a warning, not an error.* + +You could also remove nice-to-know tests altogether. Data testing should drive action. The more alerts you have in your pipeline, the less action you will take. Configure alerts with care! + +However, we do think nice-to-know tests are worth keeping *if and only if* you are gathering evidence for action you plan to take within the next 6 months, like product feature research. In a scenario like that, those tests should still be set to warning. + +### Start your action plan + +Now, your data quality concerns are listed and prioritized. Next, add 1 or 2 initial debugging steps you will take if/when the issues surface. These steps should get added to your framework document. Additionally, consider adding them to a [test’s description.](https://discourse.getdbt.com/t/is-it-possible-to-add-a-description-to-singular-tests/5472/4) + +This step is *important.* Data quality testing should spur action, not accumulate alerts. Listing initial debugging steps for each concern will refine your list to the most critical elements. + +If you can't identify an action step for any quality issue, *remove it*. Put it on a backlog and research what you can do when it surfaces later. + +Here’s a few examples from our list of unexpected behaviors above. + +- For calculated field X, a value above Y or below Z is not possible. + - *Debugging initial steps* + - Use dbt test SQL or recent test results in dbt Explorer to find problematic rows + - Check these rows in staging and first transformed model + - Pinpoint where unusual values first appear +- Revenue shouldn’t change by more than X% in Y amount of time. + - *Debugging initial steps:* + - Check recent revenue values in staging model + - Identify transactions near min/max values + - Discuss outliers with sales ops team + +You now have written out a prioritized list of data quality concerns, as well as action steps to take when each concern surfaces. Next, consult [hub.getdbt.com](http://hub.getdbt.com) and find tests that address each of your highest priority concerns. [dbt-expectations](https://hub.getdbt.com/calogica/dbt_expectations/latest/) and [dbt_utils](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/) are great places to start. + +The data tests you’ve marked as “errors” above should get error-level severity. Any concerns falling into that nice-to-know category should either *not get tested* or have their tests *set to warning.* + +Your data quality priorities list is a living reference document. We recommend linking it in your project’s README so that you can go back and edit it as your testing needs evolve. Additionally, developers in your project should have easy access to this document. Maintaining good data quality is everyone’s responsibility! + +As you try these ideas out, come to the dbt Community Slack and let us know what works and what doesn’t. Data is a community of practice, and we are eager to hear what comes out of yours. diff --git a/website/blog/2024-11-27-test-smarter-part-2.md b/website/blog/2024-11-27-test-smarter-part-2.md new file mode 100644 index 00000000000..4fabe066011 --- /dev/null +++ b/website/blog/2024-11-27-test-smarter-part-2.md @@ -0,0 +1,125 @@ +--- +title: "Test smarter not harder: Where should tests go in your pipeline?" +description: "Testing your data should drive action, not accumulate alerts. We take our testing framework developed in our last post and make recommendations for where tests ought to go at each transformation stage." +slug: test-smarter-where-tests-should-go + +authors: [faith_mckenna, jerrie_kumalah_kenney] + +tags: [analytics craft] +hide_table_of_contents: false + +date: 2024-12-09 +is_featured: true +--- + +👋 Greetings, dbt’ers! It’s Faith & Jerrie, back again to offer tactical advice on *where* to put tests in your pipeline. + +In [our first post](/blog/test-smarter-not-harder) on refining testing best practices, we developed a prioritized list of data quality concerns. We also documented first steps for debugging each concern. This post will guide you on where specific tests should go in your data pipeline. + +*Note that we are constructing this guidance based on how we [structure data at dbt Labs.](/best-practices/how-we-structure/1-guide-overview#guide-structure-overview)* You may use a different modeling approach—that’s okay! Translate our guidance to your data’s shape, and let us know in the comments section what modifications you made. + +First, here’s our opinions on where specific tests should go: + +- Source tests should be fixable data quality concerns. See the [callout box below](#sources) for what we mean by “fixable”. +- Staging tests should be business-focused anomalies specific to individual tables, such as accepted ranges or ensuring sequential values. In addition to these tests, your staging layer should clean up any nulls, duplicates, or outliers that you can’t fix in your source system. You generally don’t need to test your cleanup efforts. +- Intermediate and marts layer tests should be business-focused anomalies resulting specifically from joins or calculations. You also may consider adding additional primary key and not null tests on columns where it’s especially important to protect the grain. + + + +## Where should tests go in your pipeline? + +![A horizontal, multicolored diagram that shows examples of where tests ought to be placed in a data pipeline.](/img/blog/2024-11-27-test-smarter-part-2/testing_pipeline.png) + +This diagram above outlines where you might put specific data tests in your pipeline. Let’s expand on it and discuss where each type of data quality issue should be tested. + +### Sources + +Tests applied to your sources should indicate *fixable-at-the-source-system* issues. If your source tests flag source system issues that aren’t fixable, remove the test and mitigate the problem in your staging layer instead. + +:::tip[What does fixable mean?] +We consider a "fixable-at-the-source-system" issue to be something that: + +- You yourself can fix in the source system. +- You know the right person to fix it and have a good enough relationship with them that you know you can *get it fixed.* + +You may have issues that can *technically* get fixed at the source, but it won't happen till the next planning cycle, or you need to develop better relationships to get the issue fixed, or something similar. This demands a more nuanced approach than we'll cover in this post. If you have thoughts on this type of situation, let us know! + +::: + +Here’s our recommendation for what tests belong on your sources. + +- Source freshness: testing data freshness for sources that are critical to your pipelines. + - If any sources feed into any of the “top 3” [priority categories](https://docs.getdbt.com/blog/test-smarter-not-harder#how-to-prioritize-data-quality-concerns-in-your-pipeline) in our last post, use [`dbt source freshness`](https://docs.getdbt.com/docs/deploy/source-freshness) in your job execution commands and set the severity to `error`. That way, if source freshness fails, so does your job. + - If none of your sources feed into high priority categories, set your source freshness severity to `warn` and add source freshness to your job execution commands. That way, you still get source freshness information but stale data won't fail your pipeline. +- Data hygiene: tests that are *fixable* in the source system (see our note above on “fixability”). + - Examples: + - Duplicate customer records that can be deleted in the source system + - Null records, such as a customer name or email address, that can be entered into the source system + - Primary key testing where duplicates are removable in the source system + +### Staging + +In the staging layer, your models should be cleaning up or mitigating data issues that can't be fixed at the source. Your tests should be focused on business anomaly detection. + +- Data cleanup and issue mitigation: Use our [best practices around staging layers](https://docs.getdbt.com/best-practices/how-we-structure/2-staging) to clean things up. Don’t add tests to your cleanup efforts. If you’re filtering out nulls in a column, adding a not_null test is repetitive! 🌶️ +- Business-focused anomaly examples: these are data quality issues you *should* test for in your staging layer, because they fall outside of your business’s defined norms. These might be: + - Values inside a single column that fall outside of an acceptable range. For example, a store selling a greater quantity of limited-edition items than they received in their stock delivery. + - Values that should always be positive, are positive. This might look like a negative transaction amount that isn’t classified as a return. This failing test would then spur further investigation into the offending transaction. + - An unexpected uptick in volume of a quantity column beyond a pre-defined percentage. This might look like a store’s customer volume spiking unexpectedly and outside of expected seasonal norms. This is an anomaly that could indicate a bug or modeling issue. + +### Intermediate (if applicable) + +In your intermediate layer, focus on data hygiene and anomaly tests for new columns. Don’t re-test passthrough columns from sources or staging. Here are some examples of tests you might put in your intermediate layer based on the use cases of intermediate models we [outline in this guide](/best-practices/how-we-structure/3-intermediate#intermediate-models). + +- Intermediate models often re-grain models to prepare them for marts. + - Add a primary key test to any re-grained models. + - Additionally, consider adding a primary key test to models where the grain *has remained the same* but has been *enriched.* This helps future-proof your enriched models against future developers who may not be able to glean your intention from SQL alone. +- Intermediate models may perform a first set of joins or aggregations to reduce complexity in a final mart. + - Add simple anomaly tests to verify the behavior of your sets of joins and aggregations. This may look like: + - An [accepted_values](/reference/resource-properties/data-tests#accepted_values) test on a newly calculated categorical column. + - A [mutually_exclusive_ranges](https://github.com/dbt-labs/dbt-utils#mutually_exclusive_ranges-source) test on two columns whose values behave in relation to one another (ex: asserting age ranges do not overlap). + - A [not_constant](https://github.com/dbt-labs/dbt-utils#not_constant-source) test on a column whose value should be continually changing (ex: page view counts on website analytics). +- Intermediate models may isolate complex operations. + - The anomaly tests we list above may suffice here. + - You might also consider [unit testing](/docs/build/unit-tests) any particularly complex pieces of SQL logic. + +### Marts + +Marts layer testing will follow the same hygiene-or-anomaly pattern as staging and intermediate. Similar to your intermediate layer, you should focus your testing on net-new columns in your marts layer. This might look like: + +- Unit tests: validate especially complex transformation logic. For example: + - Calculating dates in a way that feeds into forecasting. + - Customer segmentation logic, especially logic that has a lot of CASE-WHEN statements. +- Primary key tests: focus on where where your mart's granularity has changed from its staging/intermediate inputs. + - Similar to the intermediate models above, you may also want to add primary key tests to models whose grain hasn’t changed, but have been enriched with other data. Primary key tests here communicate your intent. +- Business focused anomaly tests: focus on *new* calculated fields, such as: + - Singular tests on high-priority, high-impact tables where you have a specific problem you want forewarning about. + - This might be something like fuzzy matching logic to detect when the same person is making multiple emails to extend a free trial beyond its acceptable end date. + - A test for calculated numerical fields that shouldn’t vary by more than certain percentage in a week. + - A calculated ledger table that follows certain business rules, i.e. today’s running total of spend must always be greater than yesterday’s. + +### CI/CD + +All of the testing you’ve applied in your different layers is the manual work of constructing your framework. CI/CD is where it gets automated. + +You should run a [slim CI](/best-practices/best-practice-workflows#run-only-modified-models-to-test-changes-slim-ci) to optimize your resource consumption. + +With CI/CD and your regular production runs, your testing framework can be on autopilot. 😎 + +If and when you encounter failures, consult your trusty testing framework doc you built in our [earlier post](/blog/test-smarter-not-harder). + +### Advanced CI + +In the early stages of your smarter testing journey, start with dbt Cloud’s built-in flags for [advanced CI](/docs/deploy/advanced-ci). In PRs with advanced CI enabled, dbt Cloud will flag what has been modified, added, or removed in the “compare changes” section. These three flags offer confidence and evidence that your changes are what you expect. Then, hand them off for peer review. Advanced CI helps jump start your colleague’s review of your work by bringing all of the implications of the change into one place. + +We consider usage of Advanced CI beyond the modified, added, or changed gut checks to be an advanced (heh) testing strategy, and look forward to hearing how you use it. + +## Wrapping it all up + +Judicious data testing is like training for a marathon. It’s not productive to go run 20 miles a day and hope that you’ll be marathon-ready and uninjured. Similarly, throwing data tests randomly at your data pipeline without careful thought is not going to tell you much about your data quality. + +Runners go into marathons with training plans. Analytics engineers who care about data quality approach the issue with a plan, too. + +As you try out some of the guidance above here, remember that your testing needs are going to evolve over time. Don’t be afraid to revise your original testing strategy. + +Let us know your thoughts on these strategies in the comments section. Try them out, and share your thoughts to help us refine them. diff --git a/website/blog/authors.yml b/website/blog/authors.yml index 271130a477d..3070ec806b5 100644 --- a/website/blog/authors.yml +++ b/website/blog/authors.yml @@ -214,6 +214,14 @@ euan_johnston: - icon: fa-github url: https://github.com/euanjohnston-dev name: Euan Johnston +faith_mckenna: + image_url: /img/blog/authors/faith_pic.png + job_title: Senior Technical Instructor + links: + - icon: fa-linkedin + url: https://www.linkedin.com/in/faithlierheimer/ + name: Faith McKenna + organization: dbt Labs filip_byrén: image_url: /img/blog/authors/filip-eqt.png job_title: VP and Software Architect @@ -275,6 +283,14 @@ jeremy_cohen: job_title: Product Manager name: Jeremy Cohen organization: dbt Labs +jerrie_kumalah_kenney: + image_url: /img/blog/authors/jerrie.jpg + job_title: Resident Architect + links: + - icon: fa-linkedin + url: https://www.linkedin.com/in/jerriekumalah/ + name: Jerrie Kumalah Kenney + organization: dbt Labs jess_williams: image_url: /img/blog/authors/jess.png job_title: Head of Professional Services @@ -606,4 +622,4 @@ yu_ishikawa: - icon: fa-linkedin url: https://www.linkedin.com/in/yuishikawa0301 name: Yu Ishikawa - organization: Ubie \ No newline at end of file + organization: Ubie diff --git a/website/blog/ctas.yml b/website/blog/ctas.yml index ac56d4cc749..1f9b13afa7b 100644 --- a/website/blog/ctas.yml +++ b/website/blog/ctas.yml @@ -25,3 +25,8 @@ subheader: Coalesce is the premiere analytics engineering conference! Sign up now for innovation, collaboration, and inspiration. Don't miss out! button_text: Register now url: https://coalesce.getdbt.com/register +- name: coalesce_2024_catchup + header: Missed Coalesce 2024? + subheader: Catch up on Coalesce 2024 and register to access a select number of on-demand sessions. + button_text: Register and watch + url: https://coalesce.getdbt.com/register/online diff --git a/website/blog/metadata.yml b/website/blog/metadata.yml index d0009fd62c4..8b53a7a2a04 100644 --- a/website/blog/metadata.yml +++ b/website/blog/metadata.yml @@ -2,7 +2,7 @@ featured_image: "" # This CTA lives in right sidebar on blog index -featured_cta: "coalesce_2024_signup" +featured_cta: "coalesce_2024_catchup" # Show or hide hero title, description, cta from blog index show_title: true diff --git a/website/dbt-versions.js b/website/dbt-versions.js index 9007d719bc0..3e59b926b80 100644 --- a/website/dbt-versions.js +++ b/website/dbt-versions.js @@ -15,12 +15,12 @@ */ exports.versions = [ { - version: "1.9.1", - customDisplay: "Cloud (Versionless)", + version: "1.10", + customDisplay: "Cloud (Latest)", }, { version: "1.9", - isPrerelease: true, + EOLDate: "2025-12-08", }, { version: "1.8", @@ -28,11 +28,7 @@ exports.versions = [ }, { version: "1.7", - EOLDate: "2024-10-30", - }, - { - version: "1.6", - EOLDate: "2024-07-31", + EOLDate: "2024-11-01", }, ]; @@ -66,134 +62,6 @@ exports.versionedPages = [ page: "reference/global-configs/indirect-selection", firstVersion: "1.8", }, - { - page: "reference/resource-configs/store_failures_as", - firstVersion: "1.7", - }, - { - page: "docs/build/build-metrics-intro", - firstVersion: "1.6", - }, - { - page: "docs/build/sl-getting-started", - firstVersion: "1.6", - }, - { - page: "docs/build/about-metricflow", - firstVersion: "1.6", - }, - { - page: "docs/build/join-logic", - firstVersion: "1.6", - }, - { - page: "docs/build/validation", - firstVersion: "1.6", - }, - { - page: "docs/build/semantic-models", - firstVersion: "1.6", - }, - { - page: "docs/build/group-by", - firstVersion: "1.6", - }, - { - page: "docs/build/entities", - firstVersion: "1.6", - }, - { - page: "docs/build/metrics-overview", - firstVersion: "1.6", - }, - { - page: "docs/build/cumulative", - firstVersion: "1.6", - }, - { - page: "docs/build/derived", - firstVersion: "1.6", - }, - { - page: "docs/build/measure-proxy", - firstVersion: "1.6", - }, - { - page: "docs/build/ratio", - firstVersion: "1.6", - }, - { - page: "reference/commands/clone", - firstVersion: "1.6", - }, - { - page: "docs/collaborate/govern/project-dependencies", - firstVersion: "1.6", - }, - { - page: "reference/dbt-jinja-functions/thread_id", - firstVersion: "1.6", - }, - { - page: "reference/resource-properties/deprecation_date", - firstVersion: "1.6", - }, - { - page: "reference/commands/retry", - firstVersion: "1.6", - }, - { - page: "docs/build/groups", - firstVersion: "1.5", - }, - { - page: "docs/collaborate/govern/model-contracts", - firstVersion: "1.5", - }, - { - page: "reference/commands/show", - firstVersion: "1.5", - }, - { - page: "docs/collaborate/govern/model-access", - firstVersion: "1.5", - }, - { - page: "docs/collaborate/govern/model-versions", - firstVersion: "1.5", - }, - { - page: "reference/programmatic-invocations", - firstVersion: "1.5", - }, - { - page: "reference/resource-configs/contract", - firstVersion: "1.5", - }, - { - page: "reference/resource-configs/group", - firstVersion: "1.5", - }, - { - page: "reference/resource-properties/access", - firstVersion: "1.5", - }, - { - page: "reference/resource-properties/constraints", - firstVersion: "1.5", - }, - { - page: "reference/resource-properties/latest_version", - firstVersion: "1.5", - }, - { - page: "reference/resource-properties/versions", - firstVersion: "1.5", - }, - { - page: "reference/resource-configs/on_configuration_change", - firstVersion: "1.6", - }, ]; /** @@ -206,12 +74,5 @@ exports.versionedPages = [ * @property {string} firstVersion The first version the category is visible in the sidebar */ exports.versionedCategories = [ - { - category: "Model governance", - firstVersion: "1.5", - }, - { - category: "Build your metrics", - firstVersion: "1.6", - }, + ]; diff --git a/website/docs/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md b/website/docs/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md index 7990cf6752f..da882dba6c5 100644 --- a/website/docs/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md +++ b/website/docs/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md @@ -241,7 +241,9 @@ measures: ## Reviewing our work -Our completed code will look like this, our first semantic model! +Our completed code will look like this, our first semantic model! Here are two examples showing different organizational approaches: + + @@ -288,6 +290,68 @@ semantic_models: description: The total tax paid on each order. agg: sum ``` + + + + + + +```yml +semantic_models: + - name: orders + defaults: + agg_time_dimension: ordered_at + description: | + Order fact table. This table is at the order grain with one row per order. + + model: ref('stg_orders') + + entities: + - name: order_id + type: primary + - name: location + type: foreign + expr: location_id + - name: customer + type: foreign + expr: customer_id + + dimensions: + - name: ordered_at + expr: date_trunc('day', ordered_at) + # use date_trunc(ordered_at, DAY) if using BigQuery + type: time + type_params: + time_granularity: day + - name: is_large_order + type: categorical + expr: case when order_total > 50 then true else false end + + measures: + - name: order_total + description: The total revenue for each order. + agg: sum + - name: order_count + description: The count of individual orders. + expr: 1 + agg: sum + - name: tax_paid + description: The total tax paid on each order. + agg: sum +``` + + +As you can see, the content of the semantic model is identical in both approaches. The key differences are: + +1. **File location** + - Co-located approach: `models/marts/orders.yml` + - Parallel sub-folder approach: `models/semantic_models/sem_orders.yml` + +2. **File naming** + - Co-located approach: Uses the same name as the corresponding mart (`orders.yml`) + - Parallel sub-folder approach: Prefixes the file with `sem_` (`sem_orders.yml`) + +Choose the approach that best fits your project structure and team preferences. The co-located approach is often simpler for new projects, while the parallel sub-folder approach can be clearer for migrating large existing projects to the Semantic Layer. ## Next steps diff --git a/website/docs/best-practices/how-we-mesh/mesh-2-who-is-dbt-mesh-for.md b/website/docs/best-practices/how-we-mesh/mesh-2-who-is-dbt-mesh-for.md index b6fadc2d7a6..4c8adfa86a1 100644 --- a/website/docs/best-practices/how-we-mesh/mesh-2-who-is-dbt-mesh-for.md +++ b/website/docs/best-practices/how-we-mesh/mesh-2-who-is-dbt-mesh-for.md @@ -23,9 +23,6 @@ Is dbt Mesh a good fit in this scenario? Absolutely! There is no other way to sh - Onboarding hundreds of people and dozens of projects is full of friction! The challenges of a scaled, global organization are not to be underestimated. To start the migration, prioritize teams that have strong dbt familiarity and fundamentals. dbt Mesh is an advancement of core dbt deployments, so these teams are likely to have a smoother transition. Additionally, prioritize teams that manage strategic data assets that need to be shared widely. This ensures that dbt Mesh will help your teams deliver concrete value quickly. -- Bi-directional project dependencies -- currently, projects in dbt Mesh are treated like dbt resources in that they cannot depend on each other. However, many teams may want to be able to share data assets back and forth between teams. - - We've added support for [enabling bidirectional dependencies](/best-practices/how-we-mesh/mesh-3-structures#cycle-detection) across projects. If this sounds like your organization, dbt Mesh is the architecture you should pursue. ✅ diff --git a/website/docs/best-practices/how-we-mesh/mesh-3-structures.md b/website/docs/best-practices/how-we-mesh/mesh-3-structures.md index c75c566610b..38066811d8a 100644 --- a/website/docs/best-practices/how-we-mesh/mesh-3-structures.md +++ b/website/docs/best-practices/how-we-mesh/mesh-3-structures.md @@ -66,7 +66,7 @@ Since the launch of dbt Mesh, the most common pattern we've seen is one where pr Users may need to contribute models across multiple projects and this is fine. There will be some friction doing this, versus a single repo, but this is _useful_ friction, especially if upstreaming a change from a “spoke” to a “hub.” This should be treated like making an API change, one that the other team will be living with for some time to come. You should be concerned if your teammates find they need to make a coordinated change across multiple projects very frequently (every week), or as a key prerequisite for ~20%+ of their work. -### Cycle detection +### Cycle detection import CycleDetection from '/snippets/_mesh-cycle-detection.md'; diff --git a/website/docs/best-practices/how-we-mesh/mesh-5-faqs.md b/website/docs/best-practices/how-we-mesh/mesh-5-faqs.md index 1ae49928ae5..9f12f7d2c20 100644 --- a/website/docs/best-practices/how-we-mesh/mesh-5-faqs.md +++ b/website/docs/best-practices/how-we-mesh/mesh-5-faqs.md @@ -215,7 +215,7 @@ There’s model-level access within dbt, role-based access for users and groups First things first: access to underlying data is always defined and enforced by the underlying data platform (for example, BigQuery, Databricks, Redshift, Snowflake, Starburst, etc.) This access is managed by executing “DCL statements” (namely `grant`). dbt makes it easy to [configure `grants` on models](/reference/resource-configs/grants), which provision data access for other roles/users/groups in the data warehouse. However, dbt does _not_ automatically define or coordinate those grants unless they are configured explicitly. Refer to your organization's system for managing data warehouse permissions. -[dbt Cloud Enterprise plans](https://www.getdbt.com/pricing) support [role-based access control (RBAC)](/docs/cloud/manage-access/enterprise-permissions#how-to-set-up-rbac-groups-in-dbt-cloud) that manages granular permissions for users and user groups. You can control which users can see or edit all aspects of a dbt Cloud project. A user’s access to dbt Cloud projects also determines whether they can “explore” that project in detail. Roles, users, and groups are defined within the dbt Cloud application via the UI or by integrating with an identity provider. +[dbt Cloud Enterprise plans](https://www.getdbt.com/pricing) support [role-based access control (RBAC)](/docs/cloud/manage-access/about-user-access#role-based-access-control-) that manages granular permissions for users and user groups. You can control which users can see or edit all aspects of a dbt Cloud project. A user’s access to dbt Cloud projects also determines whether they can “explore” that project in detail. Roles, users, and groups are defined within the dbt Cloud application via the UI or by integrating with an identity provider. [Model access](/docs/collaborate/govern/model-access) defines where models can be referenced. It also informs the discoverability of those projects within dbt Explorer. Model `access` is defined in code, just like any other model configuration (`materialized`, `tags`, etc). diff --git a/website/docs/best-practices/how-we-structure/2-staging.md b/website/docs/best-practices/how-we-structure/2-staging.md index 8eb91ff5b7b..1f52a4a9a00 100644 --- a/website/docs/best-practices/how-we-structure/2-staging.md +++ b/website/docs/best-practices/how-we-structure/2-staging.md @@ -223,4 +223,4 @@ This is a welcome change for many of us who have become used to applying the sam :::info Development flow versus DAG order. This guide follows the order of the DAG, so we can get a holistic picture of how these three primary layers build on each other towards fueling impactful data products. It’s important to note though that developing models does not typically move linearly through the DAG. Most commonly, we should start by mocking out a design in a spreadsheet so we know we’re aligned with our stakeholders on output goals. Then, we’ll want to write the SQL to generate that output, and identify what tables are involved. Once we have our logic and dependencies, we’ll make sure we’ve staged all the necessary atomic pieces into the project, then bring them together based on the logic we wrote to generate our mart. Finally, with a functioning model flowing in dbt, we can start refactoring and optimizing that mart. By splitting the logic up and moving parts back upstream into intermediate models, we ensure all of our models are clean and readable, the story of our DAG is clear, and we have more surface area to apply thorough testing. -:::info +::: diff --git a/website/docs/best-practices/how-we-structure/4-marts.md b/website/docs/best-practices/how-we-structure/4-marts.md index 21de31a9e0d..995dea7e96f 100644 --- a/website/docs/best-practices/how-we-structure/4-marts.md +++ b/website/docs/best-practices/how-we-structure/4-marts.md @@ -26,7 +26,8 @@ models/marts ✅ **Group by department or area of concern.** If you have fewer than 10 or so marts you may not have much need for subfolders, so as with the intermediate layer, don’t over-optimize too early. If you do find yourself needing to insert more structure and grouping though, use useful business concepts here. In our marts layer, we’re no longer worried about source-conformed data, so grouping by departments (marketing, finance, etc.) is the most common structure at this stage. -✅ **Name by entity.** Use plain English to name the file based on the concept that forms the grain of the mart `customers`, `orders`. Note that for pure marts, there should not be a time dimension (`orders_per_day`) here, that is typically best captured via metrics. +✅ **Name by entity.** Use plain English to name the file based on the concept that forms the grain of the mart’s `customers`, `orders`. Marts that don't include any time-based rollups (pure marts) should not have a time dimension (`orders_per_day`) here, typically best captured via metrics. + ❌ **Build the same concept differently for different teams.** `finance_orders` and `marketing_orders` is typically considered an anti-pattern. There are, as always, exceptions — a common pattern we see is that, finance may have specific needs, for example reporting revenue to the government in a way that diverges from how the company as a whole measures revenue day-to-day. Just make sure that these are clearly designed and understandable as _separate_ concepts, not departmental views on the same concept: `tax_revenue` and `revenue` not `finance_revenue` and `marketing_revenue`. diff --git a/website/docs/best-practices/how-we-style/2-how-we-style-our-sql.md b/website/docs/best-practices/how-we-style/2-how-we-style-our-sql.md index 8c61e63b888..35e025faf3f 100644 --- a/website/docs/best-practices/how-we-style/2-how-we-style-our-sql.md +++ b/website/docs/best-practices/how-we-style/2-how-we-style-our-sql.md @@ -8,8 +8,8 @@ id: 2-how-we-style-our-sql - ☁️ Use [SQLFluff](https://sqlfluff.com/) to maintain these style rules automatically. - Customize `.sqlfluff` configuration files to your needs. - Refer to our [SQLFluff config file](https://github.com/dbt-labs/jaffle-shop-template/blob/main/.sqlfluff) for the rules we use in our own projects. - - - Exclude files and directories by using a standard `.sqlfluffignore` file. Learn more about the syntax in the [.sqlfluffignore syntax docs](https://docs.sqlfluff.com/en/stable/configuration.html#id2). + - Exclude files and directories by using a standard `.sqlfluffignore` file. Learn more about the syntax in the [.sqlfluffignore syntax docs](https://docs.sqlfluff.com/en/stable/configuration/index.html). + - Excluding unnecessary folders and files (such as `target/`, `dbt_packages/`, and `macros/`) can speed up linting, improve run times, and help you avoid irrelevant logs. - 👻 Use Jinja comments (`{# #}`) for comments that should not be included in the compiled SQL. - ⏭️ Use trailing commas. - 4️⃣ Indents should be four spaces. diff --git a/website/docs/best-practices/how-we-style/5-how-we-style-our-yaml.md b/website/docs/best-practices/how-we-style/5-how-we-style-our-yaml.md index 8f817356334..e3b539e8b12 100644 --- a/website/docs/best-practices/how-we-style/5-how-we-style-our-yaml.md +++ b/website/docs/best-practices/how-we-style/5-how-we-style-our-yaml.md @@ -7,6 +7,7 @@ id: 5-how-we-style-our-yaml - 2️⃣ Indents should be two spaces - ➡️ List items should be indented +- 🔠 List items with a single entry can be a string. For example, `'select': 'other_user'`, but it's best practice to provide the argument as an explicit list. For example, `'select': ['other_user']` - 🆕 Use a new line to separate list items that are dictionaries where appropriate - 📏 Lines of YAML should be no longer than 80 characters. - 🛠️ Use the [dbt JSON schema](https://github.com/dbt-labs/dbt-jsonschema) with any compatible IDE and a YAML formatter (we recommend [Prettier](https://prettier.io/)) to validate your YAML files and format them automatically. diff --git a/website/docs/community/resources/oss-expectations.md b/website/docs/community/resources/oss-expectations.md index e6e5d959c96..7b518424e92 100644 --- a/website/docs/community/resources/oss-expectations.md +++ b/website/docs/community/resources/oss-expectations.md @@ -2,112 +2,122 @@ title: "Expectations for OSS contributors" --- -Whether it's a dbt package, a plugin, `dbt-core`, or this very documentation site, contributing to the open source code that supports the dbt ecosystem is a great way to level yourself up as a developer, and to give back to the community. The goal of this page is to help you understand what to expect when contributing to dbt open source software (OSS). While we can only speak for our own experience as open source maintainers, many of these guidelines apply when contributing to other open source projects, too. +Whether it's `dbt-core`, adapters, packages, or this very documentation site, contributing to the open source code that supports the dbt ecosystem is a great way to share your knowledge, level yourself up as a developer, and to give back to the community. The goal of this page is to help you understand what to expect when contributing to dbt open source software (OSS). -Have you seen things in other OSS projects that you quite like, and think we could learn from? [Open a discussion on the dbt Community Forum](https://discourse.getdbt.com), or start a conversation in the dbt Community Slack (for example: `#community-strategy`, `#dbt-core-development`, `#package-ecosystem`, `#adapter-ecosystem`). We always appreciate hearing from you! +Have you seen things in other OSS projects that you quite like, and think we could learn from? [Open a discussion on the dbt Community Forum](https://discourse.getdbt.com), or start a conversation in the [dbt Community Slack](https://www.getdbt.com/community/join-the-community) (for example: `#community-strategy`, `#dbt-core-development`, `#package-ecosystem`, `#adapter-ecosystem`). We always appreciate hearing from you! ## Principles ### Open source is participatory -Why take time out of your day to write code you don’t _have_ to? We all build dbt together. By using dbt, you’re invested in the future of the tool, and an agent in pushing forward the practice of analytics engineering. You’ve already benefited from using code contributed by community members, and documentation written by community members. Contributing to dbt OSS is your way to pay it forward, as an active participant in the thing we’re all creating together. +We all build dbt together -- whether you write code or contribute your ideas. By using dbt, you're invested in the future of the tool, and have an active role in pushing forward the standard of analytics engineering. You already benefit from using code and documentation contributed by community members. Contributing to the dbt community is your way to be an active participant in the thing we're all creating together. -There’s a very practical reason, too: OSS prioritizes our collective knowledge and experience over any one person’s. We don’t have experience using every database, operating system, security environment, ... We rely on the community of OSS users to hone our product capabilities and documentation to the wide variety of contexts in which it operates. In this way, dbt gets to be the handiwork of thousands, rather than a few dozen. +There's a very practical reason, too: OSS prioritizes our collective knowledge and experience over any one person's. We don't have experience using every database, operating system, security environment, ... We rely on the community of OSS users to hone our product capabilities and documentation to the wide variety of contexts in which it operates. In this way, dbt gets to be the handiwork of thousands, rather than a few dozen. -### We take seriously our role as maintainers +### We take seriously our role as maintainers of a standard -In that capacity, we cannot and will not fix every bug ourselves, or code up every feature worth doing. Instead, we’ll do our best to respond to new issues with context (including links to related issues), feedback, alternatives/workarounds, and (whenever possible) pointers to code that would aid a community contributor. If a change is so tricky or involved that the initiative rests solely with us, we’ll do our best to explain the complexity, and when / why we could foresee prioritizing it. Our role also includes maintenance of the backlog of issues, such as closing duplicates, proposals we don’t intend to support, or stale issues (no activity for 180 days). +As a standard, dbt must be reliable and consistent. Our first priority is ensuring the continued high quality of existing dbt capabilities before we introduce net-new capabilities. -### Initiative is everything +We also believe dbt as a framework should be extensible enough to ["make the easy things easy, and the hard things possible"](https://en.wikipedia.org/wiki/Perl#Philosophy). To that end, we _don't_ believe it's appropriate for dbt to have an out-of-the-box solution for every niche problem. Users have the flexibility to achieve many custom behaviors by defining their own macros, materializations, hooks, and more. We view it as our responsibility as maintainers to decide when something should be "possible" — via macros, packages, etc. — and when something should be "easy" — built into the dbt Core standard. -Given that we, as maintainers, will not be able to resolve every bug or flesh out every feature request, we empower you, as a community member, to initiate a change. +So when will we say "yes" to new capabilities for dbt Core? The signals we look for include: +- Upvotes on issues in our GitHub repos +- Open source dbt packages trying to close a gap +- Technical advancements in the ecosystem -- If you open the bug report, it’s more likely to be identified. -- If you open the feature request, it’s more likely to be discussed. -- If you comment on the issue, engaging with ideas and relating it to your own experience, it’s more likely to be prioritized. -- If you open a PR to fix an identified bug, it’s more likely to be fixed. -- If you contribute the code for a well-understood feature, that feature is more likely to be in the next version. -- If you review an existing PR, to confirm it solves a concrete problem for you, it’s more likely to be merged. +In the meantime — we'll do our best to respond to new issues with: +- Clarity about whether the proposed feature falls into the intended scope of dbt Core +- Context (including links to related issues) +- Alternatives and workarounds +- When possible, pointers to code that would aid a community contributor -Sometimes, this can feel like shouting into the void, especially if you aren’t met with an immediate response. We promise that there are dozens (if not hundreds) of folks who will read your comment, maintainers included. It all adds up to a real difference. +### Initiative is everything -# Practicalities +Given that we, as maintainers, will not be able to resolve every bug or flesh out every feature request, we empower you, as a community member, to initiate a change. -As dbt OSS is growing in popularity, and dbt Labs has been growing in size, we’re working to involve new people in the responsibilities of OSS maintenance. We really appreciate your patience as our newest maintainers are learning and developing habits. +- If you open the bug report, it's more likely to be identified. +- If you open the feature request, it's more likely to be discussed. +- If you comment on the issue, engaging with ideas and relating it to your own experience, it's more likely to be prioritized. +- If you open a PR to fix an identified bug, it's more likely to be fixed. +- If you comment on an existing PR, to confirm it solves the concrete problem for your team in practice, it's more likely to be merged. -## Discussions +Sometimes, this can feel like shouting into the void, especially if you aren't met with an immediate response. We promise that there are dozens (if not hundreds) of folks who will read your comment, including us as maintainers. It all adds up to a real difference. -Discussions are a relatively new GitHub feature, and we really like them! +## Practicalities -A discussion is best suited to propose a Big Idea, such as brand-new capability in dbt Core, or a new section of the product docs. Anyone can open a discussion, add a comment to an existing one, or reply in a thread. +### Discussions -What can you expect from a new Discussion? Hopefully, comments from other members of the community, who like your idea or have their own ideas for how it could be improved. The most helpful comments are ones that describe the kinds of experiences users and readers should have. Unlike an **issue**, there is no specific code change that would “resolve” a Discussion. +A discussion is best suited to propose a Big Idea, such as brand-new capability in dbt Core or an adapter. Anyone can open a discussion, comment on an existing one, or reply in a thread. -If, over the course of a discussion, we do manage to reach consensus on a way forward, we’ll open a new issue that references the discussion for context. That issue will connect desired outcomes to specific implementation details, as well as perceived limitations and open questions. It will serve as a formal proposal and request for comment. +When you open a new discussion, you might be looking for validation from other members of the community — folks who identify with your problem statement, who like your proposed idea, and who may have their own ideas for how it could be improved. The most helpful comments propose nuances or desirable user experiences to be considered in design and refinement. Unlike an **issue**, there is no specific code change that would “resolve” a discussion. -## Issues +If, over the course of a discussion, we reach a consensus on specific elements of a proposed design, we can open new implementation issues that reference the discussion for context. Those issues will connect desired user outcomes to specific implementation details, acceptance testing, and remaining questions that need answering. -An issue could be a bug you’ve identified while using the product or reading the documentation. It could also be a specific idea you’ve had for how it could be better. +### Issues -### Best practices for issues +An issue could be a bug you've identified while using the product or reading the documentation. It could also be a specific idea you've had for a narrow extension of existing functionality. + +#### Best practices for issues - Issues are **not** for support / troubleshooting / debugging help. Please see [dbt support](/docs/dbt-support) for more details and suggestions on how to get help. - Always search existing issues first, to see if someone else had the same idea / found the same bug you did. -- Many repositories offer templates for creating issues, such as when reporting a bug or requesting a new feature. If available, please select the relevant template and fill it out to the best of your ability. This will help other people understand your issue and respond. +- Many dbt repositories offer templates for creating issues, such as reporting a bug or requesting a new feature. If available, please select the relevant template and fill it out to the best of your ability. This information helps us (and others) understand your issue. -### You’ve found an existing issue that interests you. What should you do? +##### You've found an existing issue that interests you. What should you do? -Comment on it! Explain that you’ve run into the same bug, or had a similar idea for a new feature. If the issue includes a detailed proposal for a change, say which parts of the proposal you find most compelling, and which parts give you pause. +Comment on it! Explain that you've run into the same bug, or had a similar idea for a new feature. If the issue includes a detailed proposal for a change, say which parts of the proposal you find most compelling, and which parts give you pause. -### You’ve opened a new issue. What can you expect to happen? +##### You've opened a new issue. What can you expect to happen? -In our most critical repositories (such as `dbt-core`), **our goal is to respond to new issues within 2 standard work days.** While this initial response might be quite lengthy (context, feedback, and pointers that we can offer as maintainers), more often it will be a short acknowledgement that the maintainers are aware of it and don't believe it's in urgent need of resolution. Depending on the nature of your issue, it might be well suited to an external contribution, from you or another community member. +In our most critical repositories (such as `dbt-core`), our goal is to respond to new issues as soon as possible. This initial response will often be a short acknowledgement that the maintainers are aware of the issue, signalling our perception of its urgency. Depending on the nature of your issue, it might be well suited to an external contribution, from you or another community member. -**What does “triage” mean?** In some repositories, we use a `triage` label to keep track of issues that need an initial response from a maintainer. +**What if you're opening an issue in a different repository?** We have engineering teams dedicated to active maintenance of [`dbt-core`](https://github.com/dbt-labs/dbt-core) and its component libraries ([`dbt-common`](https://github.com/dbt-labs/dbt-common) + [`dbt-adapters`](https://github.com/dbt-labs/dbt-adapters)), as well as several platform-specific adapters ([`dbt-snowflake`](https://github.com/dbt-labs/dbt-snowflake), [`dbt-bigquery`](https://github.com/dbt-labs/dbt-bigquery), [`dbt-redshift`](https://github.com/dbt-labs/dbt-redshift), [`dbt-postgres`](https://github.com/dbt-labs/dbt-postgres)). We've open-sourced a number of other software projects over the years, and the majority of them do not have the same activity or maintenance guarantees. Check to see if other recent issues have responses, or when the last commit was added to the `main` branch. -**What if I’m opening an issue in a different repository?** **What if I’m opening an issue in a different repository?** We have engineering teams dedicated to active maintainence of [`dbt-core`](https://github.com/dbt-labs/dbt-core) and its component libraries ([`dbt-common`](https://github.com/dbt-labs/dbt-common) + [`dbt-adapters`](https://github.com/dbt-labs/dbt-adapters)), as well as several platform-specific adapters ([`dbt-snowflake`](https://github.com/dbt-labs/dbt-snowflake), [`dbt-bigquery`](https://github.com/dbt-labs/dbt-bigquery), [`dbt-redshift`](https://github.com/dbt-labs/dbt-redshift), [`dbt-postgres`](https://github.com/dbt-labs/dbt-postgres)). We’ve open sourced a number of other software projects over the years, and the majority of them do not have the same activity or maintenance guarantees. Check to see if other recent issues have responses, or when the last commit was added to the `main` branch. +**You're not sure about the status of your issue.** If your issue is in an actively maintained repo and has a `triage` label attached, we're aware it's something that needs a response. If the issue has been triaged, but not prioritized, this could mean: +- The intended scope or user experience of a proposed feature requires further refinement from a maintainer +- We believe the required code change is too tricky for an external contributor -**If my issue is lingering...** Sorry for the delay! If your issue is in an actively maintained repo and has a `triage` label attached, we’re aware it's something that needs a response. +We'll do our best to explain the open questions or complexity, and when / why we could foresee prioritizing it. -**Automation that can help us:** In many repositories, we use a bot that marks issues as stale if they haven’t had any activity for 180 days. This helps us keep our backlog organized and up-to-date. We encourage you to comment on older open issues that you’re interested in, to keep them from being marked stale. You’re also always welcome to comment on closed issues to say that you’re still interested in the proposal. +**Automation that can help us:** In many repositories, we use a bot that marks issues as stale if they haven't had any activity for 180 days. This helps us keep our backlog organized and up-to-date. We encourage you to comment on older open issues that you're interested in, to keep them from being marked stale. You're also always welcome to comment on closed issues to say that you're still interested in the proposal. -### Issue labels +#### Issue labels In all likelihood, the maintainer who responds will also add a number of labels. Not all of these labels are used in every repository. -In some cases, the right resolution to an open issue might be tangential to the codebase. The right path forward might be in another codebase (we'll transfer it), a documentation update, or a change that can be made in user-space code. In other cases, the issue might describe functionality that the maintainers are unwilling or unable to incorporate into the main codebase. In these cases, a maintainer will close the issue (perhaps using a `wontfix` label) and explain why. +In some cases, the right resolution to an open issue might be tangential to the codebase. The right path forward might be in another codebase (we'll transfer it), a documentation update, or a change that you can make yourself in user-space code. In other cases, the issue might describe functionality that the maintainers are unwilling or unable to incorporate into the main codebase. In these cases, a maintainer will close the issue (perhaps using a `wontfix` label) and explain why. + +Some of the most common labels are explained below: | tag | description | | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `triage` | This is a new issue which has not yet been reviewed by a maintainer. This label is removed when a maintainer reviews and responds to the issue. | -| `bug` | This issue represents a defect or regression from the behavior that's documented, or that you reasonably expect | -| `enhancement` | This issue represents net-new functionality, including an extension of an existing capability | -| `good_first_issue` | This issue does not require deep knowledge of the codebase to implement. This issue is appropriate for a first-time contributor. | +| `bug` | This issue represents a defect or regression from the behavior that's documented | +| `enhancement` | This issue represents a narrow extension of an existing capability | +| `good_first_issue` | This issue does not require deep knowledge of the codebase to implement, and it is appropriate for a first-time contributor. | | `help_wanted` | This issue is trickier than a "good first issue." The required changes are scattered across the codebase, or more difficult to test. The maintainers are happy to help an experienced community contributor; they aren't planning to prioritize this issue themselves. | | `duplicate` | This issue is functionally identical to another open issue. The maintainers will close this issue and encourage community members to focus conversation on the other one. | | `stale` | This is an old issue which has not recently been updated. In repositories with a lot of activity, stale issues will periodically be closed. | | `wontfix` | This issue does not require a code change in the repository, or the maintainers are unwilling to merge a change which implements the proposed behavior. | -## Pull requests - -PRs are your surest way to make the change you want to see in dbt / packages / docs, especially when the change is straightforward. +### Pull requests -**Every PR should be associated with an issue.** Why? Before you spend a lot of time working on a contribution, we want to make sure that your proposal will be accepted. You should open an issue first, describing your desired outcome and outlining your planned change. If you've found an older issue that's already open, comment on it with an outline for your planned implementation. Exception to this rule: If you're just opening a PR for a cosmetic fix, such as a typo in documentation, an issue isn't needed. +**Every PR should be associated with an issue.** Why? Before you spend a lot of time working on a contribution, we want to make sure that your proposal will be accepted. You should open an issue first, describing your desired outcome and outlining your planned change. If you've found an older issue that's already open, comment on it with an outline for your planned implementation _before_ putting in the work to open a pull request. -**PRs must include robust testing.** Comprehensive testing within pull requests is crucial for the stability of our project. By prioritizing robust testing, we ensure the reliability of our codebase, minimize unforeseen issues, and safeguard against potential regressions. We cannot merge changes that risk the backward incompatibility of existing documented behaviors. We understand that creating thorough tests often requires significant effort, and your dedication to this process greatly contributes to the project's overall reliability. Thank you for your commitment to maintaining the integrity of our codebase and the experience of everyone using dbt! +**PRs must include robust testing.** Comprehensive testing within pull requests is crucial for the stability of dbt. By prioritizing robust testing, we ensure the reliability of our codebase, minimize unforeseen issues, and safeguard against potential regressions. **We cannot merge changes that risk the backward incompatibility of existing documented behaviors.** We understand that creating thorough tests often requires significant effort, and your dedication to this process greatly contributes to the project's overall reliability. Thank you for your commitment to maintaining the integrity of our codebase and the experience of everyone using dbt! -**PRs go through two review steps.** First, we aim to respond with feedback on whether we think the implementation is appropriate from a product & usability standpoint. At this point, we will close PRs that we believe fall outside the scope of dbt Core, or which might lead to an inconsistent user experience. This is an important part of our role as maintainers; we're always open to hearing disagreement. If a PR passes this first review, we will queue it up for code review, at which point we aim to test it ourselves and provide thorough feedback within the next month. +**PRs go through two review steps.** First, we aim to respond with feedback on whether we think the implementation is appropriate from a product & usability standpoint. At this point, we will close PRs that we believe fall outside the scope of dbt Core, or which might lead to an inconsistent user experience. This is an important part of our role as maintainers; we're always open to hearing disagreement. If a PR passes this first review, we will queue it up for code review, at which point we aim to test it ourselves and provide thorough feedback. -**We receive more PRs than we can thoroughly review, test, and merge.** Our teams have finite capacity, and our top priority is maintaining a well-scoped, high-quality framework for the tens of thousands of people who use it every week. To that end, we must prioritize overall stability and planned improvements over a long tail of niche potential features. For best results, say what in particular you’d like feedback on, and explain what would it mean to you, your team, and other community members to have the proposed change merged. Smaller PRs tackling well-scoped issues tend to be easier and faster for review. Two recent examples of community-contributed PRs: +**We receive more PRs than we can thoroughly review, test, and merge.** Our teams have finite capacity, and our top priority is maintaining a well-scoped, high-quality framework for the tens of thousands of people who use it every week. To that end, we must prioritize overall stability and planned improvements over a long tail of niche potential features. For best results, say what in particular you'd like feedback on, and explain what would it mean to you, your team, and other community members to have the proposed change merged. Smaller PRs tackling well-scoped issues tend to be easier and faster for review. Two examples of community-contributed PRs: - [(dbt-core#9347) Fix configuration of turning test warnings into failures](https://github.com/dbt-labs/dbt-core/pull/9347) - [(dbt-core#9863) Better error message when trying to select a disabled model](https://github.com/dbt-labs/dbt-core/pull/9863) -**Automation that can help us:** Many repositories have a template for pull request descriptions, which will include a checklist that must be completed before the PR can be merged. You don’t have to do all of these things to get an initial PR, but they definitely help. Those many include things like: +**Automation that can help us:** Many repositories have a template for pull request descriptions, which will include a checklist that must be completed before the PR can be merged. You don't have to do all of these things to get an initial PR, but they will delay our review process. Those include: -- **Tests!** When you open a PR, some tests and code checks will run. (For security reasons, some may need to be approved by a maintainer.) We will not merge any PRs with failing tests. If you’re not sure why a test is failing, please say so, and we’ll do our best to get to the bottom of it together. +- **Tests, tests, tests.** When you open a PR, some tests and code checks will run. (For security reasons, some may need to be approved by a maintainer.) We will not merge any PRs with failing tests. If you're not sure why a test is failing, please say so, and we'll do our best to get to the bottom of it together. - **Contributor License Agreement** (CLA): This ensures that we can merge your code, without worrying about unexpected implications for the copyright or license of open source dbt software. For more details, read: ["Contributor License Agreements"](../resources/contributor-license-agreements.md) - **Changelog:** In projects that include a number of changes in each release, we need a reliable way to signal what's been included. The mechanism for this will vary by repository, so keep an eye out for notes about how to update the changelog. -### Inclusion in release versions +#### Inclusion in release versions -Both bug fixes and backwards-compatible new features will be included in the [next minor release](/docs/dbt-versions/core#how-dbt-core-uses-semantic-versioning). Fixes for regressions and net-new bugs that were present in the minor version's original release will be backported to versions with [active support](/docs/dbt-versions/core). Other bug fixes may be backported when we have high confidence that they're narrowly scoped and won't cause unintended side effects. +Both bug fixes and backwards-compatible new features will be included in the [next minor release of dbt Core](/docs/dbt-versions/core#how-dbt-core-uses-semantic-versioning). Fixes for regressions and net-new bugs that were present in the minor version's original release will be backported to versions with [active support](/docs/dbt-versions/core). Other bug fixes may be backported when we have high confidence that they're narrowly scoped and won't cause unintended side effects. diff --git a/website/docs/community/spotlight/bruno-de-lima.md b/website/docs/community/spotlight/bruno-de-lima.md index f5ffaa6a970..3c373db06e8 100644 --- a/website/docs/community/spotlight/bruno-de-lima.md +++ b/website/docs/community/spotlight/bruno-de-lima.md @@ -2,42 +2,39 @@ id: bruno-de-lima title: Bruno de Lima description: | - Hi all! I'm a Data Engineer, deeply fascinated by the awesomeness dbt. I love talking about dbt, creating content from daily tips to blogposts and engaging with this vibrant community! - - Started my career at the beginning of 2022 at Indicium as an Analytics Engineer, working with dbt from day 1. By 2023, my path took a global trajectory as I joined phData as a Data Engineer, expanding my experiences and forging connections beyond Brazil. While dbt is at the heart of my expertise, I've also delved into data warehouses such as Snowflake, Databricks, and BigQuery; visualization tools like Power BI and Tableau; and several minor modern data stack tools. - - I actively participate in the dbt community, having attended two dbt Meetups in Brazil organized by Indicium; writing about dbt-related topics in my Medium and LinkedIn profiles; contributing to the code; and frequently checking dbt Slack and Discourse, helping (and being helped by) other dbt practitioners. If you are a community member, you may have seen me around! -image: /img/community/spotlight/bruno-de-lima.jpg + Hey all! I was born and raised in Florianopolis, Brazil, and I'm a Senior Data Engineer at phData. I live with my fiancée and I enjoy music, photography, and powerlifting. + + I started my career in early 2022 at Indicium as an Analytics Engineer, working with dbt from day 1. By 2023, my path took a global trajectory as I joined phData as a Data Engineer, expanding my experiences and creating connections beyond Brazil. While dbt is my main expertise, because of my work in consultancy I have experience with a large range of tools, specially the ones related to Snowflake, Databricks, AWS and GCP; but I have already tried several other modern data stack tools too. + + I actively participate in the dbt community, having organized dbt Meetups in Brazil (in Floripa and São Paulo); writing about dbt-related topics in my Medium and LinkedIn profiles; contributing to the dbt Core code and to the docs; and frequently checking dbt Slack and Discourse, helping (and being helped by) other dbt practitioners. If you are a community member, you may have seen me around! +image: /img/community/spotlight/bruno-souza-de-lima-newimage.jpg pronouns: he/him location: Florianópolis, Brazil -jobTitle: Data Engineer +jobTitle: Senior Data Engineer companyName: phData -organization: "" socialLinks: - name: LinkedIn link: https://www.linkedin.com/in/brunoszdl/ - name: Medium link: https://medium.com/@bruno.szdl -dateCreated: 2023-11-05 +dateCreated: 2024-11-03 hide_table_of_contents: true communityAward: true -communityAwardYear: 2023 +communityAwardYear: 2024 --- ## When did you join the dbt community and in what way has it impacted your career? -I was not truly happy with my academic life. My career took a new turn when I enrolled in the Analytics Engineer course by Indicium. That was my first contact with dbt, and I didn't realize how much it would transform my career. After that, I was hired at the company as an Analytics Engineer and worked extensively with dbt from day one. +I was not truly happy with my academic life. My career took a new turn when I enrolled in the Analytics Engineer course by Indicium. That was my first contact with dbt, and I didn't realize how much it would transform my career. After that, I was hired at the company as an Analytics Engineer and worked extensively with dbt from day one. It took me some time to become an active member of the dbt community. I started working with dbt at the beginning of 2022 and became more involved towards the end of that year, encouraged by Daniel Avancini. I regret not doing this earlier, because being an active community member has been a game-changer for me, as my knowledge of dbt has grown exponentially just by participating in daily discussions on Slack. I have found #advice-dbt-help and #advice-dbt-for-power-users channels particularly useful, as well as the various database-specific channels. Additionally, the #i-made-this and #i-read-this channels have allowed me to learn about the innovative things that community members are doing. Inspired by other members, especially Josh Devlin and Owen Prough, I began answering questions on Slack and Discourse. For questions I couldn't answer, I would try engaging in discussions about possible solutions or provide useful links. I also started posting dbt tips on LinkedIn to help practitioners learn about new features or to refresh their memories about existing ones. -By being more involved in the community, I felt more connected and supported. I received help from other members, and now, I could help others, too. I was happy with this arrangement, but more unexpected surprises came my way. My active participation in Slack, Discourse, and LinkedIn opened doors to new connections and career opportunities. I had the pleasure of meeting a lot of incredible people and receiving exciting job offers, including the one for working at phData. +By being more involved in the community, I felt more connected and supported. I received help from other members, and now, I could help others, too. I was happy with this arrangement, but more unexpected surprises came my way. My active participation in Slack, Discourse, and LinkedIn opened doors to new connections and career opportunities. I had the pleasure of meeting a lot of incredible people and receiving exciting job offers, including the ones for working at phData and teaching at Zach Wilson's data engineering bootcamp. Thanks to the dbt community, I went from feeling uncertain about my career prospects to having a solid career and being surrounded by incredible people. -I would like to thank the Indicium folks for opening the first door for me for this career in data, and not just for me but for lots of people in Brazil trying to migrate from different fields who would not have this opportunity otherwise. - ## What dbt community leader do you identify with? How are you looking to grow your leadership in the dbt community? I identify with Gwen Windflower and Joel Labes, or at least they are the kind of leader I admire. Their strong presence and continuous interaction with all types of dbt enthusiasts make everyone feel welcomed in the community. They uplift those who contribute to the community, whether it's through a LinkedIn post or answering a question, and provide constructive feedback to help them improve. And of course they show a very strong knowledge about dbt and data in general, which is reflected in their contributions. diff --git a/website/docs/community/spotlight/christophe-oudar.md b/website/docs/community/spotlight/christophe-oudar.md new file mode 100644 index 00000000000..2381d88a381 --- /dev/null +++ b/website/docs/community/spotlight/christophe-oudar.md @@ -0,0 +1,35 @@ +--- +id: christophe-oudar +title: Christophe Oudar +description: | + I joined the dbt Community in November 2021 after exchanging some issues in Github. I currently work as a staff engineer at a scaleup in the ad tech industry called Teads, which I joined 11 years ago as a new grad. I've been using dbt Core on BigQuery since then. I write about data engineering both on Medium and Substack. I contribute on dbt-bigquery. I wrote an article that was then featured on the Developer Blog called BigQuery ingestion-time partitioning and partition copy with dbt. +image: /img/community/spotlight/christophe-oudar.jpg +pronouns: he/him +location: Montpellier, France +jobTitle: Staff Engineer +companyName: Teads +socialLinks: + - name: X + link: https://x.com/Kayrnt + - name: LinkedIn + link: https://www.linkedin.com/in/christopheoudar/ + - name: Substack + link: https://smallbigdata.substack.com/ +dateCreated: 2024-11-08 +hide_table_of_contents: true +communityAward: true +communityAwardYear: 2024 +--- + +## When did you join the dbt community and in what way has it impacted your career? + +I joined the community in November 2021 as a way to explore how to move our in-house data modeling layer to dbt. The transition took over a year while we ensured we could cover all our bases and add missing features to dbt-bigquery. That project was one of stepping stones that helped me to move from senior to staff level at my current job. + +## What dbt community leader do you identify with? How are you looking to grow your leadership in the dbt community? + +I identify with leaders that have strong convictions about how data engineering should move forward but remain open to innovation and ideas from everyone to bring the best to the field and make it as inclusive as possible to all cultures and profiles. I think that could mean people like Jordan Tigani or Mark Raasveldt. In the dbt community, my leadership has looked like helping people struggling and offering better ways to simplify one's day to day work when possible. + +## What have you learned from community members? What do you hope others can learn from you? + +I read a lot of articles about dbt, especially when I got started with it. It helped me a lot to build a proper Slim CI that could fit my company's ways of working. I also got to see how data pipelines were done in other companies and the pros and cons of my approaches. I hope I can share more of that knowledge for people to pick what's best for their needs. +​ diff --git a/website/docs/community/spotlight/fabiyi-opeyemi.md b/website/docs/community/spotlight/fabiyi-opeyemi.md index 18a311fa437..b5b4bf8c9e0 100644 --- a/website/docs/community/spotlight/fabiyi-opeyemi.md +++ b/website/docs/community/spotlight/fabiyi-opeyemi.md @@ -2,13 +2,11 @@ id: fabiyi-opeyemi title: Opeyemi Fabiyi description: | - I'm an Analytics Engineer with Data Culture, a Data Consulting firm where I use dbt regularly to help clients build quality-tested data assets. I've also got a background in financial services and supply chain. I'm passionate about helping organizations to become data-driven and I majorly use dbt for data modeling, while the other aspect of the stack is largely dependent on the client infrastructure I'm working for, so I often say I'm tool-agnostic. 😀 - - I'm the founder of Nigeria's Young Data Professional Community. I'm also the organizer of the Lagos dbt Meetup which I started, and one of the organizers of the DataFest Africa Conference. I became an active member of the dbt Community in 2021 & spoke at Coalesce 2022. + I’m an Analytics Engineer with Data Culture, a Data Consulting firm where I use dbt regularly to help clients build quality-tested data assets. Before Data Culture, I worked at Cowrywise, one of the leading Fintech companies in Nigeria, where I was a solo data team member, and that was my first introduction to dbt and Analytics Engineering. Before that, I was doing Data Science and Analytics at Deloitte Nigeria. It’s been an exciting journey since I started using dbt and joining the community.Outside of work, I’m very passionate about Community building and Data Advocacy. I founded one of Nigeria’s most vibrant Data communities, “The Young Data Professional Community.” I’m also the Founder of the Lagos dbt Meetup and one of the organizers of the Largest Data Conference in Africa, DataFest Africa Conference. I became an active member of the dbt community in 2021 & spoke at Coalesce 2022. So when I’m not actively working I’m involved in one community activity or the other. image: /img/community/spotlight/fabiyi-opeyemi.jpg pronouns: he/him location: Lagos, Nigeria -jobTitle: Senior Analytics Engineer +jobTitle: Analytics Manager companyName: Data Culture organization: Young Data Professionals (YDP) socialLinks: @@ -16,10 +14,10 @@ socialLinks: link: https://twitter.com/Opiano_1 - name: LinkedIn link: https://www.linkedin.com/in/opeyemifabiyi/ -dateCreated: 2023-11-06 +dateCreated: 2024-11-02 hide_table_of_contents: true communityAward: true -communityAwardYear: 2023 +communityAwardYear: 2024 --- ## When did you join the dbt community and in what way has it impacted your career? @@ -40,4 +38,4 @@ I've learned how to show empathy as a data professional and be a great engineer ## Anything else interesting you want to tell us? -Maybe, I will consider DevRel as a career sometime because of my innate passion and love for community and people. Several folks tell me I'm a strong DevRel talent and a valuable asset for any product-led company. If you need someone to bounce ideas off of or discuss😃 your community engagement efforts, please feel free to reach out. +Maybe I will consider DevRel as a career sometime because of my innate passion and love for community and people. Several folks tell me I’m a strong DevRel talent and a valuable asset for any product-led company. If you need someone to bounce ideas off of or discuss your community engagement efforts, please feel free to reach out. On a side note, it was really exciting for me to attend Coalesce 2024 in Vegas in person, which allowed me not only to learn but, most importantly, to meet amazing persons I’ve only interacted with online, like Bruno, Kuberjain, Dakota and many more; shout-out to Zenlytic and Lightdash for making that possible and, most importantly, a huge shout-out to the dbt Lab community team: Amada, Natasha and everyone on the community team for their constant supports to helping out with making the dbt Lagos (Nigeria) meetup a success. diff --git a/website/docs/community/spotlight/jenna-jordan.md b/website/docs/community/spotlight/jenna-jordan.md new file mode 100644 index 00000000000..86f19f125f8 --- /dev/null +++ b/website/docs/community/spotlight/jenna-jordan.md @@ -0,0 +1,36 @@ +--- +id: jenna-jordan +title: Jenna Jordan +description: | + I am a Senior Data Management Consultant with Analytics8, where I advise clients on dbt best practices (especially regarding dbt Mesh and the various shifts in governance and strategy that come with it). My experiences working within a dbt Mesh architecture and all of the difficulties organizations could run into with such a major paradigm shift inspired my peer exchange (role-playing/simulation game) at Coalesce 2024: "Governance co-lab: We the people, in order to govern data, do establish processes." I also experimented with bringing role-playing scenarios to data problems at the September 2024 Chicago dbt Meetup, hosted by Analytics8. I occasionally write long blog posts on my website, if you're up for the read. +image: /img/community/spotlight/jenna-jordan.jpg +pronouns: she/her +location: Asheville, USA +jobTitle: Senior Data Management Consultant +companyName: Analytics8 +socialLinks: + - name: LinkedIn + link: https://www.linkedin.com/in/jennajordan1/ + - name: Personal website + link: https://jennajordan.me/ +dateCreated: 2024-11-01 +hide_table_of_contents: true +communityAward: true +communityAwardYear: 2024 +--- + +## When did you join the dbt community and in what way has it impacted your career? + +My dbt learning journey kicked off with the CoRise (now Uplimit) course Analytics Engineering with dbt, with Emily Hawkins and Jake Hannan, in February 2022 – less than a month after starting as a data engineer with the City of Boston Analytics Team. About a year later, I spearheaded the adoption of dbt at the City and got to build the project and associated architecture from scratch – which is probably the best learning experience you could ask for! I saw the value dbt could bring to improving data management processes at the City, and I knew there were other cities and local governments that could benefit from dbt as well, which motivated me to find my fellow co-speakers Ian Rose and Laurie Merrell to give a talk at Coalesce 2023 called "From Coast to Coast: Implementing dbt in the public sector." As a part of our goal to identify and cultivate a community of dbt practitioners in the public (and adjacent) sectors, we also started the dbt Community Slack channel #industry-public-sector. That experience allowed me to continue to grow my career and find my current role - as well as connect with so many amazing data folks! + +## What dbt community leader do you identify with? How are you looking to grow your leadership in the dbt community? + +There are many leaders in the dbt community that I admire and identify with – I won’t list them all out because I will invariably miss someone (but… you probably know who you are). Technical prowess is always enviable, but I most admire those who bring the human element to data work: those who aren’t afraid to be their authentic selves, cultivate a practice of empathy and compassion, and are driven by curiosity and a desire to help others. I’ve never set out to be a leader, and I still don’t really consider myself to be a leader – I’m much more comfortable in the role of a librarian. I just want to help people by connecting them to the information and resources that they may need. + +## What have you learned from community members? What do you hope others can learn from you? + +Pretty much everything I’ve learned about dbt and working in a mature analytics ecosystem I’ve learned from dbt community members. The dbt Community Slack is full of useful information and advice, and has also helped me identify experts about certain topics that I can chat with to learn even more. When I find someone sharing useful information, I usually try to find and follow them on social media so I can see more of their content. If there is one piece of advice I want to share, it is this: don’t be afraid to engage. Ask for help when you need it, but also offer help freely. Engage with the community with the same respect and grace you would offer your friends and coworkers. + +## Anything else interesting you want to tell us? + +Library Science is so much more than the Dewey Decimal System (seriously, ask a librarian about Dewey for a juicy rant). RDF triples (for knowledge graphs) are queried using SPARQL (pronounced “sparkle”). An antelope can be a document. The correct way to write a date/time is ISO-8601. The oldest known table (of the spreadsheet variety) is from 5,000 years ago – record-keeping predates literature by a significant margin. Zip codes aren’t polygons – they don’t contain an area or have boundaries. Computers don’t always return 0.3 when asked to add 0.1 + 0.2. SQL was the sequel to SQUARE. Before computers, people programmed looms (weaving is binary). What? You asked!! On a more serious note – data teams: start hiring librarians. No, seriously. No degree could have prepared me better for what I do in the data field than my M.S. in Library & Information Science. I promise, you want the skillset & mindset that a librarian will bring to your team. diff --git a/website/docs/community/spotlight/meagan-palmer.md b/website/docs/community/spotlight/meagan-palmer.md index ff45a3d6b7d..fffc2a6e0d6 100644 --- a/website/docs/community/spotlight/meagan-palmer.md +++ b/website/docs/community/spotlight/meagan-palmer.md @@ -3,8 +3,11 @@ id: meagan-palmer title: Meagan Palmer description: | I first started using dbt in 2016 or 2017 (I can't remember exactly). Since then, I have moved into data and analytics consulting and have dipped in and out of the dbt Community. + Late last year, I started leading dbt Cloud training courses and spending more time in the dbt Slack. + In consulting, I get to use a range of stacks. I've used dbt with Redshift, Snowflake, and Databricks in production settings with a range of loaders & reporting tools, and I've been enjoying using DuckDB for some home experimentation. + To share some of the experiences, I regularly post to LinkedIn and have recently started Analytics Engineering Today, a twice monthly newsletter about dbt in practice. image: /img/community/spotlight/Meagan-Palmer.png pronouns: she/her @@ -14,9 +17,10 @@ companyName: Altis Consulting socialLinks: - name: LinkedIn link: https://www.linkedin.com/in/meaganpalmer/ -dateCreated: 2024-07-29 +dateCreated: 2024-11-04 hide_table_of_contents: true -communityAward: false +communityAward: true +communityAwardYear: 2024 --- ## When did you join the dbt community and in what way has it impacted your career? @@ -27,9 +31,9 @@ I was fortunate that Jon Bradley at Nearmap had the vision to engage the then Fi Being in Australia, I often see replies from Jeremy Yeo to people in the dbt Slack. His clarity of communication is impressive. -For growth, I'm hoping that others can benefit from the wide range of experience I have. My newsletter, Analytics Engineering Today on LinkedIn aims to upskill the dbt Community and shed some light on some useful features that might not be well known. +For growth, I'm hoping that others can benefit from the wide range of experience I have. My LinkedIn Newsletter, Analytics Engineering Today aims to upskill the dbt Community and shed some light on some useful features that might not be well known. -I'll be at Coalesce and am doing some webinars/events later in the year. Come say hi, I love talking dbt and analytics engineering with people. +I was at Coalesce Onlineand am doing some webinars/events later in the year. Come say hi, I love talking dbt and analytics engineering with people. ## What have you learned from community members? What do you hope others can learn from you? diff --git a/website/docs/community/spotlight/mike-stanley.md b/website/docs/community/spotlight/mike-stanley.md new file mode 100644 index 00000000000..853b0e2f704 --- /dev/null +++ b/website/docs/community/spotlight/mike-stanley.md @@ -0,0 +1,30 @@ +--- +id: mike-stanley +title: Mike Stanley +description: | + I've split my time between financial services and the video games industry. Back when I wrote code every day, I worked in marketing analytics and marketing technology. I've been in the dbt community for about two years. I haven't authored any extensions to dbt's adapters yet but I've given feedback on proposed changes! +image: /img/community/spotlight/mike-stanley.jpg +pronouns: he/him +location: London, United Kingdom +jobTitle: Manager, Data +companyName: Freetrade +socialLinks: + - name: LinkedIn + link: https://www.linkedin.com/in/mike-stanley-31616994/ +dateCreated: 2024-11-05 +hide_table_of_contents: true +communityAward: true +communityAwardYear: 2024 +--- + +## When did you join the dbt community and in what way has it impacted your career? + +I've led data teams for almost ten years now and it can be a challenge to stay current on new technology when you're spending a lot of time on leadership and management. I joined the dbt Community to learn how to get more from it, how to solve problems and use more advanced features, and to learn best practices. I find that answering questions is the way I learn best, so I started helping people! + +## Which dbt Community leader do you identify with? How are you looking to grow your leadership in the dbt community? + +I hope that we can all continue to level up our dbt skills and leave the data environments that we work in better than we found them. + +## What have you learned from community members? What do you hope others can learn from you? + +Everything! People share so much about their best practices and when and how to deviate from them, interesting extensions to dbt that they've worked on, common bugs and problems, and how to think in a "dbtish" way. I couldn't have learned any of that without the community! diff --git a/website/docs/community/spotlight/original-dbt-athena-maintainers.md b/website/docs/community/spotlight/original-dbt-athena-maintainers.md new file mode 100644 index 00000000000..b3728a71d63 --- /dev/null +++ b/website/docs/community/spotlight/original-dbt-athena-maintainers.md @@ -0,0 +1,44 @@ +--- +id: original-dbt-athena-maintainers +title: The Original dbt-athena Maintainers +description: | + The original dbt-athena Maintainers is a group of 5 people—Jérémy Guiselin, Mattia, Jesse Dobbelaere, Serhii Dimchenko, and Nicola Corda—who met via dbt Slack in the #db-athena channel, with the aim to make make dbt-athena a production-ready adapter. + + In the first periods, Winter 2022 and Spring 2023, we focused on contributing directly to the adapter, adding relevant features like Iceberg and Lake Formation support, and stabilizing some internal behaviour. + + On a second iteration our role was triaging, providing community support and bug fixing. We encouraged community members to make their first contributions, and helped them to merge their PRs. +image: /img/community/spotlight/dbt-athena-groupheadshot.jpg +location: Europe +jobTitle: A group of data-engineers +companyName: Mix of companies +organization: dbt-athena (since November 2022) +socialLinks: + - name: Jérémy's LinkedIn + link: https://www.linkedin.com/in/jrmyy/ + - name: Mattia's LinkedIn + link: https://www.linkedin.com/in/mattia-sappa/ + - name: Jesse's LinkedIn + link: https://www.linkedin.com/in/dobbelaerejesse/ + - name: Serhii's LinkedIn + link: https://www.linkedin.com/in/serhii-dimchenko-075b3061/ + - name: Nicola's LinkedIn + link: https://www.linkedin.com/in/nicolacorda/ +dateCreated: 2024-11-06 +hide_table_of_contents: true +communityAward: true +communityAwardYear: 2024 +--- + +## When did you join the dbt community and in what way has it impacted your career? + +The dbt community allowed the dbt-athena maintainers to meet each other, and share the common goal of making the dbt-athena adapter production-ready. + +## Which dbt Community leader do you identify with? How are you looking to grow your leadership in the dbt community? + +As we grow, we are looking to embody democratic leadership. + +## What have you learned from community members? What do you hope others can learn from you? + +We learned that the power of the community was endless. People started to share best practises, and some of the best practises were incorporated directly in dbt-athena, allowing people to run the adapter smoothly in their production environment. +We reached a point where people started to ask advice for their AWS architecture, which we found pretty awesome. + diff --git a/website/docs/community/spotlight/ruth-onyekwe.md b/website/docs/community/spotlight/ruth-onyekwe.md new file mode 100644 index 00000000000..cf07e98a4f7 --- /dev/null +++ b/website/docs/community/spotlight/ruth-onyekwe.md @@ -0,0 +1,31 @@ +--- +id: ruth-onyekwe +title: Ruth Onyekwe +description: | + I've been working in the world of Data Analytics for over 5 years and have been part of the dbt community for the last 4. With a background in International Business and Digital Marketing, I experienced first hand the need for reliable data to fuel business decisions. This inspired a career move into the technology space to be able to work with the tools and the people that were facilitating this process. Today I am leading teams to deliver data modernization projects, as well as helping grow the analytics arm of my company on a day to day basis. I also have the privilege of organising the dbt Meetups in Barcelona, Spain - and am excited to continue to grow the community across Europe. +image: /img/community/spotlight/ruth-onyekwe.jpeg +pronouns: she/her +location: Madrid, Spain +jobTitle: Data Analytics Manager +companyName: Spaulding Ridge +socialLinks: + - name: LinkedIn + link: https://www.linkedin.com/in/ruth-onyekwe/ +dateCreated: 2024-11-07 +hide_table_of_contents: true +communityAward: true +communityAwardYear: 2024 +--- + +## When did you join the dbt community and in what way has it impacted your career? + +I joined the dbt community in 2021, after meeting dbt Labs reps at a conference. Through partnering with dbt Labs and learning the technology, we (Spaulding Ridge) were able to open a whole new offering in our service catalogue, and meet the growing needs of our customers. + +## Which dbt Community leader do you identify with? How are you looking to grow your leadership in the dbt community? + +I identify with the transparent leaders - those willing to share their learnings, knowledge, and experiences. I want to encourage other dbt enthusiasts to stretch themselves professionally and actively participate in the analytics community. + +## What have you learned from community members? What do you hope others can learn from you? + +I've learnt that most of us working in data have experienced the same struggles, be it searching for the best testing frameworks, or deciding how to build optimised and scalable models, or searching for the answers to non-technical questions like how to best organise teams or how to communicate with business stakeholders and translate their needs - we're all faced with the same dilemmas. And the great thing I've learned being in the dbt community, is that if you're brave enough to share your stories, you'll connect with someone who has already gone through those experiences, and can help you reach a solution a lot faster than if you tried to start from scratch. + diff --git a/website/docs/docs/build/conversion-metrics.md b/website/docs/docs/build/conversion-metrics.md index 2ef2c3910b9..2d227f4a703 100644 --- a/website/docs/docs/build/conversion-metrics.md +++ b/website/docs/docs/build/conversion-metrics.md @@ -20,28 +20,29 @@ The specification for conversion metrics is as follows: Note that we use the double colon (::) to indicate whether a parameter is nested within another parameter. So for example, `query_params::metrics` means the `metrics` parameter is nested under `query_params`. ::: -| Parameter | Description | Type | -| --- | --- | --- | -| `name` | The name of the metric. | Required | -| `description` | The description of the metric. | Optional | -| `type` | The type of metric (such as derived, ratio, and so on.). In this case, set as 'conversion' | Required | -| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | -| `type_params` | Specific configurations for each metric type. | Required | -| `conversion_type_params` | Additional configuration specific to conversion metrics. | Required | -| `entity` | The entity for each conversion event. | Required | -| `calculation` | Method of calculation. Either `conversion_rate` or `conversions`. Defaults to `conversion_rate`. | Optional | -| `base_measure` | A list of base measure inputs | Required | -| `base_measure:name` | The base conversion event measure. | Required | -| `base_measure:fill_nulls_with` | Set the value in your metric definition instead of null (such as zero). | Optional | -| `base_measure:join_to_timespine` | Boolean that indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. Default `false`. | Optional | -| `conversion_measure` | A list of conversion measure inputs. | Required | -| `conversion_measure:name` | The base conversion event measure.| Required | -| `conversion_measure:fill_nulls_with` | Set the value in your metric definition instead of null (such as zero). | Optional | -| `conversion_measure:join_to_timespine` | Boolean that indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. Default `false`. | Optional | -| `window` | The time window for the conversion event, such as 7 days, 1 week, 3 months. Defaults to infinity. | Optional | -| `constant_properties` | List of constant properties. | Optional | -| `base_property` | The property from the base semantic model that you want to hold constant. | Optional | -| `conversion_property` | The property from the conversion semantic model that you want to hold constant. | Optional | +| Parameter | Description | Required | Type | +| --- | --- | --- | --- | +| `name` | The name of the metric. | Required | String | +| `description` | The description of the metric. | Optional | String | +| `type` | The type of metric (such as derived, ratio, and so on.). In this case, set as 'conversion'. | Required | String | +| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | String | +| `type_params` | Specific configurations for each metric type. | Required | Dict | +| `conversion_type_params` | Additional configuration specific to conversion metrics. | Required | Dict | +| `entity` | The entity for each conversion event. | Required | String | +| `calculation` | Method of calculation. Either `conversion_rate` or `conversions`. Defaults to `conversion_rate`. | Optional | String | +| `base_measure` | A list of base measure inputs. | Required | Dict | +| `base_measure:name` | The base conversion event measure. | Required | String | +| `base_measure:fill_nulls_with` | Set the value in your metric definition instead of null (such as zero). | Optional | String | +| `base_measure:join_to_timespine` | Boolean that indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. Default `false`. | Optional | Boolean | +| `base_measure:filter` | Optional `filter` used to apply to the base measure. | Optional | String | +| `conversion_measure` | A list of conversion measure inputs. | Required | Dict | +| `conversion_measure:name` | The base conversion event measure.| Required | String | +| `conversion_measure:fill_nulls_with` | Set the value in your metric definition instead of null (such as zero). | Optional | String | +| `conversion_measure:join_to_timespine` | Boolean that indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. Default `false`. | Optional | Boolean | +| `window` | The time window for the conversion event, such as 7 days, 1 week, 3 months. Defaults to infinity. | Optional | String | +| `constant_properties` | List of constant properties. | Optional | List | +| `base_property` | The property from the base semantic model that you want to hold constant. | Optional | String | +| `conversion_property` | The property from the conversion semantic model that you want to hold constant. | Optional | String | Refer to [additional settings](#additional-settings) to learn how to customize conversion metrics with settings for null values, calculation type, and constant properties. @@ -61,6 +62,7 @@ metrics: name: The name of the measure # Required fill_nulls_with: Set the value in your metric definition instead of null (such as zero) # Optional join_to_timespine: true/false # Boolean that indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. Default `false`. # Optional + filter: The filter used to apply to the base measure. # Optional conversion_measure: name: The name of the measure # Required fill_nulls_with: Set the value in your metric definition instead of null (such as zero) # Optional @@ -105,13 +107,14 @@ Next, define a conversion metric as follows: - name: visit_to_buy_conversion_rate_7d description: "Conversion rate from visiting to transaction in 7 days" type: conversion - label: Visit to Buy Conversion Rate (7-day window) + label: Visit to buy conversion rate (7-day window) type_params: conversion_type_params: base_measure: name: visits fill_nulls_with: 0 - conversion_measure: sellers + filter: {{ Dimension('visits__referrer_id') }} = 'facebook' + conversion_measure: name: sellers entity: user window: 7 days diff --git a/website/docs/docs/build/cumulative-metrics.md b/website/docs/docs/build/cumulative-metrics.md index 056ff79c6eb..24596be8b3d 100644 --- a/website/docs/docs/build/cumulative-metrics.md +++ b/website/docs/docs/build/cumulative-metrics.md @@ -18,21 +18,21 @@ Note that we use the double colon (::) to indicate whether a parameter is nested -| Parameter |
Description
| Type | -| --------- | ----------- | ---- | -| `name` | The name of the metric. | Required | -| `description` | The description of the metric. | Optional | -| `type` | The type of the metric (cumulative, derived, ratio, or simple). | Required | -| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | -| `type_params` | The type parameters of the metric. Supports nested parameters indicated by the double colon, such as `type_params::measure`. | Required | -| `type_params::cumulative_type_params` | Allows you to add a `window`, `period_agg`, and `grain_to_date` configuration. Nested under `type_params`. | Optional | -| `cumulative_type_params::window` | The accumulation window, such as 1 month, 7 days, 1 year. This can't be used with `grain_to_date`. | Optional | -| `cumulative_type_params::grain_to_date` | Sets the accumulation grain, such as `month`, which will accumulate data for one month and then restart at the beginning of the next. This can't be used with `window`. | Optional | -| `cumulative_type_params::period_agg` | Specifies how to aggregate the cumulative metric when summarizing data to a different granularity. Can be used with grain_to_date. Options are
- `first` (Takes the first value within the period)
- `last` (Takes the last value within the period
- `average` (Calculates the average value within the period).

Defaults to `first` if no `window` is specified. | Optional | -| `type_params::measure` | A dictionary describing the measure you will use. | Required | -| `measure::name` | The measure you are referencing. | Optional | -| `measure::fill_nulls_with` | Set the value in your metric definition instead of null (such as zero). | Optional | -| `measure::join_to_timespine` | Boolean that indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. Default `false`. | Optional | +| Parameter |
Description
| Required | Type | +|-------------|---------------------------------------------------|----------|-----------| +| `name` | The name of the metric. | Required | String | +| `description` | The description of the metric. | Optional | String | +| `type` | The type of the metric (cumulative, derived, ratio, or simple). | Required | String | +| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | String | +| `type_params` | The type parameters of the metric. Supports nested parameters indicated by the double colon, such as `type_params::measure`. | Required | Dict | +| `type_params::measure` | The measure associated with the metric. Supports both shorthand (string) and object syntax. The shorthand is used if only the name is needed, while the object syntax allows specifying additional attributes. | Required | Dict | +| `measure::name` | The name of the measure being referenced. Required if using object syntax for `type_params::measure`. | Optional | String | +| `measure::fill_nulls_with` | Sets a value (for example, 0) to replace nulls in the metric definition. | Optional | Integer or string | +| `measure::join_to_timespine` | Boolean indicating if the aggregated measure should be joined to the time spine table to fill in missing dates. Default is `false`. | Optional | Boolean | +| `type_params::cumulative_type_params` | Configures the attributes like `window`, `period_agg`, and `grain_to_date` for cumulative metrics. | Optional | Dict | +| `cumulative_type_params::window` | Specifies the accumulation window, such as `1 month`, `7 days`, or `1 year`. Cannot be used with `grain_to_date`. | Optional | String | +| `cumulative_type_params::grain_to_date` | Sets the accumulation grain, such as `month`, restarting accumulation at the beginning of each specified grain period. Cannot be used with `window`. | Optional | String | +| `cumulative_type_params::period_agg` | Defines how to aggregate the cumulative metric when summarizing data to a different granularity: `first`, `last`, or `average`. Defaults to `first` if `window` is not specified. | Optional | String |
@@ -45,15 +45,34 @@ Note that we use the double colon (::) to indicate whether a parameter is nested | `type` | The type of the metric (cumulative, derived, ratio, or simple). | Required | | `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | | `type_params` | The type parameters of the metric. Supports nested parameters indicated by the double colon, such as `type_params::measure`. | Required | -| `window` | The accumulation window, such as 1 month, 7 days, 1 year. This can't be used with `grain_to_date`. | Optional | +| `window` | The accumulation window, such as `1 month`, `7 days`, or `1 year`. This can't be used with `grain_to_date`. | Optional | | `grain_to_date` | Sets the accumulation grain, such as `month`, which will accumulate data for one month and then restart at the beginning of the next. This can't be used with `window`. | Optional | | `type_params::measure` | A list of measure inputs | Required | -| `measure:name` | The measure you are referencing. | Optional | +| `measure:name` | The name of the measure being referenced. Required if using object syntax for `type_params::measure`. | Optional | | `measure:fill_nulls_with` | Set the value in your metric definition instead of null (such as zero).| Optional | | `measure:join_to_timespine` | Boolean that indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. Default `false`. | Optional | + + +The`type_params::measure` configuration can be written in different ways: +- Shorthand syntax — To only specify the name of the measure, use a simple string value. This is a shorthand approach when no other attributes are required. + ```yaml + type_params: + measure: revenue + ``` +- Object syntax — To add more details or attributes to the measure (such as adding a filter, handling `null` values, or specifying whether to join to a time spine), you need to use the object syntax. This allows for additional configuration beyond just the measure's name. + + ```yaml + type_params: + measure: + name: order_total + fill_nulls_with: 0 + join_to_timespine: true + ``` + + ### Complete specification The following displays the complete specification for cumulative metrics, along with an example: diff --git a/website/docs/docs/build/custom-target-names.md b/website/docs/docs/build/custom-target-names.md index ac7036de572..218fec4283d 100644 --- a/website/docs/docs/build/custom-target-names.md +++ b/website/docs/docs/build/custom-target-names.md @@ -24,6 +24,6 @@ To set a custom target name for a job in dbt Cloud, configure the **Target Name* ## dbt Cloud IDE -When developing in dbt Cloud, you can set a custom target name in your development credentials. Go to your account (from the gear menu in the top right hand corner), select the project under **Credentials**, and update the target name. +When developing in dbt Cloud, you can set a custom target name in your development credentials. Click your account name above the profile icon in the left panel, select **Account settings**, then go to **Credentials**. Choose the project to update the target name. diff --git a/website/docs/docs/build/data-tests.md b/website/docs/docs/build/data-tests.md index ae3ac9225db..af48e0af267 100644 --- a/website/docs/docs/build/data-tests.md +++ b/website/docs/docs/build/data-tests.md @@ -66,9 +66,27 @@ having total_amount < 0 -The name of this test is the name of the file: `assert_total_payment_amount_is_positive`. Simple enough. +The name of this test is the name of the file: `assert_total_payment_amount_is_positive`. -Singular data tests are easy to write—so easy that you may find yourself writing the same basic structure over and over, only changing the name of a column or model. By that point, the test isn't so singular! In that case, we recommend... +Note, you won't need to include semicolons (;) at the end of the SQL statement in your singular test files as it can cause your test to fail. + +To add a description to a singular test in your project, add a `.yml` file to your `tests` directory, for example, `tests/schema.yml` with the following content: + + + +```yaml +version: 2 +data_tests: + - name: assert_total_payment_amount_is_positive + description: > + Refunds have a negative amount, so the total amount should always be >= 0. + Therefore return records where total amount < 0 to make the test fail. + +``` + + + +Singular data tests are so easy that you may find yourself writing the same basic structure repeatedly, only changing the name of a column or model. By that point, the test isn't so singular! In that case, we recommend generic data tests. ## Generic data tests Certain data tests are generic: they can be reused over and over again. A generic data test is defined in a `test` block, which contains a parametrized query and accepts arguments. It might look like: diff --git a/website/docs/docs/build/derived-metrics.md b/website/docs/docs/build/derived-metrics.md index d5f2221907e..b6184aaeebf 100644 --- a/website/docs/docs/build/derived-metrics.md +++ b/website/docs/docs/build/derived-metrics.md @@ -10,18 +10,18 @@ In MetricFlow, derived metrics are metrics created by defining an expression usi The parameters, description, and type for derived metrics are: -| Parameter | Description | Type | -| --------- | ----------- | ---- | -| `name` | The name of the metric. | Required | -| `description` | The description of the metric. | Optional | -| `type` | The type of the metric (cumulative, derived, ratio, or simple). | Required | -| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | -| `type_params` | The type parameters of the metric. | Required | -| `expr` | The derived expression. You see validation warnings when the derived metric is missing an `expr` or the `expr` does not use all the input metrics. | Required | -| `metrics` | The list of metrics used in the derived metrics. | Required | -| `alias` | Optional alias for the metric that you can use in the expr. | Optional | -| `filter` | Optional filter to apply to the metric. | Optional | -| `offset_window` | Set the period for the offset window, such as 1 month. This will return the value of the metric one month from the metric time. | Optional | +| Parameter | Description | Required | Type | +| --------- | ----------- | ---- | ---- | +| `name` | The name of the metric. | Required | String | +| `description` | The description of the metric. | Optional | String | +| `type` | The type of the metric (cumulative, derived, ratio, or simple). | Required | String | +| `label` | Defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | String | +| `type_params` | The type parameters of the metric. | Required | Dict | +| `expr` | The derived expression. You'll see validation warnings when the derived metric is missing an `expr` or the `expr` does not use all the input metrics. | Required | String | +| `metrics` | The list of metrics used in the derived metrics. Each entry can include optional fields like `alias`, `filter`, or `offset_window`. | Required | List | +| `alias` | Optional alias for the metric that you can use in the `expr`. | Optional | String | +| `filter` | Optional filter to apply to the metric. | Optional | String | +| `offset_window` | Set the period for the offset window, such as 1 month. This will return the value of the metric one month from the metric time. | Optional | String | The following displays the complete specification for derived metrics, along with an example. diff --git a/website/docs/docs/build/dimensions.md b/website/docs/docs/build/dimensions.md index 170626ee7cc..975ae4d3160 100644 --- a/website/docs/docs/build/dimensions.md +++ b/website/docs/docs/build/dimensions.md @@ -14,14 +14,14 @@ Groups are defined within semantic models, alongside entities and measures, and All dimensions require a `name`, `type`, and can optionally include an `expr` parameter. The `name` for your Dimension must be unique within the same semantic model. -| Parameter | Description | Type | -| --------- | ----------- | ---- | -| `name` | Refers to the name of the group that will be visible to the user in downstream tools. It can also serve as an alias if the column name or SQL query reference is different and provided in the `expr` parameter.

Dimension names should be unique within a semantic model, but they can be non-unique across different models as MetricFlow uses [joins](/docs/build/join-logic) to identify the right dimension. | Required | -| `type` | Specifies the type of group created in the semantic model. There are two types:

- **Categorical**: Describe attributes or features like geography or sales region.
- **Time**: Time-based dimensions like timestamps or dates. | Required | -| `type_params` | Specific type params such as if the time is primary or used as a partition | Required | -| `description` | A clear description of the dimension | Optional | -| `expr` | Defines the underlying column or SQL query for a dimension. If no `expr` is specified, MetricFlow will use the column with the same name as the group. You can use the column name itself to input a SQL expression. | Optional | -| `label` | A recommended string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Optional | +| Parameter | Description | Required | Type | +| --------- | ----------- | ---- | ---- | +| `name` | Refers to the name of the group that will be visible to the user in downstream tools. It can also serve as an alias if the column name or SQL query reference is different and provided in the `expr` parameter.

Dimension names should be unique within a semantic model, but they can be non-unique across different models as MetricFlow uses [joins](/docs/build/join-logic) to identify the right dimension. | Required | String | +| `type` | Specifies the type of group created in the semantic model. There are two types:

- **Categorical**: Describe attributes or features like geography or sales region.
- **Time**: Time-based dimensions like timestamps or dates. | Required | String | +| `type_params` | Specific type params such as if the time is primary or used as a partition. | Required | Dict | +| `description` | A clear description of the dimension. | Optional | String | +| `expr` | Defines the underlying column or SQL query for a dimension. If no `expr` is specified, MetricFlow will use the column with the same name as the group. You can use the column name itself to input a SQL expression. | Optional | String | +| `label` | Defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Optional | String | Refer to the following for the complete specification for dimensions: @@ -67,7 +67,7 @@ semantic_models: type: categorical ``` -Dimensions are bound to the primary entity of the semantic model they are defined in. For example the dimensoin `type` is defined in a model that has `transaction` as a primary entity. `type` is scoped to the `transaction` entity, and to reference this dimension you would use the fully qualified dimension name i.e `transaction__type`. +Dimensions are bound to the primary entity of the semantic model they are defined in. For example the dimension `type` is defined in a model that has `transaction` as a primary entity. `type` is scoped to the `transaction` entity, and to reference this dimension you would use the fully qualified dimension name i.e `transaction__type`. MetricFlow requires that all semantic models have a primary entity. This is to guarantee unique dimension names. If your data source doesn't have a primary entity, you need to assign the entity a name using the `primary_entity` key. It doesn't necessarily have to map to a column in that table and assigning the name doesn't affect query generation. We recommend making these "virtual primary entities" unique across your semantic model. An example of defining a primary entity for a data source that doesn't have a primary entity column is below: diff --git a/website/docs/docs/build/environment-variables.md b/website/docs/docs/build/environment-variables.md index c26425401a7..95242069ed9 100644 --- a/website/docs/docs/build/environment-variables.md +++ b/website/docs/docs/build/environment-variables.md @@ -32,7 +32,7 @@ There are four levels of environment variables: To set environment variables at the project and environment level, click **Deploy** in the top left, then select **Environments**. Click **Environments Variables** to add and update your environment variables. - + @@ -62,7 +62,10 @@ Every job runs in a specific, deployment environment, and by default, a job will **Overriding environment variables at the personal level** -You can also set a personal value override for an environment variable when you develop in the dbt-integrated developer environment (IDE). By default, dbt Cloud uses environment variable values set in the project's development environment. To see and override these values, click the gear icon in the top right. Under "Your Profile," click **Credentials** and select your project. Click **Edit** and make any changes in "Environment Variables." +You can also set a personal value override for an environment variable when you develop in the dbt-integrated developer environment (IDE). By default, dbt Cloud uses environment variable values set in the project's development environment. To see and override these values, from dbt Cloud: +- Click on your account name in the left side menu and select **Account settings**. +- Under the **Your profile** section, click **Credentials** and then select your project. +- Scroll to the **Environment variables** section and click **Edit** to make the necessary changes. @@ -80,7 +83,7 @@ If you change the value of an environment variable mid-session while using the I To refresh the IDE mid-development, click on either the green 'ready' signal or the red 'compilation error' message at the bottom right corner of the IDE. A new modal will pop up, and you should select the Refresh IDE button. This will load your environment variables values into your development environment. - + There are some known issues with partial parsing of a project and changing environment variables mid-session in the IDE. If you find that your dbt project is not compiling to the values you've set, try deleting the `target/partial_parse.msgpack` file in your dbt project which will force dbt to re-compile your whole project. @@ -102,7 +105,7 @@ dbt Cloud has a number of pre-defined variables built in. Variables are set auto The following environment variable is set automatically for the dbt Cloud IDE: - `DBT_CLOUD_GIT_BRANCH` — Provides the development Git branch name in the [dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud). - - Available in dbt v 1.6 and later. + - Available in dbt v1.6 and later. - The variable changes when the branch is changed. - Doesn't require restarting the IDE after a branch change. - Currently not available in the [dbt Cloud CLI](/docs/cloud/cloud-cli-installation). @@ -115,7 +118,7 @@ The following environment variables are set automatically: - `DBT_ENV` — This key is reserved for the dbt Cloud application and will always resolve to 'prod'. For deployment runs only. - `DBT_CLOUD_ENVIRONMENT_NAME` — The name of the dbt Cloud environment in which `dbt` is running. -- `DBT_CLOUD_ENVIRONMENT_TYPE` — The type of dbt Cloud environment in which `dbt` is running. The valid values are `development` or `deployment`. +- `DBT_CLOUD_ENVIRONMENT_TYPE` — The type of dbt Cloud environment in which `dbt` is running. The valid values are `dev`, `staging`, or `prod`. It can be unset, so use a default like `{{env_var('DBT_CLOUD_ENVIRONMENT_TYPE', '')}}`. #### Run details diff --git a/website/docs/docs/build/exposures.md b/website/docs/docs/build/exposures.md index 1a85d5fb415..16dfd0e5f73 100644 --- a/website/docs/docs/build/exposures.md +++ b/website/docs/docs/build/exposures.md @@ -69,7 +69,7 @@ dbt test -s +exposure:weekly_jaffle_report ``` -When we generate the dbt Explorer site, you'll see the exposure appear: +When we generate the [dbt Explorer site](/docs/collaborate/explore-projects), you'll see the exposure appear: diff --git a/website/docs/docs/build/hooks-operations.md b/website/docs/docs/build/hooks-operations.md index 6cec2a673c0..842d3fb99a3 100644 --- a/website/docs/docs/build/hooks-operations.md +++ b/website/docs/docs/build/hooks-operations.md @@ -40,8 +40,6 @@ Hooks are snippets of SQL that are executed at different times: Hooks are a more-advanced capability that enable you to run custom SQL, and leverage database-specific actions, beyond what dbt makes available out-of-the-box with standard materializations and configurations. - - If (and only if) you can't leverage the [`grants` resource-config](/reference/resource-configs/grants), you can use `post-hook` to perform more advanced workflows: * Need to apply `grants` in a more complex way, which the dbt Core `grants` config doesn't (yet) support. diff --git a/website/docs/docs/build/incremental-microbatch.md b/website/docs/docs/build/incremental-microbatch.md index 2cc39e9e3b9..4aff8b5839c 100644 --- a/website/docs/docs/build/incremental-microbatch.md +++ b/website/docs/docs/build/incremental-microbatch.md @@ -8,27 +8,49 @@ id: "incremental-microbatch" :::info Microbatch -The `microbatch` strategy is available in beta for [dbt Cloud Versionless](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) and dbt Core v1.9. We have been developing it behind a flag to prevent unintended interactions with existing custom incremental strategies. To enable this feature, set the environment variable `DBT_EXPERIMENTAL_MICROBATCH` to `True` in your dbt Cloud environments or wherever you're running dbt Core. +The new `microbatch` strategy is available in beta for [dbt Cloud "Latest"](/docs/dbt-versions/cloud-release-tracks) and dbt Core v1.9. + +If you use a custom microbatch macro, set a [distinct behavior flag](/reference/global-configs/behavior-changes#custom-microbatch-strategy) in your `dbt_project.yml` to enable batched execution. If you don't have a custom microbatch macro, you don't need to set this flag as dbt will handle microbatching automatically for any model using the [microbatch strategy](#how-microbatch-compares-to-other-incremental-strategies). Read and participate in the discussion: [dbt-core#10672](https://github.com/dbt-labs/dbt-core/discussions/10672) +Refer to [Supported incremental strategies by adapter](/docs/build/incremental-strategy#supported-incremental-strategies-by-adapter) for a list of supported adapters. + ::: ## What is "microbatch" in dbt? Incremental models in dbt are a [materialization](/docs/build/materializations) designed to efficiently update your data warehouse tables by only transforming and loading _new or changed data_ since the last run. Instead of reprocessing an entire dataset every time, incremental models process a smaller number of rows, and then append, update, or replace those rows in the existing table. This can significantly reduce the time and resources required for your data transformations. -Microbatch incremental models make it possible to process transformations on very large time-series datasets with efficiency and resiliency. When dbt runs a microbatch model — whether for the first time, during incremental runs, or in specified backfills — it will split the processing into multiple queries (or "batches"), based on the `event_time` and `batch_size` you configure. +Microbatch is an incremental strategy designed for large time-series datasets: +- It relies solely on a time column ([`event_time`](/reference/resource-configs/event-time)) to define time-based ranges for filtering. Set the `event_time` column for your microbatch model and its direct parents (upstream models). Note, this is different to `partition_by`, which groups rows into partitions. +- It complements, rather than replaces, existing incremental strategies by focusing on efficiency and simplicity in batch processing. +- Unlike traditional incremental strategies, microbatch enables you to [reprocess failed batches](/docs/build/incremental-microbatch#retry), auto-detect [parallel batch execution](#parallel-batch-execution), and eliminate the need to implement complex conditional logic for [backfilling](#backfills). + +- Note, microbatch might not be the best strategy for all use cases. Consider other strategies for use cases such as not having a reliable `event_time` column or if you want more control over the incremental logic. Read more in [How `microbatch` compares to other incremental strategies](#how-microbatch-compares-to-other-incremental-strategies). + +### How microbatch works + +When dbt runs a microbatch model — whether for the first time, during incremental runs, or in specified backfills — it will split the processing into multiple queries (or "batches"), based on the `event_time` and `batch_size` you configure. + +Each "batch" corresponds to a single bounded time period (by default, a single day of data). Where other incremental strategies operate only on "old" and "new" data, microbatch models treat every batch as an atomic unit that can be built or replaced on its own. Each batch is independent and . + +This is a powerful abstraction that makes it possible for dbt to run batches [separately](#backfills), concurrently, and [retry](#retry) them independently. -Each "batch" corresponds to a single bounded time period (by default, a single day of data). Where other incremental strategies operate only on "old" and "new" data, microbatch models treat every batch as an atomic unit that can be built or replaced on its own. Each batch is independent and . This is a powerful abstraction that makes it possible for dbt to run batches separately — in the future, concurrently — and to retry them independently. +## Example -### Example +A `sessions` model aggregates and enriches data that comes from two other models: +- `page_views` is a large, time-series table. It contains many rows, new records almost always arrive after existing ones, and existing records rarely update. It uses the `page_view_start` column as its `event_time`. +- `customers` is a relatively small dimensional table. Customer attributes update often, and not in a time-based manner — that is, older customers are just as likely to change column values as newer customers. The customers model doesn't configure an `event_time` column. -A `sessions` model aggregates and enriches data that comes from two other models. -- `page_views` is a large, time-series table. It contains many rows, new records almost always arrive after existing ones, and existing records rarely update. -- `customers` is a relatively small dimensional table. Customer attributes update often, and not in a time-based manner — that is, older customers are just as likely to change column values as newer customers. +As a result: -The `page_view_start` column in `page_views` is configured as that model's `event_time`. The `customers` model does not configure an `event_time`. Therefore, each batch of `sessions` will filter `page_views` to the equivalent time-bounded batch, and it will not filter `customers` (a full scan for every batch). +- Each batch of `sessions` will filter `page_views` to the equivalent time-bounded batch. +- The `customers` table isn't filtered, resulting in a full scan for every batch. + +:::tip +In addition to configuring `event_time` for the target table, you should also specify it for any upstream models that you want to filter, even if they have different time columns. +::: @@ -40,13 +62,13 @@ models: ``` -We run the `sessions` model on October 1, 2024, and then again on October 2. It produces the following queries: +We run the `sessions` model for October 1, 2024, and then again for October 2. It produces the following queries: -The `event_time` for the `sessions` model is set to `session_start`, which marks the beginning of a user’s session on the website. This setting allows dbt to combine multiple page views (each tracked by their own `page_view_start` timestamps) into a single session. This way, `session_start` differentiates the timing of individual page views from the broader timeframe of the entire user session. +The [`event_time`](/reference/resource-configs/event-time) for the `sessions` model is set to `session_start`, which marks the beginning of a user’s session on the website. This setting allows dbt to combine multiple page views (each tracked by their own `page_view_start` timestamps) into a single session. This way, `session_start` differentiates the timing of individual page views from the broader timeframe of the entire user session. @@ -154,22 +176,65 @@ It does not matter whether the table already contains data for that day. Given t -### Relevant configs +## Relevant configs Several configurations are relevant to microbatch models, and some are required: -| Config | Type | Description | Default | -|----------|------|---------------|---------| -| `event_time` | Column (required) | The column indicating "at what time did the row occur." Required for your microbatch model and any direct parents that should be filtered. | N/A | -| `begin` | Date (required) | The "beginning of time" for the microbatch model. This is the starting point for any initial or full-refresh builds. For example, a daily-grain microbatch model run on `2024-10-01` with `begin = '2023-10-01` will process 366 batches (it's a leap year!) plus the batch for "today." | N/A | -| `batch_size` | String (required) | The granularity of your batches. The default is `day` (and currently this is the only granularity supported). | `day` | -| `lookback` | Integer (optional) | Process X batches prior to the latest bookmark to capture late-arriving records. | `0` | + +| Config | Description | Default | Type | Required | +|----------|---------------|---------|------|---------| +| [`event_time`](/reference/resource-configs/event-time) | The column indicating "at what time did the row occur." Required for your microbatch model and any direct parents that should be filtered. | N/A | Column | Required | +| [`begin`](/reference/resource-configs/begin) | The "beginning of time" for the microbatch model. This is the starting point for any initial or full-refresh builds. For example, a daily-grain microbatch model run on `2024-10-01` with `begin = '2023-10-01` will process 366 batches (it's a leap year!) plus the batch for "today." | N/A | Date | Required | +| [`batch_size`](/reference/resource-configs/batch-size) | The granularity of your batches. Supported values are `hour`, `day`, `month`, and `year` | N/A | String | Required | +| [`lookback`](/reference/resource-configs/lookback) | Process X batches prior to the latest bookmark to capture late-arriving records. | `1` | Integer | Optional | +| [`concurrent_batches`](/reference/resource-properties/concurrent_batches) | Overrides dbt's auto detect for running batches concurrently (at the same time). Read more about [configuring concurrent batches](/docs/build/incremental-microbatch#configure-concurrent_batches). Setting to
* `true` runs batches concurrently (in parallel).
* `false` runs batches sequentially (one after the other). | `None` | Boolean | Optional | +### Required configs for specific adapters +Some adapters require additional configurations for the microbatch strategy. This is because each adapter implements the microbatch strategy differently. + +The following table lists the required configurations for the specific adapters, in addition to the standard microbatch configs: + +| Adapter | `unique_key` config | `partition_by` config | +|----------|------------------|--------------------| +| [`dbt-postgres`](/reference/resource-configs/postgres-configs#incremental-materialization-strategies) | ✅ Required | N/A | +| [`dbt-spark`](/reference/resource-configs/spark-configs#incremental-models) | N/A | ✅ Required | +| [`dbt-bigquery`](/reference/resource-configs/bigquery-configs#merge-behavior-incremental-models) | N/A | ✅ Required | + +For example, if you're using `dbt-postgres`, configure `unique_key` as follows: + + + +```sql +{{ config( + materialized='incremental', + incremental_strategy='microbatch', + unique_key='sales_id', ## required for dbt-postgres + event_time='transaction_date', + begin='2023-01-01', + batch_size='day' +) }} + +select + sales_id, + transaction_date, + customer_id, + product_id, + total_amount +from {{ source('sales', 'transactions') }} + +``` + + In this example, `unique_key` is required because `dbt-postgres` microbatch uses the `merge` strategy, which needs a `unique_key` to identify which rows in the data warehouse need to get merged. Without a `unique_key`, dbt won't be able to match rows between the incoming batch and the existing table. + + + +### Full refresh + As a best practice, we recommend configuring `full_refresh: False` on microbatch models so that they ignore invocations with the `--full-refresh` flag. If you need to reprocess historical data, do so with a targeted backfill that specifies explicit start and end dates. -### Usage +## Usage **You must write your model query to process (read and return) exactly one "batch" of data**. This is a simplifying assumption and a powerful one: - You don’t need to think about `is_incremental` filtering @@ -186,25 +251,28 @@ During standard incremental runs, dbt will process batches according to the curr **Note:** If there’s an upstream model that configures `event_time`, but you *don’t* want the reference to it to be filtered, you can specify `ref('upstream_model').render()` to opt-out of auto-filtering. This isn't generally recommended — most models that configure `event_time` are fairly large, and if the reference is not filtered, each batch will perform a full scan of this input table. -### Backfills +## Backfills Whether to fix erroneous source data or retroactively apply a change in business logic, you may need to reprocess a large amount of historical data. -Backfilling a microbatch model is as simple as selecting it to run or build, and specifying a "start" and "end" for `event_time`. As always, dbt will process the batches between the start and end as independent queries. +Backfilling a microbatch model is as simple as selecting it to run or build, and specifying a "start" and "end" for `event_time`. Note that `--event-time-start` and `--event-time-end` are mutually necessary, meaning that if you specify one, you must specify the other. + +As always, dbt will process the batches between the start and end as independent queries. ```bash dbt run --event-time-start "2024-09-01" --event-time-end "2024-09-04" ``` + -### Retry +## Retry If one or more of your batches fail, you can use `dbt retry` to reprocess _only_ the failed batches. ![Partial retry](https://github.com/user-attachments/assets/f94c4797-dcc7-4875-9623-639f70c97b8f) -### Timezones +## Timezones For now, dbt assumes that all values supplied are in UTC: @@ -215,7 +283,127 @@ For now, dbt assumes that all values supplied are in UTC: While we may consider adding support for custom time zones in the future, we also believe that defining these values in UTC makes everyone's lives easier. -## How `microbatch` compares to other incremental strategies? +## Parallel batch execution + +The microbatch strategy offers the benefit of updating a model in smaller, more manageable batches. Depending on your use case, configuring your microbatch models to run in parallel offers faster processing, in comparison to running batches sequentially. + +Parallel batch execution means that multiple batches are processed at the same time, instead of one after the other (sequentially) for faster processing of your microbatch models. + +dbt automatically detects whether a batch can be run in parallel in most cases, which means you don’t need to configure this setting. However, the [`concurrent_batches` config](/reference/resource-properties/concurrent_batches) is available as an override (not a gate), allowing you to specify whether batches should or shouldn’t be run in parallel in specific cases. + +For example, if you have a microbatch model with 12 batches, you can execute those batches to run in parallel. Specifically they'll run in parallel limited by the number of [available threads](/docs/running-a-dbt-project/using-threads). + +### Prerequisites + +To enable parallel execution, you must: + +- Use a supported adapter: + - Snowflake + - Databricks + - More adapters coming soon! + - We'll be continuing to test and add concurrency support for adapters. This means that some adapters might get concurrency support _after_ the 1.9 initial release. + +- Meet [additional conditions](#how-parallel-batch-execution-works) described in the following section. + +### How parallel batch execution works + +A batch can only run in parallel if all of these conditions are met: + +| Condition | Parallel execution | Sequential execution| +| ---------------| :------------------: | :----------: | +| **Not** the first batch | ✅ | - | +| **Not** the last batch | ✅ | - | +| [Adapter supports](#prerequisites) parallel batches | ✅ | - | + + +After checking for the conditions in the previous table — and if `concurrent_batches` value isn't set, dbt will intelligently auto-detect if the model invokes the [`{{ this }}`](/reference/dbt-jinja-functions/this) Jinja function. If it references `{{ this }}`, the batches will run sequentially since `{{ this }}` represents the database of the current model and referencing the same relation causes conflict. + +Otherwise, if `{{ this }}` isn't detected (and other conditions are met), the batches will run in parallel, which can be overriden when you [set a value for `concurrent_batches`](/reference/resource-properties/concurrent_batches). + +### Parallel or sequential execution + +Choosing between parallel batch execution and sequential processing depends on the specific requirements of your use case. + +- Parallel batch execution is faster but requires logic independent of batch execution order. For example, if you're developing a data pipeline for a system that processes user transactions in batches, each batch is executed in parallel for better performance. However, the logic used to process each transaction shouldn't depend on the order of how batches are executed or completed. +- Sequential processing is slower but essential for calculations like [cumulative metrics](/docs/build/cumulative) in microbatch models. It processes data in the correct order, allowing each step to build on the previous one. + + + +### Configure `concurrent_batches` + +By default, dbt auto-detects whether batches can run in parallel for microbatch models, and this works correctly in most cases. However, you can override dbt's detection by setting the [`concurrent_batches` config](/reference/resource-properties/concurrent_batches) in your `dbt_project.yml` or model `.sql` file to specify parallel or sequential execution, given you meet all the [conditions](#prerequisites): + + + + + + +```yaml +models: + +concurrent_batches: true # value set to true to run batches in parallel +``` + + + + + + + + +```sql +{{ + config( + materialized='incremental', + incremental_strategy='microbatch', + event_time='session_start', + begin='2020-01-01', + batch_size='day + concurrent_batches=true, # value set to true to run batches in parallel + ... + ) +}} + +select ... +``` + + + + +## How microbatch compares to other incremental strategies + +As data warehouses roll out new operations for concurrently replacing/upserting data partitions, we may find that the new operation for the data warehouse is more efficient than what the adapter uses for microbatch. In such instances, we reserve the right the update the default operation for microbatch, so long as it works as intended/documented for models that fit the microbatch paradigm. Most incremental models rely on the end user (you) to explicitly tell dbt what "new" means, in the context of each model, by writing a filter in an `{% if is_incremental() %}` conditional block. You are responsible for crafting this SQL in a way that queries [`{{ this }}`](/reference/dbt-jinja-functions/this) to check when the most recent record was last loaded, with an optional look-back window for late-arriving records. diff --git a/website/docs/docs/build/incremental-models.md b/website/docs/docs/build/incremental-models.md index c48030cc32d..0560797c9bc 100644 --- a/website/docs/docs/build/incremental-models.md +++ b/website/docs/docs/build/incremental-models.md @@ -94,7 +94,7 @@ Not specifying a `unique_key` will result in append-only behavior, which means d The optional `unique_key` parameter specifies a field (or combination of fields) that defines the grain of your model. That is, the field(s) identify a single unique row. You can define `unique_key` in a configuration block at the top of your model, and it can be a single column name or a list of column names. -The `unique_key` should be supplied in your model definition as a string representing a single column or a list of single-quoted column names that can be used together, for example, `['col1', 'col2', …])`. Columns used in this way should not contain any nulls, or the incremental model run may fail. Either ensure that each column has no nulls (for example with `coalesce(COLUMN_NAME, 'VALUE_IF_NULL')`), or define a single-column [surrogate key](https://www.getdbt.com/blog/guide-to-surrogate-key) (for example with [`dbt_utils.generate_surrogate_key`](https://github.com/dbt-labs/dbt-utils#generate_surrogate_key-source)). +The `unique_key` should be supplied in your model definition as a string representing a single column or a list of single-quoted column names that can be used together, for example, `['col1', 'col2', …])`. Columns used in this way should not contain any nulls, or the incremental model may fail to match rows and generate duplicate rows. Either ensure that each column has no nulls (for example with `coalesce(COLUMN_NAME, 'VALUE_IF_NULL')`) or define a single-column [surrogate key](https://www.getdbt.com/blog/guide-to-surrogate-key) (for example with [`dbt_utils.generate_surrogate_key`](https://github.com/dbt-labs/dbt-utils#generate_surrogate_key-source)). :::tip In cases where you need multiple columns in combination to uniquely identify each row, we recommend you pass these columns as a list (`unique_key = ['user_id', 'session_number']`), rather than a string expression (`unique_key = 'concat(user_id, session_number)'`). @@ -114,7 +114,7 @@ When you define a `unique_key`, you'll see this behavior for each row of "new" d Please note that if there's a unique_key with more than one row in either the existing target table or the new incremental rows, the incremental model may fail depending on your database and [incremental strategy](/docs/build/incremental-strategy). If you're having issues running an incremental model, it's a good idea to double check that the unique key is truly unique in both your existing database table and your new incremental rows. You can [learn more about surrogate keys here](https://www.getdbt.com/blog/guide-to-surrogate-key). :::info -While common incremental strategies, such as`delete+insert` + `merge`, might use `unique_key`, others don't. For example, the `insert_overwrite` strategy does not use `unique_key`, because it operates on partitions of data rather than individual rows. For more information, see [About incremental_strategy](/docs/build/incremental-strategy). +While common incremental strategies, such as `delete+insert` + `merge`, might use `unique_key`, others don't. For example, the `insert_overwrite` strategy does not use `unique_key`, because it operates on partitions of data rather than individual rows. For more information, see [About incremental_strategy](/docs/build/incremental-strategy). ::: #### `unique_key` example @@ -156,15 +156,17 @@ Building this model incrementally without the `unique_key` parameter would resul ## How do I rebuild an incremental model? If your incremental model logic has changed, the transformations on your new rows of data may diverge from the historical transformations, which are stored in your target table. In this case, you should rebuild your incremental model. -To force dbt to rebuild the entire incremental model from scratch, use the `--full-refresh` flag on the command line. This flag will cause dbt to drop the existing target table in the database before rebuilding it for all-time. +To force dbt to rebuild the entire incremental model from scratch, use the `--full-refresh` flag on the command line. This flag will cause dbt to drop the existing target table in the database before rebuilding it for all-time. ```bash $ dbt run --full-refresh --select my_incremental_model+ ``` + It's also advisable to rebuild any downstream models, as indicated by the trailing `+`. -For detailed usage instructions, check out the [dbt run](/reference/commands/run) documentation. +You can optionally use the [`full_refresh config`](/reference/resource-configs/full_refresh) to set a resource to always or never full-refresh at the project or resource level. If specified as true or false, the `full_refresh` config will take precedence over the presence or absence of the `--full-refresh` flag. +For detailed usage instructions, check out the [dbt run](/reference/commands/run) documentation. ## What if the columns of my incremental model change? @@ -212,11 +214,11 @@ Currently, `on_schema_change` only tracks top-level column changes. It does not ### Default behavior -This is the behavior if `on_schema_change: ignore`, which is set by default, and on older versions of dbt. +This is the behavior of `on_schema_change: ignore`, which is set by default. If you add a column to your incremental model, and execute a `dbt run`, this column will _not_ appear in your target table. -Similarly, if you remove a column from your incremental model, and execute a `dbt run`, this column will _not_ be removed from your target table. +If you remove a column from your incremental model and execute a `dbt run`, `dbt run` will fail. Instead, whenever the logic of your incremental changes, execute a full-refresh run of both your incremental model and any downstream models. diff --git a/website/docs/docs/build/incremental-strategy.md b/website/docs/docs/build/incremental-strategy.md index 30de135b09b..9176e962a3a 100644 --- a/website/docs/docs/build/incremental-strategy.md +++ b/website/docs/docs/build/incremental-strategy.md @@ -27,13 +27,13 @@ Click the name of the adapter in the below table for more information about supp | Data platform adapter | `append` | `merge` | `delete+insert` | `insert_overwrite` | `microbatch` | |-----------------------|:--------:|:-------:|:---------------:|:------------------:|:-------------------:| | [dbt-postgres](/reference/resource-configs/postgres-configs#incremental-materialization-strategies) | ✅ | ✅ | ✅ | | ✅ | -| [dbt-redshift](/reference/resource-configs/redshift-configs#incremental-materialization-strategies) | ✅ | ✅ | ✅ | | | +| [dbt-redshift](/reference/resource-configs/redshift-configs#incremental-materialization-strategies) | ✅ | ✅ | ✅ | | ✅ | | [dbt-bigquery](/reference/resource-configs/bigquery-configs#merge-behavior-incremental-models) | | ✅ | | ✅ | ✅ | | [dbt-spark](/reference/resource-configs/spark-configs#incremental-models) | ✅ | ✅ | | ✅ | ✅ | -| [dbt-databricks](/reference/resource-configs/databricks-configs#incremental-models) | ✅ | ✅ | | ✅ | | +| [dbt-databricks](/reference/resource-configs/databricks-configs#incremental-models) | ✅ | ✅ | | ✅ | ✅ | | [dbt-snowflake](/reference/resource-configs/snowflake-configs#merge-behavior-incremental-models) | ✅ | ✅ | ✅ | | ✅ | | [dbt-trino](/reference/resource-configs/trino-configs#incremental) | ✅ | ✅ | ✅ | | | -| [dbt-fabric](/reference/resource-configs/fabric-configs#incremental) | ✅ | ✅ | ✅ | | | +| [dbt-fabric](/reference/resource-configs/fabric-configs#incremental) | ✅ | | ✅ | | | | [dbt-athena](/reference/resource-configs/athena-configs#incremental-models) | ✅ | ✅ | | ✅ | | ### Configuring incremental strategy @@ -241,7 +241,13 @@ select * from {{ ref("some_model") }} ### Custom strategies -Starting from dbt version 1.2 and onwards, users have an easier alternative to [creating an entirely new materialization](/guides/create-new-materializations). They define and use their own "custom" incremental strategies by: +:::note limited support + +Custom strategies are not currently supported on the BigQuery and Spark adapters. + +::: + +From dbt v1.2 and onwards, users have an easier alternative to [creating an entirely new materialization](/guides/create-new-materializations). They define and use their own "custom" incremental strategies by: 1. Defining a macro named `get_incremental_STRATEGY_sql`. Note that `STRATEGY` is a placeholder and you should replace it with the name of your custom incremental strategy. 2. Configuring `incremental_strategy: STRATEGY` within an incremental model. @@ -289,6 +295,8 @@ For example, a user-defined strategy named `insert_only` can be defined and used
+If you use a custom microbatch macro, set a [`require_batched_execution_for_custom_microbatch_strategy` behavior flag](/reference/global-configs/behavior-changes#custom-microbatch-strategy) in your `dbt_project.yml` to enable batched execution of your custom strategy. + ### Custom strategies from a package To use the `merge_null_safe` custom incremental strategy from the `example` package: diff --git a/website/docs/docs/build/materializations.md b/website/docs/docs/build/materializations.md index 5deb1e7ce92..723acf87414 100644 --- a/website/docs/docs/build/materializations.md +++ b/website/docs/docs/build/materializations.md @@ -111,7 +111,7 @@ When using the `table` materialization, your model is rebuilt as a You can install [MetricFlow](https://github.com/dbt-labs/metricflow#getting-started) from [PyPI](https://pypi.org/project/dbt-metricflow/). You need to use `pip` to install MetricFlow on Windows or Linux operating systems: + + 1. Create or activate your virtual environment `python -m venv venv` 2. Run `pip install dbt-metricflow` * You can install MetricFlow using PyPI as an extension of your dbt adapter in the command line. To install the adapter, run `python -m pip install "dbt-metricflow[your_adapter_name]"` and add the adapter name at the end of the command. For example, for a Snowflake adapter run `python -m pip install "dbt-metricflow[snowflake]"` + + + + +1. Create or activate your virtual environment `python -m venv venv` +2. Run `pip install dbt-metricflow` + * You can install MetricFlow using PyPI as an extension of your dbt adapter in the command line. To install the adapter, run `python -m pip install "dbt-metricflow[adapter_package_name]"` and add the adapter name at the end of the command. For example, for a Snowflake adapter run `python -m pip install "dbt-metricflow[dbt-snowflake]"` + + + **Note**, you'll need to manage versioning between dbt Core, your adapter, and MetricFlow. Something to note, MetricFlow `mf` commands return an error if you have a Metafont latex package installed. To run `mf` commands, uninstall the package. @@ -249,7 +259,7 @@ Create a new query with MetricFlow and execute it against your data platform. Th ```bash dbt sl query --metrics --group-by # In dbt Cloud -dbt sl query --saved-query # In dbt Cloud CLI +dbt sl query --saved-query # In dbt Cloud mf query --metrics --group-by # In dbt Core diff --git a/website/docs/docs/build/metricflow-time-spine.md b/website/docs/docs/build/metricflow-time-spine.md index e932fb36f53..5499c61a8e4 100644 --- a/website/docs/docs/build/metricflow-time-spine.md +++ b/website/docs/docs/build/metricflow-time-spine.md @@ -7,7 +7,7 @@ tags: [Metrics, Semantic Layer] --- - + It's common in analytics engineering to have a date dimension or "time spine" table as a base table for different types of time-based joins and aggregations. The structure of this table is typically a base column of daily or hourly dates, with additional columns for other time grains, like fiscal quarters, defined based on the base column. You can join other tables to the time spine on the base column to calculate metrics like revenue at a point in time, or to aggregate to a specific time grain. @@ -108,7 +108,7 @@ models: - It needs to reference a column defined under the `columns` key, in this case, `date_hour` and `date_day`, respectively. - It sets the granularity at the column-level using the `granularity` key, in this case, `hour` and `day`, respectively. - MetricFlow will use the `standard_granularity_column` as the join key when joining the time spine table to another source table. -- [The `custom_granularities` field](#custom-calendar), (available in Versionless and dbt v1.9 and higher) lets you specify non-standard time periods like `fiscal_year` or `retail_month` that your organization may use. +- [The `custom_granularities` field](#custom-calendar), (available in dbt Cloud Latest and dbt Core v1.9 and higher) lets you specify non-standard time periods like `fiscal_year` or `retail_month` that your organization may use. For an example project, refer to our [Jaffle shop](https://github.com/dbt-labs/jaffle-sl-template/blob/main/models/marts/_models.yml) example. @@ -124,42 +124,6 @@ For an example project, refer to our [Jaffle shop](https://github.com/dbt-labs/j - - -```sql -{{ - config( - materialized = 'table', - ) -}} - -with days as ( - - {{ - dbt_utils.date_spine( - 'day', - "to_date('01/01/2000','mm/dd/yyyy')", - "to_date('01/01/2025','mm/dd/yyyy')" - ) - }} - -), - -final as ( - select cast(date_day as date) as date_day - from days -) - -select * from final --- filter the time spine to a specific range -where date_day > dateadd(year, -4, current_timestamp()) -and date_hour < dateadd(day, 30, current_timestamp()) -``` - - - - - ```sql {{ config( @@ -186,45 +150,12 @@ final as ( select * from final where date_day > dateadd(year, -4, current_timestamp()) -and date_hour < dateadd(day, 30, current_timestamp()) +and date_day < dateadd(day, 30, current_timestamp()) ``` - - ### Daily (BigQuery) Use this model if you're using BigQuery. BigQuery supports `DATE()` instead of `TO_DATE()`: - - - - -```sql -{{config(materialized='table')}} -with days as ( - {{dbt_utils.date_spine( - 'day', - "DATE(2000,01,01)", - "DATE(2025,01,01)" - ) - }} -), - -final as ( - select cast(date_day as date) as date_day - from days -) - -select * -from final --- filter the time spine to a specific range -where date_day > dateadd(year, -4, current_timestamp()) -and date_hour < dateadd(day, 30, current_timestamp()) -``` - - - - - @@ -248,12 +179,11 @@ final as ( select * from final -- filter the time spine to a specific range -where date_day > dateadd(year, -4, current_timestamp()) -and date_hour < dateadd(day, 30, current_timestamp()) +where date_day > date_add(DATE(current_timestamp()), INTERVAL -4 YEAR) +and date_day < date_add(DATE(current_timestamp()), INTERVAL 30 DAY) ``` - @@ -306,42 +236,6 @@ To create this table, you need to create a model in your dbt project called `met ### Daily - - - -```sql -{{ - config( - materialized = 'table', - ) -}} - -with days as ( - - {{ - dbt_utils.date_spine( - 'day', - "to_date('01/01/2000','mm/dd/yyyy')", - "to_date('01/01/2025','mm/dd/yyyy')" - ) - }} - -), - -final as ( - select cast(date_day as date) as date_day - from days -) - -select * from final --- filter the time spine to a specific range -where date_day > dateadd(year, -4, current_timestamp()) -and date_hour < dateadd(day, 30, current_timestamp()) -``` - - - - @@ -371,47 +265,15 @@ final as ( select * from final where date_day > dateadd(year, -4, current_timestamp()) -and date_hour < dateadd(day, 30, current_timestamp()) +and date_day < dateadd(day, 30, current_timestamp()) ``` - ### Daily (BigQuery) Use this model if you're using BigQuery. BigQuery supports `DATE()` instead of `TO_DATE()`: - - - - -```sql -{{config(materialized='table')}} -with days as ( - {{dbt_utils.date_spine( - 'day', - "DATE(2000,01,01)", - "DATE(2025,01,01)" - ) - }} -), - -final as ( - select cast(date_day as date) as date_day - from days -) - -select * -from final --- filter the time spine to a specific range -where date_day > dateadd(year, -4, current_timestamp()) -and date_hour < dateadd(day, 30, current_timestamp()) -``` - - - - - ```sql @@ -434,11 +296,10 @@ select * from final -- filter the time spine to a specific range where date_day > dateadd(year, -4, current_timestamp()) -and date_hour < dateadd(day, 30, current_timestamp()) +and date_day < dateadd(day, 30, current_timestamp()) ``` - You only need to include the `date_day` column in the table. MetricFlow can handle broader levels of detail, but finer grains are only supported in versions 1.9 and higher. @@ -449,9 +310,7 @@ You only need to include the `date_day` column in the table. MetricFlow can hand -The ability to configure custom calendars, such as a fiscal calendar, is available in [dbt Cloud Versionless](/docs/dbt-versions/versionless-cloud) or dbt Core [v1.9 and higher](/docs/dbt-versions/core). - -To access this feature, [upgrade to Versionless](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) or your dbt Core version to v1.9 or higher. +The ability to configure custom calendars, such as a fiscal calendar, is available now in [the "Latest" release track in dbt Cloud](/docs/dbt-versions/cloud-release-tracks), and it will be available in [dbt Core v1.9+](/docs/dbt-versions/core-upgrade/upgrading-to-v1.9). @@ -463,9 +322,22 @@ For example, if you use a custom calendar in your organization, such as a fiscal - This is useful for calculating metrics based on a custom calendar, such as fiscal quarters or weeks. - Use the `custom_granularities` key to define a non-standard time period for querying data, such as a `retail_month` or `fiscal_week`, instead of standard options like `day`, `month`, or `year`. -- Ensure the the `standard_granularity_column` is a date time type. - This feature provides more control over how time-based metrics are calculated. + + +When working with custom calendars in MetricFlow, it's important to ensure: + +- Consistent data types — Both your dimension column and the time spine column should use the same data type to allow accurate comparisons. Functions like `DATE_TRUNC` don't change the data type of the input in some databases (like Snowflake). Using different data types can lead to mismatches and inaccurate results. + + We recommend using `DATETIME` or `TIMESTAMP` data types for your time dimensions and time spine, as they support all granularities. The `DATE` data type may not support smaller granularities like hours or minutes. + +- Time zones — MetricFlow currently doesn't perform any timezone manipulation. When working with timezone-aware data, inconsistent time zones may lead to unexpected results during aggregations and comparisons. + +For example, if your time spine column is `TIMESTAMP` type and your dimension column is `DATE` type, comparisons between these columns might not work as intended. To fix this, convert your `DATE` column to `TIMESTAMP`, or make sure both columns are the same data type. + + + ### Add custom granularities To add custom granularities, the Semantic Layer supports custom calendar configurations that allow users to query data using non-standard time periods like `fiscal_year` or `retail_month`. You can define these custom granularities (all lowercased) by modifying your model's YAML configuration like this: diff --git a/website/docs/docs/build/metrics-overview.md b/website/docs/docs/build/metrics-overview.md index 7021a6d7330..57cdd929acb 100644 --- a/website/docs/docs/build/metrics-overview.md +++ b/website/docs/docs/build/metrics-overview.md @@ -15,15 +15,15 @@ This article explains the different supported metric types you can add to your d -| Parameter | Description | Type | -| --------- | ----------- | ---- | -| `name` | Provide the reference name for the metric. This name must be a unique metric name and can consist of lowercase letters, numbers, and underscores. | Required | -| `description` | Describe your metric. | Optional | -| `type` | Define the type of metric, which can be `conversion`, `cumulative`, `derived`, `ratio`, or `simple`. | Required | -| `type_params` | Additional parameters used to configure metrics. `type_params` are different for each metric type. | Required | -| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | -| `config` | Use the [`config`](/reference/resource-properties/config) property to specify configurations for your metric. Supports [`meta`](/reference/resource-configs/meta), [`group`](/reference/resource-configs/group), and [`enabled`](/reference/resource-configs/enabled) configurations. | Optional | -| `filter` | You can optionally add a [filter](#filters) string to any metric type, applying filters to dimensions, entities, time dimensions, or other metrics during metric computation. Consider it as your WHERE clause. | Optional | +| Parameter | Description | Required | Type | +| --------- | ----------- | ---- | ---- | +| `name` | Provide the reference name for the metric. This name must be a unique metric name and can consist of lowercase letters, numbers, and underscores. | Required | String | +| `description` | Describe your metric. | Optional | String | +| `type` | Define the type of metric, which can be `conversion`, `cumulative`, `derived`, `ratio`, or `simple`. | Required | String | +| `type_params` | Additional parameters used to configure metrics. `type_params` are different for each metric type. | Required | Dict | +| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | String | +| `config` | Use the [`config`](/reference/resource-properties/config) property to specify configurations for your metric. Supports [`meta`](/reference/resource-configs/meta), [`group`](/reference/resource-configs/group), and [`enabled`](/reference/resource-configs/enabled) configurations. | Optional | Dict | +| `filter` | You can optionally add a [filter](#filters) string to any metric type, applying filters to dimensions, entities, time dimensions, or other metrics during metric computation. Consider it as your WHERE clause. | Optional | String | Here's a complete example of the metrics spec configuration: @@ -52,16 +52,16 @@ metrics: -| Parameter | Description | Type | -| --------- | ----------- | ---- | -| `name` | Provide the reference name for the metric. This name must be unique amongst all metrics. | Required | -| `description` | Describe your metric. | Optional | -| `type` | Define the type of metric, which can be `simple`, `ratio`, `cumulative`, or `derived`. | Required | -| `type_params` | Additional parameters used to configure metrics. `type_params` are different for each metric type. | Required | -| `config` | Provide the specific configurations for your metric. | Optional | -| `meta` | Use the [`meta` config](/reference/resource-configs/meta) to set metadata for a resource. | Optional | -| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | -| `filter` | You can optionally add a filter string to any metric type, applying filters to dimensions, entities, or time dimensions during metric computation. Consider it as your WHERE clause. | Optional | +| Parameter | Description | Required | Type | +| --------- | ----------- | ---- | ---- | +| `name` | Provide the reference name for the metric. This name must be unique amongst all metrics. | Required | String | +| `description` | Describe your metric. | Optional | String | +| `type` | Define the type of metric, which can be `simple`, `ratio`, `cumulative`, or `derived`. | Required | String | +| `type_params` | Additional parameters used to configure metrics. `type_params` are different for each metric type. | Required | Dict | +| `config` | Provide the specific configurations for your metric. | Optional | Dict | +| `meta` | Use the [`meta` config](/reference/resource-configs/meta) to set metadata for a resource. | Optional | String | +| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | String | +| `filter` | You can optionally add a filter string to any metric type, applying filters to dimensions, entities, or time dimensions during metric computation. Consider it as your WHERE clause. | Optional | String | Here's a complete example of the metrics spec configuration: @@ -95,7 +95,8 @@ import SLCourses from '/snippets/_sl-course.md'; Default time granularity for metrics is useful if your time dimension has a very fine grain, like second or hour, but you typically query metrics rolled up at a coarser grain. -To set the default time granularity for metrics, you need to be on dbt Cloud Versionless or dbt v1.9 and higher. +Default time granularity for metrics is available now in [the "Latest" release track in dbt Cloud](/docs/dbt-versions/cloud-release-tracks), and it will be available in [dbt Core v1.9+](/docs/dbt-versions/core-upgrade/upgrading-to-v1.9). + diff --git a/website/docs/docs/build/packages.md b/website/docs/docs/build/packages.md index 0b69d10cee6..9ba4ceeaff5 100644 --- a/website/docs/docs/build/packages.md +++ b/website/docs/docs/build/packages.md @@ -20,9 +20,10 @@ In dbt, libraries like these are called _packages_. dbt's packages are so powerf * Models to understand [Redshift](https://hub.getdbt.com/dbt-labs/redshift/latest/) privileges. * Macros to work with data loaded by [Stitch](https://hub.getdbt.com/dbt-labs/stitch_utils/latest/). -dbt _packages_ are in fact standalone dbt projects, with models and macros that tackle a specific problem area. As a dbt user, by adding a package to your project, the package's models and macros will become part of your own project. This means: +dbt _packages_ are in fact standalone dbt projects, with models, macros, and other resources that tackle a specific problem area. As a dbt user, by adding a package to your project, all of the package's resources will become part of your own project. This means: * Models in the package will be materialized when you `dbt run`. * You can use `ref` in your own models to refer to models from the package. +* You can use `source` to refer to sources in the package. * You can use macros in the package in your own project. * It's important to note that defining and installing dbt packages is different from [defining and installing Python packages](/docs/build/python-models#using-pypi-packages) @@ -82,11 +83,7 @@ packages: version: [">=0.7.0", "<0.8.0"] ``` - - -Beginning in v1.7, `dbt deps` "pins" each package by default. See ["Pinning packages"](#pinning-packages) for details. - - +`dbt deps` "pins" each package by default. See ["Pinning packages"](#pinning-packages) for details. Where possible, we recommend installing packages via dbt Hub, since this allows dbt to handle duplicate dependencies. This is helpful in situations such as: * Your project uses both the dbt-utils and Snowplow packages, and the Snowplow package _also_ uses the dbt-utils package. @@ -145,18 +142,8 @@ packages: revision: 4e28d6da126e2940d17f697de783a717f2503188 ``` - - -We **strongly recommend** ["pinning" your packages](#pinning-packages) to a specific release by specifying a release name. - - - - - By default, `dbt deps` "pins" each package. See ["Pinning packages"](#pinning-packages) for details. - - ### Internally hosted tarball URL Some organizations have security requirements to pull resources only from internal services. To address the need to install packages from hosted environments such as Artifactory or cloud storage buckets, dbt Core enables you to install packages from internally-hosted tarball URLs. @@ -175,7 +162,7 @@ Where `name: 'dbt_utils'` specifies the subfolder of `dbt_packages` that's creat #### SSH Key Method (Command Line only) If you're using the Command Line, private packages can be cloned via SSH and an SSH key. -When you use SSH keys to authenticate to your git remote server, you don’t need to supply your username and password each time. Read more about SSH keys, how to generate them, and how to add them to your git provider here: [Github](https://docs.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) and [GitLab](https://docs.gitlab.com/ee/ssh/). +When you use SSH keys to authenticate to your git remote server, you don’t need to supply your username and password each time. Read more about SSH keys, how to generate them, and how to add them to your git provider here: [Github](https://docs.github.com/en/github/authenticating-to-github/connecting-to-github-with-ssh) and [GitLab](https://docs.gitlab.com/ee/user/ssh.html). @@ -318,18 +305,6 @@ When you remove a package from your `packages.yml` file, it isn't automatically ### Pinning packages - - -We **strongly recommend** "pinning" your package to a specific release by specifying a tagged release name or a specific commit hash. - -If you do not provide a revision, or if you use the main branch, then any updates to the package will be incorporated into your project the next time you run `dbt deps`. While we generally try to avoid making breaking changes to these packages, they are sometimes unavoidable. Pinning a package revision helps prevent your code from changing without your explicit approval. - -To find the latest release for a package, navigate to the `Releases` tab in the relevant GitHub repository. For example, you can find all of the releases for the dbt-utils package [here](https://github.com/dbt-labs/dbt-utils/releases). - - - - - Beginning with v1.7, running [`dbt deps`](/reference/commands/deps) "pins" each package by creating or updating the `package-lock.yml` file in the _project_root_ where `packages.yml` is recorded. - The `package-lock.yml` file contains a record of all packages installed. @@ -337,8 +312,6 @@ Beginning with v1.7, running [`dbt deps`](/reference/commands/deps) "pins" each For example, if you use a branch name, the `package-lock.yml` file pins to the head commit. If you use a version range, it pins to the latest release. In either case, subsequent commits or versions will **not** be installed. To get new commits or versions, run `dbt deps --upgrade` or add `package-lock.yml` to your .gitignore file. - - As of v0.14.0, dbt will warn you if you install a package using the `git` syntax without specifying a revision (see below). ### Configuring packages diff --git a/website/docs/docs/build/python-models.md b/website/docs/docs/build/python-models.md index 811379a0d2c..eac477b03fd 100644 --- a/website/docs/docs/build/python-models.md +++ b/website/docs/docs/build/python-models.md @@ -598,6 +598,34 @@ Python models have capabilities that SQL models do not. They also have some draw - **These capabilities are very new.** As data warehouses develop new features, we expect them to offer cheaper, faster, and more intuitive mechanisms for deploying Python transformations. **We reserve the right to change the underlying implementation for executing Python models in future releases.** Our commitment to you is around the code in your model `.py` files, following the documented capabilities and guidance we're providing here. - **Lack of `print()` support.** The data platform runs and compiles your Python model without dbt's oversight. This means it doesn't display the output of commands such as Python's built-in [`print()`](https://docs.python.org/3/library/functions.html#print) function in dbt's logs. +- + + The following explains other methods you can use for debugging, such as writing messages to a dataframe column: + + - Using platform logs: Use your data platform's logs to debug your Python models. + - Return logs as a dataframe: Create a dataframe containing your logs and build it into the warehouse. + - Develop locally with DuckDB: Test and debug your models locally using DuckDB before deploying them. + + Here's an example of debugging in a Python model: + + ```python + def model(dbt, session): + dbt.config( + materialized = "table" + ) + + df = dbt.ref("my_source_table").df() + + # One option for debugging: write messages to temporary table column + # Pros: visibility + # Cons: won't work if table isn't building for some reason + msg = "something" + df["debugging"] = f"My debug message here: {msg}" + + return df + ``` + + As a general rule, if there's a transformation you could write equally well in SQL or Python, we believe that well-written SQL is preferable: it's more accessible to a greater number of colleagues, and it's easier to write code that's performant at scale. If there's a transformation you _can't_ write in SQL, or where ten lines of elegant and well-annotated Python could save you 1000 lines of hard-to-read Jinja-SQL, Python is the way to go. ## Specific data platforms {#specific-data-platforms} @@ -613,7 +641,8 @@ In their initial launch, Python models are supported on three of the most popula **Installing packages:** Snowpark supports several popular packages via Anaconda. Refer to the [complete list](https://repo.anaconda.com/pkgs/snowflake/) for more details. Packages are installed when your model is run. Different models can have different package dependencies. If you use third-party packages, Snowflake recommends using a dedicated virtual warehouse for best performance rather than one with many concurrent users. **Python version:** To specify a different python version, use the following configuration: -``` + +```python def model(dbt, session): dbt.config( materialized = "table", @@ -625,7 +654,7 @@ def model(dbt, session): **External access integrations and secrets**: To query external APIs within dbt Python models, use Snowflake’s [external access](https://docs.snowflake.com/en/developer-guide/external-network-access/external-network-access-overview) together with [secrets](https://docs.snowflake.com/en/developer-guide/external-network-access/secret-api-reference). Here are some additional configurations you can use: -``` +```python import pandas import snowflake.snowpark as snowpark @@ -645,20 +674,43 @@ def model(dbt, session: snowpark.Session): -**About "sprocs":** dbt submits Python models to run as _stored procedures_, which some people call _sprocs_ for short. By default, dbt will create a named sproc containing your model's compiled Python code, and then _call_ it to execute. Snowpark has an Open Preview feature for _temporary_ or _anonymous_ stored procedures ([docs](https://docs.snowflake.com/en/sql-reference/sql/call-with.html)), which are faster and leave a cleaner query history. You can switch this feature on for your models by configuring `use_anonymous_sproc: True`. We plan to switch this on for all dbt + Snowpark Python models starting with the release of dbt Core version 1.4. +**About "sprocs":** dbt submits Python models to run as _stored procedures_, which some people call _sprocs_ for short. By default, dbt will use Snowpark's _temporary_ or _anonymous_ stored procedures ([docs](https://docs.snowflake.com/en/sql-reference/sql/call-with.html)), which are faster and keep query history cleaner than named sprocs containing your model's compiled Python code. To disable this feature, set `use_anonymous_sproc: False` in your model configuration. - +**Docs:** ["Developer Guide: Snowpark Python"](https://docs.snowflake.com/en/developer-guide/snowpark/python/index.html) + +#### Third-party Snowflake packages + +To use a third-party Snowflake package that isn't available in Snowflake Anaconda, upload your package by following [this example](https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-packages#importing-packages-through-a-snowflake-stage), and then configure the `imports` setting in the dbt Python model to reference to the zip file in your Snowflake staging. + +Here’s a complete example configuration using a zip file, including using `imports` in a Python model: + +```python + +def model(dbt, session): + # Configure the model + dbt.config( + materialized="table", + imports=["@mystage/mycustompackage.zip"], # Specify the external package location + ) + + # Example data transformation using the imported package + # (Assuming `some_external_package` has a function we can call) + data = { + "name": ["Alice", "Bob", "Charlie"], + "score": [85, 90, 88] + } + df = pd.DataFrame(data) + + # Process data with the external package + df["adjusted_score"] = df["score"].apply(lambda x: some_external_package.adjust_score(x)) + + # Return the DataFrame as the model output + return df -```yml -# I asked Snowflake Support to enable this Private Preview feature, -# and now my dbt-py models run even faster! -models: - use_anonymous_sproc: True ``` - +For more information on using this configuration, refer to [Snowflake's documentation](https://community.snowflake.com/s/article/how-to-use-other-python-packages-in-snowpark) on uploading and using other python packages in Snowpark not published on Snowflake's Anaconda channel. -**Docs:** ["Developer Guide: Snowpark Python"](https://docs.snowflake.com/en/developer-guide/snowpark/python/index.html) diff --git a/website/docs/docs/build/ratio-metrics.md b/website/docs/docs/build/ratio-metrics.md index fdaeb878450..a34dec29d71 100644 --- a/website/docs/docs/build/ratio-metrics.md +++ b/website/docs/docs/build/ratio-metrics.md @@ -10,17 +10,17 @@ Ratio allows you to create a ratio between two metrics. You simply specify a num The parameters, description, and type for ratio metrics are: -| Parameter | Description | Type | -| --------- | ----------- | ---- | -| `name` | The name of the metric. | Required | -| `description` | The description of the metric. | Optional | -| `type` | The type of the metric (cumulative, derived, ratio, or simple). | Required | -| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | -| `type_params` | The type parameters of the metric. | Required | -| `numerator` | The name of the metric used for the numerator, or structure of properties. | Required | -| `denominator` | The name of the metric used for the denominator, or structure of properties. | Required | -| `filter` | Optional filter for the numerator or denominator. | Optional | -| `alias` | Optional alias for the numerator or denominator. | Optional | +| Parameter | Description | Required | Type | +| --------- | ----------- | ---- | ---- | +| `name` | The name of the metric. | Required | String | +| `description` | The description of the metric. | Optional | String | +| `type` | The type of the metric (cumulative, derived, ratio, or simple). | Required | String | +| `label` | Defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | String | +| `type_params` | The type parameters of the metric. | Required | Dict | +| `numerator` | The name of the metric used for the numerator, or structure of properties. | Required | String or dict | +| `denominator` | The name of the metric used for the denominator, or structure of properties. | Required | String or dict | +| `filter` | Optional filter for the numerator or denominator. | Optional | String | +| `alias` | Optional alias for the numerator or denominator. | Optional | String | The following displays the complete specification for ratio metrics, along with an example. diff --git a/website/docs/docs/build/saved-queries.md b/website/docs/docs/build/saved-queries.md index 649885f9506..ed56d13dcc9 100644 --- a/website/docs/docs/build/saved-queries.md +++ b/website/docs/docs/build/saved-queries.md @@ -154,8 +154,6 @@ saved_queries: - - #### Project-level saved queries To enable saved queries at the project level, you can set the `saved-queries` configuration in the [`dbt_project.yml` file](/reference/dbt_project.yml). This saves you time in configuring saved queries in each file: @@ -171,7 +169,6 @@ saved-queries: For more information on `dbt_project.yml` and config naming conventions, see the [dbt_project.yml reference page](/reference/dbt_project.yml#naming-convention). - To build `saved_queries`, use the [`--resource-type` flag](/reference/global-configs/resource-type) and run the command `dbt build --resource-type saved_query`. diff --git a/website/docs/docs/build/semantic-models.md b/website/docs/docs/build/semantic-models.md index d683d7cd020..5ff363dd44c 100644 --- a/website/docs/docs/build/semantic-models.md +++ b/website/docs/docs/build/semantic-models.md @@ -26,18 +26,18 @@ import SLCourses from '/snippets/\_sl-course.md'; Here we describe the Semantic model components with examples: -| Component | Description | Type | -| --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------- | -| [Name](#name) | Choose a unique name for the semantic model. Avoid using double underscores (\_\_) in the name as they're not supported. | Required | -| [Description](#description) | Includes important details in the description | Optional | -| [Model](#model) | Specifies the dbt model for the semantic model using the `ref` function | Required | -| [Defaults](#defaults) | The defaults for the model, currently only `agg_time_dimension` is supported. | Required | -| [Entities](#entities) | Uses the columns from entities as join keys and indicate their type as primary, foreign, or unique keys with the `type` parameter | Required | -| [Primary Entity](#primary-entity) | If a primary entity exists, this component is Optional. If the semantic model has no primary entity, then this property is required. | Optional | -| [Dimensions](#dimensions) | Different ways to group or slice data for a metric, they can be `time` or `categorical` | Required | -| [Measures](#measures) | Aggregations applied to columns in your data model. They can be the final metric or used as building blocks for more complex metrics | Optional | -| Label | The display name for your semantic model `node`, `dimension`, `entity`, and/or `measures` | Optional | -| `config` | Use the [`config`](/reference/resource-properties/config) property to specify configurations for your metric. Supports [`meta`](/reference/resource-configs/meta), [`group`](/reference/resource-configs/group), and [`enabled`](/reference/resource-configs/enabled) configs. | Optional | +| Component | Description | Required | Type | +| ------------ | ---------------- | -------- | -------- | +| [Name](#name) | Choose a unique name for the semantic model. Avoid using double underscores (\_\_) in the name as they're not supported. | Required | String | +| [Description](#description) | Includes important details in the description. | Optional | String | +| [Model](#model) | Specifies the dbt model for the semantic model using the `ref` function. | Required | String | +| [Defaults](#defaults) | The defaults for the model, currently only `agg_time_dimension` is supported. | Required | Dict | +| [Entities](#entities) | Uses the columns from entities as join keys and indicate their type as primary, foreign, or unique keys with the `type` parameter. | Required | List | +| [Primary Entity](#primary-entity) | If a primary entity exists, this component is Optional. If the semantic model has no primary entity, then this property is required. | Optional | String | +| [Dimensions](#dimensions) | Different ways to group or slice data for a metric, they can be `time` or `categorical`. | Required | List | +| [Measures](#measures) | Aggregations applied to columns in your data model. They can be the final metric or used as building blocks for more complex metrics. | Optional | List | +| [Label](#label) | The display name for your semantic model `node`, `dimension`, `entity`, and/or `measures`. | Optional | String | +| `config` | Use the [`config`](/reference/resource-properties/config) property to specify configurations for your metric. Supports [`meta`](/reference/resource-configs/meta), [`group`](/reference/resource-configs/group), and [`enabled`](/reference/resource-configs/enabled) configs. | Optional | Dict | ## Semantic models components @@ -119,8 +119,6 @@ semantic_models: type: categorical ``` - - Semantic models support [`meta`](/reference/resource-configs/meta), [`group`](/reference/resource-configs/group), and [`enabled`](/reference/resource-configs/enabled) [`config`](/reference/resource-properties/config) property in either the schema file or at the project level: - Semantic model config in `models/semantic.yml`: @@ -148,8 +146,6 @@ Semantic models support [`meta`](/reference/resource-configs/meta), [`group`](/r For more information on `dbt_project.yml` and config naming conventions, see the [dbt_project.yml reference page](/reference/dbt_project.yml#naming-convention). - - ### Name Define the name of the semantic model. You must define a unique name for the semantic model. The semantic graph will use this name to identify the model, and you can update it at any time. Avoid using double underscores (\_\_) in the name as they're not supported. diff --git a/website/docs/docs/build/simple.md b/website/docs/docs/build/simple.md index f57d498d290..2deb718d780 100644 --- a/website/docs/docs/build/simple.md +++ b/website/docs/docs/build/simple.md @@ -15,17 +15,19 @@ Simple metrics are metrics that directly reference a single measure, without any Note that we use the double colon (::) to indicate whether a parameter is nested within another parameter. So for example, `query_params::metrics` means the `metrics` parameter is nested under `query_params`. ::: -| Parameter | Description | Type | -| --------- | ----------- | ---- | -| `name` | The name of the metric. | Required | -| `description` | The description of the metric. | Optional | -| `type` | The type of the metric (cumulative, derived, ratio, or simple). | Required | -| `label` | Required string that defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | -| `type_params` | The type parameters of the metric. | Required | -| `measure` | A list of measure inputs | Required | -| `measure:name` | The measure you're referencing. | Required | -| `measure:fill_nulls_with` | Set the value in your metric definition instead of null (such as zero). | Optional | -| `measure:join_to_timespine` | Boolean that indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. Default `false`. | Optional | +| Parameter | Description | Required | Type | +| --------- | ----------- | ---- | ---- | +| `name` | The name of the metric. | Required | String | +| `description` | The description of the metric. | Optional | String | +| `type` | The type of the metric (cumulative, derived, ratio, or simple). | Required | String | +| `label` | Defines the display value in downstream tools. Accepts plain text, spaces, and quotes (such as `orders_total` or `"orders_total"`). | Required | String | +| `type_params` | The type parameters of the metric. | Required | Dict | +| `measure` | A list of measure inputs. | Required | List | +| `measure:name` | The measure you're referencing. | Required | String | +| `measure:alias` | Optional [`alias`](/reference/resource-configs/alias) to rename the measure. | Optional | String | +| `measure:filter` | Optional `filter` applied to the measure. | Optional | String | +| `measure:fill_nulls_with` | Set the value in your metric definition instead of null (such as zero). | Optional | String | +| `measure:join_to_timespine` | Indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. Default `false`. | Optional | Boolean | The following displays the complete specification for simple metrics, along with an example. @@ -38,6 +40,8 @@ metrics: type_params: # Required measure: name: The name of your measure # Required + alias: The alias applied to the measure. # Optional + filter: The filter applied to the measure. # Optional fill_nulls_with: Set value instead of null (such as zero) # Optional join_to_timespine: true/false # Boolean that indicates if the aggregated measure should be joined to the time spine table to fill in missing dates. # Optional @@ -65,9 +69,11 @@ If you've already defined the measure using the `create_metric: true` parameter, name: customers # The measure you are creating a proxy of. fill_nulls_with: 0 join_to_timespine: true + alias: customer_count + filter: {{ Dimension('customer__customer_total') }} >= 20 - name: large_orders description: "Order with order values over 20." - type: SIMPLE + type: simple label: Large orders type_params: measure: diff --git a/website/docs/docs/build/snapshots.md b/website/docs/docs/build/snapshots.md index f5321aa626a..f72f1eb75de 100644 --- a/website/docs/docs/build/snapshots.md +++ b/website/docs/docs/build/snapshots.md @@ -10,8 +10,7 @@ id: "snapshots" * [Snapshot properties](/reference/snapshot-properties) * [`snapshot` command](/reference/commands/snapshot) - -### What are snapshots? +## What are snapshots? Analysts often need to "look back in time" at previous data states in their mutable tables. While some source data systems are built in a way that makes accessing historical data possible, this is not always the case. dbt provides a mechanism, **snapshots**, which records changes to a mutable over time. Snapshots implement [type-2 Slowly Changing Dimensions](https://en.wikipedia.org/wiki/Slowly_changing_dimension#Type_2:_add_new_row) over mutable source tables. These Slowly Changing Dimensions (or SCDs) identify how a row in a table changes over time. Imagine you have an `orders` table where the `status` field can be overwritten as the order is processed. @@ -36,16 +35,11 @@ This order is now in the "shipped" state, but we've lost the information about w ## Configuring snapshots -:::info Previewing or compiling snapshots in IDE not supported - -It is not possible to "preview data" or "compile sql" for snapshots in dbt Cloud. Instead, [run the `dbt snapshot` command](#how-snapshots-work) in the IDE. - -::: - - To configure snapshots in versions 1.8 and earlier, refer to [Configure snapshots in versions 1.8 and earlier](#configure-snapshots-in-versions-18-and-earlier). These versions use an older syntax where snapshots are defined within a snapshot block in a `.sql` file, typically located in your `snapshots` directory. -- Note that defining multiple resources in a single file can significantly slow down parsing and compilation. For faster and more efficient management, consider the updated snapshot YAML syntax, [available in Versionless](/docs/dbt-versions/versionless-cloud) or [dbt Core v1.9 and later](/docs/dbt-versions/core). +- Note that defining multiple resources in a single file can significantly slow down parsing and compilation. For faster and more efficient management, consider the updated snapshot YAML syntax, [available now in the "Latest" release track in dbt Cloud](/docs/dbt-versions/cloud-release-tracks) or [dbt Core v1.9 and later](/docs/dbt-versions/core). + - For more information on how to migrate from the legacy snapshot configurations to the updated snapshot YAML syntax, refer to [Snapshot configuration migration](/reference/snapshot-configs#snapshot-configuration-migration). @@ -60,6 +54,7 @@ Configure your snapshots in YAML files to tell dbt how to detect record changes. snapshots: - name: string relation: relation # source('my_source', 'my_table') or ref('my_model') + [description](/reference/resource-properties/description): markdown_string config: [database](/reference/resource-configs/database): string [schema](/reference/resource-configs/schema): string @@ -68,9 +63,9 @@ snapshots: [unique_key](/reference/resource-configs/unique_key): column_name_or_expression [check_cols](/reference/resource-configs/check_cols): [column_name] | all [updated_at](/reference/resource-configs/updated_at): column_name - [invalidate_hard_deletes](/reference/resource-configs/invalidate_hard_deletes): true | false [snapshot_meta_column_names](/reference/resource-configs/snapshot_meta_column_names): dictionary - + [dbt_valid_to_current](/reference/resource-configs/dbt_valid_to_current): string + [hard_deletes](/reference/resource-configs/hard-deletes): ignore | invalidate | new_record ``` @@ -83,17 +78,18 @@ The following table outlines the configurations available for snapshots: | [schema](/reference/resource-configs/schema) | Specify a custom schema for the snapshot | No | snapshots | | [alias](/reference/resource-configs/alias) | Specify an alias for the snapshot | No | your_custom_snapshot | | [strategy](/reference/resource-configs/strategy) | The snapshot strategy to use. Valid values: `timestamp` or `check` | Yes | timestamp | -| [unique_key](/reference/resource-configs/unique_key) | A column or expression for the record | Yes | id | +| [unique_key](/reference/resource-configs/unique_key) | A column(s) (string or array) or expression for the record | Yes | `id` or `[order_id, product_id]` | | [check_cols](/reference/resource-configs/check_cols) | If using the `check` strategy, then the columns to check | Only if using the `check` strategy | ["status"] | | [updated_at](/reference/resource-configs/updated_at) | If using the `timestamp` strategy, the timestamp column to compare | Only if using the `timestamp` strategy | updated_at | -| [invalidate_hard_deletes](/reference/resource-configs/invalidate_hard_deletes) | Find hard deleted records in source and set `dbt_valid_to` to current time if the record no longer exists | No | True | +| [dbt_valid_to_current](/reference/resource-configs/dbt_valid_to_current) | Set a custom indicator for the value of `dbt_valid_to` in current snapshot records (like a future date). By default, this value is `NULL`. When configured, dbt will use the specified value instead of `NULL` for `dbt_valid_to` for current records in the snapshot table.| No | string | | [snapshot_meta_column_names](/reference/resource-configs/snapshot_meta_column_names) | Customize the names of the snapshot meta fields | No | dictionary | +| [hard_deletes](/reference/resource-configs/hard-deletes) | Specify how to handle deleted rows from the source. Supported options are `ignore` (default), `invalidate` (replaces the legacy `invalidate_hard_deletes=true`), and `new_record`.| No | string | + - In versions prior to v1.9, the `target_schema` (required) and `target_database` (optional) configurations defined a single schema or database to build a snapshot across users and environment. This created problems when testing or developing a snapshot, as there was no clear separation between development and production environments. In v1.9, `target_schema` became optional, allowing snapshots to be environment-aware. By default, without `target_schema` or `target_database` defined, snapshots now use the `generate_schema_name` or `generate_database_name` macros to determine where to build. Developers can still set a custom location with [`schema`](/reference/resource-configs/schema) and [`database`](/reference/resource-configs/database) configs, consistent with other resource types. - A number of other configurations are also supported (for example, `tags` and `post-hook`). For the complete list, refer to [Snapshot configurations](/reference/snapshot-configs). - You can configure snapshots from both the `dbt_project.yml` file and a `config` block. For more information, refer to the [configuration docs](/reference/snapshot-configs). - ### Add a snapshot to your project To add a snapshot to your project follow these steps. For users on versions 1.8 and earlier, refer to [Configure snapshots in versions 1.8 and earlier](#configure-snapshots-in-versions-18-and-earlier). @@ -112,6 +108,7 @@ To add a snapshot to your project follow these steps. For users on versions 1.8 unique_key: id strategy: timestamp updated_at: updated_at + dbt_valid_to_current: "to_date('9999-12-31')" # Specifies that current records should have `dbt_valid_to` set to `'9999-12-31'` instead of `NULL`. ``` @@ -172,6 +169,15 @@ This strategy handles column additions and deletions better than the `check` str + + + +By default, `dbt_valid_to` is `NULL` for current records. However, if you set the [`dbt_valid_to_current` configuration](/reference/resource-configs/dbt_valid_to_current) (available in dbt Core v1.9+), `dbt_valid_to` will be set to your specified value (such as `9999-12-31`) for current records. + +This allows for straightforward date range filtering. + + + The unique key is used by dbt to match rows up, so it's extremely important to make sure this key is actually unique! If you're snapshotting a source, I'd recommend adding a uniqueness test to your source ([example](https://github.com/dbt-labs/jaffle_shop/blob/8e7c853c858018180bef1756ec93e193d9958c5b/models/staging/schema.yml#L26)). @@ -204,12 +210,18 @@ Snapshots can't be rebuilt. Because of this, it's a good idea to put snapshots i ### How snapshots work When you run the [`dbt snapshot` command](/reference/commands/snapshot): -* **On the first run:** dbt will create the initial snapshot table — this will be the result set of your `select` statement, with additional columns including `dbt_valid_from` and `dbt_valid_to`. All records will have a `dbt_valid_to = null`. +* **On the first run:** dbt will create the initial snapshot table — this will be the result set of your `select` statement, with additional columns including `dbt_valid_from` and `dbt_valid_to`. All records will have a `dbt_valid_to = null` or the value specified in [`dbt_valid_to_current`](/reference/resource-configs/dbt_valid_to_current) (available in dbt Core 1.9+) if configured. * **On subsequent runs:** dbt will check which records have changed or if any new records have been created: - - The `dbt_valid_to` column will be updated for any existing records that have changed - - The updated record and any new records will be inserted into the snapshot table. These records will now have `dbt_valid_to = null` + - The `dbt_valid_to` column will be updated for any existing records that have changed. + - The updated record and any new records will be inserted into the snapshot table. These records will now have `dbt_valid_to = null` or the value configured in `dbt_valid_to_current` (available in dbt Core v1.9+). + + -Note, these column names can be customized to your team or organizational conventions using the [snapshot_meta_column_names](#snapshot-meta-fields) config. +#### Note +- These column names can be customized to your team or organizational conventions using the [snapshot_meta_column_names](#snapshot-meta-fields) config. +- Use the `dbt_valid_to_current` config to set a custom indicator for the value of `dbt_valid_to` in current snapshot records (like a future date such as `9999-12-31`). By default, this value is `NULL`. When set, dbt will use this specified value instead of `NULL` for `dbt_valid_to` for current records in the snapshot table. +- Use the [`hard_deletes`](/reference/resource-configs/hard-deletes) config to track hard deletes by adding a new record when row become "deleted" in source. Supported options are `ignore`, `invalidate`, and `new_record`. + Snapshots can be referenced in downstream models the same way as referencing models — by using the [ref](/reference/dbt-jinja-functions/ref) function. @@ -286,7 +298,7 @@ The `check` snapshot strategy can be configured to track changes to _all_ column ::: -**Example Usage** +**Example usage** @@ -336,15 +348,64 @@ snapshots: ### Hard deletes (opt-in) + + +In dbt v1.9 and higher, the [`hard_deletes`](/reference/resource-configs/hard-deletes) config replaces the `invalidate_hard_deletes` config to give you more control on how to handle deleted rows from the source. The `hard_deletes` config is not a separate strategy but an additional opt-in feature that can be used with any snapshot strategy. + +The `hard_deletes` config has three options/fields: +| Field | Description | +| --------- | ----------- | +| `ignore` (default) | No action for deleted records. | +| `invalidate` | Behaves the same as the existing `invalidate_hard_deletes=true`, where deleted records are invalidated by setting `dbt_valid_to`. | +| `new_record` | Tracks deleted records as new rows using the `dbt_is_deleted` [meta field](#snapshot-meta-fields) when records are deleted.| + +import HardDeletes from '/snippets/_hard-deletes.md'; + + + +#### Example usage + + + +```yaml +snapshots: + - name: orders_snapshot_hard_delete + relation: source('jaffle_shop', 'orders') + config: + schema: snapshots + unique_key: id + strategy: timestamp + updated_at: updated_at + hard_deletes: new_record # options are: 'ignore', 'invalidate', or 'new_record' +``` + + + +In this example, the `hard_deletes: new_record` config will add a new row for deleted records with the `dbt_is_deleted` column set to `True`. +Any restored records are added as new rows with the `dbt_is_deleted` field set to `False`. + +The resulting table will look like this: + +| id | status | updated_at | dbt_valid_from | dbt_valid_to | dbt_is_deleted | +| -- | ------ | ---------- | -------------- | ------------ | -------------- | +| 1 | pending | 2024-01-01 10:47 | 2024-01-01 10:47 | 2024-01-01 11:05 | False | +| 1 | shipped | 2024-01-01 11:05 | 2024-01-01 11:05 | 2024-01-01 11:20 | False | +| 1 | deleted | 2024-01-01 11:20 | 2024-01-01 11:20 | 2024-01-01 12:00 | True | +| 1 | restored | 2024-01-01 12:00 | 2024-01-01 12:00 | | False | + + + + + Rows that are deleted from the source query are not invalidated by default. With the config option `invalidate_hard_deletes`, dbt can track rows that no longer exist. This is done by left joining the snapshot table with the source table, and filtering the rows that are still valid at that point, but no longer can be found in the source table. `dbt_valid_to` will be set to the current snapshot time. This configuration is not a different strategy as described above, but is an additional opt-in feature. It is not enabled by default since it alters the previous behavior. For this configuration to work with the `timestamp` strategy, the configured `updated_at` column must be of timestamp type. Otherwise, queries will fail due to mixing data types. -**Example Usage** +Note, in v1.9 and higher, the [`hard_deletes`](/reference/resource-configs/hard-deletes) config replaces the `invalidate_hard_deletes` config for better control over how to handle deleted rows from the source. - +#### Example usage @@ -370,61 +431,24 @@ For this configuration to work with the `timestamp` strategy, the configured `up - - - - -```yaml -snapshots: - - name: orders_snapshot_hard_delete - relation: source('jaffle_shop', 'orders') - config: - schema: snapshots - unique_key: id - strategy: timestamp - updated_at: updated_at - invalidate_hard_deletes: true -``` - - - - - -## Snapshot query best practices - -This section outlines some best practices for writing snapshot queries: - -- #### Snapshot source data - Your models should then select from these snapshots, treating them like regular data sources. As much as possible, snapshot your source data in its raw form and use downstream models to clean up the data - -- #### Use the `source` function in your query - This helps when understanding data lineage in your project. - -- #### Include as many columns as possible - In fact, go for `select *` if performance permits! Even if a column doesn't feel useful at the moment, it might be better to snapshot it in case it becomes useful – after all, you won't be able to recreate the column later. - -- #### Avoid joins in your snapshot query - Joins can make it difficult to build a reliable `updated_at` timestamp. Instead, snapshot the two tables separately, and join them in downstream models. - -- #### Limit the amount of transformation in your query - If you apply business logic in a snapshot query, and this logic changes in the future, it can be impossible (or, at least, very difficult) to apply the change in logic to your snapshots. - -Basically – keep your query as simple as possible! Some reasonable exceptions to these recommendations include: -* Selecting specific columns if the table is wide. -* Doing light transformation to get data into a reasonable shape, for example, unpacking a blob to flatten your source data into columns. - ## Snapshot meta-fields Snapshot tables will be created as a clone of your source dataset, plus some additional meta-fields*. -Starting in 1.9 or with [dbt Cloud Versionless](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless), these column names can be customized to your team or organizational conventions via the [`snapshot_meta_column_names`](/reference/resource-configs/snapshot_meta_column_names) config. +In dbt Core v1.9+ (or available sooner in [the "Latest" release track in dbt Cloud](/docs/dbt-versions/cloud-release-tracks)): +- These column names can be customized to your team or organizational conventions using the [`snapshot_meta_column_names`](/reference/resource-configs/snapshot_meta_column_names) config. +ess) +- Use the [`dbt_valid_to_current` config](/reference/resource-configs/dbt_valid_to_current) to set a custom indicator for the value of `dbt_valid_to` in current snapshot records (like a future date such as `9999-12-31`). By default, this value is `NULL`. When set, dbt will use this specified value instead of `NULL` for `dbt_valid_to` for current records in the snapshot table. +- Use the [`hard_deletes`](/reference/resource-configs/hard-deletes) config to track deleted records as new rows with the `dbt_is_deleted` meta field when using the `hard_deletes='new_record'` field. + | Field | Meaning | Usage | | -------------- | ------- | ----- | | dbt_valid_from | The timestamp when this snapshot row was first inserted | This column can be used to order the different "versions" of a record. | -| dbt_valid_to | The timestamp when this row became invalidated. | The most recent snapshot record will have `dbt_valid_to` set to `null`. | +| dbt_valid_to | The timestamp when this row became invalidated.
For current records, this is `NULL` by default or the value specified in `dbt_valid_to_current`. | The most recent snapshot record will have `dbt_valid_to` set to `NULL` or the specified value. | | dbt_scd_id | A unique key generated for each snapshotted record. | This is used internally by dbt | | dbt_updated_at | The updated_at timestamp of the source record when this snapshot row was inserted. | This is used internally by dbt | +| dbt_is_deleted | A boolean value indicating if the record has been deleted. `True` if deleted, `False` otherwise. | Added when `hard_deletes='new_record'` is configured. This is used internally by dbt | *The timestamps used for each column are subtly different depending on the strategy you use: @@ -458,6 +482,15 @@ Snapshot results (note that `11:30` is not used anywhere): | 1 | pending | 2024-01-01 10:47 | 2024-01-01 10:47 | 2024-01-01 11:05 | 2024-01-01 10:47 | | 1 | shipped | 2024-01-01 11:05 | 2024-01-01 11:05 | | 2024-01-01 11:05 | +Snapshot results with `hard_deletes='new_record'`: + +| id | status | updated_at | dbt_valid_from | dbt_valid_to | dbt_updated_at | dbt_is_deleted | +|----|---------|------------------|------------------|------------------|------------------|----------------| +| 1 | pending | 2024-01-01 10:47 | 2024-01-01 10:47 | 2024-01-01 11:05 | 2024-01-01 10:47 | False | +| 1 | shipped | 2024-01-01 11:05 | 2024-01-01 11:05 | 2024-01-01 11:20 | 2024-01-01 11:05 | False | +| 1 | deleted | 2024-01-01 11:20 | 2024-01-01 11:20 | | 2024-01-01 11:20 | True | + +
@@ -492,13 +525,23 @@ Snapshot results: | 1 | pending | 2024-01-01 11:00 | 2024-01-01 11:30 | 2024-01-01 11:00 | | 1 | shipped | 2024-01-01 11:30 | | 2024-01-01 11:30 | +Snapshot results with `hard_deletes='new_record'`: + +| id | status | dbt_valid_from | dbt_valid_to | dbt_updated_at | dbt_is_deleted | +|----|---------|------------------|------------------|------------------|----------------| +| 1 | pending | 2024-01-01 11:00 | 2024-01-01 11:30 | 2024-01-01 11:00 | False | +| 1 | shipped | 2024-01-01 11:30 | 2024-01-01 11:40 | 2024-01-01 11:30 | False | +| 1 | deleted | 2024-01-01 11:40 | | 2024-01-01 11:40 | True | + ## Configure snapshots in versions 1.8 and earlier -This section is for users on dbt versions 1.8 and earlier. To configure snapshots in versions 1.9 and later, refer to [Configuring snapshots](#configuring-snapshots). The latest versions use an updated snapshot configuration syntax that optimizes performance. +For information about configuring snapshots in dbt versions 1.8 and earlier, select **1.8** from the documentation version picker, and it will appear in this section. + +To configure snapshots in versions 1.9 and later, refer to [Configuring snapshots](#configuring-snapshots). The latest versions use an updated snapshot configuration syntax that optimizes performance. @@ -506,7 +549,8 @@ This section is for users on dbt versions 1.8 and earlier. To configure snapshot - In dbt versions 1.8 and earlier, snapshots are `select` statements, defined within a snapshot block in a `.sql` file (typically in your `snapshots` directory). You'll also need to configure your snapshot to tell dbt how to detect record changes. - The earlier dbt versions use an older syntax that allows for defining multiple resources in a single file. This syntax can significantly slow down parsing and compilation. -- For faster and more efficient management, consider[ upgrading to Versionless](/docs/dbt-versions/versionless-cloud) or the [latest version of dbt Core](/docs/dbt-versions/core), which introduces an updated snapshot configuration syntax that optimizes performance. +- For faster and more efficient management, consider [choosing the "Latest" release track in dbt Cloud](/docs/dbt-versions/cloud-release-tracks) or the [latest version of dbt Core](/docs/dbt-versions/core), which introduces an updated snapshot configuration syntax that optimizes performance. + - For more information on how to migrate from the legacy snapshot configurations to the updated snapshot YAML syntax, refer to [Snapshot configuration migration](/reference/snapshot-configs#snapshot-configuration-migration). The following example shows how to configure a snapshot: diff --git a/website/docs/docs/build/unit-tests.md b/website/docs/docs/build/unit-tests.md index 1d7143d7476..fc4cf02b34f 100644 --- a/website/docs/docs/build/unit-tests.md +++ b/website/docs/docs/build/unit-tests.md @@ -10,13 +10,13 @@ keywords: :::note -This functionality is only supported in dbt Core v1.8+ or accounts that have opted for a ["Versionless"](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) dbt Cloud experience. +Unit testing functionality is available in [dbt Cloud Release Tracks](/docs/dbt-versions/cloud-release-tracks) or dbt Core v1.8+ ::: Historically, dbt's test coverage was confined to [“data” tests](/docs/build/data-tests), assessing the quality of input data or resulting datasets' structure. However, these tests could only be executed _after_ building a model. -With dbt Core v1.8 and dbt Cloud environments that have gone versionless by selecting the **Versionless** option, we have introduced an additional type of test to dbt - unit tests. In software programming, unit tests validate small portions of your functional code, and they work much the same way here. Unit tests allow you to validate your SQL modeling logic on a small set of static inputs _before_ you materialize your full model in production. Unit tests enable test-driven development, benefiting developer efficiency and code reliability. +Starting in dbt Core v1.8, we have introduced an additional type of test to dbt - unit tests. In software programming, unit tests validate small portions of your functional code, and they work much the same way here. Unit tests allow you to validate your SQL modeling logic on a small set of static inputs _before_ you materialize your full model in production. Unit tests enable test-driven development, benefiting developer efficiency and code reliability. ## Before you begin @@ -24,11 +24,15 @@ With dbt Core v1.8 and dbt Cloud environments that have gone versionless by sele - We currently only support adding unit tests to models in your _current_ project. - We currently _don't_ support unit testing models that use the [`materialized view`](/docs/build/materializations#materialized-view) materialization. - We currently _don't_ support unit testing models that use recursive SQL. -- You must specify all fields in a BigQuery STRUCT in a unit test. You cannot use only a subset of fields in a STRUCT. +- We currently _don't_ support unit testing models that use introspective queries. - If your model has multiple versions, by default the unit test will run on *all* versions of your model. Read [unit testing versioned models](/reference/resource-properties/unit-testing-versions) for more information. -- Unit tests must be defined in a YML file in your `models/` directory. -- Table names must be [aliased](/docs/build/custom-aliases) in order to unit test `join` logic. -- Redshift customers need to be aware of a [limitation when building unit tests](/reference/resource-configs/redshift-configs#unit-test-limitations) that requires a workaround. +- Unit tests must be defined in a YML file in your [`models/` directory](/reference/project-configs/model-paths). +- Table names must be aliased in order to unit test `join` logic. +- Include all [`ref`](/reference/dbt-jinja-functions/ref) or [`source`](/reference/dbt-jinja-functions/source) model references in the unit test configuration as `input`s to avoid "node not found" errors during compilation. + +#### Adapter-specific caveats +- You must specify all fields in a BigQuery `STRUCT` in a unit test. You cannot use only a subset of fields in a `STRUCT`. +- Redshift customers need to be aware of a [limitation when building unit tests](/reference/resource-configs/redshift-configs#unit-test-limitations) that requires a workaround. Read the [reference doc](/reference/resource-properties/unit-tests) for more details about formatting your unit tests. diff --git a/website/docs/docs/cloud-integrations/configure-auto-exposures.md b/website/docs/docs/cloud-integrations/configure-auto-exposures.md index 4574d69c164..2bb09573221 100644 --- a/website/docs/docs/cloud-integrations/configure-auto-exposures.md +++ b/website/docs/docs/cloud-integrations/configure-auto-exposures.md @@ -20,23 +20,24 @@ Auto-exposures help data teams optimize their efficiency and ensure data quality To access the features, you should meet the following: -1. Your environment and jobs are on [Versionless](/docs/dbt-versions/versionless-cloud) dbt. +1. Your environment and jobs are on a supported [release track](/docs/dbt-versions/cloud-release-tracks) dbt. 2. You have a dbt Cloud account on the [Enterprise plan](https://www.getdbt.com/pricing/). 3. You have set up a [production](/docs/deploy/deploy-environments#set-as-production-environment) deployment environment for each project you want to explore, with at least one successful job run. 4. You have [admin permissions](/docs/cloud/manage-access/enterprise-permissions) in dbt Cloud to edit project settings or production environment settings. 5. Use Tableau as your BI tool and enable metadata permissions or work with an admin to do so. Compatible with Tableau Cloud or Tableau Server with the Metadata API enabled. -6. Run a production job _after_ saving the Tableau collections. + - If you're using Tableau Server, you need to [allowlist dbt Cloud's IP addresses](/docs/cloud/about-cloud/access-regions-ip-addresses) for your dbt Cloud region. + - Currently, you can only connect to a single Tableau site on the same server. ## Set up in Tableau This section of the document explains the steps you need to set up the auto-exposures integration with Tableau. Once you've set this up in Tableau and dbt Cloud, you can view the [auto-exposures](/docs/collaborate/auto-exposures#view-auto-exposures-in-dbt-explorer) in dbt Explorer. -To set up [personal access tokens (PATs)](/docs/dbt-cloud-apis/user-tokens#using-the-new-personal-access-tokens) needed for auto exposures, ask a site admin to configure it for the account. +To set up [personal access tokens (PATs)](https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm) needed for auto exposures, ask a site admin to configure it for the account. 1. Ensure you or a site admin enables PATs for the account in Tableau. -2. Create a PAT that you can add to dbt Cloud to pull in Tableau metadata for auto exposures. +2. Create a PAT that you can add to dbt Cloud to pull in Tableau metadata for auto exposures. Ensure the user creating the PAT has access to collections/folders, as the PAT only grants access matching the creator's existing privileges. 3. Copy the **Secret** and the **Token name** and enter them in dbt Cloud. The secret is only displayed once, so store it in a safe location (like a password manager). @@ -60,10 +61,15 @@ To set up [personal access tokens (PATs)](/docs/dbt-cloud-apis/user-tokens#using 4. Select the collections you want to include for auto exposures. - dbt Cloud automatically imports and syncs any workbook within the selected collections. New additions to the collections will be added to the lineage in dbt Cloud during the next automatic sync (usually once per day). + + :::info + dbt Cloud automatically imports and syncs any workbook within the selected collections. New additions to the collections will be added to the lineage in dbt Cloud during the next sync (automatically once per day). + + dbt Cloud immediately starts a sync when you update the selected collections list, capturing new workbooks and removing irrelevant ones. + ::: + 5. Click **Save**. -6. Run a production job _after_ saving the Tableau collections. dbt Cloud imports everything in the collection(s) and you can continue to view them in Explorer. For more information on how to view and use auto-exposures, refer to [View auto-exposures from dbt Explorer](/docs/collaborate/auto-exposures) page. @@ -72,5 +78,5 @@ dbt Cloud imports everything in the collection(s) and you can continue to view t ## Refresh auto-exposures in jobs :::info Coming soon -Soon, you’ll also be able to use auto-exposures trigger refresh of the data used in your Tableau dashboards from within dbt Cloud. Stay tuned for more on this soon! +Soon, you’ll also be able to use auto-exposures to trigger the refresh of the data used in your Tableau dashboards from within dbt Cloud. Stay tuned for more on this soon! ::: diff --git a/website/docs/docs/cloud-integrations/overview.md b/website/docs/docs/cloud-integrations/overview.md index d925e3e52a7..8334632a7f8 100644 --- a/website/docs/docs/cloud-integrations/overview.md +++ b/website/docs/docs/cloud-integrations/overview.md @@ -13,7 +13,7 @@ Many data applications integrate with dbt Cloud, enabling you to leverage the po
diff --git a/website/docs/docs/cloud-integrations/semantic-layer/tableau.md b/website/docs/docs/cloud-integrations/semantic-layer/tableau.md index 15a0a92cf39..1f6755c38fa 100644 --- a/website/docs/docs/cloud-integrations/semantic-layer/tableau.md +++ b/website/docs/docs/cloud-integrations/semantic-layer/tableau.md @@ -46,8 +46,8 @@ Alternatively, you can follow these steps to install the Connector: ## Using the integration 1. **Authentication** — Once you authenticate, the system will direct you to the data source page. -2. **Access all Semantic Layer Objects** — Use the "ALL" data source to access all the metrics, dimensions, and entities configured in your dbt Semantic Layer. Note that the "METRICS_AND_DIMENSIONS" data source has been deprecated and replaced by "ALL". -3. **Access saved queries** — You can optionally access individual [saved queries](/docs/build/saved-queries) that you've defined. These will also show up as unique data sources when you log in. +2. **Access all Semantic Layer Objects** — Use the "ALL" data source to access all the metrics, dimensions, and entities configured in your dbt Semantic Layer. Note that the "METRICS_AND_DIMENSIONS" data source has been deprecated and replaced by "ALL". Be sure to use a live connection since extracts are not supported at this time. +3. **Access saved queries** — You can optionally access individual [saved queries](/docs/build/saved-queries) that you've defined. These will also show up as unique data sources when you log in. 4. **Access worksheet** — From your data source selection, go directly to a worksheet in the bottom left-hand corner. 5. **Query metrics and dimensions** — Then, you'll find all the metrics, dimensions, and entities that are available to query on the left side of your window based on your selection. diff --git a/website/docs/docs/cloud-integrations/set-up-snowflake-native-app.md b/website/docs/docs/cloud-integrations/set-up-snowflake-native-app.md index 49e6f90e41f..ff151d4636e 100644 --- a/website/docs/docs/cloud-integrations/set-up-snowflake-native-app.md +++ b/website/docs/docs/cloud-integrations/set-up-snowflake-native-app.md @@ -45,7 +45,10 @@ The following are the prerequisites for dbt Cloud and Snowflake. Configure dbt Cloud and Snowflake Cortex to power the **Ask dbt** chatbot. 1. In dbt Cloud, browse to your Semantic Layer configurations. - 1. From the gear menu, select **Account settings**. In the left sidebar, select **Projects** and choose your dbt project from the project list. + + 1. Navigate to the left hand side panel and click your account name. From there, select **Account settings**. + 1. In the left sidebar, select **Projects** and choose your dbt project from the project list. + 1. In the **Project details** panel, click the **Edit Semantic Layer Configuration** link (which is below the **GraphQL URL** option). 1. In the **Semantic Layer Configuration Details** panel, identify the Snowflake credentials (which you'll use to access Snowflake Cortex) and the environment against which the Semantic Layer is run. Save the username, role, and the environment in a temporary location to use later on. @@ -67,7 +70,7 @@ Configure dbt Cloud and Snowflake Cortex to power the **Ask dbt** chatbot. ## Configure dbt Cloud Collect the following pieces of information from dbt Cloud to set up the application. -1. From the gear menu in dbt Cloud, select **Account settings**. In the left sidebar, select **API tokens > Service tokens**. Create a service token with access to all the projects you want to access in the dbt Snowflake Native App. Grant these permission sets: +1. Navigate to the left-hand side panel and click your account name. From there, select **Account settings**. Then click **API tokens > Service tokens**. Create a service token with access to all the projects you want to access in the dbt Snowflake Native App. Grant these permission sets: - **Manage marketplace apps** - **Job Admin** - **Metadata Only** diff --git a/website/docs/docs/cloud/about-cloud-develop-defer.md b/website/docs/docs/cloud/about-cloud-develop-defer.md index 3ee5ac71666..ea059ed3e27 100644 --- a/website/docs/docs/cloud/about-cloud-develop-defer.md +++ b/website/docs/docs/cloud/about-cloud-develop-defer.md @@ -13,11 +13,13 @@ Both the dbt Cloud IDE and the dbt Cloud CLI enable users to natively defer to p -By default, dbt follows these rules: +When using `--defer`, dbt Cloud will follow this order of execution for resolving the `{{ ref() }}` functions. -- dbt uses the production locations of parent models to resolve `{{ ref() }}` functions, based on metadata from the production environment. -- If a development version of a deferred model exists, dbt preferentially uses the development database location when resolving the reference. -- Passing the [`--favor-state`](/reference/node-selection/defer#favor-state) flag overrides the default behavior and _always_ resolve refs using production metadata, regardless of the presence of a development relation. +1. If a development version of a deferred relation exists, dbt preferentially uses the development database location when resolving the reference. +2. If a development version doesn't exist, dbt uses the staging locations of parent relations based on metadata from the staging environment. +3. If both a development and staging version doesn't exist, dbt uses the production locations of parent relations based on metadata from the production environment. + +**Note:** Passing the `--favor-state` flag will always resolve refs using production metadata, regardless of the presence of a development relation, skipping step #1. For a clean slate, it's a good practice to drop the development schema at the start and end of your development cycle. @@ -40,9 +42,6 @@ To enable defer in the dbt Cloud IDE, toggle the **Defer to production** button For example, if you were to start developing on a new branch with [nothing in your development schema](/reference/node-selection/defer#usage), edit a single model, and run `dbt build -s state:modified` — only the edited model would run. Any `{{ ref() }}` functions will point to the production location of the referenced models. - -Note: The **Defer to staging/production** toggle button doesn't apply when running [dbt Semantic Layer commands](/docs/build/metricflow-commands) in the dbt Cloud IDE. To use defer for Semantic layer commands in the IDE, toggle the button on and manually add the `--defer` flag to the command. This is a temporary workaround and will be available soon. - ### Defer in dbt Cloud CLI diff --git a/website/docs/docs/cloud/about-cloud/about-dbt-cloud.md b/website/docs/docs/cloud/about-cloud/about-dbt-cloud.md index d7afd424fc4..1a7e59dd5c2 100644 --- a/website/docs/docs/cloud/about-cloud/about-dbt-cloud.md +++ b/website/docs/docs/cloud/about-cloud/about-dbt-cloud.md @@ -24,10 +24,16 @@ dbt Cloud's [flexible plans](https://www.getdbt.com/pricing/) and features make + + **Cell based:** ACCOUNT_PREFIX.us1.dbt.com | 52.45.144.63
54.81.134.249
52.22.161.231
52.3.77.232
3.214.191.130
34.233.79.135 | ✅ | ✅ | ✅ | +| North America [^1] | Azure
East US 2 (Virginia) | **Cell based:** ACCOUNT_PREFIX.us2.dbt.com | 20.10.67.192/26 | ❌ | ❌ | ✅ | | EMEA [^1] | AWS eu-central-1 (Frankfurt) | emea.dbt.com | 3.123.45.39
3.126.140.248
3.72.153.148 | ❌ | ❌ | ✅ | | EMEA [^1] | Azure
North Europe (Ireland) | **Cell based:** ACCOUNT_PREFIX.eu2.dbt.com | 20.13.190.192/26 | ❌ | ❌ | ✅ | | APAC [^1] | AWS ap-southeast-2 (Sydney)| au.dbt.com | 52.65.89.235
3.106.40.33
13.239.155.206
| ❌ | ❌ | ✅ | @@ -45,7 +46,7 @@ dbt Cloud, like many cloud services, relies on underlying AWS cloud infrastructu * Dynamic IP addresses — dbt Cloud infrastructure uses Amazon Web Services (AWS). dbt Cloud offers static URLs for streamlined access, but the dynamic nature of cloud services means the underlying IP addresses change occasionally. AWS manages the IP ranges and may change them according to their operational and security needs. -* Using hostnames for consistent access — To ensure uninterrupted access, we recommend that you dbt Cloud services using hostnames. Hostnames provide a consistent reference point, regardless of any changes in underlying IP addresses. We are aligning with an industry-standard practice employed by organizations such as Snowflake. +* Using hostnames for consistent access — To ensure uninterrupted access, we recommend that you use dbt Cloud services using hostnames. Hostnames provide a consistent reference point, regardless of any changes in underlying IP addresses. We are aligning with an industry-standard practice employed by organizations such as Snowflake. * Optimizing VPN connections — You should integrate a proxy alongside VPN for users who leverage VPN connections. This strategy enables steady IP addresses for your connections, facilitating smooth traffic flow through the VPN and onward to dbt Cloud. By employing a proxy and a VPN, you can direct traffic through the VPN and then to dbt Cloud. It's crucial to set up the proxy if you need to integrate with additional services. diff --git a/website/docs/docs/cloud/about-develop-dbt.md b/website/docs/docs/cloud/about-develop-dbt.md index 9568d70bb27..33d12b89e0f 100644 --- a/website/docs/docs/cloud/about-develop-dbt.md +++ b/website/docs/docs/cloud/about-develop-dbt.md @@ -9,9 +9,9 @@ hide_table_of_contents: true Develop dbt projects using dbt Cloud, which offers a fast and reliable way to work on your dbt project. It runs dbt Core in a hosted (single or multi-tenant) environment. -You can develop in your browser using an integrated development environment (IDE) or in a dbt Cloud-powered command line interface (CLI). +You can develop in your browser using an integrated development environment (IDE), a dbt Cloud-powered command line interface (CLI), or visual editor. -
+
+ +

To get started, you'll need a [dbt Cloud](https://www.getdbt.com/signup) account and a developer seat. For a more comprehensive guide about developing in dbt, refer to the [quickstart guides](/docs/get-started-dbt). diff --git a/website/docs/docs/cloud/account-integrations.md b/website/docs/docs/cloud/account-integrations.md new file mode 100644 index 00000000000..e5ff42cb900 --- /dev/null +++ b/website/docs/docs/cloud/account-integrations.md @@ -0,0 +1,103 @@ +--- +title: "Account integrations in dbt Cloud" +sidebar_label: "Account integrations" +description: "Learn how to configure account integrations for your dbt Cloud account." +--- + +The following sections describe the different **Account integrations** available from your dbt Cloud account under the account **Settings** section. + + + +## Git integrations + +Connect your dbt Cloud account to your Git provider to enable dbt Cloud users to authenticate your personal accounts. dbt Cloud will perform Git actions on behalf of your authenticated self, against repositories to which you have access according to your Git provider permissions. + +To configure a Git account integration: +1. Navigate to **Account settings** in the side menu. +2. Under the **Settings** section, click on **Integrations**. +3. Click on the Git provider from the list and select the **Pencil** icon to the right of the provider. +4. dbt Cloud [natively connects](/docs/cloud/git/git-configuration-in-dbt-cloud) to the following Git providers: + + - [GitHub](/docs/cloud/git/connect-github) + - [GitLab](/docs/cloud/git/connect-gitlab) + - [Azure DevOps](/docs/cloud/git/connect-azure-devops) + +You can connect your dbt Cloud account to additional Git providers by importing a git repository from any valid git URL. Refer to [Import a git repository](/docs/cloud/git/import-a-project-by-git-url) for more information. + + + +## OAuth integrations + +Connect your dbt Cloud account to an OAuth provider that are integrated with dbt Cloud. + +To configure an OAuth account integration: +1. Navigate to **Account settings** in the side menu. +2. Under the **Settings** section, click on **Integrations**. +3. Under **OAuth**, and click on **Link** to connect your Slack account. +4. For custom OAuth providers, under **Custom OAuth integrations**, click on **Add integration** and select the OAuth provider from the list. Fill in the required fields and click **Save**. + + + +## AI integrations + +Once AI features have been [enabled](/docs/cloud/enable-dbt-copilot#enable-dbt-copilot), you can use dbt Labs' AI integration or bring-your-own provider to support AI-powered dbt Cloud features like [dbt Copilot](/docs/cloud/dbt-copilot) and [Ask dbt](/docs/cloud-integrations/snowflake-native-app) (both available on [dbt Cloud Enterprise plans](https://www.getdbt.com/pricing)). + +dbt Cloud supports AI integrations for dbt Labs-managed OpenAI keys, Self-managed OpenAI keys, or Self-managed Azure OpenAI keys . + +Note, if you bring-your-own provider, you will incur API calls and associated charges for features used in dbt Cloud. + +:::info +dbt Cloud's AI is optimized for OpenAIs gpt-4o. Using other models can affect performance and accuracy, and functionality with other models isn't guaranteed. +::: + +To configure the AI integration in your dbt Cloud account, a dbt Cloud admin can perform the following steps: +1. Navigate to **Account settings** in the side menu. +2. Select **Integrations** and scroll to the **AI** section. +3. Click on the **Pencil** icon to the right of **OpenAI** to configure the AI integration. + +4. Configure the AI integration for either **dbt Labs OpenAI**, **OpenAI**, or **Azure OpenAI**. + + + + + 1. Select the toggle for **dbt Labs** to use dbt Labs' managed OpenAI key. + 2. Click **Save**. + + + + + + + 1. Select the toggle for **OpenAI** to use your own OpenAI key. + 2. Enter the API key. + 3. Click **Save**. + + + + + + To learn about deploying your own OpenAI model on Azure, refer to [Deploy models on Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-openai). Configure credentials for your Azure OpenAI deployment in dbt Cloud in the following two ways: + - [From a Target URI](#from-a-target-uri) + - [Manually providing the credentials](#manually-providing-the-credentials) + + #### From a Target URI + + 1. Locate your Azure OpenAI deployment URI in your Azure Deployment details page. + 2. In the dbt Cloud **Azure OpenAI** section, select the tab **From Target URI**. + 3. Paste the URI into the **Target URI** field. + 4. Enter your Azure OpenAI API key. + 5. Verify the **Endpoint**, **API Version**, and **Deployment Name** are correct. + 6. Click **Save**. + + + #### Manually providing the credentials + + 1. Locate your Azure OpenAI configuration in your Azure Deployment details page. + 2. In the dbt Cloud **Azure OpenAI** section, select the tab **Manual Input**. + 2. Enter your Azure OpenAI API key. + 3. Enter the **Endpoint**, **API Version**, and **Deployment Name**. + 4. Click **Save**. + + + + diff --git a/website/docs/docs/cloud/account-settings.md b/website/docs/docs/cloud/account-settings.md index 3b2632c8747..aaad9b28e5c 100644 --- a/website/docs/docs/cloud/account-settings.md +++ b/website/docs/docs/cloud/account-settings.md @@ -45,6 +45,6 @@ To use, select the **Enable partial parsing between deployment runs** option fro To use Advanced CI features, your dbt Cloud account must have access to them. Ask your dbt Cloud administrator to enable Advanced CI features on your account, which they can do by choosing the **Enable account access to Advanced CI** option from the account settings. -Once enabled, the **Run compare changes** option becomes available in the CI job settings for you to select. +Once enabled, the **dbt compare** option becomes available in the CI job settings for you to select. - \ No newline at end of file + diff --git a/website/docs/docs/cloud/billing.md b/website/docs/docs/cloud/billing.md index ad0834c6c98..2c80648d1f9 100644 --- a/website/docs/docs/cloud/billing.md +++ b/website/docs/docs/cloud/billing.md @@ -149,7 +149,7 @@ dbt Labs may institute use limits if reasonable use is exceeded. Additional feat ## Managing usage -From anywhere in the dbt Cloud account, click the **gear icon** and click **Account settings**. The **Billing** option will be on the left side menu under the **Account Settings** heading. Here, you can view individual available plans and the features provided for each. +From dbt Cloud, click on your account name in the left side menu and select **Account settings**. The **Billing** option will be on the left side menu under the **Settings** heading. Here, you can view individual available plans and the features provided for each. ### Usage notifications diff --git a/website/docs/docs/cloud/cloud-cli-installation.md b/website/docs/docs/cloud/cloud-cli-installation.md index 8a058cbb90f..a80f1a587e0 100644 --- a/website/docs/docs/cloud/cloud-cli-installation.md +++ b/website/docs/docs/cloud/cloud-cli-installation.md @@ -21,8 +21,6 @@ dbt commands are run against dbt Cloud's infrastructure and benefit from: ## Prerequisites The dbt Cloud CLI is available in all [deployment regions](/docs/cloud/about-cloud/access-regions-ip-addresses) and for both multi-tenant and single-tenant accounts. -- You are on dbt version 1.5 or higher. Alternatively, set it to [**Versionless**](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) to automatically stay up to date. - ## Install dbt Cloud CLI You can install the dbt Cloud CLI on the command line by using one of these methods. @@ -321,3 +319,10 @@ This alias will allow you to use the dbt-cloud command to invoke th If you've ran a dbt command and receive a Session occupied error, you can reattach to your existing session with dbt reattach and then press Control-C and choose to cancel the invocation. + + + + +The Cloud CLI allows only one command that writes to the data warehouse at a time. If you attempt to run multiple write commands simultaneously (for example, `dbt run` and `dbt build`), you will encounter a `stuck session` error. To resolve this, cancel the specific invocation by passing its ID to the cancel command. For more information, refer to [parallel execution](/reference/dbt-commands#parallel-execution). + + \ No newline at end of file diff --git a/website/docs/docs/cloud/configure-cloud-cli.md b/website/docs/docs/cloud/configure-cloud-cli.md index 2e0fc174517..5e0a285c5c5 100644 --- a/website/docs/docs/cloud/configure-cloud-cli.md +++ b/website/docs/docs/cloud/configure-cloud-cli.md @@ -104,9 +104,9 @@ With your repo recloned, you can add, edit, and sync files with your repo. To set environment variables in the dbt Cloud CLI for your dbt project: -1. Select the gear icon on the upper right of the page. -2. Then select **Profile Settings**, then **Credentials**. -3. Click on your project and scroll to the **Environment Variables** section. +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. +2. Under the **Your profile** section, select **Credentials**. +3. Click on your project and scroll to the **Environment variables** section. 4. Click **Edit** on the lower right and then set the user-level environment variables. ## Use the dbt Cloud CLI diff --git a/website/docs/docs/cloud/connect-data-platform/about-connections.md b/website/docs/docs/cloud/connect-data-platform/about-connections.md index 6f2f140b724..6497e86de89 100644 --- a/website/docs/docs/cloud/connect-data-platform/about-connections.md +++ b/website/docs/docs/cloud/connect-data-platform/about-connections.md @@ -20,9 +20,12 @@ dbt Cloud can connect with a variety of data platform providers including: - [Starburst or Trino](/docs/cloud/connect-data-platform/connect-starburst-trino) - [Teradata](/docs/cloud/connect-data-platform/connect-teradata) -You can connect to your database in dbt Cloud by clicking the gear in the top right and selecting **Account Settings**. From the Account Settings page, click **+ New Project**. +To connect to your database in dbt Cloud: - +1. Click your account name at the bottom of the left-side menu and click **Account settings** +2. Select **Projects** from the top left, and from there click **New Project** + + These connection instructions provide the basic fields required for configuring a data platform connection in dbt Cloud. For more detailed guides, which include demo project data, read our [Quickstart guides](https://docs.getdbt.com/guides) @@ -41,7 +44,7 @@ Connections created with APIs before this change cannot be accessed with the [la Warehouse connections are an account-level resource. As such you can find them under **Accounts Settings** > **Connections**: - + Warehouse connections can be re-used across projects. If multiple projects all connect to the same warehouse, you should re-use the same connection to streamline your management operations. Connections are assigned to a project via an [environment](/docs/dbt-cloud-environments). @@ -85,7 +88,7 @@ Please consider the following actions, as the steps you take will depend on the - Normalization - - Undertsand how new connections should be created to avoid local overrides. If you currently use extended attributes to override the warehouse instance in your production environment - you should instead create a new connection for that instance, and wire your production environment to it, removing the need for the local overrides + - Understand how new connections should be created to avoid local overrides. If you currently use extended attributes to override the warehouse instance in your production environment - you should instead create a new connection for that instance, and wire your production environment to it, removing the need for the local overrides - Create new connections, update relevant environments to target these connections, removing now unecessary local overrides (which may not be all of them!) - Test the new wiring by triggering jobs or starting IDE sessions diff --git a/website/docs/docs/cloud/connect-data-platform/connect-amazon-athena.md b/website/docs/docs/cloud/connect-data-platform/connect-amazon-athena.md index 0b2f844ccac..e3645500b9e 100644 --- a/website/docs/docs/cloud/connect-data-platform/connect-amazon-athena.md +++ b/website/docs/docs/cloud/connect-data-platform/connect-amazon-athena.md @@ -5,9 +5,9 @@ description: "Configure the Amazon Athena data platform connection in dbt Cloud. sidebar_label: "Connect Amazon Athena" --- -# Connect Amazon Athena +# Connect Amazon Athena -Your environment(s) must be on ["Versionless"](/docs/dbt-versions/versionless-cloud) to use the Amazon Athena connection. +Your environment(s) must be on a supported [release track](/docs/dbt-versions/cloud-release-tracks) to use the Amazon Athena connection. Connect dbt Cloud to Amazon's Athena interactive query service to build your dbt project. The following are the required and optional fields for configuring the Athena connection: diff --git a/website/docs/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb.md b/website/docs/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb.md index 4719095b87f..5be802cae77 100644 --- a/website/docs/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb.md +++ b/website/docs/docs/cloud/connect-data-platform/connect-redshift-postgresql-alloydb.md @@ -118,7 +118,7 @@ Once the connection is saved, a public key will be generated and displayed for t To configure the SSH tunnel in dbt Cloud, you'll need to provide the hostname/IP of your bastion server, username, and port, of your choosing, that dbt Cloud will connect to. Review the following steps: - Verify the bastion server has its network security rules set up to accept connections from the [dbt Cloud IP addresses](/docs/cloud/about-cloud/access-regions-ip-addresses) on whatever port you configured. -- Set up the user account by using the bastion servers instance's CLI, The following example uses the username `dbtcloud:` +- Set up the user account by using the bastion servers instance's CLI, The following example uses the username `dbtcloud`: ```shell sudo groupadd dbtcloud diff --git a/website/docs/docs/cloud/connect-data-platform/connect-snowflake.md b/website/docs/docs/cloud/connect-data-platform/connect-snowflake.md index d8dd8dfec11..6b749ced186 100644 --- a/website/docs/docs/cloud/connect-data-platform/connect-snowflake.md +++ b/website/docs/docs/cloud/connect-data-platform/connect-snowflake.md @@ -5,6 +5,14 @@ description: "Configure Snowflake connection." sidebar_label: "Connect Snowflake" --- +:::note + +dbt Cloud connections and credentials inherit the permissions of the accounts configured. You can customize roles and associated permissions in Snowflake to fit your company's requirements and fine-tune access to database objects in your account. See [Snowflake permissions](/reference/database-permissions/snowflake-permissions) for more information about customizing roles in Snowflake. + +Refer to [Snowflake permissions](/reference/database-permissions/snowflake-permissions) for more information about customizing roles in Snowflake. + +::: + The following fields are required when creating a Snowflake connection | Field | Description | Examples | @@ -14,12 +22,9 @@ The following fields are required when creating a Snowflake connection | Database | The logical database to connect to and run queries against. | `analytics` | | Warehouse | The virtual warehouse to use for running queries. | `transforming` | - -**Note:** A crucial part of working with dbt atop Snowflake is ensuring that users (in development environments) and/or service accounts (in deployment to production environments) have the correct permissions to take actions on Snowflake! Here is documentation of some [example permissions to configure Snowflake access](/reference/database-permissions/snowflake-permissions). - ## Authentication methods -This section describes the different authentication methods available for connecting dbt Cloud to Snowflake. +This section describes the different authentication methods for connecting dbt Cloud to Snowflake. Configure Deployment environment (Production, Staging, General) credentials globally in the [**Connections**](/docs/deploy/deploy-environments#deployment-connection) area of **Account settings**. Individual users configure their development credentials in the [**Credentials**](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud#get-started-with-the-cloud-ide) area of their user profile. ### Username / Password diff --git a/website/docs/docs/cloud/connect-data-platform/connect-starburst-trino.md b/website/docs/docs/cloud/connect-data-platform/connect-starburst-trino.md index db0d3f61728..4c460f0d705 100644 --- a/website/docs/docs/cloud/connect-data-platform/connect-starburst-trino.md +++ b/website/docs/docs/cloud/connect-data-platform/connect-starburst-trino.md @@ -11,7 +11,7 @@ The following are the required fields for setting up a connection with a [Starbu | **Host** | The hostname of your cluster. Don't include the HTTP protocol prefix. | `mycluster.mydomain.com` | | **Port** | The port to connect to your cluster. By default, it's 443 for TLS enabled clusters. | `443` | | **User** | The username (of the account) to log in to your cluster. When connecting to Starburst Galaxy clusters, you must include the role of the user as a suffix to the username.

| Format for Starburst Enterprise or Trino depends on your configured authentication method.
Format for Starburst Galaxy:
  • `user.name@mydomain.com/role`
| -| **Password** | The user's password. | | +| **Password** | The user's password. | - | | **Database** | The name of a catalog in your cluster. | `example_catalog` | | **Schema** | The name of a schema that exists within the specified catalog.  | `example_schema` | diff --git a/website/docs/docs/cloud/connect-data-platform/connect-teradata.md b/website/docs/docs/cloud/connect-data-platform/connect-teradata.md index cf41814078b..8663a181645 100644 --- a/website/docs/docs/cloud/connect-data-platform/connect-teradata.md +++ b/website/docs/docs/cloud/connect-data-platform/connect-teradata.md @@ -7,7 +7,7 @@ sidebar_label: "Connect Teradata" # Connect Teradata -Your environment(s) must be on ["Versionless"](/docs/dbt-versions/versionless-cloud) to use the Teradata connection. +Your environment(s) must be on a supported [release track](/docs/dbt-versions/cloud-release-tracks) to use the Teradata connection. | Field | Description | Type | Required? | Example | | ----------------------------- | --------------------------------------------------------------------------------------------- | -------------- | --------- | ------- | diff --git a/website/docs/docs/cloud/connect-data-platform/connnect-bigquery.md b/website/docs/docs/cloud/connect-data-platform/connnect-bigquery.md index 0243bc619b1..ffe7e468bd2 100644 --- a/website/docs/docs/cloud/connect-data-platform/connnect-bigquery.md +++ b/website/docs/docs/cloud/connect-data-platform/connnect-bigquery.md @@ -11,7 +11,12 @@ sidebar_label: "Connect BigQuery" :::info Uploading a service account JSON keyfile -While the fields in a BigQuery connection can be specified manually, we recommend uploading a service account keyfile to quickly and accurately configure a connection to BigQuery. +While the fields in a BigQuery connection can be specified manually, we recommend uploading a service account keyfile to quickly and accurately configure a connection to BigQuery. + +You can provide the JSON keyfile in one of two formats: + +- JSON keyfile upload — Upload the keyfile directly in its normal JSON format. +- Base64-encoded string — Provide the keyfile as a base64-encoded string. When you provide a base64-encoded string, dbt decodes it automatically and populates the necessary fields. ::: @@ -52,6 +57,123 @@ As an end user, if your organization has set up BigQuery OAuth, you can link a p To learn how to optimize performance with data platform-specific configurations in dbt Cloud, refer to [BigQuery-specific configuration](/reference/resource-configs/bigquery-configs). +### Optional configurations + +In BigQuery, optional configurations let you tailor settings for tasks such as query priority, dataset location, job timeout, and more. These options give you greater control over how BigQuery functions behind the scenes to meet your requirements. + +To customize your optional configurations in dbt Cloud: + +1. Click your name at the bottom left-hand side bar menu in dbt Cloud +2. Select **Your profile** from the menu +3. From there, click **Projects** and select your BigQuery project +5. Go to **Development Connection** and select BigQuery +6. Click **Edit** and then scroll down to **Optional settings** + + + +The following are the optional configurations you can set in dbt Cloud: + +| Configuration |
Information
| Type |
Example
| +|---------------------------|-----------------------------------------|---------|--------------------| +| [Priority](#priority) | Sets the priority for BigQuery jobs (either `interactive` or queued for `batch` processing) | String | `batch` or `interactive` | +| [Retries](#retries) | Specifies the number of retries for failed jobs due to temporary issues | Integer | `3` | +| [Location](#location) | Location for creating new datasets | String | `US`, `EU`, `us-west2` | +| [Maximum bytes billed](#maximum-bytes-billed) | Limits the maximum number of bytes that can be billed for a query | Integer | `1000000000` | +| [Execution project](#execution-project) | Specifies the project ID to bill for query execution | String | `my-project-id` | +| [Impersonate service account](#impersonate-service-account) | Allows users authenticated locally to access BigQuery resources under a specified service account | String | `service-account@project.iam.gserviceaccount.com` | +| [Job retry deadline seconds](#job-retry-deadline-seconds) | Sets the total number of seconds BigQuery will attempt to retry a job if it fails | Integer | `600` | +| [Job creation timeout seconds](#job-creation-timeout-seconds) | Specifies the maximum timeout for the job creation step | Integer | `120` | +| [Google cloud storage-bucket](#google-cloud-storage-bucket) | Location for storing objects in Google Cloud Storage | String | `my-bucket` | +| [Dataproc region](#dataproc-region) | Specifies the cloud region for running data processing jobs | String | `US`, `EU`, `asia-northeast1` | +| [Dataproc cluster name](#dataproc-cluster-name) | Assigns a unique identifier to a group of virtual machines in Dataproc | String | `my-cluster` | + + + + +The `priority` for the BigQuery jobs that dbt executes can be configured with the `priority` configuration in your BigQuery profile. The priority field can be set to one of `batch` or `interactive`. For more information on query priority, consult the [BigQuery documentation](https://cloud.google.com/bigquery/docs/running-queries). + + + + + +Retries in BigQuery help to ensure that jobs complete successfully by trying again after temporary failures, making your operations more robust and reliable. + + + + + +The `location` of BigQuery datasets can be set using the `location` setting in a BigQuery profile. As per the [BigQuery documentation](https://cloud.google.com/bigquery/docs/locations), `location` may be either a multi-regional location (for example, `EU`, `US`), or a regional location (like `us-west2`). + + + + + +When a `maximum_bytes_billed` value is configured for a BigQuery profile, that allows you to limit how much data your query can process. It’s a safeguard to prevent your query from accidentally processing more data than you expect, which could lead to higher costs. Queries executed by dbt will fail if they exceed the configured maximum bytes threshhold. This configuration should be supplied as an integer number of bytes. + +If your `maximum_bytes_billed` is 1000000000, you would enter that value in the `maximum_bytes_billed` field in dbt cloud. + + + + + + +By default, dbt will use the specified `project`/`database` as both: + +1. The location to materialize resources (models, seeds, snapshots, and so on), unless they specify a custom project/database config +2. The GCP project that receives the bill for query costs or slot usage + +Optionally, you may specify an execution project to bill for query execution, instead of the project/database where you materialize most resources. + + + + + +This feature allows users authenticating using local OAuth to access BigQuery resources based on the permissions of a service account. + +For a general overview of this process, see the official docs for [Creating Short-lived Service Account Credentials](https://cloud.google.com/iam/docs/create-short-lived-credentials-direct). + + + + + +Job retry deadline seconds is the maximum amount of time BigQuery will spend retrying a job before it gives up. + + + + + +Job creation timeout seconds is the maximum time BigQuery will wait to start the job. If the job doesn’t start within that time, it times out. + + + +#### Run dbt python models on Google Cloud Platform + +import BigQueryDataproc from '/snippets/_bigquery-dataproc.md'; + + + + + +Everything you store in Cloud Storage must be placed inside a [bucket](https://cloud.google.com/storage/docs/buckets). Buckets help you organize your data and manage access to it. + + + + + +A designated location in the cloud where you can run your data processing jobs efficiently. This region must match the location of your BigQuery dataset if you want to use Dataproc with BigQuery to ensure data doesn't move across regions, which can be inefficient and costly. + +For more information on [Dataproc regions](https://cloud.google.com/bigquery/docs/locations), refer to the BigQuery documentation. + + + + + +A unique label you give to your group of virtual machines to help you identify and manage your data processing tasks in the cloud. When you integrate Dataproc with BigQuery, you need to provide the cluster name so BigQuery knows which specific set of resources (the cluster) to use for running the data jobs. + +Have a look at [Dataproc's document on Create a cluster](https://cloud.google.com/dataproc/docs/guides/create-cluster) for an overview on how clusters work. + + + ### Account level connections and credential management You can re-use connections across multiple projects with [global connections](/docs/cloud/connect-data-platform/about-connections#migration-from-project-level-connections-to-account-level-connections). Connections are attached at the environment level (formerly project level), so you can utilize multiple connections inside of a single project (to handle dev, staging, production, etc.). @@ -147,3 +269,7 @@ For a project, you will first create an environment variable to store the secret "extended_attributes_id": FFFFF }' ``` + + + + diff --git a/website/docs/docs/cloud/dbt-cloud-ide/develop-in-the-cloud.md b/website/docs/docs/cloud/dbt-cloud-ide/develop-in-the-cloud.md index 398b0cff2a1..de44de67b33 100644 --- a/website/docs/docs/cloud/dbt-cloud-ide/develop-in-the-cloud.md +++ b/website/docs/docs/cloud/dbt-cloud-ide/develop-in-the-cloud.md @@ -13,7 +13,7 @@ The dbt Cloud integrated development environment (IDE) is a single web-based int The dbt Cloud IDE offers several [keyboard shortcuts](/docs/cloud/dbt-cloud-ide/keyboard-shortcuts) and [editing features](/docs/cloud/dbt-cloud-ide/ide-user-interface#editing-features) for faster and efficient development and governance: - Syntax highlighting for SQL — Makes it easy to distinguish different parts of your code, reducing syntax errors and enhancing readability. -- AI copilot — Use [dbt Copilot](/docs/cloud/dbt-copilot), a powerful AI engine that can generate documentation, tests, and semantic models for your dbt SQL models. +- AI copilot — Use [dbt Copilot](/docs/cloud/dbt-copilot), a powerful AI engine that can [generate code](/docs/cloud/use-dbt-copilot#generate-and-edit-code) using natural language, and [generate documentation](/docs/build/documentation), [tests](/docs/build/data-tests), and [semantic models](/docs/build/semantic-models) for you with the click of a button. - Auto-completion — Suggests table names, arguments, and column names as you type, saving time and reducing typos. - Code [formatting and linting](/docs/cloud/dbt-cloud-ide/lint-format) — Helps standardize and fix your SQL code effortlessly. - Navigation tools — Easily move around your code, jump to specific lines, find and replace text, and navigate between project files. @@ -53,7 +53,7 @@ To understand how to navigate the IDE and its user interface elements, refer to | Feature | Description | |---|---| | [**Keyboard shortcuts**](/docs/cloud/dbt-cloud-ide/keyboard-shortcuts) | You can access a variety of [commands and actions](/docs/cloud/dbt-cloud-ide/keyboard-shortcuts) in the IDE by choosing the appropriate keyboard shortcut. Use the shortcuts for common tasks like building modified models or resuming builds from the last failure. | -| **IDE version control** | The IDE version control section and git button allow you to apply the concept of [version control](/docs/collaborate/git/version-control-basics) to your project directly into the IDE.

- Create or change branches, execute git commands using the git button.
- Commit or revert individual files by right-clicking the edited file
- [Resolve merge conflicts](/docs/collaborate/git/merge-conflicts)
- Link to the repo directly by clicking the branch name
- Edit, format, or lint files and execute dbt commands in your primary protected branch, and commit to a new branch.
- Use Git diff view to view what has been changed in a file before you make a pull request.
- From dbt version 1.6 and higher, use the **Prune branches** [button](/docs/cloud/dbt-cloud-ide/ide-user-interface#prune-branches-modal) to delete local branches that have been deleted from the remote repository, keeping your branch management tidy. | +| **IDE version control** | The IDE version control section and git button allow you to apply the concept of [version control](/docs/collaborate/git/version-control-basics) to your project directly into the IDE.

- Create or change branches, execute git commands using the git button.
- Commit or revert individual files by right-clicking the edited file
- [Resolve merge conflicts](/docs/collaborate/git/merge-conflicts)
- Link to the repo directly by clicking the branch name
- Edit, format, or lint files and execute dbt commands in your primary protected branch, and commit to a new branch.
- Use Git diff view to view what has been changed in a file before you make a pull request.
- Use the **Prune branches** [button](/docs/cloud/dbt-cloud-ide/ide-user-interface#prune-branches-modal) (dbt v1.6 and higher) to delete local branches that have been deleted from the remote repository, keeping your branch management tidy.
- Sign your [git commits](/docs/cloud/dbt-cloud-ide/git-commit-signing) to mark them as 'Verified'. | | **Preview and Compile button** | You can [compile or preview](/docs/cloud/dbt-cloud-ide/ide-user-interface#console-section) code, a snippet of dbt code, or one of your dbt models after editing and saving. | | [**dbt Copilot**](/docs/cloud/dbt-copilot) | A powerful AI engine that can generate documentation, tests, and semantic models for your dbt SQL models. Available for dbt Cloud Enterprise plans. | | **Build, test, and run button** | Build, test, and run your project with a button click or by using the Cloud IDE command bar. diff --git a/website/docs/docs/cloud/dbt-cloud-ide/git-commit-signing.md b/website/docs/docs/cloud/dbt-cloud-ide/git-commit-signing.md new file mode 100644 index 00000000000..afaa0751669 --- /dev/null +++ b/website/docs/docs/cloud/dbt-cloud-ide/git-commit-signing.md @@ -0,0 +1,80 @@ +--- +title: "Git commit signing" +description: "Learn how to sign your Git commits when using the IDE for development." +sidebar_label: Git commit signing +--- + +# Git commit signing + +To prevent impersonation and enhance security, you can sign your Git commits before pushing them to your repository. Using your signature, a Git provider can cryptographically verify a commit and mark it as "verified", providing increased confidence about its origin. + +You can configure dbt Cloud to sign your Git commits when using the IDE for development. To set up, enable the feature in dbt Cloud, follow the flow to generate a keypair, and upload the public key to your Git provider to use for signature verification. + + +## Prerequisites + +- GitHub or GitLab is your Git provider. Currently, Azure DevOps is not supported. +- You have a dbt Cloud account on the [Enterprise plan](https://www.getdbt.com/pricing/). + +## Generate GPG keypair in dbt Cloud + +To generate a GPG keypair in dbt Cloud, follow these steps: +1. Go to your **Personal profile** page in dbt Cloud. +2. Navigate to **Signed Commits** section. +3. Enable the **Sign commits originating from this user** toggle. +4. This will generate a GPG keypair. The private key will be used to sign all future Git commits. The public key will be displayed, allowing you to upload it to your Git provider. + + + +## Upload public key to Git provider + +To upload the public key to your Git provider, follow the detailed documentation provided by the supported Git provider: + +- [GitHub instructions](https://docs.github.com/en/authentication/managing-commit-signature-verification/adding-a-gpg-key-to-your-github-account) +- [GitLab instructions](https://docs.gitlab.com/ee/user/project/repository/signed_commits/gpg.html) + +Once you have uploaded the public key to your Git provider, your Git commits will be marked as "Verified" after you push the changes to the repository. + + + +## Considerations + +- The GPG keypair is tied to the user, not a specific account. There is a 1:1 relationship between the user and keypair. The same key will be used for signing commits on any accounts the user is a member of. +- The GPG keypair generated in dbt Cloud is linked to the email address associated with your account at the time of keypair creation. This email identifies the author of signed commits. +- For your Git commits to be marked as "verified", your dbt Cloud email address must be a verified email address with your Git provider. The Git provider (such as, GitHub, GitLab) checks that the commit's signed email matches a verified email in your Git provider account. If they don’t match, the commit won't be marked as "verified." +- Keep your dbt Cloud email and Git provider's verified email in sync to avoid verification issues. If you change your dbt Cloud email address: + - Generate a new GPG keypair with the updated email, following the [steps mentioned earlier](/docs/cloud/dbt-cloud-ide/git-commit-signing#generate-gpg-keypair-in-dbt-cloud). + - Add and verify the new email in your Git provider. + + + +## FAQs + + + + + +If you delete your GPG keypair in dbt Cloud, your Git commits will no longer be signed. You can generate a new GPG keypair by following the [steps mentioned earlier](/docs/cloud/dbt-cloud-ide/git-commit-signing#generate-gpg-keypair-in-dbt-cloud). + + + + +GitHub and GitLab support commit signing, while Azure DevOps does not. Commit signing is a [git feature](https://git-scm.com/book/ms/v2/Git-Tools-Signing-Your-Work), and is independent of any specific provider. However, not all providers support the upload of public keys, or the display of verification badges on commits. + + + + + +If your Git Provider does not explicitly support the uploading of public GPG keys, then +commits will still be signed using the private key, but no verification information will +be displayed by the provider. + + + + + +If your Git provider is configured to enforce commit verification, then unsigned commits +will be rejected. To avoid this, ensure that you have followed all previous steps to generate +a keypair, and uploaded the public key to the provider. + + diff --git a/website/docs/docs/cloud/dbt-cloud-ide/ide-user-interface.md b/website/docs/docs/cloud/dbt-cloud-ide/ide-user-interface.md index 8d80483485c..36c6cc898dc 100644 --- a/website/docs/docs/cloud/dbt-cloud-ide/ide-user-interface.md +++ b/website/docs/docs/cloud/dbt-cloud-ide/ide-user-interface.md @@ -35,7 +35,7 @@ The IDE streamlines your workflow, and features a popular user interface layout * Added (A) — The IDE detects added files * Deleted (D) — The IDE detects deleted files. - + 5. **Command bar —** The Command bar, located in the lower left of the IDE, is used to invoke [dbt commands](/reference/dbt-commands). When a command is invoked, the associated logs are shown in the Invocation History Drawer. @@ -107,15 +107,19 @@ Starting from dbt v1.6 or higher, when you save changes to a model, you can comp 3. **Build button —** The build button allows users to quickly access dbt commands related to the active model in the File Editor. The available commands include dbt build, dbt test, and dbt run, with options to include only the current resource, the resource and its upstream dependencies, the resource, and its downstream dependencies, or the resource with all dependencies. This menu is available for all executable nodes. -4. **Format button —** The editor has a **Format** button that can reformat the contents of your files. For SQL files, it uses either `sqlfmt` or `sqlfluff`, and for Python files, it uses `black`. +4. **Lint button** — The **Lint** button runs the [linter](/docs/cloud/dbt-cloud-ide/lint-format) on the active file in the File Editor. The linter checks for syntax errors and style issues in your code and displays the results in the **Code quality** tab. -5. **Results tab —** The Results console tab displays the most recent Preview results in tabular format. +5. **dbt Copilot** — [dbt Copilot](/docs/cloud/dbt-copilot) is a powerful artificial intelligence engine that can generate documentation, tests, and semantic models for you. dbt Copilot is available in the IDE for Enterprise plans. + +6. **Results tab —** The Results console tab displays the most recent Preview results in tabular format. -6. **Compiled Code tab —** The Compile button triggers a compile invocation that generates compiled code, which is displayed in the Compiled Code tab. +7. **Code quality tab** — The Code Quality tab displays the results of the linter on the active file in the File Editor. It allows you to view code errors, provides code quality visibility and management, and displays the SQLFluff version used. + +8. **Compiled Code tab —** The Compile generates the compiled code when the Compile button is executed. The Compiled Code tab displays the compiled SQL code for the active file in the File Editor. -7. **Lineage tab —** The Lineage tab in the File Editor displays the active model's lineage or . By default, it shows two degrees of lineage in both directions (`2+model_name+2`), however, you can change it to +model+ (full DAG). +9. **Lineage tab —** The Lineage tab in the File Editor displays the active model's lineage or . By default, it shows two degrees of lineage in both directions (`2+model_name+2`), however, you can change it to +model+ (full DAG). To use the lineage: - Double-click a node in the DAG to open that file in a new tab - Expand or shrink the DAG using node selection syntax. - Note, the `--exclude` flag isn't supported. @@ -158,11 +162,11 @@ Use menus and modals to interact with IDE and access useful options to help your - #### File Search You can easily search for and navigate between files using the File Navigation menu, which can be accessed by pressing Command-O or Control-O or clicking on the 🔍 icon in the File Explorer. - + - #### Global Command Palette The Global Command Palette provides helpful shortcuts to interact with the IDE, such as git actions, specialized dbt commands, and compile, and preview actions, among others. To open the menu, use Command-P or Control-P. - + - #### IDE Status modal The IDE Status modal shows the current error message and debug logs for the server. This also contains an option to restart the IDE. Open this by clicking on the IDE Status button. @@ -193,7 +197,7 @@ Use menus and modals to interact with IDE and access useful options to help your * Toggling between dark or light mode for a better viewing experience * Restarting the IDE - * Fully recloning your repository to refresh your git state and view status details + * Rollback your repo to remote, to refresh your git state and view status details * Viewing status details, including the IDE Status modal. - + diff --git a/website/docs/docs/cloud/dbt-cloud-ide/lint-format.md b/website/docs/docs/cloud/dbt-cloud-ide/lint-format.md index d14435a97e0..abd3c86d4a8 100644 --- a/website/docs/docs/cloud/dbt-cloud-ide/lint-format.md +++ b/website/docs/docs/cloud/dbt-cloud-ide/lint-format.md @@ -81,7 +81,7 @@ To configure your own linting rules: :::tip Configure dbtonic linting rules -Refer to the [SQLFluff config file](https://github.com/dbt-labs/jaffle-shop-template/blob/main/.sqlfluff) to add the dbt code (or dbtonic) rules we use for our own projects: +Refer to the [Jaffle shop SQLFluff config file](https://github.com/dbt-labs/jaffle-shop-template/blob/main/.sqlfluff) for dbt-specific (or dbtonic) linting rules we use for our own projects:
dbtonic config code example provided by dbt Labs @@ -231,3 +231,4 @@ To avoid this, break up your model into smaller models (files) so that they are - [User interface](/docs/cloud/dbt-cloud-ide/ide-user-interface) - [Keyboard shortcuts](/docs/cloud/dbt-cloud-ide/keyboard-shortcuts) +- [SQL linting in CI jobs](/docs/deploy/continuous-integration#sql-linting) diff --git a/website/docs/docs/cloud/dbt-copilot.md b/website/docs/docs/cloud/dbt-copilot.md index 42a05dd91ba..bd2573e0ff8 100644 --- a/website/docs/docs/cloud/dbt-copilot.md +++ b/website/docs/docs/cloud/dbt-copilot.md @@ -8,12 +8,14 @@ pagination_prev: null # About dbt Copilot -dbt Copilot is a powerful artificial intelligence (AI) engine that's fully integrated into your dbt Cloud experience and designed to accelerate your analytics workflows. dbt Copilot embeds AI-driven assistance across every stage of the analytics development life cycle (ADLC), empowering data practitioners to deliver data products faster, improve data quality, and enhance data accessibility. With automatic code generation, you can let the AI engine generate the [documentation](/docs/build/documentation), [tests](/docs/build/data-tests), and [semantic models](/docs/build/semantic-models) for you. +dbt Copilot is a powerful artificial intelligence (AI) engine that's fully integrated into your dbt Cloud experience and designed to accelerate your analytics workflows. dbt Copilot embeds AI-driven assistance across every stage of the analytics development life cycle (ADLC), empowering data practitioners to deliver data products faster, improve data quality, and enhance data accessibility. + +With automatic code generation, let dbt Copilot [generate code](/docs/cloud/use-dbt-copilot#generate-and-edit-code) using natural language, and [generate documentation](/docs/build/documentation), [tests](/docs/build/data-tests), and [semantic models](/docs/build/semantic-models) for you with the click of a button. :::tip Beta feature -dbt Copilot is designed to _help_ developers generate documentation, tests, and semantic models in dbt Cloud. It's available in beta, in the dbt Cloud IDE only. +dbt Copilot is designed to _help_ developers generate documentation, tests, and semantic models, as well as [code](/docs/cloud/use-dbt-copilot#generate-and-edit-code) using natural language, in dbt Cloud. It's available in beta, in the dbt Cloud IDE only. -To use dbt Copilot, you must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing) and agree to use dbt Labs' OpenAI key. [Register your interest](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) to join the private beta or reach out to your Account team to begin this process. +To use dbt Copilot, you must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing) and either agree to use dbt Labs' OpenAI key or provide your own Open AI API key. [Register here](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) or reach out to the Account Team if you're interested in joining the private beta. ::: diff --git a/website/docs/docs/cloud/enable-dbt-copilot.md b/website/docs/docs/cloud/enable-dbt-copilot.md index 23c253ecf7a..2b954d1db5d 100644 --- a/website/docs/docs/cloud/enable-dbt-copilot.md +++ b/website/docs/docs/cloud/enable-dbt-copilot.md @@ -12,13 +12,13 @@ This page explains how to enable the dbt Copilot engine in dbt Cloud, leveraging - Available in the dbt Cloud IDE only. - Must have an active [dbt Cloud Enterprise account](https://www.getdbt.com/pricing). -- Development environment has been upgraded to ["Versionless"](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless). -- Current dbt Copilot deployments use a central OpenAI API key managed by dbt Labs. In the future, you may provide your own key for Azure OpenAI or OpenAI. +- Development environment is on a supported [release track](/docs/dbt-versions/cloud-release-tracks) to receive ongoing updates. +- By default, dbt Copilot deployments use a central OpenAI API key managed by dbt Labs. Alternatively, you can [provide your own OpenAI API key](#bringing-your-own-openai-api-key-byok). - Accept and sign legal agreements. Reach out to your Account team to begin this process. ## Enable dbt Copilot -dbt Copilot is only available at an account level after your organization has signed the legal requirements. It's disabled by default. A dbt Cloud admin(s) can enable it by following these steps: +dbt Copilot is only available to your account after your organization has signed the required legal documents. It's disabled by default. A dbt Cloud admin can enable it by following these steps: 1. Navigate to **Account settings** in the navigation menu. @@ -32,4 +32,15 @@ dbt Copilot is only available at an account level after your organization has si Note: To disable (only after enabled), repeat steps 1 to 3, toggle off in step 4, and repeat step 5. - \ No newline at end of file + + +## Bringing your own OpenAI API key (BYOK) + +Once AI features have been enabled, you can provide your organization's OpenAI API key. dbt Cloud will then leverage your OpenAI account and terms to power dbt Copilot. This will incur billing charges to your organization from OpenAI for requests made by dbt Copilot. + +Configure AI keys using: +- [dbt Labs-managed OpenAI API key](/docs/cloud/account-integrations?ai-integration=dbtlabs#ai-integrations) +- Your own [OpenAI API key](/docs/cloud/account-integrations?ai-integration=openai#ai-integrations) +- [Azure OpenAI](/docs/cloud/account-integrations?ai-integration=azure#ai-integrations) + +For configuration details, see [Account integrations](/docs/cloud/account-integrations#ai-integrations). diff --git a/website/docs/docs/cloud/git/authenticate-azure.md b/website/docs/docs/cloud/git/authenticate-azure.md index 42028bf993b..5278c134f72 100644 --- a/website/docs/docs/cloud/git/authenticate-azure.md +++ b/website/docs/docs/cloud/git/authenticate-azure.md @@ -13,9 +13,9 @@ If you use the dbt Cloud IDE or dbt Cloud CLI to collaborate on your team's Azur Connect your dbt Cloud profile to Azure DevOps using OAuth: -1. Click the gear icon at the top right and select **Profile settings**. -2. Click **Linked Accounts**. -3. Next to Azure DevOps, click **Link**. +1. Click your account name at the bottom of the left-side menu and click **Account settings** +2. Scroll down to **Your profile** and select **Personal profile**. +3. Go to the **Linked accounts** section in the middle of the page. 4. Once you're redirected to Azure DevOps, sign into your account. diff --git a/website/docs/docs/cloud/git/connect-azure-devops.md b/website/docs/docs/cloud/git/connect-azure-devops.md index f6c0ee634fc..f3bb07a12d0 100644 --- a/website/docs/docs/cloud/git/connect-azure-devops.md +++ b/website/docs/docs/cloud/git/connect-azure-devops.md @@ -4,6 +4,8 @@ id: "connect-azure-devops" pagination_next: "docs/cloud/git/setup-azure" --- +# Connect to Azure DevOps + diff --git a/website/docs/docs/cloud/git/connect-github.md b/website/docs/docs/cloud/git/connect-github.md index e2bf459275e..df5c6cb0728 100644 --- a/website/docs/docs/cloud/git/connect-github.md +++ b/website/docs/docs/cloud/git/connect-github.md @@ -25,19 +25,21 @@ Connecting your GitHub account to dbt Cloud provides convenience and another lay You can connect your dbt Cloud account to GitHub by installing the dbt Cloud application in your GitHub organization and providing access to the appropriate repositories. To connect your dbt Cloud account to your GitHub account: -1. Navigate to **Your Profile** settings by clicking the gear icon in the top right. +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. -2. Select **Linked Accounts** from the left menu. +2. Select **Personal profile** under the **Your profile** section. - +3. Scroll down to **Linked accounts**. -3. In the **Linked Accounts** section, set up your GitHub account connection to dbt Cloud by clicking **Link** to the right of GitHub. This redirects you to your account on GitHub where you will be asked to install and configure the dbt Cloud application. + -4. Select the GitHub organization and repositories dbt Cloud should access. +4. In the **Linked accounts** section, set up your GitHub account connection to dbt Cloud by clicking **Link** to the right of GitHub. This redirects you to your account on GitHub where you will be asked to install and configure the dbt Cloud application. + +5. Select the GitHub organization and repositories dbt Cloud should access. -5. Assign the dbt Cloud GitHub App the following permissions: +6. Assign the dbt Cloud GitHub App the following permissions: - Read access to metadata - Read and write access to Checks - Read and write access to Commit statuses @@ -46,8 +48,8 @@ To connect your dbt Cloud account to your GitHub account: - Read and write access to Webhooks - Read and write access to Workflows -6. Once you grant access to the app, you will be redirected back to dbt Cloud and shown a linked account success state. You are now personally authenticated. -7. Ask your team members to individually authenticate by connecting their [personal GitHub profiles](#authenticate-your-personal-github-account). +7. Once you grant access to the app, you will be redirected back to dbt Cloud and shown a linked account success state. You are now personally authenticated. +8. Ask your team members to individually authenticate by connecting their [personal GitHub profiles](#authenticate-your-personal-github-account). ## Limiting repository access in GitHub If you are your GitHub organization owner, you can also configure the dbt Cloud GitHub application to have access to only select repositories. This configuration must be done in GitHub, but we provide an easy link in dbt Cloud to start this process. @@ -67,14 +69,16 @@ After the dbt Cloud administrator [sets up a connection](/docs/cloud/git/connect To connect a personal GitHub account: -1. Navigate to **Your Profile** settings by clicking the gear icon in the top right. +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. + +2. Select **Personal profile** under the **Your profile** section. -2. Select **Linked Accounts** in the left menu. If your GitHub account is not connected, you’ll see "No connected account". +3. Scroll down to **Linked accounts**. If your GitHub account is not connected, you’ll see "No connected account". -3. Select **Link** to begin the setup process. You’ll be redirected to GitHub, and asked to authorize dbt Cloud in a grant screen. +4. Select **Link** to begin the setup process. You’ll be redirected to GitHub, and asked to authorize dbt Cloud in a grant screen. -4. Once you approve authorization, you will be redirected to dbt Cloud, and you should now see your connected account. +5. Once you approve authorization, you will be redirected to dbt Cloud, and you should now see your connected account. You can now use the dbt Cloud IDE or dbt Cloud CLI. diff --git a/website/docs/docs/cloud/git/connect-gitlab.md b/website/docs/docs/cloud/git/connect-gitlab.md index f68f09ae73d..d16cdb15b8e 100644 --- a/website/docs/docs/cloud/git/connect-gitlab.md +++ b/website/docs/docs/cloud/git/connect-gitlab.md @@ -10,6 +10,7 @@ Connecting your GitLab account to dbt Cloud provides convenience and another lay - Clone repos using HTTPS rather than SSH. - Carry GitLab user permissions through to dbt Cloud or dbt Cloud CLI's git actions. - Trigger [Continuous integration](/docs/deploy/continuous-integration) builds when merge requests are opened in GitLab. + - GitLab automatically registers a webhook in your GitLab repository to enable seamless integration with dbt Cloud. The steps to integrate GitLab in dbt Cloud depend on your plan. If you are on: - the Developer or Team plan, read these [instructions](#for-dbt-cloud-developer-and-team-tiers). @@ -18,11 +19,12 @@ The steps to integrate GitLab in dbt Cloud depend on your plan. If you are on: ## For dbt Cloud Developer and Team tiers To connect your GitLab account: -1. Navigate to Your Profile settings by clicking the gear icon in the top right. -2. Select **Linked Accounts** in the left menu. -3. Click **Link** to the right of your GitLab account. +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. +2. Select **Personal profile** under the **Your profile** section. +3. Scroll down to **Linked accounts**. +4. Click **Link** to the right of your GitLab account. - + When you click **Link**, you will be redirected to GitLab and prompted to sign into your account. GitLab will then ask for your explicit authorization: @@ -60,8 +62,8 @@ In GitLab, when creating your Group Application, input the following: | ------ | ----- | | **Name** | dbt Cloud | | **Redirect URI** | `https://YOUR_ACCESS_URL/complete/gitlab` | -| **Confidential** | ✔️ | -| **Scopes** | ✔️ api | +| **Confidential** | ✅ | +| **Scopes** | ✅ api | Replace `YOUR_ACCESS_URL` with the [appropriate Access URL](/docs/cloud/about-cloud/access-regions-ip-addresses) for your region and plan. @@ -99,7 +101,13 @@ Once you've accepted, you should be redirected back to dbt Cloud, and your integ ### Personally authenticating with GitLab dbt Cloud developers on the Enterprise plan must each connect their GitLab profiles to dbt Cloud, as every developer's read / write access for the dbt repo is checked in the dbt Cloud IDE or dbt Cloud CLI. -To connect a personal GitLab account, dbt Cloud developers should navigate to Your Profile settings by clicking the gear icon in the top right, then select **Linked Accounts** in the left menu. +To connect a personal GitLab account: + +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. + +2. Select **Personal profile** under the **Your profile** section. + +3. Scroll down to **Linked accounts**. If your GitLab account is not connected, you’ll see "No connected account". Select **Link** to begin the setup process. You’ll be redirected to GitLab, and asked to authorize dbt Cloud in a grant screen. @@ -107,20 +115,10 @@ If your GitLab account is not connected, you’ll see "No connected account". Se Once you approve authorization, you will be redirected to dbt Cloud, and you should see your connected account. You're now ready to start developing in the dbt Cloud IDE or dbt Cloud CLI. - ## Troubleshooting -### Errors when importing a repository on dbt Cloud project set up -If you do not see your repository listed, double-check that: -- Your repository is in a Gitlab group you have access to. dbt Cloud will not read repos associated with a user. - -If you do see your repository listed, but are unable to import the repository successfully, double-check that: -- You are a maintainer of that repository. Only users with maintainer permissions can set up repository connections. - -If you imported a repository using the dbt Cloud native integration with GitLab, you should be able to see the clone strategy is using a `deploy_token`. If it's relying on an SSH key, this means the repository was not set up using the native GitLab integration, but rather using the generic git clone option. The repository must be reconnected in order to get the benefits described above. - -## FAQs - + + diff --git a/website/docs/docs/cloud/git/import-a-project-by-git-url.md b/website/docs/docs/cloud/git/import-a-project-by-git-url.md index 90c54dbb1b1..2b499b39cb7 100644 --- a/website/docs/docs/cloud/git/import-a-project-by-git-url.md +++ b/website/docs/docs/cloud/git/import-a-project-by-git-url.md @@ -14,8 +14,8 @@ You must use the `git@...` or `ssh:..`. version of your git URL, not the `https: After importing a project by Git URL, dbt Cloud will generate a Deploy Key for your repository. To find the deploy key in dbt Cloud: -1. Click the gear icon in the upper right-hand corner. -2. Click **Account Settings** --> **Projects** and select a project. +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. +2. Go to **Projects** and select a project. 3. Click the **Repository** link to the repository details page. 4. Copy the key under the **Deploy Key** section. @@ -49,7 +49,7 @@ If you use GitLab, you can import your repo directly using [dbt Cloud's GitLab A - To add a deploy key to a GitLab account, navigate to the [SSH keys](https://gitlab.com/profile/keys) tab in the User Settings page of your GitLab account. - Next, paste in the deploy key generated by dbt Cloud for your repository. - After saving this SSH key, dbt Cloud will be able to read and write files in your GitLab repository. -- Refer to [Adding a read only deploy key in GitLab](https://docs.gitlab.com/ee/ssh/#per-repository-deploy-keys) +- Refer to [Adding a read only deploy key in GitLab](https://docs.gitlab.com/ee/user/project/deploy_keys/) diff --git a/website/docs/docs/cloud/manage-access/about-access.md b/website/docs/docs/cloud/manage-access/about-access.md index 6b02d9eb17b..b9d23b28add 100644 --- a/website/docs/docs/cloud/manage-access/about-access.md +++ b/website/docs/docs/cloud/manage-access/about-access.md @@ -79,7 +79,7 @@ Refer to [role-based access control](#role-based-access-control) for more inform ## Grant access -dbt Cloud users have both a license (individually or by group) and permissions (by group only) that determine what actions they can take. Licenses are account-wide, and permissions provide more granular access or restrictions to specific features. +dbt Cloud users have both a license (assigned to an individual user or by group membership) and permissions (by group membership only) that determine what actions they can take. Licenses are account-wide, and permissions provide more granular access or restrictions to specific features. ### Licenses @@ -89,7 +89,7 @@ There are three license types in dbt Cloud: - **Developer** — User can be granted _any_ permissions. - **Read-Only** — User has read-only permissions applied to all dbt Cloud resources regardless of the role-based permissions that the user is assigned. -- **IT** — User has [Security Admin](/docs/cloud/manage-access/enterprise-permissions#security-admin) and [Billing Admin](/docs/cloud/manage-access/enterprise-permissions#billing-admin) permissions applied, regardless of the group permissions assigned. +- **IT** — User has Security Admin and Billing Admin [permissions](/docs/cloud/manage-access/enterprise-permissions) applied, regardless of the group permissions assigned. Developer licenses will make up a majority of the users in your environment and have the highest impact on billing, so it's important to monitor how many you have at any given time. diff --git a/website/docs/docs/cloud/manage-access/audit-log.md b/website/docs/docs/cloud/manage-access/audit-log.md index 9c80adaf2f8..de52434be06 100644 --- a/website/docs/docs/cloud/manage-access/audit-log.md +++ b/website/docs/docs/cloud/manage-access/audit-log.md @@ -18,7 +18,7 @@ The dbt Cloud audit log stores all the events that occurred in your organization ## Accessing the audit log -To access the audit log, click the gear icon in the top right, then click **Audit Log**. +To access the audit log, click on your account name in the left side menu and select **Account settings**. @@ -32,7 +32,7 @@ On the audit log page, you will see a list of various events and their associate ### Event details -Click the event card to see the details about the activity that triggered the event. This view provides important details, including when it happened and what type of event was triggered. For example, if someone changes the settings for a job, you can use the event details to see which job was changed (type of event: `job_definition.Changed`), by whom (person who triggered the event: `actor`), and when (time it was triggered: `created_at_utc`). For types of events and their descriptions, see [Events in audit log](#events-in-audit-log). +Click the event card to see the details about the activity that triggered the event. This view provides important details, including when it happened and what type of event was triggered. For example, if someone changes the settings for a job, you can use the event details to see which job was changed (type of event: `job_definition.Changed`), by whom (person who triggered the event: `actor`), and when (time it was triggered: `created_at_utc`). For types of events and their descriptions, see [Events in audit log](#audit-log-events). The event details provide the key factors of an event: @@ -62,7 +62,7 @@ The audit log supports various events for different objects in dbt Cloud. You wi | Auth Provider Changed | auth_provider.Changed | Authentication provider settings changed | | Credential Login Succeeded | auth.CredentialsLoginSucceeded | User successfully logged in with username and password | | SSO Login Failed | auth.SsoLoginFailed | User login via SSO failed | -| SSO Login Succeeded | auth.SsoLoginSucceeded | User successfully logged in via SSO +| SSO Login Succeeded | auth.SsoLoginSucceeded | User successfully logged in via SSO | ### Environment @@ -93,7 +93,7 @@ The audit log supports various events for different objects in dbt Cloud. You wi | ------------- | ----------------------------- | ------------------------------ | | Group Added | user_group.Added | New Group successfully created | | Group Changed | user_group.Changed | Group settings changed | -| Group Removed | user_group.Changed | Group successfully removed | +| Group Removed | user_group.Removed | Group successfully removed | ### User @@ -149,12 +149,65 @@ The audit log supports various events for different objects in dbt Cloud. You wi ### Credentials -| Event Name | Event Type | Description | -| -------------------------------- | ----------------------------- | -------------------------------- | +| Event Name | Event Type | Description | +| -------------------------------- | ----------------------------- | -----------------------| | Credentials Added to Project | credentials.Added | Project credentials added | | Credentials Changed in Project | credentials.Changed | Credentials changed in project | | Credentials Removed from Project | credentials.Removed | Credentials removed from project | + +### Git integration + +| Event Name | Event Type | Description | +| -------------------------------- | ----------------------------- | -----------------------| +| GitLab Application Changed | gitlab_application.changed | GitLab configuration in dbt Cloud changed | + +### Webhooks + +| Event Name | Event Type | Description | +| -------------------------------- | ----------------------------- | -----------------------| +| Webhook Subscriptions Added | webhook_subscription.added | New webhook configured in settings | +| Webhook Subscriptions Changed | webhook_subscription.changed | Existing webhook configuration altered | +| Webhook Subscriptions Removed | webhook_subscription.removed | Existing webhook deleted | + + +### Semantic Layer + +| Event Name | Event Type | Description | +| -------------------------------- | ----------------------------- | -----------------------| +| Semantic Layer Config Added | semantic_layer_config.added | Semantic Layer config added | +| Semantic Layer Config Changed | semantic_layer_config.changed | Semantic Layer config (not related to credentials) changed | +| Semantic Layer Config Removed | semantic_layer_config.removed | Semantic Layer config removed | +| Semantic Layer Credentials Added | semantic_layer_credentials.added | Semantic Layer credentials added | +| Semantic Layer Credentials Changed| semantic_layer_credentials.changed | Semantic Layer credentials changed. Does not trigger semantic_layer_config.changed| +| Semantic Layer Credentials Removed| semantic_layer_credentials.removed | Semantic Layer credentials removed | + +### Extended attributes + +| Event Name | Event Type | Description | +| -------------------------------- | ----------------------------- | -----------------------| +| Extended Attribute Added | extended_attributes.added | Extended attribute added to a project | +| Extended Attribute Changed | extended_attributes.changed | Extended attribute changed or removed | + + +### Account-scoped personal access token + +| Event Name | Event Type | Description | +| -------------------------------- | ----------------------------- | -----------------------| +| Account Scoped Personal Access Token Created | account_scoped_pat.created | An account-scoped PAT was created | +| Account Scoped Personal Access Token Deleted | account_scoped_pat.deleted | An account-scoped PAT was deleted | + +### IP restrictions + +| Event Name | Event Type | Description | +| -------------------------------- | ----------------------------- | -----------------------| +| IP Restrictions Toggled | ip_restrictions.toggled | IP restrictions feature enabled or disabled | +| IP Restrictions Rule Added | ip_restrictions.rule.added | IP restriction rule created | +| IP Restrictions Rule Changed | ip_restrictions.rule.changed | IP restriction rule edited | +| IP Restrictions Rule Removed | ip_restrictions.rule.removed | IP restriction rule deleted | + + + ## Searching the audit log You can search the audit log to find a specific event or actor, which is limited to the ones listed in [Events in audit log](#events-in-audit-log). The audit log successfully lists historical events spanning the last 90 days. You can search for an actor or event using the search bar, and then narrow your results using the time window. diff --git a/website/docs/docs/cloud/manage-access/auth0-migration.md b/website/docs/docs/cloud/manage-access/auth0-migration.md index b7bab836810..2f45ad7dcc8 100644 --- a/website/docs/docs/cloud/manage-access/auth0-migration.md +++ b/website/docs/docs/cloud/manage-access/auth0-migration.md @@ -5,22 +5,10 @@ sidebar: "SSO Auth0 Migration" description: "Required actions for migrating to Auth0 for SSO services on dbt Cloud." --- -:::note - -This migration is a feature of the dbt Cloud Enterprise plan. To learn more about an Enterprise plan, contact us at [sales@getdbt.com](mailto::sales@getdbt.com). - -For single-tenant Virtual Private Cloud, you should [email dbt Cloud Support](mailto::support@getdbt.com) to set up or update your SSO configuration. - -::: - dbt Labs is partnering with Auth0 to bring enhanced features to dbt Cloud's single sign-on (SSO) capabilities. Auth0 is an identity and access management (IAM) platform with advanced security features, and it will be leveraged by dbt Cloud. These changes will require some action from customers with SSO configured in dbt Cloud today, and this guide will outline the necessary changes for each environment. If you have not yet configured SSO in dbt Cloud, refer instead to our setup guides for [SAML](/docs/cloud/manage-access/set-up-sso-saml-2.0), [Okta](/docs/cloud/manage-access/set-up-sso-okta), [Google Workspace](/docs/cloud/manage-access/set-up-sso-google-workspace), or [Microsoft Entra ID (formerly Azure AD)](/docs/cloud/manage-access/set-up-sso-microsoft-entra-id) single sign-on services. -## Auth0 Multi-tenant URIs - - - ## Start the migration The Auth0 migration feature is being rolled out incrementally to customers who have SSO features already enabled. When the migration option has been enabled on your account, you will see **SSO Updates Available** on the right side of the menu bar, near the settings icon. diff --git a/website/docs/docs/cloud/manage-access/cloud-seats-and-users.md b/website/docs/docs/cloud/manage-access/cloud-seats-and-users.md index da19f30ab4c..5628314c922 100644 --- a/website/docs/docs/cloud/manage-access/cloud-seats-and-users.md +++ b/website/docs/docs/cloud/manage-access/cloud-seats-and-users.md @@ -49,13 +49,13 @@ The following tabs detail steps on how to modify your user license count: If you're on an Enterprise plan and have the correct [permissions](/docs/cloud/manage-access/enterprise-permissions), you can add or remove licenses by adjusting your user seat count. Note, an IT license does not count toward seat usage. -- To remove a user, go to **Account Settings** and select **Users**. +- To remove a user, click on your account name in the left side menu, click **Account settings** and select **Users**. - Select the user you want to remove, click **Edit**, and then **Delete**. - This action cannot be undone. However, you can re-invite the user with the same info if you deleted the user in error.
- To add a user, go to **Account Settings** and select **Users**. - Click the [**Invite Users**](/docs/cloud/manage-access/invite-users) button. - - For fine-grained permission configuration, refer to [Role based access control](/docs/cloud/manage-access/enterprise-permissions). + - For fine-grained permission configuration, refer to [Role based access control](/docs/cloud/manage-access/about-user-access#role-based-access-control-). @@ -64,7 +64,7 @@ If you're on an Enterprise plan and have the correct [permissions](/docs/cloud/m If you're on a Team plan and have the correct [permissions](/docs/cloud/manage-access/self-service-permissions), you can add or remove developers. You'll need to make two changes: -- Adjust your developer user seat count, which manages the users invited to your dbt Cloud project. AND +- Adjust your developer user seat count, which manages the users invited to your dbt Cloud project. - Adjust your developer billing seat count, which manages the number of billable seats. @@ -75,7 +75,7 @@ You can add or remove developers by increasing or decreasing the number of users To add a user in dbt Cloud, you must be an account owner or have admin privileges. -1. From dbt Cloud, click the gear icon at the top right and select **Account Settings**. +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. @@ -95,11 +95,11 @@ Great work! After completing those these steps, your dbt Cloud user count and bi To delete a user in dbt Cloud, you must be an account owner or have admin privileges. If the user has a `developer` license type, this will open up their seat for another user or allow the admins to lower the total number of seats. -1. From dbt Cloud, click the gear icon at the top right and select **Account Settings**. +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. -2. In **Account Settings** and select **Users**. +2. In **Account Settings**, select **Users**. 3. Select the user you want to delete, then click **Edit**. 4. Click **Delete** in the bottom left. Click **Confirm Delete** to immediately delete the user without additional password prompts. This action cannot be undone. However, you can re-invite the user with the same information if the deletion was made in error. @@ -124,9 +124,7 @@ Great work! After completing these steps, your dbt Cloud user count and billing ## Managing license types -Licenses can be assigned manually, or automatically based on IdP configuration -(enterprise only). By default, new users in an account will be assigned a -Developer license. +Licenses can be assigned to users individually or through group membership. To assign a license via group membership, you can manually add a user to a group during the invitation process or assign them to a group after they’ve enrolled in dbt Cloud. Alternatively, with [SSO configuration](/docs/cloud/manage-access/sso-overview) and [role-based access control](/docs/cloud/manage-access/about-user-access#role-based-access-control-) (Enterprise only), users can be automatically assigned to groups. By default, new users in an account are assigned a Developer license. ### Manual configuration @@ -142,16 +140,9 @@ change. -### Mapped configuration +### Mapped configuration -**Note:** This feature is only available on the Enterprise plan. - -If your account is connected to an Identity Provider (IdP) for [Single Sign -On](/docs/cloud/manage-access/sso-overview), you can automatically map IdP user -groups to specific license types in dbt Cloud. To configure license mappings, -navigate to the Account Settings > Team > License Mappings page. From -here, you can create or edit SSO mappings for both Read-Only and Developer -license types. +If your account is connected to an Identity Provider (IdP) for [Single Sign On](/docs/cloud/manage-access/sso-overview), you can automatically map IdP user groups to specific groups in dbt Cloud and assign license types to those groups. To configure license mappings, navigate to the **Account Settings** > **Groups & Licenses** > **License Mappings** page. From here, you can create or edit SSO mappings for both Read-Only and Developer license types. By default, all new members of a dbt Cloud account will be assigned a Developer license. To assign Read-Only licenses to certain groups of users, create a new diff --git a/website/docs/docs/cloud/manage-access/enterprise-permissions.md b/website/docs/docs/cloud/manage-access/enterprise-permissions.md index a1f6d795c23..5a56900d529 100644 --- a/website/docs/docs/cloud/manage-access/enterprise-permissions.md +++ b/website/docs/docs/cloud/manage-access/enterprise-permissions.md @@ -22,22 +22,14 @@ The following roles and permission sets are available for assignment in dbt Clou :::tip Licenses or Permission sets -The user's [license](/docs/cloud/manage-access/seats-and-users) type always overrides their assigned permission set. This means that even if a user belongs to a dbt Cloud group with 'Account Admin' permissions, having a 'Read-Only' license would still prevent them from performing administrative actions on the account. +The user's [license](/docs/cloud/manage-access/about-user-access) type always overrides their assigned permission set. This means that even if a user belongs to a dbt Cloud group with 'Account Admin' permissions, having a 'Read-Only' license would still prevent them from performing administrative actions on the account. ::: -## How to set up RBAC Groups in dbt Cloud +## Additional resources -Role-Based Access Control (RBAC) is helpful for automatically assigning permissions to dbt admins based on their SSO provider group associations. RBAC does not apply to [model groups](/docs/collaborate/govern/model-access#groups). +- [Grant users access](/docs/cloud/manage-access/about-user-access#grant-access) +- [Role-based access control](/docs/cloud/manage-access/about-user-access#role-based-access-control-) +- [Environment-level permissions](/docs/cloud/manage-access/environment-permissions) -1. Click the gear icon to the top right and select **Account Settings**. Click **Groups & Licenses** - - - -2. Select an existing group or create a new group to add RBAC. Name the group (this can be any name you like, but it's recommended to keep it consistent with the SSO groups). If you have configured SSO with SAML 2.0, you may have to use the GroupID instead of the name of the group. -3. Configure the SSO provider groups you want to add RBAC by clicking **Add** in the **SSO** section. These fields are case-sensitive and must match the source group formatting. -4. Configure the permissions for users within those groups by clicking **Add** in the **Access** section of the window. - - -5. When you've completed your configurations, click **Save**. Users will begin to populate the group automatically once they have signed in to dbt Cloud with their SSO credentials. diff --git a/website/docs/docs/cloud/manage-access/environment-permissions-setup.md b/website/docs/docs/cloud/manage-access/environment-permissions-setup.md index 1a3f2724819..5b41477e456 100644 --- a/website/docs/docs/cloud/manage-access/environment-permissions-setup.md +++ b/website/docs/docs/cloud/manage-access/environment-permissions-setup.md @@ -15,7 +15,7 @@ Environment-level permissions are not the same as account-level [role-based acce In your dbt Cloud account: -1. Open the **gear menu** and select **Account settings**. From the left-side menu, select **Groups & Licenses**. While you can edit existing groups, we recommend not altering the default `Everyone`, `Member`, and `Owner` groups. +1. Click your account name, above your profile icon on the left side panel, then select **Account settings**. From there, select **Groups & Licenses**. While you can edit existing groups, we recommend not altering the default `Everyone`, `Member`, and `Owner` groups. diff --git a/website/docs/docs/cloud/manage-access/environment-permissions.md b/website/docs/docs/cloud/manage-access/environment-permissions.md index 44cf2dc9a64..20acfae51f7 100644 --- a/website/docs/docs/cloud/manage-access/environment-permissions.md +++ b/website/docs/docs/cloud/manage-access/environment-permissions.md @@ -17,8 +17,8 @@ Environment-level permissions give dbt Cloud admins more flexibility to protect - Environment-level permissions do not allow you to create custom roles and permissions for each resource type in dbt Cloud. - You can only select environment types, and can’t specify a particular environment within a project. -- You can't select specific resources within environments. dbt Cloud jobs, runs, and environment variables are all environment resources. - - For example, you can't specify that a user only has access to jobs but not environment variables. Access to a given environment gives the user access to everything within that environment. +- You can't select specific resources within environments. dbt Cloud jobs and runs are environment resources. + - For example, you can't specify that a user only has access to jobs but not runs. Access to a given environment gives the user access to everything within that environment. ## Environments and roles @@ -77,4 +77,4 @@ If the user has the same roles across projects, you can apply environment access ## Related docs --[Environment-level permissions setup](/docs/cloud/manage-access/environment-permissions-setup) +- [Environment-level permissions setup](/docs/cloud/manage-access/environment-permissions-setup) diff --git a/website/docs/docs/cloud/manage-access/external-oauth.md b/website/docs/docs/cloud/manage-access/external-oauth.md index deb23f36f09..380d0a3d1cc 100644 --- a/website/docs/docs/cloud/manage-access/external-oauth.md +++ b/website/docs/docs/cloud/manage-access/external-oauth.md @@ -267,3 +267,7 @@ app in Entra ID, click **Endpoints** and open the **Federation metadata document 6. `Application ID URI`: Copy the `Application ID URI` field from the resource server’s Overview screen. + +## FAQs + + diff --git a/website/docs/docs/cloud/manage-access/invite-users.md b/website/docs/docs/cloud/manage-access/invite-users.md index c82e15fd48f..0922b4dc991 100644 --- a/website/docs/docs/cloud/manage-access/invite-users.md +++ b/website/docs/docs/cloud/manage-access/invite-users.md @@ -17,19 +17,16 @@ You must have proper permissions to invite new users: ## Invite new users -1. In your dbt Cloud account, select the gear menu in the upper right corner and then select **Account Settings**. -2. From the left sidebar, select **Users**. - - - -3. Click on **Invite Users**. +1. In your dbt Cloud account, select your account name in the bottom left corner. Then select **Account settings**. +2. Under **Settings**, select **Users**. +3. Click on **Invite users**. -4. In the **Email Addresses** field, enter the email addresses of the users you would like to invite separated by comma, semicolon, or a new line. +4. In the **Email Addresses** field, enter the email addresses of the users you want to invite separated by a comma, semicolon, or a new line. 5. Select the license type for the batch of users from the **License** dropdown. -6. Select the group(s) you would like the invitees to belong to. -7. Click **Send Invitations**. +6. Select the group(s) you want the invitees to belong to. +7. Click **Send invitations**. - If the list of invitees exceeds the number of licenses your account has available, you will receive a warning when you click **Send Invitations** and the invitations will not be sent. diff --git a/website/docs/docs/cloud/manage-access/licenses-and-groups.md b/website/docs/docs/cloud/manage-access/licenses-and-groups.md deleted file mode 100644 index b91af80f9b3..00000000000 --- a/website/docs/docs/cloud/manage-access/licenses-and-groups.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -title: "Licenses and groups" -id: "licenses-and-groups" ---- - -## Overview - -dbt Cloud administrators can use dbt Cloud's permissioning model to control -user-level access in a dbt Cloud account. This access control comes in two flavors: -License-based and Role-based. - -- **License-based Access Controls:** User are configured with account-wide - license types. These licenses control the specific parts of the dbt Cloud application - that a given user can access. -- **Role-based Access Control (RBAC):** Users are assigned to _groups_ that have - specific permissions on specific projects or the entire account. A user may be - a member of multiple groups, and those groups may have permissions on multiple - projects. - -## License-based access control - -Each user on an account is assigned a license type when the user is first -invited to a given account. This license type may change over time, but a -user can only have one type of license at any given time. - -A user's license type controls the features in dbt Cloud that the user is able -to access. dbt Cloud's three license types are: - - **Read-Only** - - **Developer** - - **IT** - -For more information on these license types, see [Seats & Users](/docs/cloud/manage-access/seats-and-users). -At a high-level, Developers may be granted _any_ permissions, whereas Read-Only -users will have read-only permissions applied to all dbt Cloud resources -regardless of the role-based permissions that the user is assigned. IT users will have Security Admin and Billing Admin permissions applied regardless of the role-based permissions that the user is assigned. - -## Role-based access control - -:::info dbt Cloud Enterprise - -Role-based access control is a feature of the dbt Cloud Enterprise plan - -::: - -Role-based access control allows for fine-grained permissioning in the dbt Cloud -application. With role-based access control, users can be assigned varying -permissions to different projects within a dbt Cloud account. For teams on the -Enterprise tier, role-based permissions can be generated dynamically from -configurations in an [Identity Provider](sso-overview). - -Role-based permissions are applied to _groups_ and pertain to _projects_. The -assignable permissions themselves are granted via _permission sets_. - - -### Groups - -A group is a collection of users. Users may belong to multiple groups. Members -of a group inherit any permissions applied to the group itself. - -Users can be added to a dbt Cloud group based on their group memberships in the -configured [Identity Provider](sso-overview) for the account. In this way, dbt -Cloud administrators can manage access to dbt Cloud resources via identity -management software like Microsoft Entra ID (formerly Azure AD), Okta, or GSuite. See _SSO Mappings_ below for -more information. - -You can view the groups in your account or create new groups from the **Team > Groups** -page in your Account Settings. - - - - -### SSO Mappings - -SSO Mappings connect Identity Provider (IdP) group membership to dbt Cloud group -membership. When a user logs into dbt Cloud via a supported identity provider, -their IdP group memberships are synced with dbt Cloud. Upon logging in -successfully, the user's group memberships (and therefore, permissions) are -adjusted accordingly within dbt Cloud automatically. - -:::tip Creating SSO Mappings - -While dbt Cloud supports mapping multiple IdP groups to a single dbt Cloud -group, we recommend using a 1:1 mapping to make administration as simple as -possible. Consider using the same name for your dbt Cloud groups and your IdP -groups. - -::: - - -### Permission Sets - -Permission sets are predefined collections of granular permissions. Permission -sets combine low-level permission grants into high-level roles that can be -assigned to groups. Some examples of existing permission sets are: - - Account Admin - - Git Admin - - Job Admin - - Job Viewer - - ...and more - -For a full list of enterprise permission sets, see [Enterprise Permissions](/docs/cloud/manage-access/enterprise-permissions). -These permission sets are available for assignment to groups and control the ability -for users in these groups to take specific actions in the dbt Cloud application. - -In the following example, the _dbt Cloud Owners_ group is configured with the -**Account Admin** permission set on _All Projects_ and the **Job Admin** permission -set on the _Internal Analytics_ project. - - - - -### Manual assignment - -dbt Cloud administrators can manually assign users to groups independently of -IdP attributes. If a dbt Cloud group is configured _without_ any -SSO Mappings, then the group will be _unmanaged_ and dbt Cloud will not adjust -group membership automatically when users log into dbt Cloud via an identity -provider. This behavior may be desirable for teams that have connected an identity -provider, but have not yet configured SSO Mappings between dbt Cloud and the -IdP. - -If an SSO Mapping is added to an _unmanaged_ group, then it will become -_managed_, and dbt Cloud may add or remove users to the group automatically at -sign-in time based on the user's IdP-provided group membership information. - - -## FAQs -- **When are IdP group memberships updated for SSO Mapped groups?** Group memberships - are updated every time a user logs into dbt Cloud via a supported SSO provider. If - you've changed group memberships in your identity provider or dbt Cloud, ask your - users to log back into dbt Cloud for these group memberships to be synchronized. - -- **Can I set up SSO without RBAC?** Yes, see the documentation on - [Manual Assignment](#manual-assignment) above for more information on using - SSO without RBAC. - -- **Can I configure a user's License Type based on IdP Attributes?** Yes, see - the docs on [managing license types](/docs/cloud/manage-access/seats-and-users#managing-license-types) - for more information. diff --git a/website/docs/docs/cloud/manage-access/mfa.md b/website/docs/docs/cloud/manage-access/mfa.md index a06251e6468..bcddc04f072 100644 --- a/website/docs/docs/cloud/manage-access/mfa.md +++ b/website/docs/docs/cloud/manage-access/mfa.md @@ -7,6 +7,13 @@ sidebar: null # Multi-factor authentication +:::important + + +dbt Cloud enforces multi-factor authentication (MFA) for all users with username and password credentials. If MFA is not set up, you will see a notification bar prompting you to configure one of the supported methods when you log in. If you do not, you will have to configure MFA upon subsequent logins, or you will be unable to access dbt Cloud. + +::: + dbt Cloud provides multiple options for multi-factor authentication (MFA). MFA provides an additional layer of security to username and password logins for Developer and Team plan accounts. The available MFA methods are: - SMS verification code (US-based phone numbers only) diff --git a/website/docs/docs/cloud/manage-access/self-service-permissions.md b/website/docs/docs/cloud/manage-access/self-service-permissions.md index a5bdba825c2..6b326645d44 100644 --- a/website/docs/docs/cloud/manage-access/self-service-permissions.md +++ b/website/docs/docs/cloud/manage-access/self-service-permissions.md @@ -52,33 +52,33 @@ The following tables outline the access that users have if they are assigned a D | Account-level permission| Owner | Member | Read-only license| IT license | |:------------------------|:-----:|:------:|:----------------:|:------------:| -| Account settings | W | W | | W | -| Billing | W | | | W | -| Invitations | W | W | | W | -| Licenses | W | R | | W | -| Users | W | R | | W | -| Project (create) | W | W | | W | -| Connections | W | W | | W | -| Service tokens | W | | | W | -| Webhooks | W | W | | | +| Account settings | W | W | - | W | +| Billing | W | - | - | W | +| Invitations | W | W | - | W | +| Licenses | W | R | - | W | +| Users | W | R | - | W | +| Project (create) | W | W | - | W | +| Connections | W | W | - | W | +| Service tokens | W | - | - | W | +| Webhooks | W | W | - | - | #### Project permissions for account roles |Project-level permission | Owner | Member | Read-only | IT license | |:------------------------|:-----:|:-------:|:---------:|:----------:| -| Adapters | W | W | R | | -| Connections | W | W | R | | -| Credentials | W | W | R | | -| Custom env. variables | W | W | R | | -| Develop (IDE or dbt Cloud CLI)| W | W | | | -| Environments | W | W | R | | -| Jobs | W | W | R | | -| dbt Explorer | W | W | R | | -| Permissions | W | R | | | -| Profile | W | W | R | | -| Projects | W | W | R | | -| Repositories | W | W | R | | -| Runs | W | W | R | | -| Semantic Layer Config | W | W | R | | +| Adapters | W | W | R | - | +| Connections | W | W | R | - | +| Credentials | W | W | R | - | +| Custom env. variables | W | W | R | - | +| Develop (IDE or dbt Cloud CLI)| W | W | - | - | +| Environments | W | W | R | - | +| Jobs | W | W | R | - | +| dbt Explorer | W | W | R | - | +| Permissions | W | R | - | - | +| Profile | W | W | R | - | +| Projects | W | W | R | - | +| Repositories | W | W | R | - | +| Runs | W | W | R | - | +| Semantic Layer Config | W | W | R | - | diff --git a/website/docs/docs/cloud/manage-access/set-up-bigquery-oauth.md b/website/docs/docs/cloud/manage-access/set-up-bigquery-oauth.md index 9a356814111..e528e2ebc1f 100644 --- a/website/docs/docs/cloud/manage-access/set-up-bigquery-oauth.md +++ b/website/docs/docs/cloud/manage-access/set-up-bigquery-oauth.md @@ -25,13 +25,14 @@ To use BigQuery in the dbt Cloud IDE, all developers must: ### Locate the redirect URI value To get started, locate the connection's redirect URI for configuring BigQuery OAuth. To do so: - - Select the gear menu in the upper left corner and choose **Account settings** + - Navigate to your account name, above your profile icon on the left side panel + - Select **Account settings** from the menu - From the left sidebar, select **Projects** - Choose the project from the list - Select **Connection** to edit the connection details - Locate the **Redirect URI** field under the **OAuth 2.0 Settings** section. Copy this value to your clipboard to use later on. - + ### Creating a BigQuery OAuth 2.0 client ID and secret To get started, you need to create a client ID and secret for [authentication](https://cloud.google.com/bigquery/docs/authentication) with BigQuery. This client ID and secret will be stored in dbt Cloud to manage the OAuth connection between dbt Cloud users and BigQuery. @@ -64,10 +65,12 @@ Now that you have an OAuth app set up in BigQuery, you'll need to add the client ### Authenticating to BigQuery Once the BigQuery OAuth app is set up for a dbt Cloud project, each dbt Cloud user will need to authenticate with BigQuery in order to use the IDE. To do so: -- Select the gear menu in the upper left corner and choose **Profile settings** +- Navigate to your account name, above your profile icon on the left side panel +- Select **Account settings** from the menu - From the left sidebar, select **Credentials** - Choose the project from the list - Select **Authenticate BigQuery Account** + You will then be redirected to BigQuery and asked to approve the drive, cloud platform, and BigQuery scopes, unless the connection is less privileged. diff --git a/website/docs/docs/cloud/manage-access/set-up-databricks-oauth.md b/website/docs/docs/cloud/manage-access/set-up-databricks-oauth.md index e5c42c3fa59..067d51513b7 100644 --- a/website/docs/docs/cloud/manage-access/set-up-databricks-oauth.md +++ b/website/docs/docs/cloud/manage-access/set-up-databricks-oauth.md @@ -45,11 +45,11 @@ You can use the following table to set up the redirect URLs for your application ### Configure the Connection in dbt Cloud (dbt Cloud project admin) Now that you have an OAuth app set up in Databricks, you'll need to add the client ID and secret to dbt Cloud. To do so: - - go to Settings by clicking the gear in the top right. - - on the left, select **Projects** under **Account Settings** - - choose your project from the list - - select **Connection** to edit the connection details - - add the `OAuth Client ID` and `OAuth Client Secret` from the Databricks OAuth app under the **Optional Settings** section + - From dbt Cloud, click on your account name in the left side menu and select **Account settings** + - Select **Projects** from the menu + - Choose your project from the list + - Select **Connection** to edit the connection details + - Add the `OAuth Client ID` and `OAuth Client Secret` from the Databricks OAuth app under the **Optional Settings** section @@ -57,7 +57,8 @@ Now that you have an OAuth app set up in Databricks, you'll need to add the clie Once the Databricks connection via OAuth is set up for a dbt Cloud project, each dbt Cloud user will need to authenticate with Databricks in order to use the IDE. To do so: -- Click the gear icon at the top right and select **Profile settings**. +- From dbt Cloud, click on your account name in the left side menu and select **Account settings** +- Select **Profile settings**. - Select **Credentials**. - Choose your project from the list - Select `OAuth` as the authentication method, and click **Save** diff --git a/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md b/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md index e4ff998015c..2b2575efc57 100644 --- a/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md +++ b/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md @@ -117,7 +117,7 @@ If the verification information looks appropriate, then you have completed the c ## Setting up RBAC Now you have completed setting up SSO with GSuite, the next steps will be to set up -[RBAC groups](/docs/cloud/manage-access/enterprise-permissions) to complete your access control configuration. +[RBAC groups](/docs/cloud/manage-access/about-user-access#role-based-access-control-) to complete your access control configuration. ## Troubleshooting diff --git a/website/docs/docs/cloud/manage-access/set-up-sso-microsoft-entra-id.md b/website/docs/docs/cloud/manage-access/set-up-sso-microsoft-entra-id.md index 4658141034c..81463cf9ee5 100644 --- a/website/docs/docs/cloud/manage-access/set-up-sso-microsoft-entra-id.md +++ b/website/docs/docs/cloud/manage-access/set-up-sso-microsoft-entra-id.md @@ -61,6 +61,13 @@ Depending on your Microsoft Entra ID settings, your App Registration page might ### Azure <-> dbt Cloud User and Group mapping +:::important + +There is a [limitation](https://learn.microsoft.com/en-us/entra/identity/hybrid/connect/how-to-connect-fed-group-claims#important-caveats-for-this-functionality) on the number of groups Azure will emit (capped at 150) via the SSO token, meaning if a user belongs to more than 150 groups, it will appear as though they belong to none. To prevent this, configure [group assignments](https://learn.microsoft.com/en-us/entra/identity/enterprise-apps/assign-user-or-group-access-portal?pivots=portal) with the dbt Cloud app in Azure and set a [group claim](https://learn.microsoft.com/en-us/entra/identity/hybrid/connect/how-to-connect-fed-group-claims#add-group-claims-to-tokens-for-saml-applications-using-sso-configuration) so Azure emits only the relevant groups. + +::: + + The Azure users and groups you will create in the following steps are mapped to groups created in dbt Cloud based on the group name. Reference the docs on [enterprise permissions](enterprise-permissions) for additional information on how users, groups, and permission sets are configured in dbt Cloud. ### Adding users to an Enterprise application @@ -120,8 +127,9 @@ To complete setup, follow the steps below in the dbt Cloud application. ### Supplying credentials -25. Click the gear icon at the top right and select **Profile settings**. To the left, select **Single Sign On** under **Account Settings**. -26. Click the **Edit** button and supply the following SSO details: +25. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. +26. Click **Single sign-on** from the menu. +27. Click the **Edit** button and supply the following SSO details: | Field | Value | | ----- | ----- | diff --git a/website/docs/docs/cloud/manage-access/set-up-sso-okta.md b/website/docs/docs/cloud/manage-access/set-up-sso-okta.md index 53986513ce2..fda32f118ef 100644 --- a/website/docs/docs/cloud/manage-access/set-up-sso-okta.md +++ b/website/docs/docs/cloud/manage-access/set-up-sso-okta.md @@ -190,4 +190,4 @@ configured in the steps above. ## Setting up RBAC Now you have completed setting up SSO with Okta, the next steps will be to set up -[RBAC groups](/docs/cloud/manage-access/enterprise-permissions) to complete your access control configuration. +[RBAC groups](/docs/cloud/manage-access/about-user-access#role-based-access-control-) to complete your access control configuration. diff --git a/website/docs/docs/cloud/manage-access/set-up-sso-saml-2.0.md b/website/docs/docs/cloud/manage-access/set-up-sso-saml-2.0.md index 7083e7ac5f8..34c1a91fbee 100644 --- a/website/docs/docs/cloud/manage-access/set-up-sso-saml-2.0.md +++ b/website/docs/docs/cloud/manage-access/set-up-sso-saml-2.0.md @@ -16,7 +16,7 @@ Currently supported features include: This document details the steps to integrate dbt Cloud with an identity provider in order to configure Single Sign On and [role-based access control](/docs/cloud/manage-access/about-user-access#role-based-access-control). -## Auth0 Multi-tenant URIs +## Auth0 URIs diff --git a/website/docs/docs/cloud/manage-access/sso-overview.md b/website/docs/docs/cloud/manage-access/sso-overview.md index 6b6527df753..e922a073fc8 100644 --- a/website/docs/docs/cloud/manage-access/sso-overview.md +++ b/website/docs/docs/cloud/manage-access/sso-overview.md @@ -12,7 +12,7 @@ dbt Cloud supports JIT (Just-in-Time) provisioning and IdP-initiated login. You - You have a dbt Cloud account enrolled in the Enterprise plan. [Contact us](mailto:sales@getdbt.com) to learn more and enroll. -## Auth0 Multi-tenant URIs +## Auth0 URIs diff --git a/website/docs/docs/cloud/migration.md b/website/docs/docs/cloud/migration.md index 76d881e7389..2665b8f6a97 100644 --- a/website/docs/docs/cloud/migration.md +++ b/website/docs/docs/cloud/migration.md @@ -15,6 +15,13 @@ Your account will be automatically migrated on or after its scheduled date. Howe ## Recommended actions +:::info Rescheduling your migration + +If you're on the dbt Cloud Enterprise tier, you can postpone your account migration by up to 45 days. To reschedule your migration, navigate to **Account Settings** → **Migration guide**. + +For help, contact the dbt Support Team at [support@getdbt.com](mailto:support@getdbt.com). +::: + We highly recommended you take these actions: - Ensure pending user invitations are accepted or note outstanding invitations. Pending user invitations might be voided during the migration. You can resend user invitations after the migration is complete. diff --git a/website/docs/docs/cloud/secure/about-privatelink.md b/website/docs/docs/cloud/secure/about-privatelink.md index 731cef3f019..f19790fd708 100644 --- a/website/docs/docs/cloud/secure/about-privatelink.md +++ b/website/docs/docs/cloud/secure/about-privatelink.md @@ -7,10 +7,13 @@ sidebar_label: "About PrivateLink" import SetUpPages from '/snippets/_available-tiers-privatelink.md'; import PrivateLinkHostnameWarning from '/snippets/_privatelink-hostname-restriction.md'; +import CloudProviders from '/snippets/_privatelink-across-providers.md'; -PrivateLink enables a private connection from any dbt Cloud Multi-Tenant environment to your data platform hosted on AWS using [AWS PrivateLink](https://aws.amazon.com/privatelink/) technology. PrivateLink allows dbt Cloud customers to meet security and compliance controls as it allows connectivity between dbt Cloud and your data platform without traversing the public internet. This feature is supported in most regions across NA, Europe, and Asia, but [contact us](https://www.getdbt.com/contact/) if you have questions about availability. +PrivateLink enables a private connection from any dbt Cloud Multi-Tenant environment to your data platform hosted on a cloud provider, such as [AWS](https://aws.amazon.com/privatelink/) or [Azure](https://azure.microsoft.com/en-us/products/private-link), using that provider’s PrivateLink technology. PrivateLink allows dbt Cloud customers to meet security and compliance controls as it allows connectivity between dbt Cloud and your data platform without traversing the public internet. This feature is supported in most regions across NA, Europe, and Asia, but [contact us](https://www.getdbt.com/contact/) if you have questions about availability. + + ### Cross-region PrivateLink diff --git a/website/docs/docs/cloud/secure/databricks-privatelink.md b/website/docs/docs/cloud/secure/databricks-privatelink.md index a02683e1269..aaa6e0c6eb7 100644 --- a/website/docs/docs/cloud/secure/databricks-privatelink.md +++ b/website/docs/docs/cloud/secure/databricks-privatelink.md @@ -8,11 +8,14 @@ pagination_next: null import SetUpPages from '/snippets/_available-tiers-privatelink.md'; import PrivateLinkSLA from '/snippets/_PrivateLink-SLA.md'; +import CloudProviders from '/snippets/_privatelink-across-providers.md'; The following steps will walk you through the setup of a Databricks AWS PrivateLink or Azure Private Link endpoint in the dbt Cloud multi-tenant environment. + + ## Configure AWS PrivateLink 1. Locate your [Databricks instance name](https://docs.databricks.com/en/workspace/workspace-details.html#workspace-instance-names-urls-and-ids) @@ -31,7 +34,7 @@ The following steps will walk you through the setup of a Databricks AWS PrivateL 1. Once dbt Cloud support has notified you that setup is complete, [register the VPC endpoint in Databricks](https://docs.databricks.com/administration-guide/cloud-configurations/aws/privatelink.html#step-3-register-privatelink-objects-and-attach-them-to-a-workspace) and attach it to the workspace: - [Register your VPC endpoint](https://docs.databricks.com/en/security/network/classic/vpc-endpoints.html) — Register the VPC endpoint using the VPC endpoint ID provided by dbt Support. - [Create a Private Access Settings object](https://docs.databricks.com/en/security/network/classic/private-access-settings.html) — Create a Private Access Settings (PAS) object with your desired public access settings, and setting Private Access Level to **Endpoint**. Choose the registered endpoint created in the previous step. - - [Create or update your workspace](https://docs.databricks.com/en/security/network/classic/privatelink.html#step-3d-create-or-update-the-workspace-front-end-back-end-or-both) — Create a workspace, or update your an existing workspace. Under **Advanced configurations → Private Link** choose the private access settings object created in the previous step. + - [Create or update your workspace](https://docs.databricks.com/en/security/network/classic/privatelink.html#step-3d-create-or-update-the-workspace-front-end-back-end-or-both) — Create a workspace, or update an existing workspace. Under **Advanced configurations → Private Link** choose the private access settings object created in the previous step. :::warning If using an existing Databricks workspace, all workloads running in the workspace need to be stopped to enable Private Link. Workloads also can't be started for another 20 minutes after making changes. From the [Databricks documentation](https://docs.databricks.com/en/security/network/classic/privatelink.html#step-3d-create-or-update-the-workspace-front-end-back-end-or-both): diff --git a/website/docs/docs/cloud/secure/postgres-privatelink.md b/website/docs/docs/cloud/secure/postgres-privatelink.md index 76b7774fcec..4d670354686 100644 --- a/website/docs/docs/cloud/secure/postgres-privatelink.md +++ b/website/docs/docs/cloud/secure/postgres-privatelink.md @@ -7,11 +7,14 @@ sidebar_label: "PrivateLink for Postgres" import SetUpPages from '/snippets/_available-tiers-privatelink.md'; import PrivateLinkTroubleshooting from '/snippets/_privatelink-troubleshooting.md'; import PrivateLinkCrossZone from '/snippets/_privatelink-cross-zone-load-balancing.md'; +import CloudProviders from '/snippets/_privatelink-across-providers.md'; A Postgres database, hosted either in AWS or in a properly connected on-prem data center, can be accessed through a private network connection using AWS Interface-type PrivateLink. The type of Target Group connected to the Network Load Balancer (NLB) may vary based on the location and type of Postgres instance being connected, as explained in the following steps. + + ## Configuring Postgres interface-type PrivateLink ### 1. Provision AWS resources @@ -96,4 +99,4 @@ Once dbt Cloud support completes the configuration, you can start creating new c 4. Configure the remaining data platform details. 5. Test your connection and save it. - \ No newline at end of file + diff --git a/website/docs/docs/cloud/secure/redshift-privatelink.md b/website/docs/docs/cloud/secure/redshift-privatelink.md index 16d14badc05..75924cf76a9 100644 --- a/website/docs/docs/cloud/secure/redshift-privatelink.md +++ b/website/docs/docs/cloud/secure/redshift-privatelink.md @@ -8,6 +8,7 @@ sidebar_label: "PrivateLink for Redshift" import SetUpPages from '/snippets/_available-tiers-privatelink.md'; import PrivateLinkTroubleshooting from '/snippets/_privatelink-troubleshooting.md'; import PrivateLinkCrossZone from '/snippets/_privatelink-cross-zone-load-balancing.md'; +import CloudProviders from '/snippets/_privatelink-across-providers.md'; @@ -17,6 +18,8 @@ AWS provides two different ways to create a PrivateLink VPC endpoint for a Redsh dbt Cloud supports both types of endpoints, but there are a number of [considerations](https://docs.aws.amazon.com/redshift/latest/mgmt/managing-cluster-cross-vpc.html#managing-cluster-cross-vpc-considerations) to take into account when deciding which endpoint type to use. Redshift-managed provides a far simpler setup with no additional cost, which might make it the preferred option for many, but may not be an option in all environments. Based on these criteria, you will need to determine which is the right type for your system. Follow the instructions from the section below that corresponds to your chosen endpoint type. + + :::note Redshift Serverless While Redshift Serverless does support Redshift-managed type VPC endpoints, this functionality is not currently available across AWS accounts. Due to this limitation, an Interface-type VPC endpoint service must be used for Redshift Serverless cluster PrivateLink connectivity from dbt Cloud. ::: @@ -125,4 +128,4 @@ Once dbt Cloud support completes the configuration, you can start creating new c 4. Configure the remaining data platform details. 5. Test your connection and save it. - \ No newline at end of file + diff --git a/website/docs/docs/cloud/secure/snowflake-privatelink.md b/website/docs/docs/cloud/secure/snowflake-privatelink.md index c6775be2444..dc0cb64ba31 100644 --- a/website/docs/docs/cloud/secure/snowflake-privatelink.md +++ b/website/docs/docs/cloud/secure/snowflake-privatelink.md @@ -6,11 +6,14 @@ sidebar_label: "PrivateLink for Snowflake" --- import SetUpPages from '/snippets/_available-tiers-privatelink.md'; +import CloudProviders from '/snippets/_privatelink-across-providers.md'; The following steps walk you through the setup of a Snowflake AWS PrivateLink or Azure Private Link endpoint in a dbt Cloud multi-tenant environment. + + :::note Snowflake SSO with PrivateLink Users connecting to Snowflake using SSO over a PrivateLink connection from dbt Cloud will also require access to a PrivateLink endpoint from their local workstation. @@ -94,12 +97,18 @@ Once dbt Cloud support completes the configuration, you can start creating new c 4. Configure the remaining data platform details. 5. Test your connection and save it. -## Enable the connection in Snowflake +### Enable the connection in Snowflake hosted on Azure + +:::note + +AWS private internal stages are not currently supported. + +::: To complete the setup, follow the remaining steps from the Snowflake setup guides. The instructions vary based on the platform: -- [Snowflake AWS PrivateLink](https://docs.snowflake.com/en/user-guide/admin-security-privatelink) - [Snowflake Azure Private Link](https://docs.snowflake.com/en/user-guide/privatelink-azure) +- [Azure private endpoints for internal stages](https://docs.snowflake.com/en/user-guide/private-internal-stages-azure) There are some nuances for each connection and you will need a Snowflake administrator. As the Snowflake administrator, call the `SYSTEM$AUTHORIZE_STAGE_PRIVATELINK_ACCESS` function using the privateEndpointResourceID value as the function argument. This authorizes access to the Snowflake internal stage through the private endpoint. @@ -107,14 +116,12 @@ There are some nuances for each connection and you will need a Snowflake adminis USE ROLE ACCOUNTADMIN; --- AWS PrivateLink -SELECT SYSTEMS$AUTHORIZE_STATE_PRIVATELINK_ACCESS ( `AWS VPC ID` ); - -- Azure Private Link -SELECT SYSTEMS$AUTHORIZE_STATE_PRIVATELINK_ACCESS ( `AZURE PRIVATE ENDPOINT RESOURCE ID` ); +SELECT SYSTEMS$AUTHORIZE_STAGE_PRIVATELINK_ACCESS ( `AZURE PRIVATE ENDPOINT RESOURCE ID` ); ``` + ## Configuring Network Policies If your organization uses [Snowflake Network Policies](https://docs.snowflake.com/en/user-guide/network-policies) to restrict access to your Snowflake account, you will need to add a network rule for dbt Cloud. diff --git a/website/docs/docs/cloud/use-dbt-copilot.md b/website/docs/docs/cloud/use-dbt-copilot.md index 30def967f96..48e5ffa6fa7 100644 --- a/website/docs/docs/cloud/use-dbt-copilot.md +++ b/website/docs/docs/cloud/use-dbt-copilot.md @@ -1,22 +1,73 @@ --- title: "Use dbt Copilot" sidebar_label: "Use dbt Copilot" -description: "Use the dbt Copilot AI engine to generate documentation, tests, and semantic models from scratch, giving you the flexibility to modify or fix generated code." +description: "Use dbt Copilot to generate documentation, tests, semantic models, and sql code from scratch, giving you the flexibility to modify or fix generated code." --- # Use dbt Copilot -Use dbt Copilot to generate documentation, tests, and semantic models from scratch, giving you the flexibility to modify or fix generated code. To access and use this AI engine: +Use dbt Copilot to generate documentation, tests, semantic models, and code from scratch, giving you the flexibility to modify or fix generated code. -1. Navigate to the dbt Cloud IDE and select a SQL model file under the **File Explorer**. +This page explains how to use dbt Copilot to: -2. In the **Console** section (under the **File Editor**), click **dbt Copilot** to view the available AI options. +- [Generate resources](#generate-resources) — Save time by using dbt Copilot’s generation button to generate documentation, tests, and semantic model files during your development. +- [Generate and edit code](#generate-and-edit-code) — Use natural language prompts to generate SQL code from scratch or to edit existing SQL file by using keyboard shortcuts or highlighting code. + +## Generate resources +Generate documentation, tests, and semantic models resources with the click-of-a-button using dbt Copilot, saving you time. To access and use this AI feature: + +1. Navigate to the dbt Cloud IDE and select a SQL model file under the **File Explorer**. +2. In the **Console** section (under the **File Editor**), click **dbt Copilot** to view the available AI options. 3. Select the available options to generate the YAML config: **Generate Documentation**, **Generate Tests**, or **Generate Semantic Model**. - To generate multiple YAML configs for the same model, click each option separately. dbt Copilot intelligently saves the YAML config in the same file. - 4. Verify the AI-generated code. You can update or fix the code as needed. - 5. Click **Save As**. You should see the file changes under the **Version control** section. + +## Generate and edit code + +dbt Copilot also allows you to generate SQL code directly within the SQL file in the dbt Cloud IDE, using natural language prompts. This means you can rewrite or add specific portions of the SQL file without needing to edit the entire file. + +This intelligent AI tool streamlines SQL development by reducing errors, scaling effortlessly with complexity, and saving valuable time. dbt Copilot's [prompt window](#use-the-prompt-window), accessible by keyboard shortcut, handles repetitive or complex SQL generation effortlessly so you can focus on high-level tasks. + +Use Copilot's prompt window for use cases like: + +- Writing advanced transformations +- Performing bulk edits efficiently +- Crafting complex patterns like regex + +### Use the prompt window + +Access dbt Copilot's AI prompt window using the keyboard shortcut Cmd+B (Mac) or Ctrl+B (Windows) to: + +#### 1. Generate SQL from scratch +- Use the keyboard shortcuts Cmd+B (Mac) or Ctrl+B (Windows) to generate SQL from scratch. +- Enter your instructions to generate SQL code tailored to your needs using natural language. +- Ask dbt Copilot to fix the code or add a specific portion of the SQL file. + + + +#### 2. Edit existing SQL code +- Highlight a section of SQL code and press Cmd+B (Mac) or Ctrl+B (Windows) to open the prompt window for editing. +- Use this to refine or modify specific code snippets based on your needs. +- Ask dbt Copilot to fix the code or add a specific portion of the SQL file. + +#### 3. Review changes with the diff view to quickly assess the impact of the changes before making changes +- When a suggestion is generated, Copilot displays a visual "diff" view to help you compare the proposed changes with your existing code: + - **Green**: Means new code that will be added if you accept the suggestion. + - **Red**: Highlights existing code that will be removed or replaced by the suggested changes. + +#### 4. Accept or reject suggestions +- **Accept**: If the generated SQL meets your requirements, click the **Accept** button to apply the changes directly to your `.sql` file directly in the IDE. +- **Reject**: If the suggestion don’t align with your request/prompt, click **Reject** to discard the generated SQL without making changes and start again. + +#### 5. Regenerate code +- To regenerate, press the **Escape** button on your keyboard (or click the Reject button in the popup). This will remove the generated code and puts your cursor back into the prompt text area. +- Update your prompt and press **Enter** to try another generation. Press **Escape** again to close the popover entirely. + +Once you've accepted a suggestion, you can continue to use the prompt window to generate additional SQL code and commit your changes to the branch. + + + diff --git a/website/docs/docs/cloud/use-visual-editor.md b/website/docs/docs/cloud/use-visual-editor.md new file mode 100644 index 00000000000..2ab6a5b82d1 --- /dev/null +++ b/website/docs/docs/cloud/use-visual-editor.md @@ -0,0 +1,83 @@ +--- +title: "Edit and create dbt models" +id: use-visual-editor +sidebar_label: "Edit and create dbt models" +description: "Access and use the visual editor to create or edit dbt models through a visual, drag-and-drop experience inside of dbt Cloud." +pagination_prev: "docs/cloud/visual-editor-interface" +--- + +# Edit and create dbt models + +

+Access and use the dbt Cloud visual editor to create or edit dbt models through a visual, drag-and-drop experience. Use the built-in AI for custom code generation in your development experience. +

+ +:::tip Beta feature +The visual editor provides users with a seamless and drag-and-drop experience inside of dbt Cloud. It's available in private beta for [dbt Cloud Enterprise accounts](https://www.getdbt.com/pricing). + +To join the private beta, [register your interest](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) or reach out to your account team to begin this process. +::: + +## Prerequisites +- You have a [dbt Cloud Enterprise](https://www.getdbt.com/pricing) account +- You have a [developer license](/docs/cloud/manage-access/seats-and-users) with developer credentials set up +- You have an existing dbt Cloud project already created +- Your Development environment is on a supported [release track](/docs/dbt-versions/cloud-release-tracks) to receive ongoing updates. +- Have AI-powered features toggle enabled + +## Access visual editor + +Before accessing the editor, you should have a dbt Cloud project already set up. This includes a Git repository, data platform connection, environments, and developer credentials. If you don't have this set up, please contact your dbt Cloud Admin. + +To access the visual editor: +- Type in the following URL, replacing the ACCOUNT_ID and ENVIRONMENT_ID with your own account and environment ID: `https://ACCESS_URL/visual-editor/ACCOUNT_ID/env/ENVIRONMENT_ID/` + - The environment ID must have had runs that generated catalogs in it. + +- For example, if my region is North America multi-tenant, account ID is 10, environment ID with a generated catalog run is 100, my URL should be: + + - `https://cloud.getdbt.com/visual-editor/10/env/100/` + + + +## Create a model +To create a dbt SQL model, click on **Create a new model** and perform the following steps. Note that you can't create source models in the visual editor. This is because you need to have production run with sources already created. + +1. Drag an operator from the operator toolbar and drop it onto the canvas. +2. Click on the operator to open its configuration panel: + - **Model**: Select the model and columns you want to use. + - **Join**: Define the join conditions and choose columns from both tables. + - **Select**: Pick the columns you need from the model. + - **Aggregate**: Specify the aggregation functions and the columns they apply to. + - **Formula**: Add the formula to create a new column. Use the built-AI code generator to help generate SQL code by clicking on the question mark (?) icon. Enter your prompt and wait to see the results. + - **Filter**: Set the conditions to filter data. + - **Order**: Select the columns to sort by and the sort order. + - **Limit**: Set the maximum number of rows you want to return. +3. View the **Output** and **SQL Code** tabs. + - Each operator has an Output tab that allows you to preview the data from that configured node. + - The Code tab displays the SQL code generated by the node's configuration. Use this to see the SQL for your visual model config. +4. Connect the operators by using the connector by dragging your cursor between the operator's "+" start point and linking it to the other operators you want to connect to. This should create a connector line. + - Doing this allows the data to flow from the source table through various transformations you configured, to the final output. +5. Keep building your dbt model and ensure you confirm the out through the **Output** tab. + + + +## Edit an existing model +To edit an existing model, navigate to the Visual Editor, click on the **Get Started** button on the upper right, and click **Edit existing model**. This will allow you to select the model you'd like to edit. + + + +## Version control + +Testing and documenting your models is an important part of the development process. + +Stay tuned! Coming very soon, you'll be able to version control your dbt modes in the visual editor. This ensures you can track changes and revert to previous versions if needed. + + diff --git a/website/docs/docs/cloud/visual-editor-interface.md b/website/docs/docs/cloud/visual-editor-interface.md new file mode 100644 index 00000000000..16e5a038d0e --- /dev/null +++ b/website/docs/docs/cloud/visual-editor-interface.md @@ -0,0 +1,84 @@ +--- +title: "Navigate the interface" +id: visual-editor-interface +sidebar_label: "Navigate the interface" +description: "The visual editor interface contains an operator toolbar, operators, and a canvas to help you create dbt models through a seamless drag-and-drop experience in dbt Cloud." +pagination_next: "docs/cloud/use-visual-editor" +pagination_prev: "docs/cloud/visual-editor" + +--- + +# Navigate the interface + +

+The visual editor interface contains an operator toolbar, operators, canvas, built-in AI, and more to help you create dbt models through a seamless drag-and-drop experience in dbt Cloud. +

+ +:::tip Beta feature +The visual editor provides users with a seamless and visual, drag-and-drop experience inside dbt Cloud. It's available in private beta for [dbt Cloud Enterprise accounts](https://www.getdbt.com/pricing). + +To join the private beta, [register your interest](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) or reach out to your account team to begin this process. +::: + +This page offers comprehensive definitions and terminology of user interface elements, allowing you to navigate the dbt Cloud visual editor landscape with ease. + +The visual editor interface is composed of: + +- **Operator toolbar** — Located at the top of the interface, the toolbar displays all the nodes available. Use the toggle on the left of the toolbar to display or hide it. +- **Operators** — perform specific transformations or configurations (such as model, join, aggregate, filter, and so on). Use connectors to link the operators and build a complete data transformation pipeline. +- **Canvas** — The main whiteboard space below the node toolbar. The canvas allows you to create or modify models through a sleek drag-and-drop experience. +- **Configuration panel** — Each operator has a configuration panel that opens when you click on it. The configuration panel allows you to configure the operator, review the current model, preview changes to the table, view the SQL code for the node, and delete the operator. + +## Operators + +The operator toolbar above the canvas contains the different transformation operators available to use. Use each operator to configure or perform specific tasks, like adding filters or joining models by dragging an operator onto the canvas. You can connect operators using the connector line, which allows you to form a complete dbt model for your data transformation. + + + +Here the following operators are available: +- **Model**: This represents a data model. Use this to select the source table and the columns you want to include. There are no limits to the number of models you can have in a session. +- **Join**: Join two models and configure the join conditions by selecting which columns to include from each table. Requires two inputs. For example, you might want to join both tables using the 'ID' column found in both tables. +- **Select**: Use this to 'select' specific columns from a table. +- **Aggregate**: Allows you to perform aggregations like GROUP, SUM, AVG, COUNT, and so on. +- **Formula**: Create new columns using custom SQL formulas. Use a built-in AI code generator to generate SQL by clicking the ? icon. For example, you can use the formula node to only extract the email domain and ask the AI code generator to help you write the SQL for that code extraction. +- **Filter**: Filter data based on conditions you set. +- **Order**: Sort data by specific columns. +- **Limit**: Limits the number of rows returned back. + +When you click on each operator, it opens a configuration panel. The configuration panel allows you to configure the operator, review the current model, preview changes to the model, view the SQL code for the node, and delete the operator. + + + +If you have any feedback on additional operators that you might need, we'd love to hear it! Please contact your dbt Labs account team and share your thoughts. + +## Canvas + +The visual editor has a sleek drag-and-drop canvas interface that allows you to create or modify dbt SQL models. It's like a digital whiteboard space that allows analysts to deliver trustworthy data. Use the canvas to: + +- Drag-and-drop operators to create and configure your model(s) +- Generate SQL code using the built-in AI generator +- Zoom in or out for better visualization +- Version-control your dbt models +- [Coming soon] Test and document your created models + + + +### Connector + +Connectors allow you to connect your operators to create dbt models. Once you've added operators to the canvas: +- Hover over the "+" sign next to the operator and click. +- Drag your cursor between the operator's "+" start point to the other node you want to connect to. This should create a connector line. +- As an example, to create a join, connect one operator to the "L" (Left) and the other to the "R" (Right). The endpoints are located to the left of the operator so you can easily drag the connectors to the endpoint. + + + +## Configuration panel +Each operator has a configuration side panel that opens when you click on it. The configuration panel allows you to configure the operator, review the current model, preview changes, view the SQL code for the operator, and delete the operator. + +The configuration side panel has the following: +- Configure tab — This section allows you to configure the operator to your specified requirements, such as using the built-in AI code generator to generate SQL. +- Input tab — This section allows you to view the data for the current source table. Not available for model operators. +- Output tab — This section allows you to preview the data for the modified source model. +- Code — This section allows you to view the underlying SQL code for the data transformation. + + diff --git a/website/docs/docs/cloud/visual-editor.md b/website/docs/docs/cloud/visual-editor.md new file mode 100644 index 00000000000..8dc9dfa2863 --- /dev/null +++ b/website/docs/docs/cloud/visual-editor.md @@ -0,0 +1,37 @@ +--- +title: "About the visual editor" +id: visual-editor +sidebar_label: "About the visual editor" +description: "The visual editor enables analysts to quickly create and visualize dbt models through a visual, drag-and-drop experience inside of dbt Cloud." +pagination_next: "docs/cloud/visual-editor-interface" +pagination_prev: null +--- + +# About the visual editor + +

+The dbt Cloud visual editor helps analysts quickly create, edit, and visualize dbt models through a visual, drag-and-drop experience and with a built-in AI for custom code generation. +

+ +:::tip Beta feature +The visual editor in dbt Cloud provides users with a seamless and visual, drag-and-drop experience inside dbt Cloud. It's available in private beta for [dbt Cloud Enterprise accounts](https://www.getdbt.com/pricing). + +To join the private beta, [register your interest](https://docs.google.com/forms/d/e/1FAIpQLScPjRGyrtgfmdY919Pf3kgqI5E95xxPXz-8JoVruw-L9jVtxg/viewform) or reach out to your account team to begin this process. +::: + +The visual editor allows organizations to enjoy the many benefits of code-driven development—such as increased precision, ease of debugging, and ease of validation — while retaining the flexibility to have different contributors develop wherever they are most comfortable. Users can also take advantage of built-in AI for custom code generation, making it an end-to-end frictionless experience. + +These models compile directly to SQL and are indistinguishable from other dbt models in your projects: +- Visual models are version-controlled in your backing Git provider. +- All models are accessible across projects in [dbt Mesh](/best-practices/how-we-mesh/mesh-1-intro). +- Models can be materialized into production through [dbt Cloud orchestration](/docs/deploy/deployments), or be built directly into a user's development schema. +- Integrate with [dbt Explorer](/docs/collaborate/explore-projects) and the [dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud). + + + +## Feedback + +Please note, always review AI-generated code and content as it may produce incorrect results. The visual editor features and/or functionality may be added or eliminated as part of the beta trial. + +To give feedback, please reach out to your dbt Labs account team. We appreciate your feedback and suggestions as we improve the visual editor. + diff --git a/website/docs/docs/collaborate/auto-exposures.md b/website/docs/docs/collaborate/auto-exposures.md index 9b25a2fb305..495906cee75 100644 --- a/website/docs/docs/collaborate/auto-exposures.md +++ b/website/docs/docs/collaborate/auto-exposures.md @@ -9,11 +9,16 @@ image: /img/docs/cloud-integrations/auto-exposures/explorer-lineage.jpg # Auto-exposures -As a data team, it’s critical that you have context into the downstream use cases and users of your data products. Auto-exposures integrates natively with Tableau (Power BI coming soon) and auto-generates downstream lineage in dbt Explorer for a richer experience. +As a data team, it’s critical that you have context into the downstream use cases and users of your data products. Auto-exposures integrate natively with Tableau (Power BI coming soon) and auto-generate downstream lineage in dbt Explorer for a richer experience. -Auto-exposures helps users understand how their models are used in downstream analytics tools to inform investments and reduce incidents — ultimately building trust and confidence in data products. It imports and auto-generates exposures based on Tableau dashboards, with user-defined curation. +Auto-exposures help users understand how their models are used in downstream analytics tools to inform investments and reduce incidents — ultimately building trust and confidence in data products. It imports and auto-generates exposures based on Tableau dashboards, with user-defined curation. -Auto-exposures is available on [Versionless](/docs/dbt-versions/versionless-cloud) and on [dbt Cloud Enterprise](https://www.getdbt.com/pricing/) plans. +## Supported plans +Auto-exposures is available on the [dbt Cloud Enterprise](https://www.getdbt.com/pricing/) plan. Currently, you can only connect to a single Tableau site on the same server. + +:::info Tableau Server +If you're using Tableau Server, you need to [allowlist dbt Cloud's IP addresses](/docs/cloud/about-cloud/access-regions-ip-addresses) for your dbt Cloud region. +::: For more information on how to set up auto-exposures, prerequisites, and more — refer to [configure auto-exposures in Tableau and dbt Cloud](/docs/cloud-integrations/configure-auto-exposures). diff --git a/website/docs/docs/collaborate/build-and-view-your-docs.md b/website/docs/docs/collaborate/build-and-view-your-docs.md index 06716a67674..1a16f034eff 100644 --- a/website/docs/docs/collaborate/build-and-view-your-docs.md +++ b/website/docs/docs/collaborate/build-and-view-your-docs.md @@ -24,7 +24,7 @@ To set up a job to generate docs: 1. In the top left, click **Deploy** and select **Jobs**. 2. Create a new job or select an existing job and click **Settings**. 3. Under **Execution Settings**, select **Generate docs on run** and click **Save**. - + *Note, for dbt Docs users you need to configure the job to generate docs when it runs, then manually link that job to your project. Proceed to [configure project documentation](#configure-project-documentation) so your project generates the documentation when this job runs.* @@ -51,12 +51,11 @@ dbt Docs, available on developer plans or dbt Core users, generates a website fr You configure project documentation to generate documentation when the job you set up in the previous section runs. In the project settings, specify the job that generates documentation artifacts for that project. Once you configure this setting, subsequent runs of the job will automatically include a step to generate documentation. -1. Click the gear icon in the top right. -2. Select **Account Settings**. -3. Navigate to **Projects** and select the project that needs documentation. -4. Click **Edit**. -5. Under **Artifacts**, select the job that should generate docs when it runs and click **Save**. - +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. +2. Navigate to **Projects** and select the project that needs documentation. +3. Click **Edit**. +4. Under **Artifacts**, select the job that should generate docs when it runs and click **Save**. + :::tip Use dbt Explorer for a richer documentation experience For a richer and more interactive experience, try out [dbt Explorer](/docs/collaborate/explore-projects), available on [Team or Enterprise plans](https://www.getdbt.com/pricing/). It includes map layers of your DAG, keyword search, interacts with the IDE, model performance, project recommendations, and more. diff --git a/website/docs/docs/collaborate/data-tile.md b/website/docs/docs/collaborate/data-tile.md index 70318922d68..1d5b26e26b7 100644 --- a/website/docs/docs/collaborate/data-tile.md +++ b/website/docs/docs/collaborate/data-tile.md @@ -92,7 +92,7 @@ Follow these steps to embed the data health tile in PowerBI: ```html Website = - "" + "" ``` @@ -120,12 +120,34 @@ Follow these steps to embed the data health tile in Tableau: 3. Insert a **Web Page** object. 4. Insert the URL and click **Ok**. - `https://metadata.cloud.getdbt.com/exposure-tile?uniqueId=exposure.snowflake_tpcds_sales_spoke.customer360_test&environmentType=production&environmentId=220370&token=` - + ```html + https://metadata.ACCESS_URL/exposure-tile?uniqueId=exposure.EXPOSURE_NAME&environmentType=production&environmentId=220370&token= + ``` + *Note, replace the placeholders with your actual values.* 5. You should now see the data health tile embedded in your Tableau dashboard. + + + +Follow these steps to embed the data health tile in Sigma: + + + +1. Create a dashboard in Sigma and connect to your database to pull in the data. +2. Ensure you've copied the URL or iFrame snippet available in dbt Explorer's **Data health** section, under the **Embed data health into your dashboard** toggle. +3. Add a new embedded UI element in your Sigma Workbook in the following format: + + ```html + https://metadata.ACCESS_URL/exposure-tile?uniqueId=exposure.EXPOSURE_NAME&environmentType=production&environmentId=ENV_ID_NUMBER&token= + ``` + + *Note, replace the placeholders with your actual values.* +4. You should now see the data health tile embedded in your Sigma dashboard. + + + ## Job-based data health diff --git a/website/docs/docs/collaborate/explore-projects.md b/website/docs/docs/collaborate/explore-projects.md index a4388a8696e..3780d100932 100644 --- a/website/docs/docs/collaborate/explore-projects.md +++ b/website/docs/docs/collaborate/explore-projects.md @@ -164,12 +164,12 @@ Under the the **Models** option, you can filter on model properties (access or m -Trust signal icons offer a quick, at-a-glance view of data health when browsing your models in dbt Explorer. These icons keep you informed on the status of your model's health using the indicators **Healthy**, **Caution**, **Degraded**, and **Unknown**. For accurate health data, ensure the resource is up-to-date and has had a recent job run. +Trust signal icons offer a quick, at-a-glance view of data health when browsing your resources in dbt Explorer. These icons keep you informed on the status of your resource's health using the indicators **Healthy**, **Caution**, **Degraded**, and **Unknown**. For accurate health data, ensure the resource is up-to-date and has had a recent job run. Supported resources are models, sources, and exposures. Each trust signal icon reflects key data health components, such as test success status, missing resource descriptions, absence of builds in 30-day windows, and more. To access trust signals: -- Use the search function or click on **Models** or **Sources** under the **Resource** tab. +- Use the search function or click on **Models**, **Sources** or **Exposures** under the **Resource** tab. - View the icons under the **Health** column. - Hover over or click the trust signal to see detailed information. - For sources, the trust signal also indicates the source freshness status. diff --git a/website/docs/docs/collaborate/govern/model-contracts.md b/website/docs/docs/collaborate/govern/model-contracts.md index d30024157c8..9b75e518719 100644 --- a/website/docs/docs/collaborate/govern/model-contracts.md +++ b/website/docs/docs/collaborate/govern/model-contracts.md @@ -205,13 +205,11 @@ At the same time, for models with many columns, we understand that this can mean When comparing to a previous project state, dbt will look for breaking changes that could impact downstream consumers. If breaking changes are detected, dbt will present a contract error. -Breaking changes include: -- Removing an existing column. -- Changing the `data_type` of an existing column. -- Removing or modifying one of the `constraints` on an existing column (dbt v1.6 or higher). -- Removing a contracted model by deleting, renaming, or disabling it (dbt v1.9 or higher). - - versioned models will raise an error. - - unversioned models will raise a warning. +import BreakingChanges from '/snippets/_versions-contracts.md'; -More details are available in the [contract reference](/reference/resource-configs/contract#detecting-breaking-changes). + +More details are available in the [contract reference](/reference/resource-configs/contract#detecting-breaking-changes). diff --git a/website/docs/docs/collaborate/govern/project-dependencies.md b/website/docs/docs/collaborate/govern/project-dependencies.md index 2e73eee028b..bbda99960cd 100644 --- a/website/docs/docs/collaborate/govern/project-dependencies.md +++ b/website/docs/docs/collaborate/govern/project-dependencies.md @@ -18,9 +18,9 @@ This year, dbt Labs is introducing an expanded notion of `dependencies` across m ## Prerequisites - Available in [dbt Cloud Enterprise](https://www.getdbt.com/pricing). If you have an Enterprise account, you can unlock these features by designating a [public model](/docs/collaborate/govern/model-access) and adding a [cross-project ref](#how-to-write-cross-project-ref). -- Use a supported version of dbt (v1.6, v1.7, or go versionless with "[Versionless](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless)") for both the upstream ("producer") project and the downstream ("consumer") project. - Define models in an upstream ("producer") project that are configured with [`access: public`](/reference/resource-configs/access). You need at least one successful job run after defining their `access`. - Define a deployment environment in the upstream ("producer") project [that is set to be your Production environment](/docs/deploy/deploy-environments#set-as-production-environment), and ensure it has at least one successful job run in that environment. +- If the upstream project has a Staging environment, run a job in that Staging environment to ensure the downstream cross-project ref resolves. - Each project `name` must be unique in your dbt Cloud account. For example, if you have a dbt project (codebase) for the `jaffle_marketing` team, you should not create separate projects for `Jaffle Marketing - Dev` and `Jaffle Marketing - Prod`. That isolation should instead be handled at the environment level. - We are adding support for environment-level permissions and data warehouse connections; please contact your dbt Labs account team for beta access. - The `dbt_project.yml` file is case-sensitive, which means the project name must exactly match the name in your `dependencies.yml`. For example, if your project name is `jaffle_marketing`, you should use `jaffle_marketing` (not `JAFFLE_MARKETING`) in all related files. @@ -110,7 +110,10 @@ Read [Why use a staging environment](/docs/deploy/deploy-environments#why-use-a- #### Staging with downstream dependencies -dbt Cloud begins using the Staging environment to resolve cross-project references from downstream projects as soon as it exists in a project without "fail-over" to Production. To avoid causing downtime for downstream developers, you should define and trigger a job before marking the environment as Staging: +dbt Cloud begins using the Staging environment to resolve cross-project references from downstream projects as soon as it exists in a project without "fail-over" to Production. This means that dbt Cloud will consistently use metadata from the Staging environment to resolve references in downstream projects, even if there haven't been any successful runs in the configured Staging environment. + +To avoid causing downtime for downstream developers, you should define and trigger a job before marking the environment as Staging: + 1. Create a new environment, but do NOT mark it as **Staging**. 2. Define a job in that environment. 3. Trigger the job to run, and ensure it completes successfully. diff --git a/website/docs/docs/collaborate/model-query-history.md b/website/docs/docs/collaborate/model-query-history.md index 0f43c9b163f..872a5a295da 100644 --- a/website/docs/docs/collaborate/model-query-history.md +++ b/website/docs/docs/collaborate/model-query-history.md @@ -13,22 +13,30 @@ Model query history allows you to: - Provides data teams insight, so they can focus their time and infrastructure spend on the worthwhile used data products. - Enable analysts to find the most popular models used by other people. -Model query history is powered by a single consumption query of the query log table in your data warehouse aggregated on a daily basis. It currently supports Snowflake and BigQuery only, with additional platforms coming soon. +Model query history is powered by a single consumption query of the query log table in your data warehouse aggregated on a daily basis. + + -:::info What is a consumption query? Consumption query is a metric of queries in your dbt project that has used the model in a given time. It filters down to `select` statements only to gauge model consumption and excludes dbt model build and test executions. So for example, if `model_super_santi` was queried 10 times in the past week, it would count as having 10 consumption queries for that particular time period. + + + +:::info Support for Snowflake (Enterprise tier or higher) and BigQuery + +Model query history for Snowflake users is **only available for Enterprise tier or higher**. The feature also supports BigQuery. Additional platforms coming soon. ::: ## Prerequisites To access the features, you should meet the following: -1. You have a dbt Cloud account on the [Enterprise plan](https://www.getdbt.com/pricing/). +1. You have a dbt Cloud account on the [Enterprise plan](https://www.getdbt.com/pricing/). Single-tenant accounts should contact their account representative for setup. 2. You have set up a [production](https://docs.getdbt.com/docs/deploy/deploy-environments#set-as-production-environment) deployment environment for each project you want to explore, with at least one successful job run. 3. You have [admin permissions](/docs/cloud/manage-access/enterprise-permissions) in dbt Cloud to edit project settings or production environment settings. 4. Use Snowflake or BigQuery as your data warehouse and can enable query history permissions or work with an admin to do so. Support for additional data platforms coming soon. + - For Snowflake users: You **must** have a Snowflake Enterprise tier or higher subscription. ## Enable query history in dbt Cloud diff --git a/website/docs/docs/collaborate/project-recommendations.md b/website/docs/docs/collaborate/project-recommendations.md index 12007c6b88b..c9499579e54 100644 --- a/website/docs/docs/collaborate/project-recommendations.md +++ b/website/docs/docs/collaborate/project-recommendations.md @@ -20,7 +20,7 @@ The Recommendations overview page includes two top-level metrics measuring the t - **Model test coverage** — The percent of models in your project (models not from a package or imported via dbt Mesh) with at least one dbt test configured on them. - **Model documentation coverage** — The percent of models in your project (models not from a package or imported via dbt Mesh) with a description. - + ## List of rules The following table lists the rules currently defined in the `dbt_project_evaluator` [package](https://hub.getdbt.com/dbt-labs/dbt_project_evaluator/latest/). diff --git a/website/docs/docs/community-adapters.md b/website/docs/docs/community-adapters.md index 3af4e15b32b..895e47a8fa3 100644 --- a/website/docs/docs/community-adapters.md +++ b/website/docs/docs/community-adapters.md @@ -7,7 +7,8 @@ Community adapters are adapter plugins contributed and maintained by members of | Data platforms (click to view setup guide) ||| | ------------------------------------------ | -------------------------------- | ------------------------------------- | -| [Clickhouse](/docs/core/connect-data-platform/clickhouse-setup) | [Databend Cloud](/docs/core/connect-data-platform/databend-setup) | [Doris & SelectDB](/docs/core/connect-data-platform/doris-setup) | +| [Clickhouse](/docs/core/connect-data-platform/clickhouse-setup) | [CrateDB](/docs/core/connect-data-platform/cratedb-setup) +| [Databend Cloud](/docs/core/connect-data-platform/databend-setup) | [Doris & SelectDB](/docs/core/connect-data-platform/doris-setup) | | [DuckDB](/docs/core/connect-data-platform/duckdb-setup) | [Exasol Analytics](/docs/core/connect-data-platform/exasol-setup) | [Extrica](/docs/core/connect-data-platform/extrica-setup) | | [Hive](/docs/core/connect-data-platform/hive-setup) | [IBM DB2](/docs/core/connect-data-platform/ibmdb2-setup) | [Impala](/docs/core/connect-data-platform/impala-setup) | | [Infer](/docs/core/connect-data-platform/infer-setup) | [iomete](/docs/core/connect-data-platform/iomete-setup) | [MindsDB](/docs/core/connect-data-platform/mindsdb-setup) | diff --git a/website/docs/docs/core/connect-data-platform/about-core-connections.md b/website/docs/docs/core/connect-data-platform/about-core-connections.md index 461aeea2e87..221f495d054 100644 --- a/website/docs/docs/core/connect-data-platform/about-core-connections.md +++ b/website/docs/docs/core/connect-data-platform/about-core-connections.md @@ -32,8 +32,6 @@ If you're using dbt from the command line (CLI), you'll need a profiles.yml file For detailed info, you can refer to the [Connection profiles](/docs/core/connect-data-platform/connection-profiles). - - ## Adapter features The following table lists the features available for adapters: @@ -55,5 +53,3 @@ For adapters that support it, you can partially build the catalog. This allows t ### Source freshness You can measure source freshness using the warehouse metadata tables on supported adapters. This allows for calculating source freshness without using the [`loaded_at_field`](/reference/resource-properties/freshness#loaded_at_field) and without querying the table directly. This is faster and more flexible (though it might sometimes be inaccurate, depending on how the warehouse tracks altered tables). You can override this with the `loaded_at_field` in the [source config](/reference/source-configs). If the adapter doesn't support this, you can still use the `loaded_at_field`. - - diff --git a/website/docs/docs/core/connect-data-platform/bigquery-setup.md b/website/docs/docs/core/connect-data-platform/bigquery-setup.md index eedc3646f89..8b1867ef620 100644 --- a/website/docs/docs/core/connect-data-platform/bigquery-setup.md +++ b/website/docs/docs/core/connect-data-platform/bigquery-setup.md @@ -390,9 +390,9 @@ my-profile: ### Running Python models on Dataproc -To run dbt Python models on GCP, dbt uses companion services, Dataproc and Cloud Storage, that offer tight integrations with BigQuery. You may use an existing Dataproc cluster and Cloud Storage bucket, or create new ones: -- https://cloud.google.com/dataproc/docs/guides/create-cluster -- https://cloud.google.com/storage/docs/creating-buckets +import BigQueryDataproc from '/snippets/_bigquery-dataproc.md'; + + Then, add the bucket name, cluster name, and cluster region to your connection profile: diff --git a/website/docs/docs/core/connect-data-platform/cratedb-setup.md b/website/docs/docs/core/connect-data-platform/cratedb-setup.md new file mode 100644 index 00000000000..fa1b9833e59 --- /dev/null +++ b/website/docs/docs/core/connect-data-platform/cratedb-setup.md @@ -0,0 +1,62 @@ +--- +title: "CrateDB setup" +description: "Read this guide to learn about the CrateDB data platform setup in dbt." +id: "cratedb-setup" +meta: + maintained_by: Crate.io, Inc. + authors: 'CrateDB maintainers' + github_repo: 'crate/dbt-cratedb2' + pypi_package: 'dbt-cratedb2' + min_core_version: 'v1.0.0' + cloud_support: Not Supported + min_supported_version: 'n/a' + slack_channel_name: 'Community Forum' + slack_channel_link: 'https://community.cratedb.com/' + platform_name: 'CrateDB' + config_page: '/reference/resource-configs/no-configs' +--- + +import SetUpPages from '/snippets/_setup-pages-intro.md'; + + + + +[CrateDB] is compatible with PostgreSQL, so its dbt adapter strongly depends on +dbt-postgres, documented at [PostgreSQL profile setup]. + +CrateDB targets are configured exactly the same way, see also [PostgreSQL +configuration], with just a few things to consider which are special to +CrateDB. Relevant details are outlined at [using dbt with CrateDB], +which also includes up-to-date information. + + +## Profile configuration + +CrateDB targets should be set up using a configuration like this minimal sample +of settings in your [`profiles.yml`] file. + + + +```yaml +cratedb_analytics: + target: dev + outputs: + dev: + type: cratedb + host: [clustername].aks1.westeurope.azure.cratedb.net + port: 5432 + user: [username] + pass: [password] + dbname: crate # Do not change this value. CrateDB's only catalog is `crate`. + schema: doc # Define the schema name. CrateDB's default schema is `doc`. +``` + + + + + +[CrateDB]: https://cratedb.com/database +[PostgreSQL configuration]: https://docs.getdbt.com/reference/resource-configs/postgres-configs +[PostgreSQL profile setup]: https://docs.getdbt.com/docs/core/connect-data-platform/postgres-setup +[`profiles.yml`]: https://docs.getdbt.com/docs/core/connect-data-platform/profiles.yml +[using dbt with CrateDB]: https://cratedb.com/docs/guide/integrate/dbt/ diff --git a/website/docs/docs/core/connect-data-platform/dremio-setup.md b/website/docs/docs/core/connect-data-platform/dremio-setup.md index 21d0ee2956b..69f2b14fc4f 100644 --- a/website/docs/docs/core/connect-data-platform/dremio-setup.md +++ b/website/docs/docs/core/connect-data-platform/dremio-setup.md @@ -60,10 +60,6 @@ Next, configure the profile for your project. When you initialize a project, you create one of these three profiles. You must configure it before trying to connect to Dremio Cloud or Dremio Software. -## Profiles - -When you initialize a project, you create one of these three profiles. You must configure it before trying to connect to Dremio Cloud or Dremio Software. - * Profile for Dremio Cloud * Profile for Dremio Software with Username/Password Authentication * Profile for Dremio Software with Authentication Through a Personal Access Token @@ -149,9 +145,7 @@ For descriptions of the configurations in these profiles, see [Configurations](# -## Configurations - -### Configurations Common to Profiles for Dremio Cloud and Dremio Software +## Configurations Common to Profiles for Dremio Cloud and Dremio Software | Configuration | Required? | Default Value | Description | diff --git a/website/docs/docs/core/connect-data-platform/glue-setup.md b/website/docs/docs/core/connect-data-platform/glue-setup.md index f2cf717147a..a074038a87f 100644 --- a/website/docs/docs/core/connect-data-platform/glue-setup.md +++ b/website/docs/docs/core/connect-data-platform/glue-setup.md @@ -175,7 +175,7 @@ Please to update variables between **`<>`**, here are explanations of these argu ### Configuration of the local environment -Because **`dbt`** and **`dbt-glue`** adapters are compatible with Python versions 3.7, 3.8, and 3.9, check the version of Python: +Because **`dbt`** and **`dbt-glue`** adapters are compatible with Python versions 3.9 or higher, check the version of Python: ```bash $ python3 --version diff --git a/website/docs/docs/core/connect-data-platform/mssql-setup.md b/website/docs/docs/core/connect-data-platform/mssql-setup.md index f2b17278df3..31fa93874cf 100644 --- a/website/docs/docs/core/connect-data-platform/mssql-setup.md +++ b/website/docs/docs/core/connect-data-platform/mssql-setup.md @@ -4,7 +4,7 @@ description: "Read this guide to learn about the Microsoft SQL Server warehouse id: "mssql-setup" meta: maintained_by: Community - authors: 'dbt-msft community (https://github.com/dbt-msft)' + authors: 'Mikael Ene & dbt-msft community (https://github.com/dbt-msft)' github_repo: 'dbt-msft/dbt-sqlserver' pypi_package: 'dbt-sqlserver' min_core_version: 'v0.14.0' diff --git a/website/docs/docs/core/connect-data-platform/redshift-setup.md b/website/docs/docs/core/connect-data-platform/redshift-setup.md index ce3e8658045..4c00558d782 100644 --- a/website/docs/docs/core/connect-data-platform/redshift-setup.md +++ b/website/docs/docs/core/connect-data-platform/redshift-setup.md @@ -31,7 +31,7 @@ import SetUpPages from '/snippets/_setup-pages-intro.md'; | `port` | 5439 | | | `dbname` | my_db | Database name| | `schema` | my_schema | Schema name| -| `connect_timeout` | `None` or 30 | Number of seconds before connection times out| +| `connect_timeout` | 30 | Number of seconds before connection times out. Default is `None`| | `sslmode` | prefer | optional, set the sslmode to connect to the database. Default prefer, which will use 'verify-ca' to connect. For more information on `sslmode`, see Redshift note below| | `role` | None | Optional, user identifier of the current session| | `autocreate` | false | Optional, default false. Creates user if they do not exist | diff --git a/website/docs/docs/core/connect-data-platform/risingwave-setup.md b/website/docs/docs/core/connect-data-platform/risingwave-setup.md index 29ebc378c4e..320ba2a506d 100644 --- a/website/docs/docs/core/connect-data-platform/risingwave-setup.md +++ b/website/docs/docs/core/connect-data-platform/risingwave-setup.md @@ -28,7 +28,7 @@ import SetUpPages from '/snippets/_setup-pages-intro.md'; ## Connecting to RisingWave with dbt-risingwave -Before connecting to RisingWave, ensure that RisingWave is installed and running. For more information about how to get RisingWave up and running, see the [RisingWave quick start guide](https://docs.risingwave.com/docs/dev/get-started/). +Before connecting to RisingWave, ensure that RisingWave is installed and running. For more information about how to get RisingWave up and running, see the [RisingWave quick start guide](https://docs.risingwave.com/get-started/quickstart). To connect to RisingWave with dbt, you need to add a RisingWave profile to your dbt profile file (`~/.dbt/profiles.yml`). Below is an example RisingWave profile. Revise the field values when necessary. @@ -71,17 +71,17 @@ The dbt models for managing data transformations in RisingWave are similar to ty |Materializations| Supported|Notes| |----|----|----| -|`table` |Yes |Creates a [table](https://docs.risingwave.com/docs/dev/sql-create-table/). To use this materialization, add `{{ config(materialized='table') }}` to your model SQL files. | -|`view`|Yes | Creates a [view](https://docs.risingwave.com/docs/dev/sql-create-view/). To use this materialization, add `{{ config(materialized='view') }}` to your model SQL files. | -|`ephemeral`|Yes| This materialization uses [common table expressions](https://docs.risingwave.com/docs/dev/query-syntax-with-clause/) in RisingWave under the hood. To use this materialization, add `{{ config(materialized='ephemeral') }}` to your model SQL files.| +|`table` |Yes |Creates a [table](https://docs.risingwave.com/sql/commands/sql-create-table). To use this materialization, add `{{ config(materialized='table') }}` to your model SQL files. | +|`view`|Yes | Creates a [view](https://docs.risingwave.com/sql/commands/sql-create-view). To use this materialization, add `{{ config(materialized='view') }}` to your model SQL files. | +|`ephemeral`|Yes| This materialization uses [common table expressions](https://docs.risingwave.com/sql/query-syntax/with-clause) in RisingWave under the hood. To use this materialization, add `{{ config(materialized='ephemeral') }}` to your model SQL files.| |`materializedview`| To be deprecated. |It is available only for backward compatibility purposes (for v1.5.1 of the dbt-risingwave adapter plugin). If you are using v1.6.0 and later versions of the dbt-risingwave adapter plugin, use `materialized_view` instead.| -|`materialized_view`| Yes| Creates a [materialized view](https://docs.risingwave.com/docs/dev/sql-create-mv/). This materialization corresponds the `incremental` one in dbt. To use this materialization, add `{{ config(materialized='materialized_view') }}` to your model SQL files.| +|`materialized_view`| Yes| Creates a [materialized view](https://docs.risingwave.com/sql/commands/sql-create-mv). This materialization corresponds the `incremental` one in dbt. To use this materialization, add `{{ config(materialized='materialized_view') }}` to your model SQL files.| | `incremental`|No|Please use `materialized_view` instead. Since RisingWave is designed to use materialized view to manage data transformation in an incremental way, you can just use the `materialized_view` materialization.| -|`source`| Yes| Creates a [source](https://docs.risingwave.com/docs/dev/sql-create-source/). To use this materialization, add \{\{ config(materialized='source') \}\} to your model SQL files. You need to provide your create source statement as a whole in this model. See [Example model files](https://docs.risingwave.com/docs/dev/use-dbt/#example-model-files) for details.| -|`table_with_connector`| Yes| Creates a table with connector settings. In RisingWave, a table with connector settings is similar to a source. The difference is that a table object with connector settings persists raw streaming data in the source, while a source object does not. To use this materialization, add `{{ config(materialized='table_with_connector') }}` to your model SQL files. You need to provide your create table with connector statement as a whole in this model (see [Example model files](https://docs.risingwave.com/docs/dev/use-dbt/#example-model-files) for details). Because dbt tables have their own semantics, RisingWave use `table_with_connector` to distinguish itself from a dbt table.| -|`sink`| Yes| Creates a [sink](https://docs.risingwave.com/docs/dev/sql-create-sink/). To use this materialization, add `{{ config(materialized='sink') }}` to your SQL files. You need to provide your create sink statement as a whole in this model. See [Example model files](https://docs.risingwave.com/docs/dev/use-dbt/#example-model-files) for details.| +|`source`| Yes| Creates a [source](https://docs.risingwave.com/sql/commands/sql-create-source). To use this materialization, add \{\{ config(materialized='source') \}\} to your model SQL files. You need to provide your create source statement as a whole in this model. See [Example model files](https://docs.risingwave.com/integrations/other/dbt#example-model-files) for details.| +|`table_with_connector`| Yes| Creates a table with connector settings. In RisingWave, a table with connector settings is similar to a source. The difference is that a table object with connector settings persists raw streaming data in the source, while a source object does not. To use this materialization, add `{{ config(materialized='table_with_connector') }}` to your model SQL files. You need to provide your create table with connector statement as a whole in this model (see [Example model files](https://docs.risingwave.com/integrations/other/dbt#example-model-files) for details). Because dbt tables have their own semantics, RisingWave use `table_with_connector` to distinguish itself from a dbt table.| +|`sink`| Yes| Creates a [sink](https://docs.risingwave.com/sql/commands/sql-create-sink). To use this materialization, add `{{ config(materialized='sink') }}` to your SQL files. You need to provide your create sink statement as a whole in this model. See [Example model files](https://docs.risingwave.com/integrations/other/dbt#example-model-files) for details.| ## Resources -- [RisingWave's guide about using dbt for data transformations](https://docs.risingwave.com/docs/dev/use-dbt/) -- [A demo project using dbt to manage Nexmark benchmark queries in RisingWave](https://docs.risingwave.com/docs/dev/use-dbt/) +- [RisingWave's guide about using dbt for data transformations](https://docs.risingwave.com/integrations/other/dbt) +- [A demo project using dbt to manage Nexmark benchmark queries in RisingWave](https://github.com/risingwavelabs/dbt_rw_nexmark) diff --git a/website/docs/docs/core/connect-data-platform/snowflake-setup.md b/website/docs/docs/core/connect-data-platform/snowflake-setup.md index 266840cafae..b692ba5c0d6 100644 --- a/website/docs/docs/core/connect-data-platform/snowflake-setup.md +++ b/website/docs/docs/core/connect-data-platform/snowflake-setup.md @@ -211,7 +211,7 @@ my-snowflake-db: -### SSO Authentication +### SSO authentication To use SSO authentication for Snowflake, omit a `password` and instead supply an `authenticator` config to your target. `authenticator` can be one of 'externalbrowser' or a valid Okta URL. @@ -332,7 +332,7 @@ my-snowflake-db: -### SSO Authentication +### SSO authentication To use SSO authentication for Snowflake, omit a `password` and instead supply an `authenticator` config to your target. `authenticator` can be one of 'externalbrowser' or a valid Okta URL. @@ -421,6 +421,30 @@ my-snowflake-db: Refer to the [Snowflake docs](https://docs.snowflake.com/en/sql-reference/parameters.html#label-allow-id-token) for info on how to enable this feature in your account. +### OAuth authorization + +To learn how to configure OAuth in Snowflake, refer to their [documentation](https://docs.snowflake.com/en/user-guide/oauth-snowflake-overview). Your Snowflake admin needs to generate an [OAuth token](https://community.snowflake.com/s/article/HOW-TO-OAUTH-TOKEN-GENERATION-USING-SNOWFLAKE-CUSTOM-OAUTH) for your configuration to work. + +Provide the OAUTH_REDIRECT_URI in Snowflake:`http://localhost:PORT_NUMBER`. For example, `http://localhost:8080`. + +Once your Snowflake admin has configured OAuth, add the following to your `profiles.yml` file: + +```yaml + +my-snowflake-db: + target: dev + outputs: + dev: + type: snowflake + account: [account id] + + # The following fields are retrieved from the Snowflake configuration + authenticator: oauth + oauth_client_id: [OAuth client id] + oauth_client_secret: [OAuth client secret] + token: [OAuth refresh token] +``` + ## Configurations The "base" configs for Snowflake targets are shown below. Note that you should also specify auth-related configs specific to the authentication method you are using as described above. diff --git a/website/docs/docs/core/connect-data-platform/spark-setup.md b/website/docs/docs/core/connect-data-platform/spark-setup.md index 01318211c8f..611642e91b7 100644 --- a/website/docs/docs/core/connect-data-platform/spark-setup.md +++ b/website/docs/docs/core/connect-data-platform/spark-setup.md @@ -197,14 +197,9 @@ connect_retries: 3 - - - - ### Server side configuration Spark can be customized using [Application Properties](https://spark.apache.org/docs/latest/configuration.html). Using these properties the execution can be customized, for example, to allocate more memory to the driver process. Also, the Spark SQL runtime can be set through these properties. For example, this allows the user to [set a Spark catalogs](https://spark.apache.org/docs/latest/configuration.html#spark-sql). - ## Caveats diff --git a/website/docs/docs/core/connect-data-platform/teradata-setup.md b/website/docs/docs/core/connect-data-platform/teradata-setup.md index df32b07bd0e..f4ffbe37f35 100644 --- a/website/docs/docs/core/connect-data-platform/teradata-setup.md +++ b/website/docs/docs/core/connect-data-platform/teradata-setup.md @@ -8,7 +8,7 @@ meta: github_repo: 'Teradata/dbt-teradata' pypi_package: 'dbt-teradata' min_core_version: 'v0.21.0' - cloud_support: Not Supported + cloud_support: Supported min_supported_version: 'n/a' slack_channel_name: '#db-teradata' slack_channel_link: 'https://getdbt.slack.com/archives/C027B6BHMT3' @@ -18,6 +18,7 @@ meta: Some core functionality may be limited. If you're interested in contributing, check out the source code in the repository listed in the next section. + import SetUpPages from '/snippets/_setup-pages-intro.md'; @@ -26,20 +27,17 @@ import SetUpPages from '/snippets/_setup-pages-intro.md'; ## Python compatibility -| Plugin version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10 | Python 3.11 | -| -------------- | ----------- | ----------- | ----------- | ----------- | ----------- | ------------ | -| 0.19.0.x | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ -| 0.20.0.x | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ -| 0.21.1.x | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ -| 1.0.0.x | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ -|1.1.x.x | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ -|1.2.x.x | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ -|1.3.x.x | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ -|1.4.x.x | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ -|1.5.x | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ -|1.6.x | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ -|1.7.x | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ -|1.8.x | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ +| Plugin version | Python 3.9 | Python 3.10 | Python 3.11 | Python 3.12 | +|----------------|------------|-------------|-------------|-------------| +| 1.0.0.x | ✅ | ❌ | ❌ | ❌ | +| 1.1.x.x | ✅ | ✅ | ❌ | ❌ | +| 1.2.x.x | ✅ | ✅ | ❌ | ❌ | +| 1.3.x.x | ✅ | ✅ | ❌ | ❌ | +| 1.4.x.x | ✅ | ✅ | ✅ | ❌ | +| 1.5.x | ✅ | ✅ | ✅ | ❌ | +| 1.6.x | ✅ | ✅ | ✅ | ❌ | +| 1.7.x | ✅ | ✅ | ✅ | ❌ | +| 1.8.x | ✅ | ✅ | ✅ | ✅ | ## dbt dependent packages version compatibility @@ -49,6 +47,8 @@ import SetUpPages from '/snippets/_setup-pages-intro.md'; | 1.6.7 | 1.6.7 | 1.1.1 | 1.1.1 | | 1.7.x | 1.7.x | 1.1.1 | 1.1.1 | | 1.8.x | 1.8.x | 1.1.1 | 1.1.1 | +| 1.8.x | 1.8.x | 1.2.0 | 1.2.0 | +| 1.8.x | 1.8.x | 1.3.0 | 1.3.0 | ### Connecting to Teradata diff --git a/website/docs/docs/core/connect-data-platform/trino-setup.md b/website/docs/docs/core/connect-data-platform/trino-setup.md index 4caa56dcb00..06c94d7e7ff 100644 --- a/website/docs/docs/core/connect-data-platform/trino-setup.md +++ b/website/docs/docs/core/connect-data-platform/trino-setup.md @@ -34,7 +34,7 @@ The following profile fields are always required except for `user`, which is als | Field | Example | Description | | --------- | ------- | ----------- | -| `host` | `mycluster.mydomain.com` | The hostname of your cluster.

Don't include the `http://` or `https://` prefix. | +| `host` | `mycluster.mydomain.com`

Format for Starburst Galaxy:
  • `mygalaxyaccountname-myclustername.trino.galaxy.starburst.io`
| The hostname of your cluster.

Don't include the `http://` or `https://` prefix. | | `database` | `my_postgres_catalog` | The name of a catalog in your cluster. | | `schema` | `my_schema` | The name of a schema within your cluster's catalog.

It's _not recommended_ to use schema names that have upper case or mixed case letters. | | `port` | `443` | The port to connect to your cluster. By default, it's 443 for TLS enabled clusters. | diff --git a/website/docs/docs/dbt-cloud-apis/apis-overview.md b/website/docs/docs/dbt-cloud-apis/apis-overview.md index 055edea72b6..05964ace871 100644 --- a/website/docs/docs/dbt-cloud-apis/apis-overview.md +++ b/website/docs/docs/dbt-cloud-apis/apis-overview.md @@ -20,4 +20,4 @@ If you want to learn more about webhooks, refer to [Webhooks for your jobs](/doc ## How to Access the APIs -dbt Cloud supports two types of API Tokens: [user tokens](/docs/dbt-cloud-apis/user-tokens) and [service account tokens](/docs/dbt-cloud-apis/service-tokens). Requests to the dbt Cloud APIs can be authorized using these tokens. +dbt Cloud supports two types of API Tokens: [personal access tokens](/docs/dbt-cloud-apis/user-tokens) and [service account tokens](/docs/dbt-cloud-apis/service-tokens). Requests to the dbt Cloud APIs can be authorized using these tokens. diff --git a/website/docs/docs/dbt-cloud-apis/authentication.md b/website/docs/docs/dbt-cloud-apis/authentication.md index 8729cc0641d..43a08d84fd7 100644 --- a/website/docs/docs/dbt-cloud-apis/authentication.md +++ b/website/docs/docs/dbt-cloud-apis/authentication.md @@ -8,7 +8,7 @@ pagination_prev: null
@@ -23,9 +23,7 @@ pagination_prev: null ## Types of API access tokens -**User API keys (Legacy):** User API keys were historically the only method available to access dbt Cloud APIs on the user’s behalf. They are scoped to the user and not the account. User API Keys will eventually be deprecated for the more secure personal access tokens. - -**Personal access tokens (New):** Personal access tokens (PATs) are the new, preferred, and secure way of accessing dbt Cloud APIs on behalf of a user. They are more secure than user API Keys. PATs are scoped to an account and can be enhanced with more granularity and control. +**Personal access tokens:** Preferred and secure way of accessing dbt Cloud APIs on behalf of a user. PATs are scoped to an account and can be enhanced with more granularity and control. **Service tokens:** Service tokens are similar to service accounts and are the preferred method to enable access on behalf of the dbt Cloud account. @@ -33,7 +31,7 @@ pagination_prev: null You should use service tokens broadly for any production workflow where you need a service account. You should use PATs only for developmental workflows _or_ dbt Cloud client workflows that require user context. The following examples show you when to use a personal access token (PAT) or a service token: -* **Connecting a partner integration to dbt Cloud** — Some examples include the [dbt Semantic Layer Google Sheets integration](/docs/cloud-integrations/avail-sl-integrations), Hightouch, Datafold, a custom app you’ve created, etc. These types of integrations should use a service token instead of a PAT because service tokens give you visibility, and you can scope them to only what the integration needs and ensure the least privilege. We highly recommend switching to a service token if you’re using a user API key for these integrations today. +* **Connecting a partner integration to dbt Cloud** — Some examples include the [dbt Semantic Layer Google Sheets integration](/docs/cloud-integrations/avail-sl-integrations), Hightouch, Datafold, a custom app you’ve created, etc. These types of integrations should use a service token instead of a PAT because service tokens give you visibility, and you can scope them to only what the integration needs and ensure the least privilege. We highly recommend switching to a service token if you’re using a personal acess token for these integrations today. * **Production Terraform** — Use a service token since this is a production workflow and is acting as a service account and not a user account. * **Cloud CLI** — Use a PAT since the dbt Cloud CLI works within the context of a user (the user is making the requests and has to operate within the context of their user account). * **Testing a custom script and staging Terraform or Postman** — We recommend using a PAT as this is a developmental workflow and is scoped to the user making the changes. When you push this script or Terraform into production, use a service token instead. diff --git a/website/docs/docs/dbt-cloud-apis/discovery-api.md b/website/docs/docs/dbt-cloud-apis/discovery-api.md index ca84347ffad..db6819a5e09 100644 --- a/website/docs/docs/dbt-cloud-apis/discovery-api.md +++ b/website/docs/docs/dbt-cloud-apis/discovery-api.md @@ -32,7 +32,7 @@ Use the API to look at historical information like model build time to determine You can use, for example, the [model timing](/docs/deploy/run-visibility#model-timing) tab to help identify and optimize bottlenecks in model builds: - + @@ -50,7 +50,7 @@ Use the API to find and understand dbt assets in integrated tools using informat Data producers must manage and organize data for stakeholders, while data consumers need to quickly and confidently analyze data on a large scale to make informed decisions that improve business outcomes and reduce organizational overhead. The API is useful for discovery data experiences in catalogs, analytics, apps, and machine learning (ML) tools. It can help you understand the origin and meaning of datasets for your analysis. - + @@ -65,7 +65,6 @@ Use the API to review who developed the models and who uses them to help establi Use the API to review dataset changes and uses by examining exposures, lineage, and dependencies. From the investigation, you can learn how to define and build more effective dbt projects. For more details, refer to [Development](/docs/dbt-cloud-apis/discovery-use-cases-and-examples#development). - diff --git a/website/docs/docs/dbt-cloud-apis/discovery-use-cases-and-examples.md b/website/docs/docs/dbt-cloud-apis/discovery-use-cases-and-examples.md index b99853cd547..e095374343f 100644 --- a/website/docs/docs/dbt-cloud-apis/discovery-use-cases-and-examples.md +++ b/website/docs/docs/dbt-cloud-apis/discovery-use-cases-and-examples.md @@ -25,7 +25,7 @@ For performance use cases, people typically query the historical or latest appli It’s helpful to understand how long it takes to build models (tables) and tests to execute during a dbt run. Longer model build times result in higher infrastructure costs and fresh data arriving later to stakeholders. Analyses like these can be in observability tools or ad-hoc queries, like in a notebook. - +
Example query with code diff --git a/website/docs/docs/dbt-cloud-apis/service-tokens.md b/website/docs/docs/dbt-cloud-apis/service-tokens.md index fe8ace5fa34..d9ae52dbc2d 100644 --- a/website/docs/docs/dbt-cloud-apis/service-tokens.md +++ b/website/docs/docs/dbt-cloud-apis/service-tokens.md @@ -12,7 +12,7 @@ If you have service tokens created on or before July 18, 2023, please read [this ::: -Service account tokens enable you to securely authenticate with the dbt Cloud API by assigning each token a narrow set of permissions that more precisely manages access to the API. While similar to [User API tokens](user-tokens), service account tokens belong to an account rather than a user. +Service account tokens enable you to securely authenticate with the dbt Cloud API by assigning each token a narrow set of permissions that more precisely manages access to the API. While similar to [personal access tokens](user-tokens), service account tokens belong to an account rather than a user. You can use service account tokens for system-level integrations that do not run on behalf of any one user. Assign any permission sets available in dbt Cloud to your service account token, which can vary slightly depending on your plan: @@ -25,7 +25,7 @@ You can assign as many permission sets as needed to one token. For more on permi You can generate service tokens if you have a Developer [license](/docs/cloud/manage-access/seats-and-users) and account admin [permissions](/docs/cloud/manage-access/about-user-access#permission-sets). To create a service token in dbt Cloud, follow these steps: -1. Open the **Account Settings** page by clicking the gear icon on the right-hand side. +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. 2. On the left sidebar, click on **Service Tokens**. 3. Click the **+ New Token** button to generate a new token. 4. Once the token is generated, you won't be able to view this token again so make sure to save it somewhere safe. @@ -36,80 +36,37 @@ You can assign service account tokens to any permission set available in dbt Clo ### Team plans using service account tokens -The following permissions can be assigned to a service account token on a Team plan. +The following permissions can be assigned to a service account token on a Team plan. Refer to [Enterprise permissions](/docs/cloud/manage-access/enterprise-permissions) for more information about these roles. -**Account Admin**
-Account Admin service tokens have full `read + write` access to an account, so please use them with caution. A Team plan refers to this permission set as an "Owner role." For more on these permissions, see [Account Admin](/docs/cloud/manage-access/enterprise-permissions#account-admin). - -**Metadata Only**
-Metadata-only service tokens authorize requests to the Discovery API. - -**Semantic Layer Only**
-Semantic Layer-only service tokens authorize requests to the Semantic Layer APIs. - -**Job Admin**
-Job admin service tokens can authorize requests for viewing, editing, and creating environments, triggering runs, and viewing historical runs. - -**Job Runner**
-Job runner service tokens can authorize requests for triggering runs and viewing historical runs. - -**Member**
-Member service tokens can authorize requests for viewing and editing resources, triggering runs, and inviting members to the account. Tokens assigned the Member permission set will have the same permissions as a Member user. For more information about Member users, see "[Self-service Team plan permissions](/docs/cloud/manage-access/self-service-permissions)". - -**Read-only**
-Read-only service tokens can authorize requests for viewing a read-only dashboard, viewing generated documentation, and viewing source freshness reports. This token can access and retrieve account-level information endpoints on the [Admin API](/docs/dbt-cloud-apis/admin-cloud-api) and authorize requests to the [Discovery API](/docs/dbt-cloud-apis/discovery-api). +- Account Admin — Account Admin service tokens have full `read + write` access to an account, so please use them with caution. A Team plan refers to this permission set as an "Owner role." +- Billing Admin +- Job Admin +- Metadata Only +- Member +- Read-only +- Semantic Layer Only ### Enterprise plans using service account tokens -The following permissions can be assigned to a service account token on an Enterprise plan. For more details about these permissions, see "[Enterprise permissions](/docs/cloud/manage-access/enterprise-permissions)." - -**Account Admin**
-Account Admin service tokens have full `read + write` access to an account, so please use them with caution. For more on these permissions, see [Account Admin](/docs/cloud/manage-access/enterprise-permissions#account-admin). - -**Security Admin**
-Security Admin service tokens have certain account-level permissions. For more on these permissions, see [Security Admin](/docs/cloud/manage-access/enterprise-permissions#security-admin). - -**Billing Admin**
-Billing Admin service tokens have certain account-level permissions. For more on these permissions, see [Billing Admin](/docs/cloud/manage-access/enterprise-permissions#billing-admin). - -**Manage marketplace apps**
-Used only for service tokens assigned to marketplace apps (for example, the [Snowflake Native app](/docs/cloud-integrations/snowflake-native-app)). - -**Metadata Only**
-Metadata-only service tokens authorize requests to the Discovery API. - -**Semantic Layer Only**
-Semantic Layer-only service tokens authorize requests to the Semantic Layer APIs. - -**Job Admin**
-Job Admin service tokens can authorize requests for viewing, editing, and creating environments, triggering runs, and viewing historical runs. For more on these permissions, see [Job Admin](/docs/cloud/manage-access/enterprise-permissions#job-admin). - -**Account Viewer**
-Account Viewer service tokens have read-only access to dbt Cloud accounts. For more on these permissions, see [Account Viewer](/docs/cloud/manage-access/enterprise-permissions#account-viewer) on the Enterprise Permissions page. - -**Admin**
-Admin service tokens have unrestricted access to projects in dbt Cloud accounts. You have the option to grant that permission all projects in the account or grant the permission only on specific projects. For more on these permissions, see [Admin Service](/docs/cloud/manage-access/enterprise-permissions#admin-service) on the Enterprise Permissions page. - -**Git Admin**
-Git admin service tokens have all the permissions listed in [Git admin](/docs/cloud/manage-access/enterprise-permissions#git-admin) on the Enterprise Permissions page. - -**Database Admin**
-Database admin service tokens have all the permissions listed in [Database admin](/docs/cloud/manage-access/enterprise-permissions#database-admin) on the Enterprise Permissions page. - -**Team Admin**
-Team admin service tokens have all the permissions listed in [Team admin](/docs/cloud/manage-access/enterprise-permissions#team-admin) on the Enterprise Permissions page. - -**Job Viewer**
-Job viewer admin service tokens have all the permissions listed in [Job viewer](/docs/cloud/manage-access/enterprise-permissions#job-viewer) on the Enterprise Permissions page. - -**Developer**
-Developer service tokens have all the permissions listed in [Developer](/docs/cloud/manage-access/enterprise-permissions#developer) on the Enterprise Permissions page. - -**Analyst**
-Analyst admin service tokens have all the permissions listed in [Analyst](/docs/cloud/manage-access/enterprise-permissions#analyst) on the Enterprise Permissions page. - -**Stakeholder**
-Stakeholder service tokens have all the permissions listed in [Stakeholder](/docs/cloud/manage-access/enterprise-permissions#stakeholder) on the Enterprise Permissions page. +Refer to [Enterprise permissions](/docs/cloud/manage-access/enterprise-permissions) for more information about these roles. + +- Account Admin — Account Admin service tokens have full `read + write` access to an account, so please use them with caution. +- Account Viewer +- Admin +- Analyst +- Billing Admin +- Database Admin +- Developer +- Git Admin +- Job Admin +- Job Runner +- Job Viewer +- Manage marketplace apps +- Metadata Only +- Semantic Layer Only +- Security Admin +- Stakeholder +- Team Admin ## Service token update diff --git a/website/docs/docs/dbt-cloud-apis/sl-jdbc.md b/website/docs/docs/dbt-cloud-apis/sl-jdbc.md index 3a9832dd706..d9ce3bf4fd1 100644 --- a/website/docs/docs/dbt-cloud-apis/sl-jdbc.md +++ b/website/docs/docs/dbt-cloud-apis/sl-jdbc.md @@ -56,7 +56,7 @@ The Semantic Layer JDBC API has built-in metadata calls which can provide a user Expand the following toggles for examples and metadata commands: - + You can use this query to fetch all defined metrics in your dbt project: @@ -65,9 +65,9 @@ select * from {{ semantic_layer.metrics() }} ``` - + - + You can use this query to fetch all dimensions for a metric. @@ -77,9 +77,9 @@ Note, metrics is a required argument that lists one or multiple metrics in it. select * from {{ semantic_layer.dimensions(metrics=['food_order_amount'])}} ``` - + - + You can use this query to fetch dimension values for one or multiple metrics and a single dimension. @@ -89,9 +89,9 @@ Note, metrics is a required argument that lists one or multiple metrics, and a s select * from {{ semantic_layer.dimension_values(metrics=['food_order_amount'], group_by=['customer__customer_name'])}} ``` - + - + You can use this query to fetch queryable granularities for a list of metrics. @@ -103,9 +103,9 @@ select * from {{ semantic_layer.queryable_granularities(metrics=['food_order_amount', 'order_gross_profit'])}} ``` - + - + You can use this query to fetch available metrics given dimensions. This command is essentially the opposite of getting dimensions given a list of metrics. @@ -117,9 +117,9 @@ select * from {{ }} ``` - + - + You can use this example query to fetch available granularities for all time dimensions (the similar queryable granularities API call only returns granularities for the primary time dimensions for metrics). @@ -133,9 +133,9 @@ select NAME, QUERYABLE_GRANULARITIES from {{ }} ``` - + - + It may be useful in your application to expose the names of the time dimensions that represent metric_time or the common thread across all metrics. @@ -147,9 +147,44 @@ select * from {{ }} ``` - + + + + +You can filter your metrics to include only those that contain a specific substring (sequence of characters contained within a larger string (text)). Use the `search` argument to specify the substring you want to match. + +```sql +select * from {{ semantic_layer.metrics(search='order') }} +``` + +If no substring is provided, the query returns all metrics. - + + + + +In the case when you don't want to return the full result set from a metadata call, you can paginate the results for both `semantic_layer.metrics()` and `semantic_layer.dimensions()` calls using the `page_size` and `page_number` parameters. + +- `page_size`: This is an optional variable which sets the number of records per page. If left as None, there is no page limit. +- `page_number`: This is an optional variable which specifies the page number to retrieve. Defaults to `1` (first page) if not specified. + +Examples: + +```sql +-- Retrieves the 5th page with a page size of 10 metrics +select * from {{ semantic_layer.metrics(page_size=10, page_number=5) }} + +-- Retrieves the 1st page with a page size of 10 metrics +select * from {{ semantic_layer.metrics(page_size=10) }} + +-- Retrieves all metrics without pagination +select * from {{ semantic_layer.metrics() }} +``` + +You can use the same pagination parameters for `semantic_layer.dimensions(...)`. + + + You can use this example query to list all available saved queries in your dbt project. @@ -165,7 +200,7 @@ select * from semantic_layer.saved_queries() | NAME | DESCRIPTION | LABEL | METRICS | GROUP_BY | WHERE_FILTER | ``` - + + Expand your dbt knowledge and expertise with these additional resources: - [Join the bi-weekly demos](https://www.getdbt.com/resources/webinars/dbt-cloud-demos-with-experts) to see dbt Cloud in action and ask questions. diff --git a/website/docs/docs/use-dbt-semantic-layer/dbt-sl.md b/website/docs/docs/use-dbt-semantic-layer/dbt-sl.md index e09a68b97c4..71e9d52c888 100644 --- a/website/docs/docs/use-dbt-semantic-layer/dbt-sl.md +++ b/website/docs/docs/use-dbt-semantic-layer/dbt-sl.md @@ -13,6 +13,8 @@ The dbt Semantic Layer, powered by [MetricFlow](/docs/build/about-metricflow), s Moving metric definitions out of the BI layer and into the modeling layer allows data teams to feel confident that different business units are working from the same metric definitions, regardless of their tool of choice. If a metric definition changes in dbt, it’s refreshed everywhere it’s invoked and creates consistency across all applications. To ensure secure access control, the dbt Semantic Layer implements robust [access permissions](/docs/use-dbt-semantic-layer/setup-sl#set-up-dbt-semantic-layer) mechanisms. + + Refer to the [dbt Semantic Layer FAQs](/docs/use-dbt-semantic-layer/sl-faqs) or [Why we need a universal semantic layer](https://www.getdbt.com/blog/universal-semantic-layer/) blog post to learn more. ## Get started with the dbt Semantic Layer diff --git a/website/docs/docs/use-dbt-semantic-layer/exports.md b/website/docs/docs/use-dbt-semantic-layer/exports.md index 5d6e4c0d996..1883212fb66 100644 --- a/website/docs/docs/use-dbt-semantic-layer/exports.md +++ b/website/docs/docs/use-dbt-semantic-layer/exports.md @@ -176,7 +176,7 @@ If exports aren't needed, you can set the value(s) to `FALSE` (`DBT_INCLUDE_SAVE - + 1. Click **Deploy** in the top navigation bar and choose **Environments**. diff --git a/website/docs/docs/use-dbt-semantic-layer/sl-cache.md b/website/docs/docs/use-dbt-semantic-layer/sl-cache.md index 0c6387959a3..27ffe97a951 100644 --- a/website/docs/docs/use-dbt-semantic-layer/sl-cache.md +++ b/website/docs/docs/use-dbt-semantic-layer/sl-cache.md @@ -22,7 +22,7 @@ While you can use caching to speed up your queries and reduce compute time, know ## Prerequisites - dbt Cloud [Team or Enterprise](https://www.getdbt.com/) plan. -- dbt Cloud environments that are ["Versionless"](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless). +- dbt Cloud environments must be on [release tracks](/docs/dbt-versions/cloud-release-tracks) and not legacy dbt Core versions. - A successful job run and [production environment](/docs/deploy/deploy-environments#set-as-production-environment). - For declarative caching, you need to have [exports](/docs/use-dbt-semantic-layer/exports) defined in your [saved queries](/docs/build/saved-queries) YAML configuration file. diff --git a/website/docs/docs/use-dbt-semantic-layer/sl-faqs.md b/website/docs/docs/use-dbt-semantic-layer/sl-faqs.md index 40b84ada40a..d206e4f1488 100644 --- a/website/docs/docs/use-dbt-semantic-layer/sl-faqs.md +++ b/website/docs/docs/use-dbt-semantic-layer/sl-faqs.md @@ -28,6 +28,8 @@ The primary value of the dbt Semantic Layer is to centralize and bring consisten - **Simplify your code** by not duplicating metric logic and allowing MetricFlow to perform complex calculations for you. - **Empower stakeholders** with rich context and flexible, yet governed experiences. + + @@ -110,6 +112,9 @@ You can use tables and dbt models to calculate metrics as an option, but it's a If you create a table with a metric, you’ll need to create numerous other tables derived from that table to show the desired metric cut by the desired dimension or time grain. Mature data models have thousands of dimensions, so you can see how this will quickly result in unnecessary duplication, maintenance, and costs. It's also incredibly hard to predict all the slices of data that a user is going to need ahead of time. With the dbt Semantic Layer, you don’t need to pre-join or build any tables; rather, you can simply add a few lines of code to your semantic model, and that data will only be computed upon request. + + + diff --git a/website/docs/faqs/API/rotate-token.md b/website/docs/faqs/API/rotate-token.md deleted file mode 100644 index 8dea2d0b875..00000000000 --- a/website/docs/faqs/API/rotate-token.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: How can I rotate my user API token? -description: "Instructions on how to rotate API token" -sidebar_label: 'Rotate your user API token' -id: rotate-token ---- - -For security reasons and best practices, you should aim to rotate API keys every so often. You can rotate your API key automatically with the push of a button in your dbt Cloud environment or manually using the command line. - - - - - -To automatically rotate your API key: - -1. Navigate to the Account settings by clicking the **gear icon** in the top right of your dbt Cloud account. -2. Select **API Access** from the lefthand side. -3. In the **API** pane, click `Rotate`. - - - - - - - -1. Rotate your [User API token](/docs/dbt-cloud-apis/user-tokens) by replacing `YOUR_USER_ID`, `YOUR_CURRENT_PAT_TOKEN`, and `YOUR_ACCESS_URL` with your information in the following request. - -``` -curl --location --request POST 'https://cloud.getdbt.com/api/v3/accounts/YOUR_ACCOUNT_ID/users/YOUR_USER_ID/apikey/' \ ---header 'Authorization: Token YOUR_CURRENT_PAT_TOKEN' -``` - -* Find your `YOUR_USER_ID` by reading [How to find your user ID](/faqs/Accounts/find-user-id). -* Find your `YOUR_CURRENT_TOKEN` by going to **Profile Settings** -> **API Access** and copying the API key. -* Find [`YOUR_ACCESS_URL`](/docs/cloud/about-cloud/access-regions-ip-addresses) for your region and plan. - -If `YOUR_USER_ID` = `123`, `YOUR_CURRENT_TOKEN` = `abcf9g`, then your curl request will be: - -``` -curl --location --request POST 'https://YOUR_ACCESS_URL/api/v2/users/123/apikey/' \ - ---header 'Authorization: Token abcf9g' -``` - -2. Find the new key in the API response or in dbt Cloud. - -3. To find the new key in dbt Cloud, go to **Profile Settings** -> **API Access**. - -### dbt Cloud deployments - -If your [dbt Cloud deployment](/docs/cloud/about-cloud/access-regions-ip-addresses) uses a different access URL, replace `YOUR_ACCESS_URL` with the URL of your instance. - -For example, if your deployment is Virtual Private dbt: - -✅ `http://cloud.customizedurl.getdbt.com/`
-❌ `http://cloud.getdbt.com/`
- -
- -
diff --git a/website/docs/faqs/Accounts/change-billing.md b/website/docs/faqs/Accounts/change-billing.md index 11290728c98..2b2aa607c16 100644 --- a/website/docs/faqs/Accounts/change-billing.md +++ b/website/docs/faqs/Accounts/change-billing.md @@ -6,6 +6,6 @@ id: change-billing --- -If you want to change your account's credit card details, select the gear menu in the upper right corner of dbt Cloud. Go to Account Settings → Billing → Payment Information. Enter the new credit card details on the respective fields then click on **Update payment information**. Only the _account owner_ can make this change. +If you want to change your account's credit card details, go to the left side panel, click **Account settings** → **Billing** → scroll to **Payment information**. Enter the new credit card details on the respective fields then click on **Update payment information**. Only the _account owner_ can make this change. To change your billing name or location address, send our Support team a message at support@getdbt.com with the newly updated information, and we can make that change for you! diff --git a/website/docs/faqs/Accounts/change-users-license.md b/website/docs/faqs/Accounts/change-users-license.md index 8755b946126..ae44414e5f9 100644 --- a/website/docs/faqs/Accounts/change-users-license.md +++ b/website/docs/faqs/Accounts/change-users-license.md @@ -8,12 +8,12 @@ id: change-user-license To change the license type for a user from `developer` to `read-only` or `IT` in dbt Cloud, you must be an account owner or have admin privileges. You might make this change to free up a billable seat but retain the user’s access to view the information in the dbt Cloud account. -1. From dbt Cloud, click the gear icon at the top right and select **Account Settings**. +1. From dbt Cloud, click on your account name in the left side menu and, select **Account settings**. - + 2. In **Account Settings**, select **Users** under **Teams**. -3. Select the user you want to remove, and click **Edit** in the bottom of their profile. +3. Select the user you want to remove and click **Edit** in the bottom of their profile. 4. For the **License** option, choose **Read-only** or **IT** (from **Developer**), and click **Save**. - + diff --git a/website/docs/faqs/Accounts/delete-users.md b/website/docs/faqs/Accounts/delete-users.md index a7e422fd82c..1efbb018242 100644 --- a/website/docs/faqs/Accounts/delete-users.md +++ b/website/docs/faqs/Accounts/delete-users.md @@ -8,15 +8,15 @@ id: delete-users To delete a user in dbt Cloud, you must be an account owner or have admin privileges. If the user has a `developer` license type, this will open up their seat for another user or allow the admins to lower the total number of seats. -1. From dbt Cloud, click the gear icon at the top right and select **Account Settings**. +1. From dbt Cloud, click on your account name in the left side menu and, select **Account settings**. - + 2. In **Account Settings**, select **Users** under **Teams**. 3. Select the user you want to delete, then click **Edit**. 4. Click **Delete** in the bottom left. Click **Confirm Delete** to immediately delete the user without additional password prompts. This action cannot be undone. However, you can re-invite the user with the same information if the deletion was made in error. - + If you are on a **Teams** plan and you are deleting users to reduce the number of billable seats, you also need to take these steps to lower the license count: 1. In **Account Settings**, select **Billing**. diff --git a/website/docs/faqs/Accounts/find-user-id.md b/website/docs/faqs/Accounts/find-user-id.md index 09e3ed35a0b..c7c810d9b3c 100644 --- a/website/docs/faqs/Accounts/find-user-id.md +++ b/website/docs/faqs/Accounts/find-user-id.md @@ -5,7 +5,7 @@ sidebar_label: 'Where can I find my user ID' id: find-user-id --- -Knowing your dbt Cloud user ID can help with actions related to [rotating your API token](/faqs/API/rotate-token), interacting with support, and more. +Knowing your dbt Cloud user ID can help with interacting with support. To find your user ID in dbt Cloud, read the following steps: diff --git a/website/docs/faqs/Accounts/transfer-account.md b/website/docs/faqs/Accounts/transfer-account.md index 693061c55c6..e694636cf68 100644 --- a/website/docs/faqs/Accounts/transfer-account.md +++ b/website/docs/faqs/Accounts/transfer-account.md @@ -10,7 +10,7 @@ You can transfer your dbt Cloud [access control](/docs/cloud/manage-access/about | Account plan| Steps | | ------ | ---------- | -| **Developer** | You can transfer ownership by changing the email directly on your dbt Cloud profile page, which you can access using this URL when you replace `YOUR_ACCESS_URL` with the [appropriate Access URL](/docs/cloud/about-cloud/access-regions-ip-addresses) for your region and plan: `https://YOUR_ACCESS_URL/settings/profile`. Before doing this, please ensure that you unlink your GitHub profile. | +| **Developer** | You can transfer ownership by changing the email directly on your dbt Cloud profile page, which you can access using this URL when you replace `YOUR_ACCESS_URL` with the [appropriate Access URL](/docs/cloud/about-cloud/access-regions-ip-addresses) for your region and plan: `https://YOUR_ACCESS_URL/settings/profile`. Before doing this, please ensure that you unlink your GitHub profile. The email address of the new account owner cannot be associated with another dbt Cloud account.| | **Team** | Existing account admins with account access can add users to, or remove users from the owner group. | | **Enterprise** | Account admins can add users to, or remove users from a group with Account Admin permissions. | | **If all account owners left the company** | If the account owner has left your organization, you will need to work with _your_ IT department to have incoming emails forwarded to the new account owner. Once your IT department has redirected the emails, you can request to reset the user password. Once you log in, you can change the email on the Profile page when you replace `YOUR_ACCESS_URL` with the [appropriate Access URL](/docs/cloud/about-cloud/access-regions-ip-addresses) for your region and plan: `https://YOUR_ACCESS_URL/settings/profile`. | diff --git a/website/docs/faqs/Core/install-pip-os-prereqs.md b/website/docs/faqs/Core/install-pip-os-prereqs.md index c8435b44f33..e25c15ee570 100644 --- a/website/docs/faqs/Core/install-pip-os-prereqs.md +++ b/website/docs/faqs/Core/install-pip-os-prereqs.md @@ -33,7 +33,7 @@ python --version ``` -If you need a compatible version, you can download and install [Python version 3.8 or higher for MacOS](https://www.python.org/downloads/macos). +If you need a compatible version, you can download and install [Python version 3.9 or higher for MacOS](https://www.python.org/downloads/macos). If your machine runs on an Apple M1 architecture, we recommend that you install dbt via [Rosetta](https://support.apple.com/en-us/HT211861). This is necessary for certain dependencies that are only supported on Intel processors. ### Ubuntu/Debian @@ -55,6 +55,6 @@ pip install cryptography~=3.4 Windows requires Python and git to successfully install and run dbt Core. -Install [Git for Windows](https://git-scm.com/downloads) and [Python version 3.8 or higher for Windows](https://www.python.org/downloads/windows/). +Install [Git for Windows](https://git-scm.com/downloads) and [Python version 3.9 or higher for Windows](https://www.python.org/downloads/windows/). For further questions, please see the [Python compatibility FAQ](/faqs/Core/install-python-compatibility) diff --git a/website/docs/faqs/Core/install-python-compatibility.md b/website/docs/faqs/Core/install-python-compatibility.md index aee2d16318e..92b4ae8698b 100644 --- a/website/docs/faqs/Core/install-python-compatibility.md +++ b/website/docs/faqs/Core/install-python-compatibility.md @@ -1,6 +1,6 @@ --- title: What version of Python can I use? -description: "Python versions 3.8 and newer can be used with dbt Core" +description: "Python versions supported with dbt Core" sidebar_label: 'Python version' id: install-python-compatibility --- diff --git a/website/docs/faqs/Docs/long-descriptions.md b/website/docs/faqs/Docs/long-descriptions.md index ef410df0517..e984a6e78c8 100644 --- a/website/docs/faqs/Docs/long-descriptions.md +++ b/website/docs/faqs/Docs/long-descriptions.md @@ -32,4 +32,3 @@ If you need more than a sentence to explain a model, you can: ``` 3. Use a [docs block](/docs/build/documentation#using-docs-blocks) to write the description in a separate Markdown file. -b diff --git a/website/docs/faqs/Environments/custom-branch-settings.md b/website/docs/faqs/Environments/custom-branch-settings.md index 70052488ac6..6e998b267d8 100644 --- a/website/docs/faqs/Environments/custom-branch-settings.md +++ b/website/docs/faqs/Environments/custom-branch-settings.md @@ -27,7 +27,7 @@ For example, if you want to use the `develop` branch of a connected repository: - Enter **develop** as the name of your custom branch - Click **Save** - + ## Deployment diff --git a/website/docs/faqs/Environments/delete-environment-job.md b/website/docs/faqs/Environments/delete-environment-job.md index eb9ac511a7c..5b167b6df13 100644 --- a/website/docs/faqs/Environments/delete-environment-job.md +++ b/website/docs/faqs/Environments/delete-environment-job.md @@ -18,7 +18,7 @@ To delete a job or multiple jobs in dbt Cloud: 4. Scroll to the bottom of the page and click **Delete** to delete the job.
- +
Delete a job
@@ -35,10 +35,7 @@ Deleting an environment automatically deletes its associated job(s). If you want 3. Click **Settings** on the top right of the page and then click **Edit**. 4. Scroll to the bottom of the page and click **Delete** to delete the environment.
-
- -
Delete an environment
-
+ 5. Confirm your action in the **Confirm Delete** pop-up by clicking **Confirm Delete** in the bottom right to delete the environment immediately. This action cannot be undone. However, you can create a new environment with the same information if the deletion was made in error.

diff --git a/website/docs/faqs/Git/git-migration.md b/website/docs/faqs/Git/git-migration.md index 156227d59ae..7d7a503c16a 100644 --- a/website/docs/faqs/Git/git-migration.md +++ b/website/docs/faqs/Git/git-migration.md @@ -16,7 +16,7 @@ To migrate from one git provider to another, refer to the following steps to avo 2. Go back to dbt Cloud and set up your [integration for the new git provider](/docs/cloud/git/connect-github), if needed. 3. Disconnect the old repository in dbt Cloud by going to **Account Settings** and then **Projects**. Click on the **Repository** link, then click **Edit** and **Disconnect**. - + 4. On the same page, connect to the new git provider repository by clicking **Configure Repository** - If you're using the native integration, you may need to OAuth to it. diff --git a/website/docs/faqs/Git/github-permissions.md b/website/docs/faqs/Git/github-permissions.md index 075343e0c5e..c244b6742b9 100644 --- a/website/docs/faqs/Git/github-permissions.md +++ b/website/docs/faqs/Git/github-permissions.md @@ -40,7 +40,7 @@ Disconnect the GitHub and dbt Cloud integration in dbt Cloud. 6. Return to your **Project details** page and reconnect your repository by clicking the **Configure Repository** link. 7. Configure your repository and click **Save** - + ## Support If you've tried these workarounds and are still experiencing this behavior — reach out to the [dbt Support](mailto:support@getdbt.com) team and we'll be happy to help! diff --git a/website/docs/faqs/Git/gitignore.md b/website/docs/faqs/Git/gitignore.md index 16575861289..f5892b30b83 100644 --- a/website/docs/faqs/Git/gitignore.md +++ b/website/docs/faqs/Git/gitignore.md @@ -47,9 +47,9 @@ For more info on `gitignore` syntax, refer to the [Git docs](https://git-scm.com 11. Return to the dbt Cloud IDE and use the **Change Branch** button, to switch to the main branch of the project. 12. Once the branch has changed, click the **Pull from remote** button to pull in all the changes. -13. Verify the changes by making sure the files/folders in the `.gitignore `file are in italics. +13. Verify the changes by making sure the files/folders in the `.gitignore` file are in italics. - + ### Fix in the git provider @@ -80,9 +80,9 @@ dbt_modules/ * `target`, `dbt_modules`, `dbt_packages`, `logs` 7. Commit (save) the deletions to the main branch. 8. Switch to the dbt Cloud IDE, and open the project that you're fixing. -9. Reclone your repo in the IDE by clicking on the three dots next to the **IDE Status** button on the lower right corner of the IDE screen, then select **Reclone Repo**. - * **Note** — Any saved but uncommitted changes will be lost, so make sure you copy any modified code that you want to keep in a temporary location outside of dbt Cloud. -10. Once you reclone the repo, open the `.gitignore` file in the branch you're working in. If the new changes aren't included, you'll need to merge the latest commits from the main branch into your working branch. +9. [Rollback your repo to remote](/docs/collaborate/git/version-control-basics#the-git-button-in-the-cloud-ide) in the IDE by clicking on the three dots next to the **IDE Status** button on the lower right corner of the IDE screen, then select **Rollback to remote**. + * **Note** — Rollback to remote resets your repo back to an earlier clone from your remote. Any saved but uncommitted changes will be lost, so make sure you copy any modified code that you want to keep in a temporary location outside of dbt Cloud. +10. Once you rollback to remote, open the `.gitignore` file in the branch you're working in. If the new changes aren't included, you'll need to merge the latest commits from the main branch into your working branch. 11. Go to the **File Explorer** to verify the `.gitignore` file contains the correct entries and make sure the untracked files/folders in the .gitignore file are in *italics*. 12. Great job 🎉! You've configured the `.gitignore` correctly and can continue with your development! @@ -111,9 +111,9 @@ dbt_modules/ 8. Open a merge request using the git provider web interface. The merge request should attempt to merge the changes into the 'main' branch that all development branches are created from. 9. Follow the necessary procedures to get the branch approved and merged into the 'main' branch. You can delete the branch after the merge is complete. 10. Once the merge is complete, go back to the dbt Cloud IDE, and open the project that you're fixing. -11. Reclone your repo in the IDE by clicking on the three dots next to the **IDE Status** button on the lower right corner of the IDE screen, then select **Reclone Repo**. - * **Note** — Any saved but uncommitted changes will be lost, so make sure you copy any modified code that you want to keep in a temporary location outside of dbt Cloud. -12. Once you reclone the repo, open the `.gitignore` file in the branch you're working in. If the new changes aren't included, you'll need to merge the latest commits from the main branch into your working branch. +11. [Rollback your repo to remote](/docs/collaborate/git/version-control-basics#the-git-button-in-the-cloud-ide) in the IDE by clicking on the three dots next to the **IDE Status** button on the lower right corner of the IDE screen, then select **Rollback to remote**. + * **Note** — Rollback to remote resets your repo back to an earlier clone from your remote. Any saved but uncommitted changes will be lost, so make sure you copy any modified code that you want to keep in a temporary location outside of dbt Cloud. +12. Once you rollback to remote, open the `.gitignore` file in the branch you're working in. If the new changes aren't included, you'll need to merge the latest commits from the main branch into your working branch. 13. Go to the **File Explorer** to verify the `.gitignore` file contains the correct entries and make sure the untracked files/folders in the .gitignore file are in *italics*. 14. Great job 🎉! You've configured the `.gitignore` correctly and can continue with your development! diff --git a/website/docs/faqs/Git/managed-repo.md b/website/docs/faqs/Git/managed-repo.md index 17b75256fb6..c357fce112c 100644 --- a/website/docs/faqs/Git/managed-repo.md +++ b/website/docs/faqs/Git/managed-repo.md @@ -7,4 +7,8 @@ id: managed-repo dbt Labs can send your managed repository through a ZIP file in its current state for you to push up to a git provider. After that, you'd just need to switch over to the [repo in your project](/docs/cloud/git/import-a-project-by-git-url) to point to the new repository. -When you're ready to do this, [contact the dbt Labs Support team](mailto:support@getdbt.com) with your request and your managed repo URL, which you can find by navigating to your project setting. To find project settings, click the gear icon in the upper right, select **Account settings**, click **Projects**, and then select your project. Under **Repository** in the project details page, you can find your managed repo URL. +When you're ready to do this, [contact the dbt Labs Support team](mailto:support@getdbt.com) with your request and your managed repo URL, which you can find by navigating to your project setting. To find project settings: + +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. +2. Click **Projects**, and then select your project. +3. Under **Repository** in the project details page, you can find your managed repo URL. diff --git a/website/docs/faqs/Project/dbt-source-freshness.md b/website/docs/faqs/Project/dbt-source-freshness.md index e2554579ffc..61bd5d035ba 100644 --- a/website/docs/faqs/Project/dbt-source-freshness.md +++ b/website/docs/faqs/Project/dbt-source-freshness.md @@ -11,4 +11,4 @@ The `dbt source freshness` command will output a pass/warning/error status for e Additionally, dbt will write the freshness results to a file in the `target/` directory called `sources.json` by default. You can also override this destination, use the `-o` flag to the `dbt source freshness` command. -After enabling source freshness within a job, configure [Artifacts](/docs/deploy/artifacts) in your **Project Details** page, which you can find by clicking the gear icon and then selecting **Account settings**. You can see the current status for source freshness by clicking **View Sources** in the job page. +After enabling source freshness within a job, configure [Artifacts](/docs/deploy/artifacts) in your **Project Details** page, which you can find by selectng your account name on the left side menu in dbt Cloud and clicking **Account settings**. You can see the current status for source freshness by clicking **View Sources** in the job page. diff --git a/website/docs/faqs/Project/delete-a-project.md b/website/docs/faqs/Project/delete-a-project.md index 5fde3fee9cd..36c6bf4f160 100644 --- a/website/docs/faqs/Project/delete-a-project.md +++ b/website/docs/faqs/Project/delete-a-project.md @@ -7,12 +7,12 @@ id: delete-a-project --- To delete a project in dbt Cloud, you must be the account owner or have admin privileges. -1. From dbt Cloud, click the gear icon at the top right corner and select **Account Settings**. +1. From dbt Cloud, click on your account name in the left side menu and select **Account settings**. - + 2. In **Account Settings**, select **Projects**. Click the project you want to delete from the **Projects** page. 3. Click the edit icon in the lower right-hand corner of the **Project Details**. A **Delete** option will appear on the left side of the same details view. 4. Select **Delete**. Confirm the action to immediately delete the user without additional password prompts. There will be no account password prompt, and the project is deleted immediately after confirmation. Once a project is deleted, this action cannot be undone. - + diff --git a/website/docs/faqs/Troubleshooting/error-importing-repo.md b/website/docs/faqs/Troubleshooting/error-importing-repo.md new file mode 100644 index 00000000000..85c9ffb0745 --- /dev/null +++ b/website/docs/faqs/Troubleshooting/error-importing-repo.md @@ -0,0 +1,14 @@ +--- +title: Errors importing a repository on dbt Cloud project set up +description: "Errors importing a repository on dbt Cloud project set up" +sidebar_label: 'Errors importing a repository on dbt Cloud project set up' +id: error-importing-repo +--- + +If you don't see your repository listed, double-check that: +- Your repository is in a Gitlab group you have access to. dbt Cloud will not read repos associated with a user. + +If you do see your repository listed, but are unable to import the repository successfully, double-check that: +- You are a maintainer of that repository. Only users with maintainer permissions can set up repository connections. + +If you imported a repository using the dbt Cloud native integration with GitLab, you should be able to see if the clone strategy is using a `deploy_token`. If it's relying on an SSH key, this means the repository was not set up using the native GitLab integration, but rather using the generic git clone option. The repository must be reconnected in order to get the benefits described above. diff --git a/website/docs/faqs/Troubleshooting/failed-snowflake-oauth-connection.md b/website/docs/faqs/Troubleshooting/failed-snowflake-oauth-connection.md new file mode 100644 index 00000000000..84ef49d212c --- /dev/null +++ b/website/docs/faqs/Troubleshooting/failed-snowflake-oauth-connection.md @@ -0,0 +1,31 @@ +--- +title: Receiving a `Failed to connect to DB` error when connecting to Snowflake +description: "Edit your OAuth Security integration when you see error" +sidebar_label: 'Receiving `Failed to connect to database` error' +--- + +1. If you see the following error: + + ```text + Failed to connect to DB: xxxxxxx.snowflakecomputing.com:443. The role requested in the connection, or the default role if none was requested in the connection ('xxxxx'), is not listed in the Access Token or was filtered. + Please specify another role, or contact your OAuth Authorization server administrator. + ``` + +2. Edit your OAuth Security integration and explicitly specify this scope mapping attribute: + + ```sql + ALTER INTEGRATION SET EXTERNAL_OAUTH_SCOPE_MAPPING_ATTRIBUTE = 'scp'; + ``` + +You can read more about this error in [Snowflake's documentation](https://community.snowflake.com/s/article/external-custom-oauth-error-the-role-requested-in-the-connection-is-not-listed-in-the-access-token). + +---- + +1. If you see the following error: + + ```text + Failed to connect to DB: xxxxxxx.snowflakecomputing.com:443. Incorrect username or password was specified. + ``` + + * **Unique email addresses** — Each user in Snowflake must have a unique email address. You can't have multiple users (for example, a human user and a service account) using the same email, such as `alice@acme.com`, to authenticate to Snowflake. + * **Match email addresses with identity provider** — The email address of your Snowflake user must exactly match the email address you use to authenticate with your Identity Provider (IdP). For example, if your Snowflake user's email is `alice@acme.com` but you log in to Entra or Okta with `alice_adm@acme.com`, this mismatch can cause an error. diff --git a/website/docs/faqs/Troubleshooting/gitlab-webhook.md b/website/docs/faqs/Troubleshooting/gitlab-webhook.md new file mode 100644 index 00000000000..450796db83e --- /dev/null +++ b/website/docs/faqs/Troubleshooting/gitlab-webhook.md @@ -0,0 +1,19 @@ +--- +title: Unable to trigger a CI job with GitLab +description: "Unable to trigger a CI job" +sidebar_label: 'Unable to trigger a CI job' +id: gitlab-webhook +--- + +When you connect dbt Cloud to a GitLab repository, GitLab automatically registers a webhook in the background, viewable under the repository settings. This webhook is also used to trigger [CI jobs](/docs/deploy/ci-jobs) when you push to the repository. + +If you're unable to trigger a CI job, this usually indicates that the webhook registration is missing or incorrect. + +To resolve this issue, navigate to the repository settings in GitLab and view the webhook registrations by navigating to GitLab --> **Settings** --> **Webhooks**. + +Some things to check: + +- The webhook registration is enabled in GitLab. +- The webhook registration is configured with the correct URL and secret. + +If you're still experiencing this issue, reach out to the Support team at support@getdbt.com and we'll be happy to help! diff --git a/website/docs/guides/adapter-creation.md b/website/docs/guides/adapter-creation.md index 278e2a9fe14..37ef5ec0412 100644 --- a/website/docs/guides/adapter-creation.md +++ b/website/docs/guides/adapter-creation.md @@ -666,7 +666,7 @@ In order to enable the [`dbt init` command](/reference/commands/init) to prompt See examples: -- [dbt-postgres](https://github.com/dbt-labs/dbt-core/blob/main/plugins/postgres/dbt/include/postgres/profile_template.yml) +- [dbt-postgres](https://github.com/dbt-labs/dbt-postgres/blob/main/dbt/include/postgres/profile_template.yml) - [dbt-redshift](https://github.com/dbt-labs/dbt-redshift/blob/main/dbt/include/redshift/profile_template.yml) - [dbt-snowflake](https://github.com/dbt-labs/dbt-snowflake/blob/main/dbt/include/snowflake/profile_template.yml) - [dbt-bigquery](https://github.com/dbt-labs/dbt-bigquery/blob/main/dbt/include/bigquery/profile_template.yml) @@ -1345,8 +1345,6 @@ Breaking this down: - Implementation instructions: -- Future plans - - Contributor recognition (if applicable) diff --git a/website/docs/guides/athena-qs.md b/website/docs/guides/athena-qs.md new file mode 100644 index 00000000000..b1933bdd076 --- /dev/null +++ b/website/docs/guides/athena-qs.md @@ -0,0 +1,334 @@ +--- +title: "Quickstart for dbt Cloud and Amazon Athena" +id: "athena" +# time_to_complete: '30 minutes' commenting out until we test +level: 'Beginner' +icon: 'athena' +hide_table_of_contents: true +tags: ['Amazon','Athena', 'dbt Cloud','Quickstart'] +recently_updated: true +--- + +
+ +## Introduction + +In this quickstart guide, you'll learn how to use dbt Cloud with Amazon Athena. It will show you how to: + +- Create an S3 bucket for Athena query results. +- Creat an Athena database. +- Access sample data in a public dataset. +- Connect dbt Cloud to Amazon Athena. +- Take a sample query and turn it into a model in your dbt project. A model in dbt is a select statement. +- Add tests to your models. +- Document your models. +- Schedule a job to run. + +:::tip Videos for you +You can check out [dbt Fundamentals](https://learn.getdbt.com/courses/dbt-fundamentals) for free if you're interested in course learning with videos. +::: + +### Prerequisites​ + +- You have a [dbt Cloud account](https://www.getdbt.com/signup/). +- You have an [AWS account](https://aws.amazon.com/). +- You have set up [Amazon Athena](https://docs.aws.amazon.com/athena/latest/ug/getting-started.html). + +### Related content + +- Learn more with [dbt Learn courses](https://learn.getdbt.com) +- [CI jobs](/docs/deploy/continuous-integration) +- [Deploy jobs](/docs/deploy/deploy-jobs) +- [Job notifications](/docs/deploy/job-notifications) +- [Source freshness](/docs/deploy/source-freshness) + +## Getting started + +For the following guide you can use an existing S3 bucket or [create a new one](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html). + +Download the following CSV files (the Jaffle Shop sample data) and upload them to your S3 bucket: +- [jaffle_shop_customers.csv](https://dbt-tutorial-public.s3-us-west-2.amazonaws.com/jaffle_shop_customers.csv) +- [jaffle_shop_orders.csv](https://dbt-tutorial-public.s3-us-west-2.amazonaws.com/jaffle_shop_orders.csv) +- [stripe_payments.csv](https://dbt-tutorial-public.s3-us-west-2.amazonaws.com/stripe_payments.csv) + + +## Configure Amazon Athena + +1. Log into your AWS account and navigate to the **Athena console**. + - If this is your first time in the Athena console (in your current AWS Region), click **Explore the query editor** to open the query editor. Otherwise, Athena opens automatically in the query editor. +1. Open **Settings** and find the **Location of query result box** field. + 1. Enter the path of the S3 bucket (prefix it with `s3://`). + 2. Navigate to **Browse S3**, select the S3 bucket you created, and click **Choose**. +1. **Save** these settings. +1. In the **query editor**, create a database by running `create database YOUR_DATABASE_NAME`. +1. To make the database you created the one you `write` into, select it from the **Database** list on the left side menu. +1. Access the Jaffle Shop data in the S3 bucket using one of these options: + 1. Manually create the tables. + 2. Create a glue crawler to recreate the data as external tables (recommended). +1. Once the tables have been created, you will able to `SELECT` from them. + +## Set up security access to Athena + +To setup the security access for Athena, determine which access method you want to use: +* Obtain `aws_access_key_id` and `aws_secret_access_key` (recommended) +* Obtain an **AWS credentials** file. + +### AWS access key (recommended) + +To obtain your `aws_access_key_id` and `aws_secret_access_key`: + +1. Open the **AWS Console**. +1. Click on your **username** near the top right and click **Security Credentials**. +1. Click on **Users** in the sidebar. +1. Click on your **username** (or the name of the user for whom to create the key). +1. Click on the **Security Credentials** tab. +1. Click **Create Access Key**. +1. Click **Show User Security Credentials** and + +Save the `aws_access_key_id` and `aws_secret_access_key` for a future step. + +### AWS credentials file + +To obtain your AWS credentials file: +1. Follow the instructions for [configuring the credentials file](https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-files.html) usin the AWS CLI +1. Locate the `~/.aws/credentials` file on your computer + 1. Windows: `%USERPROFILE%\.aws\credentials` + 2. Mac/Linux: `~/.aws/credentials` + +Retrieve the `aws_access_key_id` and `aws_secret_access_key` from the `~/.aws/credentials` file for a future step. + +## Configure the connection in dbt Cloud + +To configure the Athena connection in dbt Cloud: +1. Click your **account name** on the left-side menu and click **Account settings**. +1. Click **Connections** and click **New connection**. +1. Click **Athena** and fill out the required fields (and any optional fields). + 1. **AWS region name** — The AWS region of your environment. + 1. **Database (catalog)** — Enter the database name created in earlier steps (lowercase only). + 1. **AWS S3 staging directory** — Enter the S3 bucket created in earlier steps. +1. Click **Save** + +### Configure your environment + +To configure the Athena credentials in your environment: +1. Click **Deploy** on the left-side menu and click **Environments**. +1. Click **Create environment** and fill out the **General settings**. + - Your **dbt version** must be set to `Versionless` to use the Athena connection. +1. Select the Athena connection from the **Connection** dropdown. +1. Fill out the `aws_access_key` and `aws_access_id` recorded in previous steps, as well as the `Schema` to write to. +1. Click **Test connection** and once it succeeds, **Save** the environment. + +Repeat the process to create a [development environment](https://docs.getdbt.com/docs/dbt-cloud-environments#types-of-environments). + +## Set up a dbt Cloud managed repository + + +## Initialize your dbt project​ and start developing + +Now that you have a repository configured, you can initialize your project and start development in dbt Cloud: + +1. Click **Start developing in the IDE**. It might take a few minutes for your project to spin up for the first time as it establishes your git connection, clones your repo, and tests the connection to the warehouse. +2. Above the file tree to the left, click **Initialize dbt project**. This builds out your folder structure with example models. +3. Make your initial commit by clicking **Commit and sync**. Use the commit message `initial commit` and click **Commit**. This creates the first commit to your managed repo and allows you to open a branch where you can add new dbt code. +4. You can now directly query data from your warehouse and execute `dbt run`. You can try this out now: + - Click **+ Create new file**, add this query to the new file, and click **Save as** to save the new file: + ```sql + select * from jaffle_shop.customers + ``` + - In the command line bar at the bottom, enter `dbt run` and click **Enter**. You should see a `dbt run succeeded` message. + +## Build your first model + +You have two options for working with files in the dbt Cloud IDE: + +- Create a new branch (recommended) — Create a new branch to edit and commit your changes. Navigate to **Version Control** on the left sidebar and click **Create branch**. +- Edit in the protected primary branch — If you prefer to edit, format, or lint files and execute dbt commands directly in your primary git branch. The dbt Cloud IDE prevents commits to the protected branch, so you will be prompted to commit your changes to a new branch. + +Name the new branch `add-customers-model`. + +1. Click the **...** next to the `models` directory, then select **Create file**. +2. Name the file `customers.sql`, then click **Create**. +3. Copy the following query into the file and click **Save**. + +```sql +with customers as ( + + select + id as customer_id, + first_name, + last_name + + from jaffle_shop.customers + +), + +orders as ( + + select + id as order_id, + user_id as customer_id, + order_date, + status + + from jaffle_shop.orders + +), + +customer_orders as ( + + select + customer_id, + + min(order_date) as first_order_date, + max(order_date) as most_recent_order_date, + count(order_id) as number_of_orders + + from orders + + group by 1 + +), + +final as ( + + select + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order_date, + customer_orders.most_recent_order_date, + coalesce(customer_orders.number_of_orders, 0) as number_of_orders + + from customers + + left join customer_orders using (customer_id) + +) + +select * from final +``` + +4. Enter `dbt run` in the command prompt at the bottom of the screen. You should get a successful run and see the three models. + +Later, you can connect your business intelligence (BI) tools to these views and tables so they only read cleaned up data rather than raw data in your BI tool. + +#### FAQs + + + + + + + +## Change the way your model is materialized + + + +## Delete the example models + + + +## Build models on top of other models + + + +1. Create a new SQL file, `models/stg_customers.sql`, with the SQL from the `customers` CTE in our original query. +2. Create a second new SQL file, `models/stg_orders.sql`, with the SQL from the `orders` CTE in our original query. + + + + ```sql + select + id as customer_id, + first_name, + last_name + + from jaffle_shop.customers + ``` + + + + + + ```sql + select + id as order_id, + user_id as customer_id, + order_date, + status + + from jaffle_shop.orders + ``` + + + +3. Edit the SQL in your `models/customers.sql` file as follows: + + + + ```sql + with customers as ( + + select * from {{ ref('stg_customers') }} + + ), + + orders as ( + + select * from {{ ref('stg_orders') }} + + ), + + customer_orders as ( + + select + customer_id, + + min(order_date) as first_order_date, + max(order_date) as most_recent_order_date, + count(order_id) as number_of_orders + + from orders + + group by 1 + + ), + + final as ( + + select + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order_date, + customer_orders.most_recent_order_date, + coalesce(customer_orders.number_of_orders, 0) as number_of_orders + + from customers + + left join customer_orders using (customer_id) + + ) + + select * from final + + ``` + + + +4. Execute `dbt run`. + + This time, when you performed a `dbt run`, separate views/tables were created for `stg_customers`, `stg_orders` and `customers`. dbt inferred the order to run these models. Because `customers` depends on `stg_customers` and `stg_orders`, dbt builds `customers` last. You do not need to explicitly define these dependencies. + + +#### FAQs {#faq-2} + + + + + +
+ + + + diff --git a/website/docs/guides/azure-synapse-analytics-qs.md b/website/docs/guides/azure-synapse-analytics-qs.md index 4f0285e6623..94beddfec80 100644 --- a/website/docs/guides/azure-synapse-analytics-qs.md +++ b/website/docs/guides/azure-synapse-analytics-qs.md @@ -92,7 +92,7 @@ In this quickstart guide, you'll learn how to use dbt Cloud with [Azure Synapse ## Connect dbt Cloud to Azure Synapse Analytics -1. Create a new project in dbt Cloud. Open the gear menu in the top right corner, select **Account settings** and click **+ New Project**. +1. Create a new project in dbt Cloud. Click on your account name in the left side menu, select **Account settings**, and click **+ New Project**. 2. Enter a project name and click **Continue**. 3. Choose **Synapse** as your connection and click **Next**. 4. In the **Configure your environment** section, enter the **Settings** for your new project: diff --git a/website/docs/guides/bigquery-qs.md b/website/docs/guides/bigquery-qs.md index 19a4ff8fbb0..194b73f25bf 100644 --- a/website/docs/guides/bigquery-qs.md +++ b/website/docs/guides/bigquery-qs.md @@ -85,13 +85,14 @@ In order to let dbt connect to your warehouse, you'll need to generate a keyfile 3. Create a service account key for your new project from the [Service accounts page](https://console.cloud.google.com/iam-admin/serviceaccounts?walkthrough_id=iam--create-service-account-keys&start_index=1#step_index=1). For more information, refer to [Create a service account key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#creating) in the Google Cloud docs. When downloading the JSON file, make sure to use a filename you can easily remember. For example, `dbt-user-creds.json`. For security reasons, dbt Labs recommends that you protect this JSON file like you would your identity credentials; for example, don't check the JSON file into your version control software. ## Connect dbt Cloud to BigQuery​ -1. Create a new project in [dbt Cloud](/docs/cloud/about-cloud/access-regions-ip-addresses). From **Account settings** (using the gear menu in the top right corner), click **+ New Project**. +1. Create a new project in [dbt Cloud](/docs/cloud/about-cloud/access-regions-ip-addresses). Navigate to **Account settings** (by clicking on your account name in the left side menu), and click **+ New project**. 2. Enter a project name and click **Continue**. 3. For the warehouse, click **BigQuery** then **Next** to set up your connection. 4. Click **Upload a Service Account JSON File** in settings. 5. Select the JSON file you downloaded in [Generate BigQuery credentials](#generate-bigquery-credentials) and dbt Cloud will fill in all the necessary fields. -6. Click **Test Connection**. This verifies that dbt Cloud can access your BigQuery account. -7. Click **Next** if the test succeeded. If it failed, you might need to go back and regenerate your BigQuery credentials. +6. Optional — dbt Cloud Enterprise plans can configure developer OAuth with BigQuery, providing an additional layer of security. For more information, refer to [Set up BigQuery OAuth](/docs/cloud/manage-access/set-up-bigquery-oauth). +7. Click **Test Connection**. This verifies that dbt Cloud can access your BigQuery account. +8. Click **Next** if the test succeeded. If it failed, you might need to go back and regenerate your BigQuery credentials. ## Set up a dbt Cloud managed repository diff --git a/website/docs/guides/core-cloud-2.md b/website/docs/guides/core-cloud-2.md index cee1e8029c2..ddc0e883d84 100644 --- a/website/docs/guides/core-cloud-2.md +++ b/website/docs/guides/core-cloud-2.md @@ -155,7 +155,7 @@ After [setting the foundations of dbt Cloud](https://docs.getdbt.com/guides/core Once you’ve confirmed that dbt Cloud orchestration and CI/CD are working as expected, you should pause your current orchestration tool and stop or update your current CI/CD process. This is not relevant if you’re still using an external orchestrator (such as Airflow), and you’ve swapped out `dbt-core` execution for dbt Cloud execution (through the [API](/docs/dbt-cloud-apis/overview)). Familiarize your team with dbt Cloud's [features](/docs/cloud/about-cloud/dbt-cloud-features) and optimize development and deployment processes. Some key features to consider include: -- **Version management:** Manage [dbt versions](/docs/dbt-versions/upgrade-dbt-version-in-cloud) and ensure team collaboration with dbt Cloud's one-click feature, removing the hassle of manual updates and version discrepancies. You can go [**Versionless**](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) to always get the latest features and early access to new functionality for your dbt project. +- **Release tracks:** Choose a [release track](/docs/dbt-versions/cloud-release-tracks) for automatic dbt version upgrades, at the cadence appropriate for your team — removing the hassle of manual updates and the risk of version discrepancies. You can also get early access to new functionality, ahead of dbt Core. - **Development tools**: Use the [dbt Cloud CLI](/docs/cloud/cloud-cli-installation) or [dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud) to build, test, run, and version control your dbt projects. - **Documentation and Source freshness:** Automate storage of [documentation](/docs/build/documentation) and track [source freshness](/docs/deploy/source-freshness) in dbt Cloud, which streamlines project maintenance. - **Notifications and logs:** Receive immediate [notifications](/docs/deploy/monitor-jobs) for job failures, with direct links to the job details. Access comprehensive logs for all job runs to help with troubleshooting. diff --git a/website/docs/guides/core-to-cloud-1.md b/website/docs/guides/core-to-cloud-1.md index efed66c862a..3d6b119c178 100644 --- a/website/docs/guides/core-to-cloud-1.md +++ b/website/docs/guides/core-to-cloud-1.md @@ -58,8 +58,7 @@ This guide outlines the steps you need to take to move from dbt Core to dbt Clou ## Prerequisites -- You have an existing dbt Core project connected to a Git repository and data platform supported in [dbt Cloud](/docs/cloud/connect-data-platform/about-connections). -- A [supported version](/docs/dbt-versions/core) of dbt or select [**Versionless**](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) of dbt. +- You have an existing dbt Core project connected to a Git repository and data platform supported in [dbt Cloud](/docs/cloud/connect-data-platform/about-connections). - You have a dbt Cloud account. **[Don't have one? Start your free trial today](https://www.getdbt.com/signup)**! ## Account setup @@ -147,8 +146,8 @@ The most common data environments are production, staging, and development. The ### Initial setup steps 1. **Set up development environment** — Set up your [development](/docs/dbt-cloud-environments#create-a-development-environment) environment and [development credentials](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud#access-the-cloud-ide). You’ll need this to access your dbt project and start developing. -2. **dbt Core version** — In your dbt Cloud environment and credentials, use the same dbt Core version you use locally. You can run `dbt --version` in the command line to find out which version of dbt Core you’re using. - - When using dbt Core, you need to think about which version you’re using and manage your own upgrades. When using dbt Cloud, leverage ["Versionless"](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) so you don’t have to. +2. **dbt Core version** — In your dbt Cloud environment, select a [release track](/docs/dbt-versions/cloud-release-tracks) for ongoing dbt version upgrades. If your team plans to use both dbt Core and dbt Cloud for developing or deploying your dbt project, You can run `dbt --version` in the command line to find out which version of dbt Core you’re using. + - When using dbt Core, you need to think about which version you’re using and manage your own upgrades. When using dbt Cloud, leverage [release tracks](/docs/dbt-versions/cloud-release-tracks) so you don’t have to. 3. **Connect to your data platform** — When using dbt Cloud, you can [connect to your data platform](/docs/cloud/connect-data-platform/about-connections) directly in the UI. - Each environment is roughly equivalent to an entry in your `profiles.yml` file. This means you don't need a `profiles.yml` file in your project. @@ -210,7 +209,7 @@ To use the [dbt Cloud's job scheduler](/docs/deploy/job-scheduler), set up one e ### Initial setup steps 1. **dbt Core version** — In your environment settings, configure dbt Cloud with the same dbt Core version. - - Once your full migration is complete, we recommend upgrading your environments to ["Versionless"](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) to always get the latest features and more. You only need to do this once. + - Once your full migration is complete, we recommend upgrading your environments to [release tracks](/docs/dbt-versions/cloud-release-tracks) to always get the latest features and more. You only need to do this once. 2. **Configure your jobs** — [Create jobs](/docs/deploy/deploy-jobs#create-and-schedule-jobs) for scheduled or event-driven dbt jobs. You can use cron execution, manual, pull requests, or trigger on the completion of another job. - Note that alongside [jobs in dbt Cloud](/docs/deploy/jobs), discover other ways to schedule and run your dbt jobs with the help of other tools. Refer to [Integrate with other tools](/docs/deploy/deployment-tools) for more information. diff --git a/website/docs/guides/core-to-cloud-3.md b/website/docs/guides/core-to-cloud-3.md index 7d482d54471..81222471345 100644 --- a/website/docs/guides/core-to-cloud-3.md +++ b/website/docs/guides/core-to-cloud-3.md @@ -36,7 +36,7 @@ You may have already started your move to dbt Cloud and are looking for tips to In dbt Cloud, you can natively connect to your data platform and test its [connection](/docs/connect-adapters) with a click of a button. This is especially useful for users who are new to dbt Cloud or are looking to streamline their connection setup. Here are some tips and caveats to consider: ### Tips -- Manage [dbt versions](/docs/dbt-versions/upgrade-dbt-version-in-cloud) and ensure team collaboration with dbt Cloud's one-click feature, eliminating the need for manual updates and version discrepancies. You can go [**Versionless**](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) to always get the latest features and early access to new functionality for your dbt project. +- Manage [dbt versions](/docs/dbt-versions/upgrade-dbt-version-in-cloud) and ensure team collaboration with dbt Cloud's one-click feature, eliminating the need for manual updates and version discrepancies. Select a [release track](/docs/dbt-versions/cloud-release-tracks) for ongoing updates, to always stay up to date with fixes and (optionally) get early access to new functionality for your dbt project. - dbt Cloud supports a whole host of [cloud providers](/docs/cloud/connect-data-platform/about-connections), including Snowflake, Databricks, BigQuery, Fabric, and Redshift (to name a few). - Use [Extended Attributes](/docs/deploy/deploy-environments#extended-attributes) to set a flexible [profiles.yml](/docs/core/connect-data-platform/profiles.yml) snippet in your dbt Cloud environment settings. It gives you more control over environments (both deployment and development) and extends how dbt Cloud connects to the data platform within a given environment. - For example, if you have a field in your `profiles.yml` that you’d like to add to the dbt Cloud adapter user interface, you can use Extended Attributes to set it. diff --git a/website/docs/guides/custom-cicd-pipelines.md b/website/docs/guides/custom-cicd-pipelines.md index be23524d096..668d3f6f1dd 100644 --- a/website/docs/guides/custom-cicd-pipelines.md +++ b/website/docs/guides/custom-cicd-pipelines.md @@ -506,7 +506,7 @@ Additionally, you’ll see the job in the run history of dbt Cloud. It should be - + diff --git a/website/docs/guides/dbt-python-snowpark.md b/website/docs/guides/dbt-python-snowpark.md index 2e74c9722d8..091f1006992 100644 --- a/website/docs/guides/dbt-python-snowpark.md +++ b/website/docs/guides/dbt-python-snowpark.md @@ -286,7 +286,7 @@ We need to obtain our data source by copying our Formula 1 data into Snowflake t ## Change development schema name navigate the IDE -1. First we are going to change the name of our default schema to where our dbt models will build. By default, the name is `dbt_`. We will change this to `dbt_` to create your own personal development schema. To do this, select **Profile Settings** from the gear icon in the upper right. +1. First we are going to change the name of our default schema to where our dbt models will build. By default, the name is `dbt_`. We will change this to `dbt_` to create your own personal development schema. To do this, click on your account name in the left side menu and select **Account settings**. diff --git a/website/docs/guides/how-to-use-databricks-workflows-to-run-dbt-cloud-jobs.md b/website/docs/guides/how-to-use-databricks-workflows-to-run-dbt-cloud-jobs.md index f420b7845a2..60d67218642 100644 --- a/website/docs/guides/how-to-use-databricks-workflows-to-run-dbt-cloud-jobs.md +++ b/website/docs/guides/how-to-use-databricks-workflows-to-run-dbt-cloud-jobs.md @@ -33,7 +33,7 @@ Using Databricks workflows to call the dbt Cloud job API can be useful for sever ## Set up a Databricks secret scope -1. Retrieve **[User API Token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens#user-api-tokens) **or **[Service Account Token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens#generating-service-account-tokens) **from dbt Cloud +1. Retrieve **[personal access token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) **or **[Service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens#generating-service-account-tokens) **from dbt Cloud 2. Set up a **Databricks secret scope**, which is used to securely store your dbt Cloud API key. 3. Enter the **following commands** in your terminal: diff --git a/website/docs/guides/manual-install-qs.md b/website/docs/guides/manual-install-qs.md index 2e10cdac07c..816a9bd07ee 100644 --- a/website/docs/guides/manual-install-qs.md +++ b/website/docs/guides/manual-install-qs.md @@ -36,7 +36,7 @@ The following steps use [GitHub](https://github.com/) as the Git provider for th 2. Select **Public** so the repository can be shared with others. You can always make it private later. 3. Leave the default values for all other settings. 4. Click **Create repository**. -5. Save the commands from "…or create a new repository on the command line" to use later in [Commit your changes](#commit-your-changes). +5. Save the commands from "…or create a new repository on the command line" to use later in [Commit your changes](https://docs.getdbt.com/guides/manual-install?step=6). ## Create a project @@ -162,7 +162,7 @@ You should have an output that looks like this: Commit your changes so that the repository contains the latest code. -1. Link the GitHub repository you created to your dbt project by running the following commands in Terminal. Make sure you use the correct git URL for your repository, which you should have saved from step 5 in [Create a repository](#create-a-repository). +1. Link the GitHub repository you created to your dbt project by running the following commands in Terminal. Make sure you use the correct git URL for your repository, which you should have saved from step 5 in [Create a repository](https://docs.getdbt.com/guides/manual-install?step=2). ```shell git init diff --git a/website/docs/guides/mesh-qs.md b/website/docs/guides/mesh-qs.md index 0d13d043059..d81951c9669 100644 --- a/website/docs/guides/mesh-qs.md +++ b/website/docs/guides/mesh-qs.md @@ -40,7 +40,6 @@ To leverage dbt Mesh, you need the following: - You must have a [dbt Cloud Enterprise account](https://www.getdbt.com/get-started/enterprise-contact-pricing) - You have access to a cloud data platform, permissions to load the sample data tables, and dbt Cloud permissions to create new projects. -- Set your development and deployment [environments](/docs/dbt-cloud-environments) to use dbt [version](/docs/dbt-versions/core) 1.6 or later. You can also opt to go ["Versionless"](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) to always get the most recent features and functionality. - This guide uses the Jaffle Shop sample data, including `customers`, `orders`, and `payments` tables. Follow the provided instructions to load this data into your respective data platform: - [Snowflake](https://docs.getdbt.com/guides/snowflake?step=3) - [Databricks](https://docs.getdbt.com/guides/databricks?step=3) @@ -95,7 +94,7 @@ To set a production environment: 6. Click **Test Connection** to confirm the deployment connection. 6. Click **Save** to create a production environment. - + ## Set up a foundational project @@ -300,6 +299,8 @@ To run your first deployment dbt Cloud job, you will need to create a new dbt Cl 5. After the run is complete, click **Explore** from the upper menu bar. You should now see your lineage, tests, and documentation coming through successfully. +For details on how dbt Cloud uses metadata from the Staging environment to resolve references in downstream projects, check out the section on [Staging with downstream dependencies](/docs/collaborate/govern/project-dependencies#staging-with-downstream-dependencies). + ## Reference a public model in your downstream project In this section, you will set up the downstream project, "Jaffle | Finance", and [cross-project reference](/docs/collaborate/govern/project-dependencies) the `fct_orders` model from the foundational project. Navigate to the **Develop** page to set up our project: diff --git a/website/docs/guides/microsoft-fabric-qs.md b/website/docs/guides/microsoft-fabric-qs.md index 157ab2e6b89..6bacf4177df 100644 --- a/website/docs/guides/microsoft-fabric-qs.md +++ b/website/docs/guides/microsoft-fabric-qs.md @@ -101,7 +101,7 @@ In this quickstart guide, you'll learn how to use dbt Cloud with [Microsoft Fabr ## Connect dbt Cloud to Microsoft Fabric -1. Create a new project in dbt Cloud. From **Account settings** (using the gear menu in the top right corner), click **+ New Project**. +1. Create a new project in dbt Cloud. Navigate to **Account settings** (by clicking on your account name in the left side menu), and click **+ New Project**. 2. Enter a project name and click **Continue**. 3. Choose **Fabric** as your connection and click **Next**. 4. In the **Configure your environment** section, enter the **Settings** for your new project: diff --git a/website/docs/guides/redshift-qs.md b/website/docs/guides/redshift-qs.md index 8b950472506..83fafad1d12 100644 --- a/website/docs/guides/redshift-qs.md +++ b/website/docs/guides/redshift-qs.md @@ -170,7 +170,7 @@ Now we are going to load our sample data into the S3 bucket that our Cloudformat ``` ## Connect dbt Cloud to Redshift -1. Create a new project in [dbt Cloud](/docs/cloud/about-cloud/access-regions-ip-addresses). From **Account settings** (using the gear menu in the top right corner), click **+ New Project**. +1. Create a new project in [dbt Cloud](/docs/cloud/about-cloud/access-regions-ip-addresses). Navigate to **Account settings** (by clicking on your account name in the left side menu), and click **+ New Project**. 2. Enter a project name and click **Continue**. 3. For the warehouse, click **Redshift** then **Next** to set up your connection. 4. Enter your Redshift settings. Reference your credentials you saved from the CloudFormation template. diff --git a/website/docs/guides/serverless-datadog.md b/website/docs/guides/serverless-datadog.md index 10444ccae9a..dcb4a851663 100644 --- a/website/docs/guides/serverless-datadog.md +++ b/website/docs/guides/serverless-datadog.md @@ -108,7 +108,7 @@ Wrote config file fly.toml
## Store secrets The application requires four secrets to be set, using these names: -- `DBT_CLOUD_SERVICE_TOKEN`: a dbt Cloud [user token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens) with at least the `Metdata Only` permission. +- `DBT_CLOUD_SERVICE_TOKEN`: a dbt Cloud [personal access token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens) with at least the `Metdata Only` permission. - `DBT_CLOUD_AUTH_TOKEN`: the Secret Key for the dbt Cloud webhook you created earlier. - `DD_API_KEY`: the API key you created earlier. - `DD_SITE`: The Datadog site for your organisation, e.g. `datadoghq.com`. diff --git a/website/docs/guides/serverless-pagerduty.md b/website/docs/guides/serverless-pagerduty.md index ffd25f8989c..a4df65e0304 100644 --- a/website/docs/guides/serverless-pagerduty.md +++ b/website/docs/guides/serverless-pagerduty.md @@ -113,7 +113,7 @@ Make note of the Webhook Secret Key for later. ## Store secrets The application requires three secrets to be set, using these names: -- `DBT_CLOUD_SERVICE_TOKEN`: a dbt Cloud [user token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens) with at least the `Metdata Only` permission. +- `DBT_CLOUD_SERVICE_TOKEN`: a dbt Cloud [personal access token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens) with at least the `Metdata Only` permission. - `DBT_CLOUD_AUTH_TOKEN`: the Secret Key for the dbt Cloud webhook you created earlier. - `PD_ROUTING_KEY`: the integration key for the PagerDuty integration you created earlier. diff --git a/website/docs/guides/sl-snowflake-qs.md b/website/docs/guides/sl-snowflake-qs.md index b5a0e559c5b..79038cd1dfc 100644 --- a/website/docs/guides/sl-snowflake-qs.md +++ b/website/docs/guides/sl-snowflake-qs.md @@ -106,7 +106,6 @@ Open a new tab and follow these quick steps for account setup and data loading i -- Production and development environments must be on [dbt version 1.6 or higher](/docs/dbt-versions/upgrade-dbt-version-in-cloud). Alternatively, set your environment to [**Versionless**](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) to always get the latest updates. - Create a [trial Snowflake account](https://signup.snowflake.com/): - Select the Enterprise Snowflake edition with ACCOUNTADMIN access. Consider organizational questions when choosing a cloud provider, refer to Snowflake's [Introduction to Cloud Platforms](https://docs.snowflake.com/en/user-guide/intro-cloud-platforms). - Select a cloud provider and region. All cloud providers and regions will work so choose whichever you prefer. @@ -291,7 +290,7 @@ Using Partner Connect allows you to create a complete dbt account with your [Sno 5. After you have filled out the form and clicked **Complete Registration**, you will be logged into dbt Cloud automatically. -6. From your **Account Settings** in dbt Cloud (using the gear menu in the upper right corner), choose the "Partner Connect Trial" project and select **snowflake** in the overview table. Select **Edit** and update the **Database** field to `analytics` and the **Warehouse** field to `transforming`. +6. Click your account name in the left side menu and select **Account settings**, choose the "Partner Connect Trial" project, and select **snowflake** in the overview table. Select **Edit** and update the **Database** field to `analytics` and the **Warehouse** field to `transforming`. @@ -301,7 +300,7 @@ Using Partner Connect allows you to create a complete dbt account with your [Sno -1. Create a new project in dbt Cloud. From **Account settings** (using the gear menu in the top right corner), click **+ New Project**. +1. Create a new project in dbt Cloud. Navigate to **Account settings** (by clicking on your account name in the left side menu), and click **+ New Project**. 2. Enter a project name and click **Continue**. 3. For the warehouse, click **Snowflake** then **Next** to set up your connection. diff --git a/website/docs/guides/snowflake-qs.md b/website/docs/guides/snowflake-qs.md index bc27d1e1a4f..f1edd5ffc00 100644 --- a/website/docs/guides/snowflake-qs.md +++ b/website/docs/guides/snowflake-qs.md @@ -170,7 +170,7 @@ Using Partner Connect allows you to create a complete dbt account with your [Sno 5. After you have filled out the form and clicked **Complete Registration**, you will be logged into dbt Cloud automatically. -6. From your **Account Settings** in dbt Cloud (using the gear menu in the upper right corner), choose the "Partner Connect Trial" project and select **snowflake** in the overview table. Select edit and update the fields **Database** and **Warehouse** to be `analytics` and `transforming`, respectively. +6. Go to the left side menu and click your account name, then select **Account settings**, choose the "Partner Connect Trial" project, and select **snowflake** in the overview table. Select edit and update the fields **Database** and **Warehouse** to be `analytics` and `transforming`, respectively. @@ -180,7 +180,7 @@ Using Partner Connect allows you to create a complete dbt account with your [Sno -1. Create a new project in dbt Cloud. From **Account settings** (using the gear menu in the top right corner), click **+ New Project**. +1. Create a new project in dbt Cloud. Navigate to **Account settings** (by clicking on your account name in the left side menu), and click **+ New Project**. 2. Enter a project name and click **Continue**. 3. For the warehouse, click **Snowflake** then **Next** to set up your connection. @@ -230,6 +230,26 @@ Now that you have a repository configured, you can initialize your project and s ``` - In the command line bar at the bottom, enter `dbt run` and click **Enter**. You should see a `dbt run succeeded` message. +:::info +If you receive an insufficient privileges error on Snowflake at this point, it may be because your Snowflake role doesn't have permission to access the raw source data, to build target tables and views, or both. + +To troubleshoot, use a role with sufficient privileges (like `ACCOUNTADMIN`) and run the following commands in Snowflake. + +**Note**: Replace `snowflake_role_name` with the role you intend to use. If you launched dbt Cloud with Snowflake Partner Connect, use `pc_dbt_role` as the role. + +``` +grant all on database raw to role snowflake_role_name; +grant all on database analytics to role snowflake_role_name; + +grant all on schema raw.jaffle_shop to role snowflake_role_name; +grant all on schema raw.stripe to role snowflake_role_name; + +grant all on all tables in database raw to role snowflake_role_name; +grant all on future tables in database raw to role snowflake_role_name; +``` + +::: + ## Build your first model You have two options for working with files in the dbt Cloud IDE: diff --git a/website/docs/guides/starburst-galaxy-qs.md b/website/docs/guides/starburst-galaxy-qs.md index 316e392483d..76e4abadd64 100644 --- a/website/docs/guides/starburst-galaxy-qs.md +++ b/website/docs/guides/starburst-galaxy-qs.md @@ -203,7 +203,7 @@ To query the Jaffle Shop data with Starburst Galaxy, you need to create tables u 3. Click **Clusters** on the left sidebar. 4. Find your cluster in the **View clusters** table and click **Connection info**. Choose **dbt** from the **Select client** dropdown. Keep the **Connection information** modal open. You will use details from that modal in dbt Cloud. 5. In another browser tab, log in to [dbt Cloud](/docs/cloud/about-cloud/access-regions-ip-addresses). -6. Create a new project in dbt Cloud. From Account settings (using the gear menu in the top right corner), click **+ New Project**. +6. Create a new project in dbt Cloud. Click on your account name in the left side menu, select **Account settings**, and click **+ New Project**. 7. Enter a project name and click **Continue**. 8. Choose **Starburst** as your connection and click **Next**. 9. Enter the **Settings** for your new project: diff --git a/website/docs/guides/teradata-qs.md b/website/docs/guides/teradata-qs.md index da951620515..338997f274a 100644 --- a/website/docs/guides/teradata-qs.md +++ b/website/docs/guides/teradata-qs.md @@ -59,54 +59,52 @@ If you created your Teradata Vantage database instance at https://clearscape.ter ::: -1. Use your preferred SQL IDE editor to create two databases: `jaffle_shop` and `stripe`: +1. Use your preferred SQL IDE editor to create the database, `jaffle_shop`: ```sql CREATE DATABASE jaffle_shop AS PERM = 1e9; - CREATE DATABASE stripe AS PERM = 1e9; ``` -2. In the databases `jaffle_shop` and `stripe`, create three foreign tables and reference the respective csv files located in object storage: - - ```sql - CREATE FOREIGN TABLE jaffle_shop.customers ( - id integer, - first_name varchar (100), - last_name varchar (100) - ) - USING ( - LOCATION ('/s3/dbt-tutorial-public.s3.amazonaws.com/jaffle_shop_customers.csv') - ) - NO PRIMARY INDEX; - - CREATE FOREIGN TABLE jaffle_shop.orders ( - id integer, - user_id integer, - order_date date, - status varchar(100) - ) - USING ( - LOCATION ('/s3/dbt-tutorial-public.s3.amazonaws.com/jaffle_shop_orders.csv') - ) - NO PRIMARY INDEX; - - CREATE FOREIGN TABLE stripe.payment ( - id integer, - orderid integer, - paymentmethod varchar (100), - status varchar (100), - amount integer, - created date - ) - USING ( - LOCATION ('/s3/dbt-tutorial-public.s3.amazonaws.com/stripe_payments.csv') - ) - NO PRIMARY INDEX; - ``` - -## Connect dbt cloud to Teradata - -1. Create a new project in dbt Cloud. From **Account settings** (using the gear menu in the top right corner), click **New Project**. +2. In `jaffle_shop` database, create three foreign tables and reference the respective csv files located in object storage: + + ```sql + CREATE FOREIGN TABLE jaffle_shop.customers ( + id integer, + first_name varchar (100), + last_name varchar (100), + email varchar (100) + ) + USING ( + LOCATION ('/gs/storage.googleapis.com/clearscape_analytics_demo_data/dbt/raw_customers.csv') + ) + NO PRIMARY INDEX; + + CREATE FOREIGN TABLE jaffle_shop.orders ( + id integer, + user_id integer, + order_date date, + status varchar(100) + ) + USING ( + LOCATION ('/gs/storage.googleapis.com/clearscape_analytics_demo_data/dbt/raw_orders.csv') + ) + NO PRIMARY INDEX; + + CREATE FOREIGN TABLE jaffle_shop.payments ( + id integer, + orderid integer, + paymentmethod varchar (100), + amount integer + ) + USING ( + LOCATION ('/gs/storage.googleapis.com/clearscape_analytics_demo_data/dbt/raw_payments.csv') + ) + NO PRIMARY INDEX; + ``` + +## Connect dbt Cloud to Teradata + +1. Create a new project in dbt Cloud. Click on your account name in the left side menu, select **Account settings**, and click **+ New Project**. 2. Enter a project name and click **Continue**. 3. In **Configure your development environment**, click **Add new connection**. 4. Select **Teradata**, fill in all the required details in the **Settings** section, and test the connection. @@ -136,12 +134,46 @@ Now that you have a repository configured, you can initialize your project and s 1. Click **Start developing in the IDE**. It might take a few minutes for your project to spin up for the first time as it establishes your git connection, clones your repo, and tests the connection to the warehouse. 2. Above the file tree to the left, click **Initialize your project** to build out your folder structure with example models. 3. Make your initial commit by clicking **Commit and sync**. Use the commit message `initial commit` to create the first commit to your managed repo. Once you’ve created the commit, you can open a branch to add new dbt code. -4. You can now directly query data from your warehouse and execute `dbt run`. You can try this out now: - - Click **Create new file**, add this query to the new file, and click **Save as** to save the new file: - ```sql - select * from jaffle_shop.customers - ``` - - In the command line bar at the bottom, enter `dbt run` and click **Enter**. You should see a `dbt run succeeded` message. + +## Delete the example models + +You can now delete the files that dbt created when you initialized the project: + +1. Delete the `models/example/` directory. +2. Delete the `example:` key from your `dbt_project.yml` file, and any configurations that are listed under it. + + + + ```yaml + # before + models: + my_new_project: + +materialized: table + example: + +materialized: view + ``` + + + + + + ```yaml + # after + models: + my_new_project: + +materialized: table + ``` + + + +3. Save your changes. +4. Commit your changes and merge to the main branch. + +#### FAQs + + + + ## Build your first model @@ -153,7 +185,7 @@ You have two options for working with files in the dbt Cloud IDE: Name the new branch `add-customers-model`. 1. Click the **...** next to the `models` directory, then select **Create file**. -2. Name the file `customers.sql`, then click **Create**. +2. Name the file `bi_customers.sql`, then click **Create**. 3. Copy the following query into the file and click **Save**. ```sql @@ -208,7 +240,7 @@ final as ( from customers - left join customer_orders using (customer_id) + left join customer_orders on customers.customer_id = customer_orders.customer_id ) @@ -222,19 +254,73 @@ You can connect your business intelligence (BI) tools to these views and tables ## Change the way your model is materialized - +One of the most powerful features of dbt is that you can change the way a model is materialized in your warehouse, simply by changing a configuration value. You can change things between tables and views by changing a keyword rather than writing the data definition language (DDL) to do this behind the scenes. -## Delete the example models +By default, everything gets created as a view. You can override that at the directory level so everything in that directory will materialize to a different materialization. + +1. Edit your `dbt_project.yml` file. + - Update your project `name` to: + + + ```yaml + name: 'jaffle_shop' + ``` + + + - Configure `jaffle_shop` so everything in it will be materialized as a table; and configure `example` so everything in it will be materialized as a view. Update your `models` config block to: + + + + ```yaml + models: + jaffle_shop: + +materialized: table + ``` + + + - Click **Save**. + +2. Enter the `dbt run` command. Your `bi_customers` model should now be built as a table! + :::info + To do this, dbt had to first run a `drop view` statement (or API call on BigQuery), then a `create table as` statement. + ::: + +3. Edit `models/bi_customers.sql` to override the `dbt_project.yml` for the `customers` model only by adding the following snippet to the top, and click **Save**: - + + + ```sql + {{ + config( + materialized='view' + ) + }} + + with customers as ( + + select + id as customer_id + ... + + ) + + ``` + + + +4. Enter the `dbt run` command. Your model, `bi_customers`, should now build as a view. + +### FAQs + + + + ## Build models on top of other models 1. Create a new SQL file, `models/stg_customers.sql`, with the SQL from the `customers` CTE in your original query. -2. Create a second new SQL file, `models/stg_orders.sql`, with the SQL from the `orders` CTE in your original query. - ```sql @@ -248,6 +334,7 @@ You can connect your business intelligence (BI) tools to these views and tables +2. Create a second new SQL file, `models/stg_orders.sql`, with the SQL from the `orders` CTE in your original query. ```sql @@ -262,9 +349,9 @@ You can connect your business intelligence (BI) tools to these views and tables -3. Edit the SQL in your `models/customers.sql` file as follows: +3. Edit the SQL in your `models/bi_customers.sql` file as follows: - + ```sql with customers as ( @@ -306,7 +393,7 @@ You can connect your business intelligence (BI) tools to these views and tables from customers - left join customer_orders using (customer_id) + left join customer_orders on customers.customer_id = customer_orders.customer_id ) @@ -395,6 +482,192 @@ Sources make it possible to name and describe the data loaded into your warehous
- +## Add tests to your models + +Adding [tests](/docs/build/data-tests) to a project helps validate that your models are working correctly. + +To add tests to your project: + +1. Create a new YAML file in the `models` directory, named `models/schema.yml` +2. Add the following contents to the file: + + + + ```yaml + version: 2 + + models: + - name: bi_customers + columns: + - name: customer_id + tests: + - unique + - not_null + + - name: stg_customers + columns: + - name: customer_id + tests: + - unique + - not_null + + - name: stg_orders + columns: + - name: order_id + tests: + - unique + - not_null + - name: status + tests: + - accepted_values: + values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] + - name: customer_id + tests: + - not_null + - relationships: + to: ref('stg_customers') + field: customer_id + + ``` + + + +3. Run `dbt test`, and confirm that all your tests passed. + +When you run `dbt test`, dbt iterates through your YAML files, and constructs a query for each test. Each query will return the number of records that fail the test. If this number is 0, then the test is successful. + +#### FAQs + + + + + + + + + + +## Document your models + +Adding [documentation](/docs/build/documentation) to your project allows you to describe your models in rich detail, and share that information with your team. Here, we're going to add some basic documentation to our project. + +1. Update your `models/schema.yml` file to include some descriptions, such as those below. + + + + ```yaml + version: 2 + + models: + - name: bi_customers + description: One record per customer + columns: + - name: customer_id + description: Primary key + tests: + - unique + - not_null + - name: first_order_date + description: NULL when a customer has not yet placed an order. + + - name: stg_customers + description: This model cleans up customer data + columns: + - name: customer_id + description: Primary key + tests: + - unique + - not_null + + - name: stg_orders + description: This model cleans up order data + columns: + - name: order_id + description: Primary key + tests: + - unique + - not_null + - name: status + tests: + - accepted_values: + values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] + - name: customer_id + tests: + - not_null + - relationships: + to: ref('stg_customers') + field: customer_id + ``` + + + +2. Run `dbt docs generate` to generate the documentation for your project. dbt introspects your project and your warehouse to generate a file with rich documentation about your project. + + +3. Click the book icon in the Develop interface to launch documentation in a new tab. + +#### FAQs + + + + + + +## Commit your changes + +Now that you've built your customer model, you need to commit the changes you made to the project so that the repository has your latest code. + +**If you edited directly in the protected primary branch:**
+1. Click the **Commit and sync git** button. This action prepares your changes for commit. +2. A modal titled **Commit to a new branch** will appear. +3. In the modal window, name your new branch `add-customers-model`. This branches off from your primary branch with your new changes. +4. Add a commit message, such as "Add customers model, tests, docs" and commit your changes. +5. Click **Merge this branch to main** to add these changes to the main branch on your repo. + + +**If you created a new branch before editing:**
+1. Since you already branched out of the primary protected branch, go to **Version Control** on the left. +2. Click **Commit and sync** to add a message. +3. Add a commit message, such as "Add customers model, tests, docs." +4. Click **Merge this branch to main** to add these changes to the main branch on your repo. + +## Deploy dbt + +Use dbt Cloud's Scheduler to deploy your production jobs confidently and build observability into your processes. You'll learn to create a deployment environment and run a job in the following steps. + +### Create a deployment environment + +1. In the upper left, select **Deploy**, then click **Environments**. +2. Click **Create Environment**. +3. In the **Name** field, write the name of your deployment environment. For example, "Production." +4. In the **dbt Version** field, select the latest version from the dropdown. +5. Under **Deployment connection**, enter the name of the dataset you want to use as the target, such as `jaffle_shop_prod`. This will allow dbt to build and work with that dataset. +6. Click **Save**. + +### Create and run a job + +Jobs are a set of dbt commands that you want to run on a schedule. For example, `dbt build`. + +As the `jaffle_shop` business gains more customers, and those customers create more orders, you will see more records added to your source data. Because you materialized the `bi_customers` model as a table, you'll need to periodically rebuild your table to ensure that the data stays up-to-date. This update will happen when you run a job. + +1. After creating your deployment environment, you should be directed to the page for a new environment. If not, select **Deploy** in the upper left, then click **Jobs**. +2. Click **+ Create job** and then select **Deploy job**. Provide a name, for example, "Production run", and link it to the Environment you just created. +3. Scroll down to the **Execution Settings** section. +4. Under **Commands**, add this command as part of your job if you don't see it: + * `dbt build` +5. Select the **Generate docs on run** checkbox to automatically [generate updated project docs](/docs/collaborate/build-and-view-your-docs) each time your job runs. +6. For this exercise, do _not_ set a schedule for your project to run — while your organization's project should run regularly, there's no need to run this example project on a schedule. Scheduling a job is sometimes referred to as _deploying a project_. +7. Select **Save**, then click **Run now** to run your job. +8. Click the run and watch its progress under "Run history." +9. Once the run is complete, click **View Documentation** to see the docs for your project. + + +Congratulations 🎉! You've just deployed your first dbt project! + + +#### FAQs + + + + - diff --git a/website/docs/guides/zapier-ms-teams.md b/website/docs/guides/zapier-ms-teams.md index 171ed19193a..e52205d315f 100644 --- a/website/docs/guides/zapier-ms-teams.md +++ b/website/docs/guides/zapier-ms-teams.md @@ -56,7 +56,7 @@ The sample body's values are hard-coded and not reflective of your project, but ## Store secrets -In the next step, you will need the Webhook Secret Key from the prior step, and a dbt Cloud [user token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens). +In the next step, you will need the Webhook Secret Key from the prior step, and a dbt Cloud [personal access token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens). Zapier allows you to [store secrets](https://help.zapier.com/hc/en-us/articles/8496293271053-Save-and-retrieve-data-from-Zaps), which prevents your keys from being displayed in plaintext in the Zap code. You will be able to access them via the [StoreClient utility](https://help.zapier.com/hc/en-us/articles/8496293969549-Store-data-from-code-steps-with-StoreClient). @@ -136,7 +136,7 @@ for step in run_data_results['run_steps']: # Remove timestamp and any colour tags full_log = re.sub('\x1b?\[[0-9]+m[0-9:]*', '', full_log) - summary_start = re.search('(?:Completed with \d+ errors? and \d+ warnings?:|Database Error|Compilation Error|Runtime Error)', full_log) + summary_start = re.search('(?:Completed with \d+ error.* and \d+ warnings?:|Database Error|Compilation Error|Runtime Error)', full_log) line_items = re.findall('(^.*(?:Failure|Error) in .*\n.*\n.*)', full_log, re.MULTILINE) diff --git a/website/docs/guides/zapier-refresh-mode-report.md b/website/docs/guides/zapier-refresh-mode-report.md index c3bd1a11778..23dd19d0b4c 100644 --- a/website/docs/guides/zapier-refresh-mode-report.md +++ b/website/docs/guides/zapier-refresh-mode-report.md @@ -46,7 +46,7 @@ Once you've tested the endpoint in dbt Cloud, go back to Zapier and click **Test The sample body's values are hard-coded and not reflective of your project, but they give Zapier a correctly-shaped object during development. ## Store secrets -In the next step, you will need the Webhook Secret Key from the prior step, and a dbt Cloud [user token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens), as well as a [Mode API token and secret](https://mode.com/developer/api-reference/authentication/). +In the next step, you will need the Webhook Secret Key from the prior step, and a dbt Cloud [personal access token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens), as well as a [Mode API token and secret](https://mode.com/developer/api-reference/authentication/). Zapier allows you to [store secrets](https://help.zapier.com/hc/en-us/articles/8496293271053-Save-and-retrieve-data-from-Zaps), which prevents your keys from being displayed in plaintext in the Zap code. You will be able to access them via the [StoreClient utility](https://help.zapier.com/hc/en-us/articles/8496293969549-Store-data-from-code-steps-with-StoreClient). diff --git a/website/docs/guides/zapier-slack.md b/website/docs/guides/zapier-slack.md index c3e7383c007..99c8347424a 100644 --- a/website/docs/guides/zapier-slack.md +++ b/website/docs/guides/zapier-slack.md @@ -50,7 +50,7 @@ Once you've tested the endpoint in dbt Cloud, go back to Zapier and click **Test The sample body's values are hardcoded and not reflective of your project, but they give Zapier a correctly-shaped object during development. ## Store secrets -In the next step, you will need the Webhook Secret Key from the prior step, and a dbt Cloud [user token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens). +In the next step, you will need the Webhook Secret Key from the prior step, and a dbt Cloud [personal access token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens). Zapier allows you to [store secrets](https://help.zapier.com/hc/en-us/articles/8496293271053-Save-and-retrieve-data-from-Zaps). This prevents your keys from being displayed as plaintext in the Zap code. You can access them with the [StoreClient utility](https://help.zapier.com/hc/en-us/articles/8496293969549-Store-data-from-code-steps-with-StoreClient). @@ -134,7 +134,7 @@ for step in run_data_results['run_steps']: # Remove timestamp and any colour tags full_log = re.sub('\x1b?\[[0-9]+m[0-9:]*', '', full_log) - summary_start = re.search('(?:Completed with \d+ errors? and \d+ warnings?:|Database Error|Compilation Error|Runtime Error)', full_log) + summary_start = re.search('(?:Completed with \d+ error.* and \d+ warnings?:|Database Error|Compilation Error|Runtime Error)', full_log) line_items = re.findall('(^.*(?:Failure|Error) in .*\n.*\n.*)', full_log, re.MULTILINE) @@ -215,7 +215,7 @@ Sometimes dbt Cloud posts the message about the run failing before the run's art A one-minute delay is generally sufficient. ### 5. Store secrets -In the next step, you will need either a dbt Cloud [user token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens). +In the next step, you will need either a dbt Cloud [personal access token](https://docs.getdbt.com/docs/dbt-cloud-apis/user-tokens) or [service account token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens). Zapier allows you to [store secrets](https://help.zapier.com/hc/en-us/articles/8496293271053-Save-and-retrieve-data-from-Zaps). This prevents your keys from being displayed as plaintext in the Zap code. You can access them with the [StoreClient utility](https://help.zapier.com/hc/en-us/articles/8496293969549-Store-data-from-code-steps-with-StoreClient). @@ -277,7 +277,7 @@ for step in results['run_steps']: # Remove timestamp and any colour tags full_log = re.sub('\x1b?\[[0-9]+m[0-9:]*', '', full_log) - summary_start = re.search('(?:Completed with \d+ errors? and \d+ warnings?:|Database Error|Compilation Error|Runtime Error)', full_log) + summary_start = re.search('(?:Completed with \d+ error.* and \d+ warnings?:|Database Error|Compilation Error|Runtime Error)', full_log) line_items = re.findall('(^.*(?:Failure|Error) in .*\n.*\n.*)', full_log, re.MULTILINE) if not summary_start: diff --git a/website/docs/reference/artifacts/run-results-json.md b/website/docs/reference/artifacts/run-results-json.md index ff8da3559fa..13ad528d185 100644 --- a/website/docs/reference/artifacts/run-results-json.md +++ b/website/docs/reference/artifacts/run-results-json.md @@ -44,8 +44,6 @@ import RowsAffected from '/snippets/_run-result.md'; - - The run_results.json includes three attributes related to the `applied` state that complement `unique_id`: - `compiled`: Boolean entry of the node compilation status (`False` after parsing, but `True` after compiling). @@ -195,5 +193,3 @@ Here's a printed snippet from the `run_results.json`: } ], ``` - - diff --git a/website/docs/reference/commands/build.md b/website/docs/reference/commands/build.md index c7ac29862c2..9f8e83d2abd 100644 --- a/website/docs/reference/commands/build.md +++ b/website/docs/reference/commands/build.md @@ -31,32 +31,9 @@ In DAG order, for selected resources or an entire project. The `build` command supports the `--empty` flag for building schema-only dry runs. The `--empty` flag limits the refs and sources to zero rows. dbt will still execute the model SQL against the target data warehouse but will avoid expensive reads of input data. This validates dependencies and ensures your models will build properly. -#### SQL compilation error when running the `--empty` flag on a model - -If you encounter the error: `SQL compilation error: syntax error line 1 at position 21 unexpected '('.` when running a model with the `--empty` flag, explicitly call the `.render()` method on that relation. - - - - -```Jinja - --- models/staging/stg_sys__customers.sql -{{ config( - pre_hook = [ - "alter external table {{ source('sys', 'customers').render() }} refresh" - ] -) }} - -with cus as ( - select * from {{ source("sys", "customers") }} -- leave this as is! -) - -select * from cus - -``` - - +import SQLCompilationError from '/snippets/_render-method.md'; + ## Tests diff --git a/website/docs/reference/commands/cmd-docs.md b/website/docs/reference/commands/cmd-docs.md index f20da08a4ae..03e11ae89f0 100644 --- a/website/docs/reference/commands/cmd-docs.md +++ b/website/docs/reference/commands/cmd-docs.md @@ -20,8 +20,6 @@ The command is responsible for generating your project's documentation website b dbt docs generate ``` - - Use the `--select` argument to limit the nodes included within `catalog.json`. When this flag is provided, step (3) will be restricted to the selected nodes. All other nodes will be excluded. Step (2) is unaffected. **Example**: @@ -30,8 +28,6 @@ Use the `--select` argument to limit the nodes included within `catalog.json`. W dbt docs generate --select +orders ``` - - Use the `--no-compile` argument to skip re-compilation. When this flag is provided, `dbt docs generate` will skip step (2) described above. **Example**: diff --git a/website/docs/reference/commands/deps.md b/website/docs/reference/commands/deps.md index 85c103e6337..6755dbbcb3c 100644 --- a/website/docs/reference/commands/deps.md +++ b/website/docs/reference/commands/deps.md @@ -58,29 +58,49 @@ Updates available for packages: ['tailsdotcom/dbt_artifacts', 'dbt-labs/snowplow Update your versions in packages.yml, then run dbt deps ``` - - ## Predictable package installs -Starting in dbt Core v1.7, dbt generates a `package-lock.yml` file in the root of your project. This contains the complete set of resolved packages based on the `packages` configuration in `dependencies.yml` or `packages.yml`. Each subsequent invocation of `dbt deps` will install from the _locked_ set of packages specified in this file. Storing the complete set of required packages (with pinned versions) in version-controlled code ensures predictable installs in production and consistency across all developers and environments. +Starting in dbt v1.7, dbt generates a `package-lock.yml` file in the root of your project. This file ensures consistent and predictable package installs by storing the exact versions (including commit SHAs) of all resolved packages specified in your `packages.yml` or `dependencies.yml`. This consistency is crucial for maintaining stability in development and production environments, preventing unexpected issues from new releases with potential bugs. + +When you run `dbt deps`, dbt installs packages based on the locked versions in `package-lock.yml`. To update these locked versions, you must explicitly run `dbt deps --upgrade` and commit the updated `package-lock.yml` file. Storing this file in version control guarantees consistency across all environments and for all developers. + +### Managing `package-lock.yml` + +The `package-lock.yml` file should be committed to Git initially and updated only when you intend to change versions or uninstall a package. For example, run `dbt deps --upgrade` to get updated package versions or `dbt deps --lock` to update the lock file based on changes to the packages config without installing the packages. + +To bypass using `package-lock.yml` entirely, you can add it to your project's `.gitignore`. However, this approach sacrifices the predictability of builds. If you choose this route, we strongly recommend adding version pins for third-party packages in your `packages` config. + +### Detecting changes in `packages` config + +The `package-lock.yml` file includes a `sha1_hash` of your packages config. If you update `packages.yml`, dbt will detect the change and rerun dependency resolution during the next `dbt deps` command. To update the lock file without installing the new packages, use the `--lock` flag: + +```shell +dbt deps --lock +``` + +### Forcing package updates -The `package-lock.yml` file should be committed in Git initially, and then updated and committed only when you want to change versions or uninstall a package (for example `dbt deps --upgrade` or `dbt deps --lock`). +To update all packages, even if `packages.yml` hasn’t changed, use the `--upgrade` flag: -The `package-lock.yml` file includes a `sha1_hash` of the `packages` config. This enables dbt to detect if the `packages` config has been updated, and to rerun dependency resolution. To only check for changes to the `packages` config and update the lock file accordingly without installing those packages, provide the `--lock` flag (that is, `dbt deps --lock`). +```shell -### Forcing upgrades +dbt deps --upgrade + +``` -It's possible to force package resolution to rerun, even if the `packages` config hasn't changed, by running `dbt deps --upgrade`. This enables you to get the latest commits from the `main` branch of an internally maintained `git` package while accepting the risk of unpredictable builds. +This is particularly useful for fetching the latest commits from the `main` branch of an internally maintained Git package. -An alternative to running `dbt deps --upgrade` in production is to "ignore" the lock file by adding `package-lock.yml` to your project's `.gitignore` file. +:::warning +Forcing package upgrades may introduce build inconsistencies unless carefully managed. +::: -If you pursue either approach, dbt Labs strongly recommends adding version pins for third-party packages within your `packages` config. +### Adding specific packages -## Add specific packages +The `dbt deps` command can add or update package configurations directly, saving you from remembering exact syntax. -The `dbt deps` command can add or update an existing package configuration — no need to remember the exact syntax for package configurations. +#### Hub packages (default) -For Hub packages (default), which are the easiest to install: +Hub packages are the default package types and the easiest to install. ```shell dbt deps --add-package dbt-labs/dbt_utils@1.0.0 @@ -89,13 +109,15 @@ dbt deps --add-package dbt-labs/dbt_utils@1.0.0 dbt deps --add-package dbt-labs/snowplow@">=0.7.0,<0.8.0" ``` -For other package types, use the `--source` flag: +#### Non-Hub packages + +Use the `--source` flag to specify the type of package to be installed: + ```shell -# add package from git + +# Git package dbt deps --add-package https://github.com/fivetran/dbt_amplitude@v0.3.0 --source git -# add package from local +# Local package dbt deps --add-package /opt/dbt/redshift --source local ``` - - diff --git a/website/docs/reference/commands/init.md b/website/docs/reference/commands/init.md index 8945eb823db..7b71bf70f45 100644 --- a/website/docs/reference/commands/init.md +++ b/website/docs/reference/commands/init.md @@ -17,15 +17,10 @@ Then, it will: - Create a new folder with your project name and sample files, enough to get you started with dbt - Create a connection profile on your local machine. The default location is `~/.dbt/profiles.yml`. Read more in [configuring your profile](/docs/core/connect-data-platform/connection-profiles). - - When using `dbt init` to initialize your project, include the `--profile` flag to specify an existing `profiles.yml` as the `profile:` key to use instead of creating a new one. For example, `dbt init --profile profile_name`. - - If the profile does not exist in `profiles.yml` or the command is run inside an existing project, the command raises an error. - ## Existing project @@ -36,7 +31,7 @@ If you've just cloned or downloaded an existing dbt project, `dbt init` can stil `dbt init` knows how to prompt for connection information by looking for a file named `profile_template.yml`. It will look for this file in two places: -- **Adapter plugin:** What's the bare minumum Postgres profile? What's the type of each field, what are its defaults? This information is stored in a file called [`dbt/include/postgres/profile_template.yml`](https://github.com/dbt-labs/dbt-core/blob/main/plugins/postgres/dbt/include/postgres/profile_template.yml). If you're the maintainer of an adapter plugin, we highly recommend that you add a `profile_template.yml` to your plugin, too. Refer to the [Build, test, document, and promote adapters](/guides/adapter-creation) guide for more information. +- **Adapter plugin:** What's the bare minumum Postgres profile? What's the type of each field, what are its defaults? This information is stored in a file called [`dbt/include/postgres/profile_template.yml`](https://github.com/dbt-labs/dbt-postgres/blob/main/dbt/include/postgres/profile_template.yml). If you're the maintainer of an adapter plugin, we highly recommend that you add a `profile_template.yml` to your plugin, too. Refer to the [Build, test, document, and promote adapters](/guides/adapter-creation) guide for more information. - **Existing project:** If you're the maintainer of an existing project, and you want to help new users get connected to your database quickly and easily, you can include your own custom `profile_template.yml` in the root of your project, alongside `dbt_project.yml`. For common connection attributes, set the values in `fixed`; leave user-specific attributes in `prompts`, but with custom hints and defaults as you'd like. diff --git a/website/docs/reference/commands/run.md b/website/docs/reference/commands/run.md index 26db40cb7e4..58a876f98ef 100644 --- a/website/docs/reference/commands/run.md +++ b/website/docs/reference/commands/run.md @@ -83,4 +83,15 @@ See [global configs](/reference/global-configs/print-output#print-color) The `run` command supports the `--empty` flag for building schema-only dry runs. The `--empty` flag limits the refs and sources to zero rows. dbt will still execute the model SQL against the target data warehouse but will avoid expensive reads of input data. This validates dependencies and ensures your models will build properly. - \ No newline at end of file + + +## Status codes + +When calling the [list_runs api](/dbt-cloud/api-v2#/operations/List%20Runs), you will get a status code for each run returned. The available run status codes are as follows: + +- Starting = 1 +- Running = 3 +- Success = 10 +- Error = 20 +- Canceled = 30 +- Skipped = 40 diff --git a/website/docs/reference/commands/version.md b/website/docs/reference/commands/version.md index 2ed14117828..4d5ce6524dd 100644 --- a/website/docs/reference/commands/version.md +++ b/website/docs/reference/commands/version.md @@ -13,7 +13,7 @@ The `--version` command-line flag returns information about the currently instal ## Versioning To learn more about release versioning for dbt Core, refer to [How dbt Core uses semantic versioning](/docs/dbt-versions/core#how-dbt-core-uses-semantic-versioning). -If using [versionless dbt Cloud](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless), then `dbt_version` uses the latest (continuous) release version. This also follows semantic versioning guidelines, using the `YYYY.xx.yy` format, where the year is the major version (for example, `2024.04.1234`) +If using a [dbt Cloud release track](/docs/dbt-versions/cloud-release-tracks), which provide ongoing updates to dbt, then `dbt_version` represents the release version of dbt in dbt Cloud. This also follows semantic versioning guidelines, using the `YYYY.MM.DD+` format. The year, month, and day represent the date the version was built (for example, `2024.10.28+996c6a8`). The suffix provides an additional unique identification for each build. ## Example usages diff --git a/website/docs/reference/data-test-configs.md b/website/docs/reference/data-test-configs.md index e7adc266b07..0044a707db1 100644 --- a/website/docs/reference/data-test-configs.md +++ b/website/docs/reference/data-test-configs.md @@ -275,3 +275,24 @@ tests: ``` + +#### Specify custom configurations for generic data tests + +Beginning in dbt v1.9, you can use any custom config key to specify custom configurations for data tests. For example, the following specifies the `snowflake_warehouse` custom config that dbt should use when executing the `accepted_values` data test: + +```yml + +models: + - name: my_model + columns: + - name: color + tests: + - accepted_values: + values: ['blue', 'red'] + config: + severity: warn + snowflake_warehouse: my_warehouse + +``` + +Given the config, the data test runs on a different Snowflake virtual warehouse than the one in your default connection to enable better price-performance with a different warehouse size or more granular cost allocation and visibility. diff --git a/website/docs/reference/database-permissions/snowflake-permissions.md b/website/docs/reference/database-permissions/snowflake-permissions.md index 3f474242834..1ab35e46d26 100644 --- a/website/docs/reference/database-permissions/snowflake-permissions.md +++ b/website/docs/reference/database-permissions/snowflake-permissions.md @@ -83,6 +83,7 @@ grant role reporter to user looker_user; -- or mode_user, periscope_user ``` 5. Let loader load data + Give the role unilateral permission to operate on the raw database ``` use role sysadmin; @@ -90,6 +91,7 @@ grant all on database raw to role loader; ``` 6. Let transformer transform data + The transformer role needs to be able to read raw data. If you do this before you have any data loaded, you can run: @@ -110,6 +112,7 @@ transformer also needs to be able to create in the analytics database: grant all on database analytics to role transformer; ``` 7. Let reporter read the transformed data + A previous version of this article recommended this be implemented through hooks in dbt, but this way lets you get away with a one-off statement. ``` grant usage on database analytics to role reporter; @@ -120,10 +123,11 @@ grant select on future views in database analytics to role reporter; Again, if you already have data in your analytics database, make sure you run: ``` grant usage on all schemas in database analytics to role reporter; -grant select on all tables in database analytics to role transformer; -grant select on all views in database analytics to role transformer; +grant select on all tables in database analytics to role reporter; +grant select on all views in database analytics to role reporter; ``` 8. Maintain + When new users are added, make sure you add them to the right role! Everything else should be inherited automatically thanks to those `future` grants. For more discussion and legacy information, refer to [this Discourse article](https://discourse.getdbt.com/t/setting-up-snowflake-the-exact-grant-statements-we-run/439). diff --git a/website/docs/reference/dbt-classes.md b/website/docs/reference/dbt-classes.md index 13f9263e545..a6a8c2d4fa6 100644 --- a/website/docs/reference/dbt-classes.md +++ b/website/docs/reference/dbt-classes.md @@ -98,9 +98,14 @@ col.numeric_type('numeric', 12, 4) # numeric(12,4) ### Properties -- **name**: Returns the name of the column +- **char_size**: Returns the maximum size for character varying columns +- **column**: Returns the name of the column +- **data_type**: Returns the data type of the column (with size/precision/scale included) +- **dtype**: Returns the data type of the column (without any size/precision/scale included) +- **name**: Returns the name of the column (identical to `column`, provided as an alias). +- **numeric_precision**: Returns the maximum precision for fixed decimal columns +- **numeric_scale**: Returns the maximum scale for fixed decimal columns - **quoted**: Returns the name of the column wrapped in quotes -- **data_type**: Returns the data type of the column ### Instance methods diff --git a/website/docs/reference/dbt-commands.md b/website/docs/reference/dbt-commands.md index 8386cf61731..9cbc5e5e38b 100644 --- a/website/docs/reference/dbt-commands.md +++ b/website/docs/reference/dbt-commands.md @@ -11,7 +11,7 @@ A key distinction with the tools mentioned, is that dbt Cloud CLI and IDE are de ## Parallel execution -dbt Cloud allows for parallel execution of commands, enhancing efficiency without compromising data integrity. This enables you to run multiple commands at the same time, however it's important to understand which commands can be run in parallel and which can't. +dbt Cloud allows for concurrent execution of commands, enhancing efficiency without compromising data integrity. This enables you to run multiple commands at the same time. However, it's important to understand which commands can be run in parallel and which can't. In contrast, [`dbt-core` _doesn't_ support](/reference/programmatic-invocations#parallel-execution-not-supported) safe parallel execution for multiple invocations in the same process, and requires users to manage concurrency manually to ensure data integrity and system stability. @@ -34,10 +34,10 @@ Commands with a ('❌') indicate write commands, commands with a ('✅') indicat | Command | Description | Parallel execution |
Caveats
| |---------|-------------| :-----------------:| ------------------------------------------ | -| [build](/reference/commands/build) | Build and test all selected resources (models, seeds, snapshots, tests) | ❌ | All tools
All [supported versions](/docs/dbt-versions/core) | +| [build](/reference/commands/build) | Builds and tests all selected resources (models, seeds, snapshots, tests) | ❌ | All tools
All [supported versions](/docs/dbt-versions/core) | | cancel | Cancels the most recent invocation. | N/A | dbt Cloud CLI
Requires [dbt v1.6 or higher](/docs/dbt-versions/core) | | [clean](/reference/commands/clean) | Deletes artifacts present in the dbt project | ✅ | All tools
All [supported versions](/docs/dbt-versions/core) | -| [clone](/reference/commands/clone) | Clone selected models from the specified state | ❌ | All tools
Requires [dbt v1.6 or higher](/docs/dbt-versions/core) | +| [clone](/reference/commands/clone) | Clones selected models from the specified state | ❌ | All tools
Requires [dbt v1.6 or higher](/docs/dbt-versions/core) | | [compile](/reference/commands/compile) | Compiles (but does not run) the models in a project | ✅ | All tools
All [supported versions](/docs/dbt-versions/core) | | [debug](/reference/commands/debug) | Debugs dbt connections and projects | ✅ | dbt Cloud IDE, dbt Core
All [supported versions](/docs/dbt-versions/core) | | [deps](/reference/commands/deps) | Downloads dependencies for a project | ✅ | All tools
All [supported versions](/docs/dbt-versions/core) | @@ -50,9 +50,9 @@ Commands with a ('❌') indicate write commands, commands with a ('✅') indicat | reattach | Reattaches to the most recent invocation to retrieve logs and artifacts. | N/A | dbt Cloud CLI
Requires [dbt v1.6 or higher](/docs/dbt-versions/core) | | [retry](/reference/commands/retry) | Retry the last run `dbt` command from the point of failure | ❌ | All tools
Requires [dbt v1.6 or higher](/docs/dbt-versions/core) | | [run](/reference/commands/run) | Runs the models in a project | ❌ | All tools
All [supported versions](/docs/dbt-versions/core) | -| [run-operation](/reference/commands/run-operation) | Invoke a macro, including running arbitrary maintenance SQL against the database | ❌ | All tools
All [supported versions](/docs/dbt-versions/core) | +| [run-operation](/reference/commands/run-operation) | Invokes a macro, including running arbitrary maintenance SQL against the database | ❌ | All tools
All [supported versions](/docs/dbt-versions/core) | | [seed](/reference/commands/seed) | Loads CSV files into the database | ❌ | All tools
All [supported versions](/docs/dbt-versions/core) | -| [show](/reference/commands/show) | Preview table rows post-transformation | ✅ | All tools
All [supported versions](/docs/dbt-versions/core) | +| [show](/reference/commands/show) | Previews table rows post-transformation | ✅ | All tools
All [supported versions](/docs/dbt-versions/core) | | [snapshot](/reference/commands/snapshot) | Executes "snapshot" jobs defined in a project | ❌ | All tools
All [supported versions](/docs/dbt-versions/core) | | [source](/reference/commands/source) | Provides tools for working with source data (including validating that sources are "fresh") | ✅ | All tools
All [supported versions](/docs/dbt-versions/core) | | [test](/reference/commands/test) | Executes tests defined in a project | ✅ | All tools
All [supported versions](/docs/dbt-versions/core) | diff --git a/website/docs/reference/dbt-jinja-functions/config.md b/website/docs/reference/dbt-jinja-functions/config.md index 3903c82eef7..8083ea2a124 100644 --- a/website/docs/reference/dbt-jinja-functions/config.md +++ b/website/docs/reference/dbt-jinja-functions/config.md @@ -34,13 +34,21 @@ __Args__: The `config.get` function is used to get configurations for a model from the end-user. Configs defined in this way are optional, and a default value can be provided. +There are 3 cases: +1. The configuration variable exists, it is not `None` +1. The configuration variable exists, it is `None` +1. The configuration variable does not exist + Example usage: ```sql {% materialization incremental, default -%} -- Example w/ no default. unique_key will be None if the user does not provide this configuration {%- set unique_key = config.get('unique_key') -%} - -- Example w/ default value. Default to 'id' if 'unique_key' not provided + -- Example w/ alternate value. Use alternative of 'id' if 'unique_key' config is provided, but it is None + {%- set unique_key = config.get('unique_key') or 'id' -%} + + -- Example w/ default value. Default to 'id' if the 'unique_key' config does not exist {%- set unique_key = config.get('unique_key', default='id') -%} ... ``` diff --git a/website/docs/reference/dbt-jinja-functions/model.md b/website/docs/reference/dbt-jinja-functions/model.md index 516981e11e3..b0995ff958c 100644 --- a/website/docs/reference/dbt-jinja-functions/model.md +++ b/website/docs/reference/dbt-jinja-functions/model.md @@ -20,9 +20,9 @@ To view the contents of `model` for a given model: - + -If you're using the CLI, use [log()](/reference/dbt-jinja-functions/log) to print the full contents: +If you're using the command line interface (CLI), use [log()](/reference/dbt-jinja-functions/log) to print the full contents: ```jinja {{ log(model, info=True) }} @@ -42,6 +42,48 @@ If you're using the CLI, use [log()](/reference/dbt-jinja-functions/log) to prin +## Batch properties for microbatch models + +Starting in dbt Core v1.9, the model object includes a `batch` property (`model.batch`), which provides details about the current batch when executing an [incremental microbatch](/docs/build/incremental-microbatch) model. This property is only populated during the batch execution of a microbatch model. + +The following table describes the properties of the `batch` object. Note that dbt appends the property to the `model` and `batch` objects. + +| Property | Description | Example | +| -------- | ----------- | ------- | +| `id` | The unique identifier for the batch within the context of the microbatch model. | `model.batch.id` | +| `event_time_start` | The start time of the batch's [`event_time`](/reference/resource-configs/event-time) filter (inclusive). | `model.batch.event_time_start` | +| `event_time_end` | The end time of the batch's `event_time` filter (exclusive). | `model.batch.event_time_end` | + +### Usage notes + +`model.batch` is only available during the execution of a microbatch model batch. Outside of the microbatch execution, `model.batch` is `None`, and its sub-properties aren't accessible. + +#### Example of safeguarding access to batch properties + +We recommend to always check if `model.batch` is populated before accessing its properties. To do this, use an `if` statement for safe access to `batch` properties: + +```jinja +{% if model.batch %} + {{ log(model.batch.id) }} # Log the batch ID # + {{ log(model.batch.event_time_start) }} # Log the start time of the batch # + {{ log(model.batch.event_time_end) }} # Log the end time of the batch # +{% endif %} +``` + +In this example, the `if model.batch` statement makes sure that the code only runs during a batch execution. `log()` is used to print the `batch` properties for debugging. + +#### Example of log batch details + +This is a practical example of how you might use `model.batch` in a microbatch model to log batch details for the `batch.id`: + +```jinja +{% if model.batch %} + {{ log("Processing batch with ID: " ~ model.batch.id, info=True) }} + {{ log("Batch event time range: " ~ model.batch.event_time_start ~ " to " ~ model.batch.event_time_end, info=True) }} +{% endif %} +``` +In this example, the `if model.batch` statement makes sure that the code only runs during a batch execution. `log()` is used to print the `batch` properties for debugging. + ## Model structure and JSON schema To view the structure of `models` and their definitions: diff --git a/website/docs/reference/dbt-jinja-functions/target.md b/website/docs/reference/dbt-jinja-functions/target.md index 968f64d0f8d..d91749277ac 100644 --- a/website/docs/reference/dbt-jinja-functions/target.md +++ b/website/docs/reference/dbt-jinja-functions/target.md @@ -10,7 +10,7 @@ The `target` variable contains information about your connection to the warehous - **dbt Core:** These values are based on the target defined in your [profiles.yml](/docs/core/connect-data-platform/profiles.yml) file. Please note that for certain adapters, additional configuration steps may be required. Refer to the [set up page](/docs/core/connect-data-platform/about-core-connections) for your data platform. - **dbt Cloud** To learn more about setting up your adapter in dbt Cloud, refer to [About data platform connections](/docs/cloud/connect-data-platform/about-connections). - **[dbt Cloud Scheduler](/docs/deploy/job-scheduler)**: `target.name` is defined per job as described in [Custom target names](/docs/build/custom-target-names). For other attributes, values are defined by the deployment connection. To check these values, click **Deploy** and select **Environments**. Then, select the relevant deployment environment, and click **Settings**. - - **[dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud)**: These values are defined by your connection and credentials. To edit these values, click the gear icon in the top right, select **Profile settings**, and click **Credentials**. Select and edit a project to set up the credentials and target name. + - **[dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud)**: These values are defined by your connection and credentials. To edit these values, click on your account name in the left side menu and select **Account settings**. Then, click **Credentials**. Select and edit a project to set up the credentials and target name. Some configurations are shared between all adapters, while others are adapter-specific. diff --git a/website/docs/reference/dbt-jinja-functions/this.md b/website/docs/reference/dbt-jinja-functions/this.md index f9f2961b08f..7d358cb6299 100644 --- a/website/docs/reference/dbt-jinja-functions/this.md +++ b/website/docs/reference/dbt-jinja-functions/this.md @@ -20,8 +20,6 @@ meta: ## Examples - - ### Configuring incremental models diff --git a/website/docs/reference/dbt_project.yml.md b/website/docs/reference/dbt_project.yml.md index e7cd5bbeb79..1bb9dd2cf9c 100644 --- a/website/docs/reference/dbt_project.yml.md +++ b/website/docs/reference/dbt_project.yml.md @@ -14,8 +14,6 @@ Every [dbt project](/docs/build/projects) needs a `dbt_project.yml` file — thi The following example is a list of all available configurations in the `dbt_project.yml` file: - - ```yml @@ -94,77 +92,6 @@ vars: ``` - - - - - - -```yml -[name](/reference/project-configs/name): string - -[config-version](/reference/project-configs/config-version): 2 -[version](/reference/project-configs/version): version - -[profile](/reference/project-configs/profile): profilename - -[model-paths](/reference/project-configs/model-paths): [directorypath] -[seed-paths](/reference/project-configs/seed-paths): [directorypath] -[test-paths](/reference/project-configs/test-paths): [directorypath] -[analysis-paths](/reference/project-configs/analysis-paths): [directorypath] -[macro-paths](/reference/project-configs/macro-paths): [directorypath] -[snapshot-paths](/reference/project-configs/snapshot-paths): [directorypath] -[docs-paths](/reference/project-configs/docs-paths): [directorypath] -[asset-paths](/reference/project-configs/asset-paths): [directorypath] - -[packages-install-path](/reference/project-configs/packages-install-path): directorypath - -[clean-targets](/reference/project-configs/clean-targets): [directorypath] - -[query-comment](/reference/project-configs/query-comment): string - -[require-dbt-version](/reference/project-configs/require-dbt-version): version-range | [version-range] - -[dbt-cloud](/docs/cloud/cloud-cli-installation): - [project-id](/docs/cloud/configure-cloud-cli#configure-the-dbt-cloud-cli): project_id # Required - [defer-env-id](/docs/cloud/about-cloud-develop-defer#defer-in-dbt-cloud-cli): environment_id # Optional - -[quoting](/reference/project-configs/quoting): - database: true | false - schema: true | false - identifier: true | false - -models: - [](/reference/model-configs) - -seeds: - [](/reference/seed-configs) - -snapshots: - [](/reference/snapshot-configs) - -sources: - [](source-configs) - -tests: - [](/reference/data-test-configs) - -vars: - [](/docs/build/project-variables) - -[on-run-start](/reference/project-configs/on-run-start-on-run-end): sql-statement | [sql-statement] -[on-run-end](/reference/project-configs/on-run-start-on-run-end): sql-statement | [sql-statement] - -[dispatch](/reference/project-configs/dispatch-config): - - macro_namespace: packagename - search_order: [packagename] - -[restrict-access](/docs/collaborate/govern/model-access): true | false - -``` - - - ## Naming convention diff --git a/website/docs/reference/dbtignore.md b/website/docs/reference/dbtignore.md index 8733fc592cd..063b455f5cc 100644 --- a/website/docs/reference/dbtignore.md +++ b/website/docs/reference/dbtignore.md @@ -20,6 +20,13 @@ another-non-dbt-model.py # ignore all .py files with "codegen" in the filename *codegen*.py + +# ignore all folders in a directory +path/to/folders/** + +# ignore some folders in a directory +path/to/folders/subfolder/** + ``` diff --git a/website/docs/reference/global-configs/about-global-configs.md b/website/docs/reference/global-configs/about-global-configs.md index 64d56d002fe..435a86d84ba 100644 --- a/website/docs/reference/global-configs/about-global-configs.md +++ b/website/docs/reference/global-configs/about-global-configs.md @@ -95,5 +95,5 @@ Because the values of `flags` can differ across invocations, we strongly advise | [use_experimental_parser](/reference/global-configs/parsing#experimental-parser) | boolean | False | ✅ | `DBT_USE_EXPERIMENTAL_PARSER` | `--use-experimental-parser`, `--no-use-experimental-parser` | ❌ | | [version_check](/reference/global-configs/version-compatibility) | boolean | varies | ✅ | `DBT_VERSION_CHECK` | `--version-check`, `--no-version-check` | ❌ | | [warn_error_options](/reference/global-configs/warnings) | dict | {} | ✅ | `DBT_WARN_ERROR_OPTIONS` | `--warn-error-options` | ✅ | -| [warn_error](/reference/global-configs/warnings) | boolean | False | ✅ | `DBT_WARN_ERROR` | `--warn-error`, `--no-warn-error` | ✅ | +| [warn_error](/reference/global-configs/warnings) | boolean | False | ✅ | `DBT_WARN_ERROR` | `--warn-error` | ✅ | | [write_json](/reference/global-configs/json-artifacts) | boolean | True | ✅ | `DBT_WRITE_JSON` | `--write-json`, `--no-write-json` | ✅ | diff --git a/website/docs/reference/global-configs/adapter-behavior-changes.md b/website/docs/reference/global-configs/adapter-behavior-changes.md index bd0ba9f7404..a755f8cfe50 100644 --- a/website/docs/reference/global-configs/adapter-behavior-changes.md +++ b/website/docs/reference/global-configs/adapter-behavior-changes.md @@ -14,10 +14,17 @@ Some adapters can display behavior changes when certain flags are enabled. The f
+ + + -
\ No newline at end of file +
diff --git a/website/docs/reference/global-configs/behavior-changes.md b/website/docs/reference/global-configs/behavior-changes.md index d35b83765e3..bda4d2b361a 100644 --- a/website/docs/reference/global-configs/behavior-changes.md +++ b/website/docs/reference/global-configs/behavior-changes.md @@ -4,6 +4,8 @@ id: "behavior-changes" sidebar: "Behavior changes" --- +import StateModified from '/snippets/_state-modified-compare.md'; + Most flags exist to configure runtime behaviors with multiple valid choices. The right choice may vary based on the environment, user preference, or the specific invocation. Another category of flags provides existing projects with a migration window for runtime behaviors that are changing in newer releases of dbt. These flags help us achieve a balance between these goals, which can otherwise be in tension, by: @@ -56,20 +58,25 @@ flags: require_model_names_without_spaces: False source_freshness_run_project_hooks: False restrict_direct_pg_catalog_access: False + require_yaml_configuration_for_mf_time_spines: False + require_batched_execution_for_custom_microbatch_strategy: False ``` -When we use dbt Cloud in the following table, we're referring to accounts that have gone "[Versionless](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless)." +This table outlines which month of the "Latest" release track in dbt Cloud and which version of dbt Core contains the behavior change's introduction (disabled by default) or maturity (enabled by default). -| Flag | dbt Cloud: Intro | dbt Cloud: Maturity | dbt Core: Intro | dbt Core: Maturity | +| Flag | dbt Cloud "Latest": Intro | dbt Cloud "Latest": Maturity | dbt Core: Intro | dbt Core: Maturity | |-----------------------------------------------------------------|------------------|---------------------|-----------------|--------------------| -| require_explicit_package_overrides_for_builtin_materializations | 2024.04 | 2024.06 | 1.6.14, 1.7.14 | 1.8.0 | -| require_resource_names_without_spaces | 2024.05 | TBD* | 1.8.0 | 1.9.0 | -| source_freshness_run_project_hooks | 2024.03 | TBD* | 1.8.0 | 1.9.0 | +| [require_explicit_package_overrides_for_builtin_materializations](#package-override-for-built-in-materialization) | 2024.04 | 2024.06 | 1.6.14, 1.7.14 | 1.8.0 | +| [require_resource_names_without_spaces](#no-spaces-in-resource-names) | 2024.05 | TBD* | 1.8.0 | 1.10.0 | +| [source_freshness_run_project_hooks](#project-hooks-with-source-freshness) | 2024.03 | TBD* | 1.8.0 | 1.10.0 | | [Redshift] [restrict_direct_pg_catalog_access](/reference/global-configs/redshift-changes#the-restrict_direct_pg_catalog_access-flag) | 2024.09 | TBD* | dbt-redshift v1.9.0 | 1.9.0 | -| skip_nodes_if_on_run_start_fails | 2024.10 | TBD* | 1.9.0 | TBD* | -| state_modified_compare_more_unrendered_values | 2024.10 | TBD* | 1.9.0 | TBD* | +| [skip_nodes_if_on_run_start_fails](#failures-in-on-run-start-hooks) | 2024.10 | TBD* | 1.9.0 | TBD* | +| [state_modified_compare_more_unrendered_values](#source-definitions-for-state) | 2024.10 | TBD* | 1.9.0 | TBD* | +| [require_yaml_configuration_for_mf_time_spines](#metricflow-time-spine-yaml) | 2024.10 | TBD* | 1.9.0 | TBD* | +| [require_batched_execution_for_custom_microbatch_strategy](#custom-microbatch-strategy) | 2024.11 | TBD* | 1.9.0 | TBD* | +| [cumulative_type_params](#cumulative-metrics-parameter) | 2024.11 | TBD* | 1.9.0 | TBD* | When the dbt Cloud Maturity is "TBD," it means we have not yet determined the exact date when these flags' default values will change. Affected users will see deprecation warnings in the meantime, and they will receive emails providing advance warning ahead of the maturity date. In the meantime, if you are seeing a deprecation warning, you can either: - Migrate your project to support the new behavior, and then set the flag to `True` to stop seeing the warnings. @@ -83,13 +90,18 @@ Set the `skip_nodes_if_on_run_start_fails` flag to `True` to skip all selected r ### Source definitions for state:modified +:::info + + + +::: + The flag is `False` by default. Set `state_modified_compare_more_unrendered_values` to `True` to reduce false positives during `state:modified` checks (especially when configs differ by target environment like `prod` vs. `dev`). Setting the flag to `True` changes the `state:modified` comparison from using rendered values to unrendered values instead. It accomplishes this by persisting `unrendered_config` during model parsing and `unrendered_database` and `unrendered_schema` configs during source parsing. - ### Package override for built-in materialization Setting the `require_explicit_package_overrides_for_builtin_materializations` flag to `True` prevents this automatic override. @@ -136,7 +148,7 @@ The names of dbt resources (models, sources, etc) should contain letters, number Set the `source_freshness_run_project_hooks` flag to `True` to include "project hooks" ([`on-run-start` / `on-run-end`](/reference/project-configs/on-run-start-on-run-end)) in the `dbt source freshness` command execution. -If you have specific project [`on-run-start` / `on-run-end`](/reference/project-configs/on-run-start-on-run-end) hooks that should not run before/after `source freshness` command, you can add a conditional check to those hooks: +If you have a specific project [`on-run-start` / `on-run-end`](/reference/project-configs/on-run-start-on-run-end) hooks that should not run before/after `source freshness` command, you can add a conditional check to those hooks: @@ -145,3 +157,71 @@ on-run-start: - '{{ ... if flags.WHICH != 'freshness' }}' ``` + + +### MetricFlow time spine YAML +The `require_yaml_configuration_for_mf_time_spines` flag is set to `False` by default. + +In previous versions (dbt Core 1.8 and earlier), the MetricFlow time spine configuration was stored in a `metricflow_time_spine.sql` file. + +When the flag is set to `True`, dbt will continue to support the SQL file configuration. When the flag is set to `False`, dbt will raise a deprecation warning if it detects a MetricFlow time spine configured in a SQL file. + +The MetricFlow YAML file should have the `time_spine:` field. Refer to [MetricFlow timespine](/docs/build/metricflow-time-spine) for more details. + +### Custom microbatch strategy +The `require_batched_execution_for_custom_microbatch_strategy` flag is set to `False` by default and is only relevant if you already have a custom microbatch macro in your project. If you don't have a custom microbatch macro, you don't need to set this flag as dbt will handle microbatching automatically for any model using the [microbatch strategy](/docs/build/incremental-microbatch#how-microbatch-compares-to-other-incremental-strategies). + +Set the flag is set to `True` if you have a custom microbatch macro set up in your project. When the flag is set to `True`, dbt will execute the custom microbatch strategy in batches. + +If you have a custom microbatch macro and the flag is left as `False`, dbt will issue a deprecation warning. + +Previously, users needed to set the `DBT_EXPERIMENTAL_MICROBATCH` environment variable to `True` to prevent unintended interactions with existing custom incremental strategies. But this is no longer necessary, as setting `DBT_EXPERMINENTAL_MICROBATCH` will no longer have an effect on runtime functionality. + +### Cumulative metrics + +[Cumulative-type metrics](/docs/build/cumulative#parameters) are nested under the `cumulative_type_params` field in [the dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks), dbt Core v1.9 and newer. Currently, dbt will warn users if they have cumulative metrics improperly nested. To enforce the new format (resulting in an error instead of a warning), set the `require_nested_cumulative_type_params` to `True`. + +Use the following metric configured with the syntax before v1.9 as an example: + +```yaml + + type: cumulative + type_params: + measure: order_count + window: 7 days + +``` + +If you run `dbt parse` with that syntax on Core v1.9 or [the dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks), you will receive a warning like: + +```bash + +15:36:22 [WARNING]: Cumulative fields `type_params.window` and +`type_params.grain_to_date` has been moved and will soon be deprecated. Please +nest those values under `type_params.cumulative_type_params.window` and +`type_params.cumulative_type_params.grain_to_date`. See documentation on +behavior changes: +https://docs.getdbt.com/reference/global-configs/behavior-changes. + +``` + +If you set `require_nested_cumulative_type_params` to `True` and re-run `dbt parse` you will now receive an error like: + +```bash + +21:39:18 Cumulative fields `type_params.window` and `type_params.grain_to_date` should be nested under `type_params.cumulative_type_params.window` and `type_params.cumulative_type_params.grain_to_date`. Invalid metrics: orders_last_7_days. See documentation on behavior changes: https://docs.getdbt.com/reference/global-configs/behavior-changes. + +``` + +Once the metric is updated, it will work as expected: + +```yaml + + type: cumulative + type_params: + measure: + name: order_count + cumulative_type_params: + window: 7 days + +``` diff --git a/website/docs/reference/global-configs/cache.md b/website/docs/reference/global-configs/cache.md index 1a74fef8d30..03f33286aa4 100644 --- a/website/docs/reference/global-configs/cache.md +++ b/website/docs/reference/global-configs/cache.md @@ -6,7 +6,7 @@ sidebar: "Cache" ### Cache population -At the start of runs, dbt caches metadata about all the objects in all the schemas where it might materialize resources (such as models). By default, dbt populates the cache with information on all schemas related to the project. +At the start of runs, dbt caches metadata about all the objects in all the schemas where it might materialize resources (such as models). By default, dbt populates the relational cache with information on all schemas related to the project. There are two ways to optionally modify this behavior: - `POPULATE_CACHE` (default: `True`): Whether to populate the cache at all. To skip cache population entirely, use the `--no-populate-cache` flag or `DBT_POPULATE_CACHE: False`. Note that this does not _disable_ the cache; missed cache lookups will run queries, and update the cache afterward. @@ -26,3 +26,11 @@ Or, to improve speed and performance while focused on developing Salesforce mode dbt --cache-selected-only run --select salesforce ``` + +### Logging relational cache events + +import LogLevel from '/snippets/_log-relational-cache.md'; + + diff --git a/website/docs/reference/global-configs/databricks-changes.md b/website/docs/reference/global-configs/databricks-changes.md new file mode 100644 index 00000000000..ca24b822ae5 --- /dev/null +++ b/website/docs/reference/global-configs/databricks-changes.md @@ -0,0 +1,26 @@ +--- +title: "Databricks adapter behavior changes" +id: "databricks-changes" +sidebar: "Databricks" +--- + +The following are the current [behavior change flags](/docs/reference/global-configs/behavior-changes.md#behavior-change-flags) that are specific to `dbt-databricks`: + +| Flag | `dbt-databricks`: Intro | `dbt-databricks`: Maturity | +| ----------------------------- | ----------------------- | -------------------------- | +| `use_info_schema_for_columns` | 1.9.0 | TBD | +| `use_user_folder_for_python` | 1.9.0 | TBD | + +### Use information schema for columns + +The `use_info_schema_for_columns` flag is `False` by default. + +Setting this flag to `True` will use `information_schema` rather than `describe extended` to get column metadata for Unity Catalog tables. This setting helps you avoid issues where `describe extended` truncates information when the type is a complex struct. However, this setting is not yet the default behavior, as there are performance impacts due to a Databricks metadata limitation because of the need to run `REPAIR TABLE {{relation}} SYNC METADATA` before querying to ensure the `information_schema` is complete. + +This flag will become the default behavior when this additional query is no longer needed. + +### Use user's folder for Python model notebooks + +The `use_user_folder_for_python` flag is `False` by default and results in writing uploaded python model notebooks to `/Shared/dbt_python_models/{{schema}}/`. Setting this flag to `True` will write notebooks to `/Users/{{current user}}/{{catalog}}/{{schema}}/` Writing to the `Shared` folder is deprecated by Databricks as it does not align with governance best practices. + +We plan to promote this flag to maturity in v1.10.0. diff --git a/website/docs/reference/global-configs/indirect-selection.md b/website/docs/reference/global-configs/indirect-selection.md index 729176a1ff4..03048b57119 100644 --- a/website/docs/reference/global-configs/indirect-selection.md +++ b/website/docs/reference/global-configs/indirect-selection.md @@ -6,7 +6,7 @@ sidebar: "Indirect selection" import IndirSelect from '/snippets/_indirect-selection-definitions.md'; -Use the `--indirect_selection` flag to `dbt test` or `dbt build` to configure which tests to run for the nodes you specify. You can set this as a CLI flag or an environment variable. In dbt Core, you can also configure user configurations in [YAML selectors](/reference/node-selection/yaml-selectors) or in the `flags:` block of `dbt_project.yml`, which sets project-level flags. +Use the `--indirect-selection` flag to `dbt test` or `dbt build` to configure which tests to run for the nodes you specify. You can set this as a CLI flag or an environment variable. In dbt Core, you can also configure user configurations in [YAML selectors](/reference/node-selection/yaml-selectors) or in the `flags:` block of `dbt_project.yml`, which sets project-level flags. When all flags are set, the order of precedence is as follows. Refer to [About global configs](/reference/global-configs/about-global-configs) for more details: diff --git a/website/docs/reference/global-configs/logs.md b/website/docs/reference/global-configs/logs.md index 972a731854d..85969a5bc02 100644 --- a/website/docs/reference/global-configs/logs.md +++ b/website/docs/reference/global-configs/logs.md @@ -66,19 +66,28 @@ See [structured logging](/reference/events-logging#structured-logging) for more The `LOG_LEVEL` config sets the minimum severity of events captured in the console and file logs. This is a more flexible alternative to the `--debug` flag. The available options for the log levels are `debug`, `info`, `warn`, `error`, or `none`. -Setting the `--log-level` will configure console and file logs. +- Setting the `--log-level` will configure console and file logs. + ```text + dbt --log-level debug run + ``` -```text -dbt --log-level debug run -``` +- Setting the `LOG_LEVEL` to `none` will disable information from being sent to either the console or file logs. + + ```text + dbt --log-level none + ``` -To set the file log level as a different value than the console, use the `--log-level-file` flag. +- To set the file log level as a different value than the console, use the `--log-level-file` flag. + ```text + dbt --log-level-file error run + ``` -```text -dbt --log-level-file error run -``` +- To only disable writing to the logs file but keep console logs, set `LOG_LEVEL_FILE` config to none. + ```text + dbt --log-level-file none + ``` ### Debug-level logging @@ -137,11 +146,11 @@ You can use either of these parameters to ensure clean output that's compatible ### Logging relational cache events -The `LOG_CACHE_EVENTS` config allows detailed logging for [relational cache](/reference/global-configs/cache) events, which are disabled by default. +import LogLevel from '/snippets/_log-relational-cache.md'; -```text -dbt --log-cache-events compile -``` +relational cache} +/> ### Color diff --git a/website/docs/reference/global-configs/resource-type.md b/website/docs/reference/global-configs/resource-type.md index 9e6ec82df06..9a888c73885 100644 --- a/website/docs/reference/global-configs/resource-type.md +++ b/website/docs/reference/global-configs/resource-type.md @@ -6,7 +6,7 @@ sidebar: "resource type" -The `--resource-type` and `--exclude-resource-type` flags include or exclude resource types from the `dbt build`, `dbt clone`, and `dbt list` commands. In Versionless and from dbt v1.9 onwards, these flags are also supported in the `dbt test` command. +The `--resource-type` and `--exclude-resource-type` flags include or exclude resource types from the `dbt build`, `dbt clone`, and `dbt list` commands. In dbt v1.9 onwards, these flags are also supported in the `dbt test` command. @@ -24,20 +24,7 @@ The `--exclude-resource-type` flag is only available in dbt version 1.8 and high The available resource types are: - - -- [`analysis`](/docs/build/analyses) -- [`exposure`](/docs/build/exposures) -- [`metric`](/docs/build/metrics-overview) -- [`model`](/docs/build/models) -- [`seed`](/docs/build/seeds) -- [`snapshot`](/docs/build/snapshots) -- [`source`](/docs/build/sources) -- [`test`](/docs/build/data-tests) - - - - + - [`analysis`](/docs/build/analyses) - [`exposure`](/docs/build/exposures) @@ -82,7 +69,6 @@ Instead of targeting specific resources, use the `--resource-flag` or `--exclude - - In this example, run the following command to include _all_ saved queries with the `--resource-type` flag: @@ -94,8 +80,6 @@ Instead of targeting specific resources, use the `--resource-flag` or `--exclude - - - In this example, use the following command to exclude _all_ unit tests from your dbt build process. Note that the `--exclude-resource-type` flag is only available in dbt version 1.8 and higher: diff --git a/website/docs/reference/global-configs/version-compatibility.md b/website/docs/reference/global-configs/version-compatibility.md index 80841678a85..7667dcfda9c 100644 --- a/website/docs/reference/global-configs/version-compatibility.md +++ b/website/docs/reference/global-configs/version-compatibility.md @@ -14,7 +14,7 @@ Running with dbt=1.0.0 Found 13 models, 2 tests, 1 archives, 0 analyses, 204 macros, 2 operations.... ``` -:::info Versionless +:::info dbt Cloud release tracks ::: diff --git a/website/docs/reference/macro-properties.md b/website/docs/reference/macro-properties.md index 91a616ded0d..69a66f308d9 100644 --- a/website/docs/reference/macro-properties.md +++ b/website/docs/reference/macro-properties.md @@ -19,6 +19,7 @@ macros: [description](/reference/resource-properties/description): [docs](/reference/resource-configs/docs): show: true | false + [meta](/reference/resource-configs/meta): {} arguments: - name: [type](/reference/resource-properties/argument-type): diff --git a/website/docs/reference/model-configs.md b/website/docs/reference/model-configs.md index 65133dcb25a..6c37b69758c 100644 --- a/website/docs/reference/model-configs.md +++ b/website/docs/reference/model-configs.md @@ -36,9 +36,11 @@ models: [+](/reference/resource-configs/plus-prefix)[materialized](/reference/resource-configs/materialized): [+](/reference/resource-configs/plus-prefix)[sql_header](/reference/resource-configs/sql_header): [+](/reference/resource-configs/plus-prefix)[on_configuration_change](/reference/resource-configs/on_configuration_change): apply | continue | fail #only for materialized views on supported adapters + [+](/reference/resource-configs/plus-prefix)[unique_key](/reference/resource-configs/unique_key): ``` + @@ -57,6 +59,7 @@ models: [materialized](/reference/resource-configs/materialized): [sql_header](/reference/resource-configs/sql_header): [on_configuration_change](/reference/resource-configs/on_configuration_change): apply | continue | fail #only for materialized views on supported adapters + [unique_key](/reference/resource-configs/unique_key): ``` @@ -69,12 +72,13 @@ models: -```jinja +```sql {{ config( [materialized](/reference/resource-configs/materialized)="", [sql_header](/reference/resource-configs/sql_header)="" [on_configuration_change](/reference/resource-configs/on_configuration_change): apply | continue | fail #only for materialized views for supported adapters + [unique_key](/reference/resource-configs/unique_key)='column_name_or_expression' ) }} ``` @@ -104,6 +108,8 @@ models: + + ```yaml models: [](/reference/resource-configs/resource-path): @@ -121,7 +127,29 @@ models: [+](/reference/resource-configs/plus-prefix)[contract](/reference/resource-configs/contract): {} ``` + + + +```yaml +models: + [](/reference/resource-configs/resource-path): + [+](/reference/resource-configs/plus-prefix)[enabled](/reference/resource-configs/enabled): true | false + [+](/reference/resource-configs/plus-prefix)[tags](/reference/resource-configs/tags): | [] + [+](/reference/resource-configs/plus-prefix)[pre-hook](/reference/resource-configs/pre-hook-post-hook): | [] + [+](/reference/resource-configs/plus-prefix)[post-hook](/reference/resource-configs/pre-hook-post-hook): | [] + [+](/reference/resource-configs/plus-prefix)[database](/reference/resource-configs/database): + [+](/reference/resource-configs/plus-prefix)[schema](/reference/resource-properties/schema): + [+](/reference/resource-configs/plus-prefix)[alias](/reference/resource-configs/alias): + [+](/reference/resource-configs/plus-prefix)[persist_docs](/reference/resource-configs/persist_docs): + [+](/reference/resource-configs/plus-prefix)[full_refresh](/reference/resource-configs/full_refresh): + [+](/reference/resource-configs/plus-prefix)[meta](/reference/resource-configs/meta): {} + [+](/reference/resource-configs/plus-prefix)[grants](/reference/resource-configs/grants): {} + [+](/reference/resource-configs/plus-prefix)[contract](/reference/resource-configs/contract): {} + [+](/reference/resource-configs/plus-prefix)[event_time](/reference/resource-configs/event-time): my_time_field + +``` + @@ -131,6 +159,8 @@ models: + + ```yaml version: 2 @@ -150,18 +180,43 @@ models: [grants](/reference/resource-configs/grants): {} [contract](/reference/resource-configs/contract): {} ``` + - + - +```yaml +version: 2 + +models: + - name: [] + config: + [enabled](/reference/resource-configs/enabled): true | false + [tags](/reference/resource-configs/tags): | [] + [pre_hook](/reference/resource-configs/pre-hook-post-hook): | [] + [post_hook](/reference/resource-configs/pre-hook-post-hook): | [] + [database](/reference/resource-configs/database): + [schema](/reference/resource-properties/schema): + [alias](/reference/resource-configs/alias): + [persist_docs](/reference/resource-configs/persist_docs): + [full_refresh](/reference/resource-configs/full_refresh): + [meta](/reference/resource-configs/meta): {} + [grants](/reference/resource-configs/grants): {} + [contract](/reference/resource-configs/contract): {} + [event_time](/reference/resource-configs/event-time): my_time_field +``` + + + -```jinja + + +```sql {{ config( [enabled](/reference/resource-configs/enabled)=true | false, @@ -178,6 +233,30 @@ models: ) }} ``` + + + + +```sql + +{{ config( + [enabled](/reference/resource-configs/enabled)=true | false, + [tags](/reference/resource-configs/tags)="" | [""], + [pre_hook](/reference/resource-configs/pre-hook-post-hook)="" | [""], + [post_hook](/reference/resource-configs/pre-hook-post-hook)="" | [""], + [database](/reference/resource-configs/database)="", + [schema](/reference/resource-properties/schema)="", + [alias](/reference/resource-configs/alias)="", + [persist_docs](/reference/resource-configs/persist_docs)={}, + [meta](/reference/resource-configs/meta)={}, + [grants](/reference/resource-configs/grants)={}, + [contract](/reference/resource-configs/contract)={}, + [event_time](/reference/resource-configs/event-time)='my_time_field', + +) }} + +``` + diff --git a/website/docs/reference/node-selection/defer.md b/website/docs/reference/node-selection/defer.md index 863494de12e..eddb1ece9d4 100644 --- a/website/docs/reference/node-selection/defer.md +++ b/website/docs/reference/node-selection/defer.md @@ -29,11 +29,12 @@ dbt test --models [...] --defer --state path/to/artifacts -When the `--defer` flag is provided, dbt will resolve `ref` calls differently depending on two criteria: -1. Is the referenced node included in the model selection criteria of the current run? -2. Does the referenced node exist as a database object in the current environment? +By default, dbt uses the [`target`](/reference/dbt-jinja-functions/target) namespace to resolve `ref` calls. -If the answer to both is **no**—a node is not included _and_ it does not exist as a database object in the current environment—references to it will use the other namespace instead, provided by the state manifest. +When `--defer` is enabled, dbt resolves ref calls using the state manifest instead, but only if: + +1. The node isn’t among the selected nodes, _and_ +2. It doesn’t exist in the database (or `--favor-state` is used). Ephemeral models are never deferred, since they serve as "passthroughs" for other `ref` calls. @@ -46,7 +47,7 @@ Deferral requires both `--defer` and `--state` to be set, either by passing flag #### Favor state -You can optionally skip the second criterion by passing the `--favor-state` flag. If passed, dbt will favor using the node defined in your `--state` namespace, even if the node exists in the current target. +When `--favor-state` is passed, dbt prioritizes node definitions from the `--state directory`. However, this doesn’t apply if the node is also part of the selected nodes. ### Example diff --git a/website/docs/reference/node-selection/methods.md b/website/docs/reference/node-selection/methods.md index 38484494e4b..7587a9fd2b1 100644 --- a/website/docs/reference/node-selection/methods.md +++ b/website/docs/reference/node-selection/methods.md @@ -310,10 +310,6 @@ dbt list --select "+semantic_model:orders" # list your semantic model named "or ``` ### The "saved_query" method - -Supported in v1.7 or newer. - - The `saved_query` method selects [saved queries](/docs/build/saved-queries). @@ -322,8 +318,6 @@ dbt list --select "saved_query:*" # list all saved queries dbt list --select "+saved_query:orders_saved_query" # list your saved query named "orders_saved_query" and all upstream resources ``` - - ### The "unit_test" method diff --git a/website/docs/reference/node-selection/state-comparison-caveats.md b/website/docs/reference/node-selection/state-comparison-caveats.md index 25301656539..adaf35bd710 100644 --- a/website/docs/reference/node-selection/state-comparison-caveats.md +++ b/website/docs/reference/node-selection/state-comparison-caveats.md @@ -2,6 +2,8 @@ title: "Caveats to state comparison" --- +import StateModified from '/snippets/_state-modified-compare.md'; + The [`state:` selection method](/reference/node-selection/methods#the-state-method) is a powerful feature, with a lot of underlying complexity. Below are a handful of considerations when setting up automated jobs that leverage state comparison. ### Seeds @@ -48,6 +50,8 @@ dbt test -s "state:modified" --exclude "test_name:relationships" To reduce false positives during `state:modified` selection due to env-aware logic, you can set the `state_modified_compare_more_unrendered_values` [behavior flag](/reference/global-configs/behavior-changes#behavior-change-flags) to `True`. + + diff --git a/website/docs/reference/programmatic-invocations.md b/website/docs/reference/programmatic-invocations.md index 09e41b1789f..61250e6debb 100644 --- a/website/docs/reference/programmatic-invocations.md +++ b/website/docs/reference/programmatic-invocations.md @@ -25,9 +25,9 @@ for r in res.result: ## Parallel execution not supported -[`dbt-core`](https://pypi.org/project/dbt-core/) doesn't support [safe parallel execution](/reference/dbt-commands#parallel-execution) for multiple invocations in the same process. This means it's not safe to run multiple dbt commands at the same time. It's officially discouraged and requires a wrapping process to handle sub-processes. This is because: +[`dbt-core`](https://pypi.org/project/dbt-core/) doesn't support [safe parallel execution](/reference/dbt-commands#parallel-execution) for multiple invocations in the same process. This means it's not safe to run multiple dbt commands concurrently. It's officially discouraged and requires a wrapping process to handle sub-processes. This is because: -- Running simultaneous commands can unexpectedly interact with the data platform. For example, running `dbt run` and `dbt build` for the same models simultaneously could lead to unpredictable results. +- Running concurrent commands can unexpectedly interact with the data platform. For example, running `dbt run` and `dbt build` for the same models simultaneously could lead to unpredictable results. - Each `dbt-core` command interacts with global Python variables. To ensure safe operation, commands need to be executed in separate processes, which can be achieved using methods like spawning processes or using tools like Celery. To run [safe parallel execution](/reference/dbt-commands#available-commands), you can use the [dbt Cloud CLI](/docs/cloud/cloud-cli-installation) or [dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud), both of which does that additional work to manage concurrency (multiple processes) on your behalf. diff --git a/website/docs/reference/project-configs/analysis-paths.md b/website/docs/reference/project-configs/analysis-paths.md index 5c3d223a5cb..20e2e65c2ad 100644 --- a/website/docs/reference/project-configs/analysis-paths.md +++ b/website/docs/reference/project-configs/analysis-paths.md @@ -13,12 +13,31 @@ analysis-paths: [directorypath] ## Definition -Specify a custom list of directories where [analyses](/docs/build/analyses) are located. +Specify a custom list of directories where [analyses](/docs/build/analyses) are located. ## Default Without specifying this config, dbt will not compile any `.sql` files as analyses. -However, the [`dbt init` command](/reference/commands/init) populates this value as `analyses` ([source](https://github.com/dbt-labs/dbt-starter-project/blob/HEAD/dbt_project.yml#L15)) +However, the [`dbt init` command](/reference/commands/init) populates this value as `analyses` ([source](https://github.com/dbt-labs/dbt-starter-project/blob/HEAD/dbt_project.yml#L15)). + +import RelativePath from '/snippets/_relative-path.md'; + + + +- ✅ **Do** + - Use relative path: + ```yml + analysis-paths: ["analyses"] + ``` + +- ❌ **Don't** + - Avoid absolute paths: + ```yml + analysis-paths: ["/Users/username/project/analyses"] + ``` ## Examples ### Use a subdirectory named `analyses` diff --git a/website/docs/reference/project-configs/asset-paths.md b/website/docs/reference/project-configs/asset-paths.md index 1fb3cf9f260..effae8bad7f 100644 --- a/website/docs/reference/project-configs/asset-paths.md +++ b/website/docs/reference/project-configs/asset-paths.md @@ -15,8 +15,29 @@ asset-paths: [directorypath] ## Definition Optionally specify a custom list of directories to copy to the `target` directory as part of the `docs generate` command. This is useful for rendering images in your repository in your project documentation. + ## Default -By default, dbt will not copy any additional files as part of docs generate, i.e. `asset-paths: []` + +By default, dbt will not copy any additional files as part of docs generate. For example, `asset-paths: []`. + +import RelativePath from '/snippets/_relative-path.md'; + + + +- ✅ **Do** + - Use relative path: + ```yml + asset-paths: ["assets"] + ``` + +- ❌ **Don't** + - Avoid absolute paths: + ```yml + asset-paths: ["/Users/username/project/assets"] + ``` ## Examples ### Compile files in the `assets` subdirectory as part of `docs generate` diff --git a/website/docs/reference/project-configs/docs-paths.md b/website/docs/reference/project-configs/docs-paths.md index 5481c19c9fd..6cd179201fc 100644 --- a/website/docs/reference/project-configs/docs-paths.md +++ b/website/docs/reference/project-configs/docs-paths.md @@ -30,6 +30,25 @@ By default, dbt will search in all resource paths for docs blocks (i.e. the comb +import RelativePath from '/snippets/_relative-path.md'; + + + +- ✅ **Do** + - Use relative path: + ```yml + docs-paths: ["docs"] + ``` + +- ❌ **Don't** + - Avoid absolute paths: + ```yml + docs-paths: ["/Users/username/project/docs"] + ``` + ## Example Use a subdirectory named `docs` for docs blocks: diff --git a/website/docs/reference/project-configs/macro-paths.md b/website/docs/reference/project-configs/macro-paths.md index 486ec08ffdf..d790899689e 100644 --- a/website/docs/reference/project-configs/macro-paths.md +++ b/website/docs/reference/project-configs/macro-paths.md @@ -16,7 +16,26 @@ macro-paths: [directorypath] Optionally specify a custom list of directories where [macros](/docs/build/jinja-macros#macros) are located. Note that you cannot co-locate models and macros. ## Default -By default, dbt will search for macros in a directory named `macros`, i.e. `macro-paths: ["macros"]` +By default, dbt will search for macros in a directory named `macros`. For example, `macro-paths: ["macros"]`. + +import RelativePath from '/snippets/_relative-path.md'; + + + +- ✅ **Do** + - Use relative path: + ```yml + macro-paths: ["macros"] + ``` + +- ❌ **Don't:** + - Avoid absolute paths: + ```yml + macro-paths: ["/Users/username/project/macros"] + ``` ## Examples ### Use a subdirectory named `custom_macros` instead of `macros` diff --git a/website/docs/reference/project-configs/model-paths.md b/website/docs/reference/project-configs/model-paths.md index a0652432787..44a40c33066 100644 --- a/website/docs/reference/project-configs/model-paths.md +++ b/website/docs/reference/project-configs/model-paths.md @@ -12,10 +12,29 @@ model-paths: [directorypath] ## Definition -Optionally specify a custom list of directories where [models](/docs/build/models) and [sources](/docs/build/sources) are located. +Optionally specify a custom list of directories where [models](/docs/build/models), [sources](/docs/build/sources), and [unit tests](/docs/build/unit-tests) are located. ## Default -By default, dbt will search for models and sources in the `models` directory, i.e. `model-paths: ["models"]` +By default, dbt will search for models and sources in the `models` directory. For example, `model-paths: ["models"]`. + +import RelativePath from '/snippets/_relative-path.md'; + + + +- ✅ **Do** + - Use relative path: + ```yml + model-paths: ["models"] + ``` + +- ❌ **Don't:** + - Avoid absolute paths: + ```yml + model-paths: ["/Users/username/project/models"] + ``` ## Examples ### Use a subdirectory named `transformations` instead of `models` diff --git a/website/docs/reference/project-configs/on-run-start-on-run-end.md b/website/docs/reference/project-configs/on-run-start-on-run-end.md index 74557839f11..347ce54ab63 100644 --- a/website/docs/reference/project-configs/on-run-start-on-run-end.md +++ b/website/docs/reference/project-configs/on-run-start-on-run-end.md @@ -27,8 +27,6 @@ A SQL statement (or list of SQL statements) to be run at the start or end of the ## Examples - - ### Grant privileges on all schemas that dbt uses at the end of a run This leverages the [schemas](/reference/dbt-jinja-functions/schemas) variable that is only available in an `on-run-end` hook. diff --git a/website/docs/reference/project-configs/query-comment.md b/website/docs/reference/project-configs/query-comment.md index 7e654350306..f7f9472e947 100644 --- a/website/docs/reference/project-configs/query-comment.md +++ b/website/docs/reference/project-configs/query-comment.md @@ -30,7 +30,7 @@ query-comment: ## Definition -A string to inject as a comment in each query that dbt runs against your database. This comment can be used to attribute SQL statements to specific dbt resources like models and tests. +A string to inject as a comment in each query that dbt runs against your database. This comment can attribute SQL statements to specific dbt resources like models and tests. The `query-comment` configuration can also call a macro that returns a string. @@ -51,7 +51,7 @@ create view analytics.analytics.orders as ( ## Using the dictionary syntax The dictionary syntax includes two keys: - * `comment` (optional, see above for default): The string to be injected to a query as a comment. + * `comment` (optional, for more information, refer to the [default](#default) section): The string to be injected into a query as a comment. * `append` (optional, default=`false`): Whether a comment should be appended (added to the bottom of a query) or not (i.e. added to the top of a query). By default, comments are added to the top of queries (i.e. `append: false`). This syntax is useful on databases like Snowflake which [remove leading SQL comments](https://docs.snowflake.com/en/release-notes/2017-04.html#queries-leading-comments-removed-during-execution). @@ -275,4 +275,6 @@ The following context variables are available when generating a query comment: | var | See [var](/reference/dbt-jinja-functions/var) | | target | See [target](/reference/dbt-jinja-functions/target) | | connection_name | A string representing the internal name for the connection. This string is generated by dbt. | -| node | A dictionary representation of the parsed node object. Use `node.unique_id`, `node.database`, `node.schema`, etc | +| node | A dictionary representation of the parsed node object. Use `node.unique_id`, `node.database`, `node.schema`, and so on. | + +Note: The `var()` function in `query-comment` macros only access variables passed through the `--vars` argument in the CLI. Variables defined in the vars block of your `dbt_project.yml` are not accessible when generating query comments. diff --git a/website/docs/reference/project-configs/require-dbt-version.md b/website/docs/reference/project-configs/require-dbt-version.md index 97b42e036ec..f659370af4e 100644 --- a/website/docs/reference/project-configs/require-dbt-version.md +++ b/website/docs/reference/project-configs/require-dbt-version.md @@ -22,7 +22,7 @@ When you set this configuration, dbt sends a helpful error message for any user If this configuration is not specified, no version check will occur. -:::info Versionless +:::info dbt Cloud release tracks diff --git a/website/docs/reference/project-configs/seed-paths.md b/website/docs/reference/project-configs/seed-paths.md index 614bda62cd2..53e2902cae0 100644 --- a/website/docs/reference/project-configs/seed-paths.md +++ b/website/docs/reference/project-configs/seed-paths.md @@ -16,10 +16,29 @@ Optionally specify a custom list of directories where [seed](/docs/build/seeds) ## Default -By default, dbt expects seeds to be located in the `seeds` directory, i.e. `seed-paths: ["seeds"]` +By default, dbt expects seeds to be located in the `seeds` directory. For example, `seed-paths: ["seeds"]`. + +import RelativePath from '/snippets/_relative-path.md'; + + + +- ✅ **Do** + - Use relative path: + ```yml + seed-paths: ["seed"] + ``` + +- ❌ **Don't:** + - Avoid absolute paths: + ```yml + seed-paths: ["/Users/username/project/seed"] + ``` ## Examples -### Use a subdirectory named `custom_seeds` instead of `seeds` +### Use a directory named `custom_seeds` instead of `seeds` diff --git a/website/docs/reference/project-configs/snapshot-paths.md b/website/docs/reference/project-configs/snapshot-paths.md index 8319833f1e6..a13697fc705 100644 --- a/website/docs/reference/project-configs/snapshot-paths.md +++ b/website/docs/reference/project-configs/snapshot-paths.md @@ -16,15 +16,35 @@ snapshot-paths: [directorypath] Optionally specify a custom list of directories where [snapshots](/docs/build/snapshots) are located. -In [Versionless](/docs/dbt-versions/versionless-cloud) and on dbt v1.9 and higher, you can co-locate your snapshots with models if they are [defined using the latest YAML syntax](/docs/build/snapshots). +In dbt Core v1.9+, you can co-locate your snapshots with models if they are [defined using the latest YAML syntax](/docs/build/snapshots). -Note that you cannot co-locate models and snapshots. However, in [Versionless](/docs/dbt-versions/versionless-cloud) and on dbt v1.9 and higher, you can co-locate your snapshots with models if they are [defined using the latest YAML syntax](/docs/build/snapshots). +Note that you cannot co-locate models and snapshots. However, in dbt Core v1.9+, you can co-locate your snapshots with models if they are [defined using the latest YAML syntax](/docs/build/snapshots). ## Default -By default, dbt will search for snapshots in the `snapshots` directory, i.e. `snapshot-paths: ["snapshots"]` +By default, dbt will search for snapshots in the `snapshots` directory. For example, `snapshot-paths: ["snapshots"]`. + + +import RelativePath from '/snippets/_relative-path.md'; + + + +- ✅ **Do** + - Use relative path: + ```yml + snapshot-paths: ["snapshots"] + ``` + +- ❌ **Don't:** + - Avoid absolute paths: + ```yml + snapshot-paths: ["/Users/username/project/snapshots"] + ``` ## Examples ### Use a subdirectory named `archives` instead of `snapshots` diff --git a/website/docs/reference/project-configs/test-paths.md b/website/docs/reference/project-configs/test-paths.md index 6749a07d23d..ab816eec973 100644 --- a/website/docs/reference/project-configs/test-paths.md +++ b/website/docs/reference/project-configs/test-paths.md @@ -21,6 +21,25 @@ Without specifying this config, dbt will search for tests in the `tests` directo - Generic test definitions in the `tests/generic` subdirectory - Singular tests (all other files) +import RelativePath from '/snippets/_relative-path.md'; + + + +- ✅ **Do** + - Use relative path: + ```yml + test-paths: ["test"] + ``` + +- ❌ **Don't:** + - Avoid absolute paths: + ```yml + test-paths: ["/Users/username/project/test"] + ``` + ## Examples ### Use a subdirectory named `custom_tests` instead of `tests` for data tests diff --git a/website/docs/reference/resource-configs/access.md b/website/docs/reference/resource-configs/access.md index 0f67a454344..c73e09dd639 100644 --- a/website/docs/reference/resource-configs/access.md +++ b/website/docs/reference/resource-configs/access.md @@ -15,14 +15,6 @@ models: - - -Access modifiers may be applied to models one-by-one in YAML properties. In v1.5 and v1.6, you are unable to configure `access` for multiple models at once. Upgrade to v1.7 for additional configuration options. A group or subfolder contains models with varying access levels, so when you designate a model with `access: public`, make sure you intend for this behavior. - - - - - You can apply access modifiers in config files, including the `dbt_project.yml`, or to models one-by-one in `properties.yml`. Applying access configs to a subfolder modifies the default for all models in that subfolder, so make sure you intend for this behavior. When setting individual model access, a group or subfolder might contain a variety of access levels, so when you designate a model with `access: public` make sure you intend for this behavior. There are multiple approaches to configuring access: @@ -83,8 +75,6 @@ There are multiple approaches to configuring access: ``` - - After you define `access`, rerun a production job to apply the change. ## Definition diff --git a/website/docs/reference/resource-configs/alias.md b/website/docs/reference/resource-configs/alias.md index 3f36bbd0d8f..5beaa238806 100644 --- a/website/docs/reference/resource-configs/alias.md +++ b/website/docs/reference/resource-configs/alias.md @@ -8,9 +8,11 @@ datatype: string -Specify a custom alias for a model in your `dbt_project.yml` file or config block. +Specify a custom alias for a model in your `dbt_project.yml` file, `models/properties.yml` file, or config block in a SQL file. -For example, if you have a model that calculates `sales_total` and want to give it a more user-friendly alias, you can alias it like this: +For example, if you have a model that calculates `sales_total` and want to give it a more user-friendly alias, you can alias it as shown in the following examples. + +In the `dbt_project.yml` file, the following example sets a default `alias` for the `sales_total` model at the project level: @@ -22,16 +24,40 @@ models: ``` +The following specifies an `alias` as part of the `models/properties.yml` file metadata, useful for centralized configuration: + + + +```yml +version: 2 + +models: + - name: sales_total + config: + alias: sales_dashboard +``` + + +The following assigns the `alias` directly in the In `models/sales_total.sql` file: + + + +```sql +{{ config( + alias="sales_dashboard" +) }} +``` + + This would return `analytics.finance.sales_dashboard` in the database, instead of the default `analytics.finance.sales_total`. +Configure a seed's alias in your `dbt_project.yml` file or a `properties.yml` file. The following examples demonstrate how to `alias` a seed named `product_categories` to `categories_data`. -Configure a seed's alias in your `dbt_project.yml` file or config block. - -For example, if you have a seed that represents `product_categories` and want to alias it as `categories_data`, you would alias like this: +In the `dbt_project.yml` file at the project level: @@ -41,6 +67,21 @@ seeds: product_categories: +alias: categories_data ``` + + +In the `seeds/properties.yml` file: + + + +```yml +version: 2 + +seeds: + - name: product_categories + config: + alias: categories_data +``` + This would return the name `analytics.finance.categories_data` in the database. @@ -55,9 +96,6 @@ seeds: +alias: country_mappings ``` - - - @@ -65,7 +103,9 @@ seeds: Configure a snapshots's alias in your `dbt_project.yml` file or config block. -For example, if you have a snapshot that is named `your_snapshot` and want to alias it as `the_best_snapshot`, you would alias like this: +The following examples demonstrate how to `alias` a snapshot named `your_snapshot` to `the_best_snapshot`. + +In the `dbt_project.yml` file at the project level: @@ -75,20 +115,57 @@ snapshots: your_snapshot: +alias: the_best_snapshot ``` + -This would build your snapshot to `analytics.finance.the_best_snapshot` in the database. +In the `snapshots/properties.yml` file: + + + +```yml +version: 2 + +snapshots: + - name: your_snapshot + config: + alias: the_best_snapshot +``` + + +In `snapshots/your_snapshot.sql` file: + + +```sql +{{ config( + alias="the_best_snapshot" +) }} +``` +This would build your snapshot to `analytics.finance.the_best_snapshot` in the database. + -Configure a test's alias in your `schema.yml` file or config block. +Configure a data test's alias in your `dbt_project.yml` file, `properties.yml` file, or config block in the model file. + +The following examples demonstrate how to `alias` a unique data test named `order_id` to `unique_order_id_test` to identify a specific data test. -For example, to add a unique test to the `order_id` column and give it an alias `unique_order_id_test` to identify this specific test, you would alias like this: +In the `dbt_project.yml` file at the project level: - + + +```yml +tests: + your_project: + +alias: unique_order_id_test +``` + + +In the `models/properties.yml` file: + + ```yml models: @@ -99,10 +176,22 @@ models: - unique: alias: unique_order_id_test ``` + -When using `--store-failures`, this would return the name `analytics.finance.orders_order_id_unique_order_id_test` in the database. +In `tests/unique_order_id_test.sql` file: + + +```sql +{{ config( + alias="unique_order_id_test", + severity="error", +``` + +When using [`store_failures_as`](/reference/resource-configs/store_failures_as), this would return the name `analytics.finance.orders_order_id_unique_order_id_test` in the database. + + diff --git a/website/docs/reference/resource-configs/athena-configs.md b/website/docs/reference/resource-configs/athena-configs.md index f871ede9fab..fd5bc663ee7 100644 --- a/website/docs/reference/resource-configs/athena-configs.md +++ b/website/docs/reference/resource-configs/athena-configs.md @@ -109,7 +109,7 @@ lf_grants={ There are some limitations and recommendations that should be considered: - `lf_tags` and `lf_tags_columns` configs support only attaching lf tags to corresponding resources. -- We recommend managing LF Tags permissions somewhere outside dbt. For example, [terraform](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lakeformation_permissions) or [aws cdk](https://docs.aws.amazon.com/cdk/api/v1/docs/aws-lakeformation-readme.html). +- We recommend managing LF Tags permissions somewhere outside dbt. For example, [terraform](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lakeformation_permissions) or [aws cdk](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_lakeformation-readme.html). - `data_cell_filters` management can't be automated outside dbt because the filter can't be attached to the table, which doesn't exist. Once you `enable` this config, dbt will set all filters and their permissions during every dbt run. Such an approach keeps the actual state of row-level security configuration after every dbt run and applies changes if they occur: drop, create, and update filters and their permissions. - Any tags listed in `lf_inherited_tags` should be strictly inherited from the database level and never overridden at the table and column level. - Currently, `dbt-athena` does not differentiate between an inherited tag association and an override it made previously. diff --git a/website/docs/reference/resource-configs/batch_size.md b/website/docs/reference/resource-configs/batch_size.md new file mode 100644 index 00000000000..4001545778a --- /dev/null +++ b/website/docs/reference/resource-configs/batch_size.md @@ -0,0 +1,56 @@ +--- +title: "batch_size" +id: "batch-size" +sidebar_label: "batch_size" +resource_types: [models] +description: "dbt uses `batch_size` to determine how large batches are when running a microbatch incremental model." +datatype: hour | day | month | year +--- + +Available in the [dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks) and dbt Core v1.9 and higher. + +## Definition + +The`batch_size` config determines how large batches are when running a microbatch. Accepted values are `hour`, `day`, `month`, or `year`. You can configure `batch_size` for a [model](/docs/build/models) in your `dbt_project.yml` file, property YAML file, or config block. + +## Examples + +The following examples set `day` as the `batch_size` for the `user_sessions` model. + +Example of the `batch_size` config in the `dbt_project.yml` file: + + + +```yml +models: + my_project: + user_sessions: + +batch_size: day +``` + + +Example in a properties YAML file: + + + +```yml +models: + - name: user_sessions + config: + batch_size: day +``` + + + +Example in sql model config block: + + + +```sql +{{ config( + lookback='day +) }} +``` + + + diff --git a/website/docs/reference/resource-configs/begin.md b/website/docs/reference/resource-configs/begin.md new file mode 100644 index 00000000000..dd47419be21 --- /dev/null +++ b/website/docs/reference/resource-configs/begin.md @@ -0,0 +1,55 @@ +--- +title: "begin" +id: "begin" +sidebar_label: "begin" +resource_types: [models] +description: "dbt uses `begin` to determine when a microbatch incremental model should begin from. When defined on a micorbatch incremental model, `begin` is used as the lower time bound when the model is built for the first time or fully refreshed." +datatype: string +--- + +Available in the [dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks) and dbt Core v1.9 and higher. + +## Definition + +Set the `begin` config to the timestamp value at which your microbatch model data should begin — at the point the data becomes relevant for the microbatch model. You can configure `begin` for a [model](/docs/build/models) in your `dbt_project.yml` file, property YAML file, or config block. The value for `begin` must be a string representing an ISO formatted date OR date and time. + +## Examples + +The following examples set `2024-01-01 00:00:00` as the `begin` config for the `user_sessions` model. + +Example in the `dbt_project.yml` file: + + + +```yml +models: + my_project: + user_sessions: + +begin: "2024-01-01 00:00:00" +``` + + +Example in a properties YAML file: + + + +```yml +models: + - name: user_sessions + config: + begin: "2024-01-01 00:00:00" +``` + + + +Example in sql model config block: + + + +```sql +{{ config( + begin='2024-01-01 00:00:00' +) }} +``` + + diff --git a/website/docs/reference/resource-configs/bigquery-configs.md b/website/docs/reference/resource-configs/bigquery-configs.md index a6f3036ede8..c912bca0688 100644 --- a/website/docs/reference/resource-configs/bigquery-configs.md +++ b/website/docs/reference/resource-configs/bigquery-configs.md @@ -21,7 +21,7 @@ This will allow you to read and write from multiple BigQuery projects. Same for ### Partition clause -BigQuery supports the use of a [partition by](https://cloud.google.com/bigquery/docs/data-definition-language#specifying_table_partitioning_options) clause to easily partition a by a column or expression. This option can help decrease latency and cost when querying large tables. Note that partition pruning [only works](https://cloud.google.com/bigquery/docs/querying-partitioned-tables#pruning_limiting_partitions) when partitions are filtered using literal values (so selecting partitions using a won't improve performance). +BigQuery supports the use of a [partition by](https://cloud.google.com/bigquery/docs/data-definition-language#specifying_table_partitioning_options) clause to easily partition a by a column or expression. This option can help decrease latency and cost when querying large tables. Note that partition pruning [only works](https://cloud.google.com/bigquery/docs/querying-partitioned-tables#use_a_constant_filter_expression) when partitions are filtered using literal values (so selecting partitions using a won't improve performance). The `partition_by` config can be supplied as a dictionary with the following format: @@ -265,7 +265,7 @@ If your model has `partition_by` configured, you may optionally specify two addi -### Clustering Clause +### Clustering clause BigQuery tables can be [clustered](https://cloud.google.com/bigquery/docs/clustered-tables) to colocate related data. @@ -286,7 +286,7 @@ select * from ... -Clustering on a multiple columns: +Clustering on multiple columns: @@ -303,11 +303,11 @@ select * from ... -## Managing KMS Encryption +## Managing KMS encryption [Customer managed encryption keys](https://cloud.google.com/bigquery/docs/customer-managed-encryption) can be configured for BigQuery tables using the `kms_key_name` model configuration. -### Using KMS Encryption +### Using KMS encryption To specify the KMS key name for a model (or a group of models), use the `kms_key_name` model configuration. The following example sets the `kms_key_name` for all of the models in the `encrypted/` directory of your dbt project. @@ -328,7 +328,7 @@ models: -## Labels and Tags +## Labels and tags ### Specifying labels @@ -373,8 +373,6 @@ models: - - ### Specifying tags @@ -427,14 +425,15 @@ Please note that in order for policy tags to take effect, [column-level `persist The [`incremental_strategy` config](/docs/build/incremental-strategy) controls how dbt builds incremental models. dbt uses a [merge statement](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax) on BigQuery to refresh incremental tables. -The `incremental_strategy` config can be set to one of two values: - - `merge` (default) - - `insert_overwrite` +The `incremental_strategy` config can be set to one of the following values: +- `merge` (default) +- `insert_overwrite` +- [`microbatch`](/docs/build/incremental-microbatch) ### Performance and cost The operations performed by dbt while building a BigQuery incremental model can -be made cheaper and faster by using [clustering keys](#clustering-keys) in your +be made cheaper and faster by using a [clustering clause](#clustering-clause) in your model configuration. See [this guide](https://discourse.getdbt.com/t/benchmarking-incremental-strategies-on-bigquery/981) for more information on performance tuning for BigQuery incremental models. **Note:** These performance and cost benefits are applicable to incremental models @@ -563,7 +562,7 @@ If no `partitions` configuration is provided, dbt will instead: 3. Query the destination table to find the _max_ partition in the database When building your model SQL, you can take advantage of the introspection performed -by dbt to filter for only _new_ data. The max partition in the destination table +by dbt to filter for only _new_ data. The maximum value in the partitioned field in the destination table will be available using the `_dbt_max_partition` BigQuery scripting variable. **Note:** this is a BigQuery SQL variable, not a dbt Jinja variable, so no jinja brackets are required to access this variable. @@ -673,7 +672,7 @@ select ... -## Authorized Views +## Authorized views If the `grant_access_to` config is specified for a model materialized as a view, dbt will grant the view model access to select from the list of datasets @@ -712,8 +711,6 @@ models: Views with this configuration will be able to select from objects in `project_1.dataset_1` and `project_2.dataset_2`, even when they are located elsewhere and queried by users who do not otherwise have access to `project_1.dataset_1` and `project_2.dataset_2`. - - ## Materialized views The BigQuery adapter supports [materialized views](https://cloud.google.com/bigquery/docs/materialized-views-intro) @@ -896,10 +893,6 @@ As with most data platforms, there are limitations associated with materialized Find more information about materialized view limitations in Google's BigQuery [docs](https://cloud.google.com/bigquery/docs/materialized-views-intro#limitations). - - - - ## Python models The BigQuery adapter supports Python models with the following additional configuration parameters: @@ -916,4 +909,10 @@ By default, this is set to `True` to support the default `intermediate_format` o ### The `intermediate_format` parameter The `intermediate_format` parameter specifies which file format to use when writing records to a table. The default is `parquet`. + + +## Unit test limitations + +You must specify all fields in a BigQuery `STRUCT` for [unit tests](/docs/build/unit-tests). You cannot use only a subset of fields in a `STRUCT`. + diff --git a/website/docs/reference/resource-configs/contract.md b/website/docs/reference/resource-configs/contract.md index 2f52fc26e1f..fb25076b0d9 100644 --- a/website/docs/reference/resource-configs/contract.md +++ b/website/docs/reference/resource-configs/contract.md @@ -16,14 +16,6 @@ This is to ensure that the people querying your model downstream—both inside a ## Data type aliasing - - -The `data_type` defined in your YAML file must match a data type your data platform recognizes. dbt does not do any type aliasing itself. If your data platform recognizes both `int` and `integer` as corresponding to the same type, then they will return a match. - - - - - dbt uses built-in type aliasing for the `data_type` defined in your YAML. For example, you can specify `string` in your contract, and on Postgres/Redshift, dbt will convert it to `text`. If dbt doesn't recognize the `data_type` name among its known aliases, it will pass it through as-is. This is enabled by default, but you can opt-out by setting `alias_types` to `false`. Example for disabling: @@ -42,7 +34,6 @@ models: ``` - ## Size, precision, and scale diff --git a/website/docs/reference/resource-configs/database.md b/website/docs/reference/resource-configs/database.md index 338159b30dc..6c57e7e2c69 100644 --- a/website/docs/reference/resource-configs/database.md +++ b/website/docs/reference/resource-configs/database.md @@ -49,7 +49,7 @@ This would result in the generated relation being located in the `staging` datab -Available for versionless dbt Cloud or dbt Core v1.9+. Select v1.9 or newer from the version dropdown to view the configs. +Available for dbt Cloud release tracks or dbt Core v1.9+. Select v1.9 or newer from the version dropdown to view the configs. @@ -79,22 +79,19 @@ This results in the generated relation being located in the `snapshots` database -Configure a database in your `dbt_project.yml` file. +Customize the database for storing test results in your `dbt_project.yml` file. -For example, to load a test into a database called `reporting` instead of the target database, you can configure it like this: +For example, to save test results in a specific database, you can configure it like this: ```yml tests: - - my_not_null_test: - column_name: order_id - type: not_null - +database: reporting + +store_failures: true + +database: test_results ``` -This would result in the generated relation being located in the `reporting` database, so the full relation name would be `reporting.finance.my_not_null_test`. - +This would result in the test results being stored in the `test_results` database. diff --git a/website/docs/reference/resource-configs/databricks-configs.md b/website/docs/reference/resource-configs/databricks-configs.md index 5823fe7d9a4..6ac3e23c113 100644 --- a/website/docs/reference/resource-configs/databricks-configs.md +++ b/website/docs/reference/resource-configs/databricks-configs.md @@ -7,22 +7,7 @@ id: "databricks-configs" When materializing a model as `table`, you may include several optional configs that are specific to the dbt-databricks plugin, in addition to the standard [model configs](/reference/model-configs). - - - -| Option | Description | Required? | Model Support | Example | -|---------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------|---------------|--------------------------| -| file_format | The file format to use when creating tables (`parquet`, `delta`, `hudi`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | SQL, Python | `delta` | -| location_root | The created table uses the specified directory to store its data. The table alias is appended to it. | Optional | SQL, Python | `/mnt/root` | -| partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | SQL, Python | `date_day` | -| liquid_clustered_by | Cluster the created table by the specified columns. Clustering method is based on [Delta's Liquid Clustering feature](https://docs.databricks.com/en/delta/clustering.html). Available since dbt-databricks 1.6.2. | Optional | SQL | `date_day` | -| clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | SQL, Python | `country_code` | -| buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | SQL, Python | `8` | -| tblproperties | [Tblproperties](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-tblproperties.html) to be set on the created table | Optional | SQL | `{'this.is.my.key': 12}` | - - - - + | Option | Description | Required? | Model Support | Example | @@ -34,13 +19,14 @@ When materializing a model as `table`, you may include several optional configs | clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | SQL, Python | `country_code` | | buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | SQL, Python | `8` | | tblproperties | [Tblproperties](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-tblproperties.html) to be set on the created table | Optional | SQL, Python* | `{'this.is.my.key': 12}` | +| compression | Set the compression algorithm. | Optional | SQL, Python | `zstd` | \* Beginning in 1.7.12, we have added tblproperties to Python models via an alter statement that runs after table creation. We do not yet have a PySpark API to set tblproperties at table creation, so this feature is primarily to allow users to anotate their python-derived tables with tblproperties. - + 1.8 introduces support for [Tags](https://docs.databricks.com/en/data-governance/unity-catalog/tags.html) at the table level, in addition to all table configuration supported in 1.7. @@ -49,11 +35,12 @@ We do not yet have a PySpark API to set tblproperties at table creation, so this | file_format | The file format to use when creating tables (`parquet`, `delta`, `hudi`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | SQL, Python | `delta` | | location_root | The created table uses the specified directory to store its data. The table alias is appended to it. | Optional | SQL, Python | `/mnt/root` | | partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | SQL, Python | `date_day` | -| liquid_clustered_by | Cluster the created table by the specified columns. Clustering method is based on [Delta's Liquid Clustering feature](https://docs.databricks.com/en/delta/clustering.html). Available since dbt-databricks 1.6.2. | Optional | SQL | `date_day` | +| liquid_clustered_by | Cluster the created table by the specified columns. Clustering method is based on [Delta's Liquid Clustering feature](https://docs.databricks.com/en/delta/clustering.html). Available since dbt-databricks 1.6.2. | Optional | SQL, Python | `date_day` | | clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | SQL, Python | `country_code` | | buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | SQL, Python | `8` | | tblproperties | [Tblproperties](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-tblproperties.html) to be set on the created table | Optional | SQL, Python* | `{'this.is.my.key': 12}` | -| databricks_tags | [Tags](https://docs.databricks.com/en/data-governance/unity-catalog/tags.html) to be set on the created table | Optional | SQL+, Python+ | `{'my_tag': 'my_value'}` | +| databricks_tags | [Tags](https://docs.databricks.com/en/data-governance/unity-catalog/tags.html) to be set on the created table | Optional | SQL+, Python+ | `{'my_tag': 'my_value'}` | +| compression | Set the compression algorithm. | Optional | SQL, Python | `zstd` | \* Beginning in 1.7.12, we have added tblproperties to Python models via an alter statement that runs after table creation. We do not yet have a PySpark API to set tblproperties at table creation, so this feature is primarily to allow users to anotate their python-derived tables with tblproperties. @@ -62,6 +49,131 @@ We do not yet have a PySpark API to set tblproperties at table creation, so this + + +dbt-databricks v1.9 adds support for the `table_format: iceberg` config. Try it now on the [dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks). All other table configurations were also supported in 1.8. + +| Option | Description | Required? | Model Support | Example | +|---------------------|-----------------------------|-------------------------------------------|-----------------|--------------------------| +| table_format | Whether or not to provision [Iceberg](https://docs.databricks.com/en/delta/uniform.html) compatibility for the materialization | Optional | SQL, Python | `iceberg` | +| file_format+ | The file format to use when creating tables (`parquet`, `delta`, `hudi`, `csv`, `json`, `text`, `jdbc`, `orc`, `hive` or `libsvm`). | Optional | SQL, Python | `delta` | +| location_root | The created table uses the specified directory to store its data. The table alias is appended to it. | Optional | SQL, Python | `/mnt/root` | +| partition_by | Partition the created table by the specified columns. A directory is created for each partition. | Optional | SQL, Python | `date_day` | +| liquid_clustered_by | Cluster the created table by the specified columns. Clustering method is based on [Delta's Liquid Clustering feature](https://docs.databricks.com/en/delta/clustering.html). Available since dbt-databricks 1.6.2. | Optional | SQL, Python | `date_day` | +| clustered_by | Each partition in the created table will be split into a fixed number of buckets by the specified columns. | Optional | SQL, Python | `country_code` | +| buckets | The number of buckets to create while clustering | Required if `clustered_by` is specified | SQL, Python | `8` | +| tblproperties | [Tblproperties](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-tblproperties.html) to be set on the created table | Optional | SQL, Python* | `{'this.is.my.key': 12}` | +| databricks_tags | [Tags](https://docs.databricks.com/en/data-governance/unity-catalog/tags.html) to be set on the created table | Optional | SQL++, Python++ | `{'my_tag': 'my_value'}` | +| compression | Set the compression algorithm. | Optional | SQL, Python | `zstd` | + +\* We do not yet have a PySpark API to set tblproperties at table creation, so this feature is primarily to allow users to anotate their python-derived tables with tblproperties. +\+ When `table_format` is `iceberg`, `file_format` must be `delta`. +\++ `databricks_tags` are currently only supported at the table level, and applied via `ALTER` statements. + + + + + +### Python submission methods + +In dbt-databricks v1.9 (try it now in [the dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks)), you can use these four options for `submission_method`: + +* `all_purpose_cluster`: Executes the python model either directly using the [command api](https://docs.databricks.com/api/workspace/commandexecution) or by uploading a notebook and creating a one-off job run +* `job_cluster`: Creates a new job cluster to execute an uploaded notebook as a one-off job run +* `serverless_cluster`: Uses a [serverless cluster](https://docs.databricks.com/en/jobs/run-serverless-jobs.html) to execute an uploaded notebook as a one-off job run +* `workflow_job`: Creates/updates a reusable workflow and uploaded notebook, for execution on all-purpose, job, or serverless clusters. + :::caution + This approach gives you maximum flexibility, but will create persistent artifacts in Databricks (the workflow) that users could run outside of dbt. + ::: + +We are currently in a transitionary period where there is a disconnect between old submission methods (which were grouped by compute), and the logically distinct submission methods (command, job run, workflow). + +As such, the supported config matrix is somewhat complicated: + +| Config | Use | Default | `all_purpose_cluster`* | `job_cluster` | `serverless_cluster` | `workflow_job` | +| --------------------- | -------------------------------------------------------------------- | ------------------ | ---------------------- | ------------- | -------------------- | -------------- | +| `create_notebook` | if false, use Command API, otherwise upload notebook and use job run | `false` | ✅ | ❌ | ❌ | ❌ | +| `timeout` | maximum time to wait for command/job to run | `0` (No timeout) | ✅ | ✅ | ✅ | ✅ | +| `job_cluster_config` | configures a [new cluster](https://docs.databricks.com/api/workspace/jobs/submit#tasks-new_cluster) for running the model | `{}` | ❌ | ✅ | ❌ | ✅ | +| `access_control_list` | directly configures [access control](https://docs.databricks.com/api/workspace/jobs/submit#access_control_list) for the job | `{}` | ✅ | ✅ | ✅ | ✅ | +| `packages` | list of packages to install on the executing cluster | `[]` | ✅ | ✅ | ✅ | ✅ | +| `index_url` | url to install `packages` from | `None` (uses pypi) | ✅ | ✅ | ✅ | ✅ | +| `additional_libs` | directly configures [libraries](https://docs.databricks.com/api/workspace/jobs/submit#tasks-libraries) | `[]` | ✅ | ✅ | ✅ | ✅ | +| `python_job_config` | additional configuration for jobs/workflows (see table below) | `{}` | ✅ | ✅ | ✅ | ✅ | +| `cluster_id` | id of existing all purpose cluster to execute against | `None` | ✅ | ❌ | ❌ | ✅ | +| `http_path` | path to existing all purpose cluster to execute against | `None` | ✅ | ❌ | ❌ | ❌ | + +\* Only `timeout` and `cluster_id`/`http_path` are supported when `create_notebook` is false + +With the introduction of the `workflow_job` submission method, we chose to segregate further configuration of the python model submission under a top level configuration named `python_job_config`. This keeps configuration options for jobs and workflows namespaced in such a way that they do not interfere with other model config, allowing us to be much more flexible with what is supported for job execution. + +The support matrix for this feature is divided into `workflow_job` and all others (assuming `all_purpose_cluster` with `create_notebook`==true). +Each config option listed must be nested under `python_job_config`: + +| Config | Use | Default | `workflow_job` | All others | +| -------------------------- | ----------------------------------------------------------------------------------------------------------------------- | ------- | -------------- | ---------- | +| `name` | The name to give (or used to look up) the created workflow | `None` | ✅ | ❌ | +| `grants` | A simplified way to specify access control for the workflow | `{}` | ✅ | ✅ | +| `existing_job_id` | Id to use to look up the created workflow (in place of `name`) | `None` | ✅ | ❌ | +| `post_hook_tasks` | [Tasks](https://docs.databricks.com/api/workspace/jobs/create#tasks) to include after the model notebook execution | `[]` | ✅ | ❌ | +| `additional_task_settings` | Additional [task config](https://docs.databricks.com/api/workspace/jobs/create#tasks) to include in the model task | `{}` | ✅ | ❌ | +| [Other job run settings](https://docs.databricks.com/api/workspace/jobs/submit) | Config will be copied into the request, outside of the model task | `None` | ❌ | ✅ | +| [Other workflow settings](https://docs.databricks.com/api/workspace/jobs/create) | Config will be copied into the request, outside of the model task | `None` | ✅ | ❌ | + +This example uses the new configuration options in the previous table: + + + +```yaml +models: + - name: my_model + config: + submission_method: workflow_job + + # Define a job cluster to create for running this workflow + # Alternately, could specify cluster_id to use an existing cluster, or provide neither to use a serverless cluster + job_cluster_config: + spark_version: "15.3.x-scala2.12" + node_type_id: "rd-fleet.2xlarge" + runtime_engine: "{{ var('job_cluster_defaults.runtime_engine') }}" + data_security_mode: "{{ var('job_cluster_defaults.data_security_mode') }}" + autoscale: { "min_workers": 1, "max_workers": 4 } + + python_job_config: + # These settings are passed in, as is, to the request + email_notifications: { on_failure: ["me@example.com"] } + max_retries: 2 + + name: my_workflow_name + + # Override settings for your model's dbt task. For instance, you can + # change the task key + additional_task_settings: { "task_key": "my_dbt_task" } + + # Define tasks to run before/after the model + # This example assumes you have already uploaded a notebook to /my_notebook_path to perform optimize and vacuum + post_hook_tasks: + [ + { + "depends_on": [{ "task_key": "my_dbt_task" }], + "task_key": "OPTIMIZE_AND_VACUUM", + "notebook_task": + { "notebook_path": "/my_notebook_path", "source": "WORKSPACE" }, + }, + ] + + # Simplified structure, rather than having to specify permission separately for each user + grants: + view: [{ "group_name": "marketing-team" }] + run: [{ "user_name": "other_user@example.com" }] + manage: [] +``` + + + + + + ## Incremental models dbt-databricks plugin leans heavily on the [`incremental_strategy` config](/docs/build/incremental-strategy). This config tells the incremental materialization how to build models in runs beyond their first. It can be set to one of four values: @@ -72,6 +184,23 @@ dbt-databricks plugin leans heavily on the [`incremental_strategy` config](/docs Each of these strategies has its pros and cons, which we'll discuss below. As with any model config, `incremental_strategy` may be specified in `dbt_project.yml` or within a model file's `config()` block. + + + + +## Incremental models + +dbt-databricks plugin leans heavily on the [`incremental_strategy` config](/docs/build/incremental-strategy). This config tells the incremental materialization how to build models in runs beyond their first. It can be set to one of five values: + - **`append`**: Insert new records without updating or overwriting any existing data. + - **`insert_overwrite`**: If `partition_by` is specified, overwrite partitions in the with new data. If no `partition_by` is specified, overwrite the entire table with new data. + - **`merge`** (default; Delta and Hudi file format only): Match records based on a `unique_key`, updating old records, and inserting new ones. (If no `unique_key` is specified, all new data is inserted, similar to `append`.) + - **`replace_where`** (Delta file format only): Match records based on `incremental_predicates`, replacing all records that match the predicates from the existing table with records matching the predicates from the new data. (If no `incremental_predicates` are specified, all new data is inserted, similar to `append`.) + - **`microbatch`** (Delta file format only): Implements the [microbatch strategy](/docs/build/incremental-microbatch) using `replace_where` with predicates generated based `event_time`. + +Each of these strategies has its pros and cons, which we'll discuss below. As with any model config, `incremental_strategy` may be specified in `dbt_project.yml` or within a model file's `config()` block. + + + ### The `append` strategy Following the `append` strategy, dbt will perform an `insert into` statement with all new data. The appeal of this strategy is that it is straightforward and functional across all platforms, file types, connection methods, and Apache Spark versions. However, this strategy _cannot_ update, overwrite, or delete existing data, so it is likely to insert duplicate records for many data sources. @@ -218,7 +347,7 @@ The `merge` incremental strategy requires: - Databricks Runtime 5.1 and above for delta file format - Apache Spark for hudi file format -dbt will run an [atomic `merge` statement](https://docs.databricks.com/spark/latest/spark-sql/language-manual/merge-into.html) which looks nearly identical to the default merge behavior on Snowflake and BigQuery. If a `unique_key` is specified (recommended), dbt will update old records with values from new records that match on the key column. If a `unique_key` is not specified, dbt will forgo match criteria and simply insert all new records (similar to `append` strategy). +The Databricks adapter will run an [atomic `merge` statement](https://docs.databricks.com/spark/latest/spark-sql/language-manual/merge-into.html) similar to the default merge behavior on Snowflake and BigQuery. If a `unique_key` is specified (recommended), dbt will update old records with values from new records that match on the key column. If a `unique_key` is not specified, dbt will forgo match criteria and simply insert all new records (similar to `append` strategy). Specifying `merge` as the incremental strategy is optional since it's the default strategy used when none is specified. @@ -299,6 +428,123 @@ merge into analytics.merge_incremental as DBT_INTERNAL_DEST + + +Beginning with 1.9, `merge` behavior can be modified with the following additional configuration options: + +- `target_alias`, `source_alias`: Aliases for the target and source to allow you to describe your merge conditions more naturally. These default to `DBT_INTERNAL_DEST` and `DBT_INTERNAL_SOURCE`, respectively. +- `skip_matched_step`: If set to `true`, the 'matched' clause of the merge statement will not be included. +- `skip_not_matched_step`: If set to `true`, the 'not matched' clause will not be included. +- `matched_condition`: Condition to apply to the `WHEN MATCHED` clause. You should use the `target_alias` and `source_alias` to write a conditional expression, such as `DBT_INTERNAL_DEST.col1 = hash(DBT_INTERNAL_SOURCE.col2, DBT_INTERNAL_SOURCE.col3)`. This condition further restricts the matched set of rows. +- `not_matched_condition`: Condition to apply to the `WHEN NOT MATCHED [BY TARGET]` clause. This condition further restricts the set of rows in the target that do not match the source that will be inserted into the merged table. +- `not_matched_by_source_condition`: Condition to apply to the further filter `WHEN NOT MATCHED BY SOURCE` clause. Only used in conjunction with `not_matched_by_source_action: delete`. +- `not_matched_by_source_action`: If set to `delete`, a `DELETE` clause is added to the merge statement for `WHEN NOT MATCHED BY SOURCE`. +- `merge_with_schema_evolution`: If set to `true`, the merge statement includes the `WITH SCHEMA EVOLUTION` clause. + +For more details on the meaning of each merge clause, please see [the Databricks documentation](https://docs.databricks.com/en/sql/language-manual/delta-merge-into.html). + +The following is an example demonstrating the use of these new options: + + + + + + +```sql +{{ config( + materialized = 'incremental', + unique_key = 'id', + incremental_strategy='merge', + target_alias='t', + source_alias='s', + matched_condition='t.tech_change_ts < s.tech_change_ts', + not_matched_condition='s.attr1 IS NOT NULL', + not_matched_by_source_condition='t.tech_change_ts < current_timestamp()', + not_matched_by_source_action='delete', + merge_with_schema_evolution=true +) }} + +select + id, + attr1, + attr2, + tech_change_ts +from + {{ ref('source_table') }} as s +``` + + + + + + + +```sql +create temporary view merge_incremental__dbt_tmp as + + select + id, + attr1, + attr2, + tech_change_ts + from upstream.source_table +; + +merge + with schema evolution +into + target_table as t +using ( + select + id, + attr1, + attr2, + tech_change_ts + from + source_table as s +) +on + t.id <=> s.id +when matched + and t.tech_change_ts < s.tech_change_ts + then update set + id = s.id, + attr1 = s.attr1, + attr2 = s.attr2, + tech_change_ts = s.tech_change_ts + +when not matched + and s.attr1 IS NOT NULL + then insert ( + id, + attr1, + attr2, + tech_change_ts + ) values ( + s.id, + s.attr1, + s.attr2, + s.tech_change_ts + ) + +when not matched by source + and t.tech_change_ts < current_timestamp() + then delete +``` + + + + + + + + ### The `replace_where` strategy The `replace_where` incremental strategy requires: @@ -388,7 +634,83 @@ insert into analytics.replace_where_incremental - + + +### The `microbatch` strategy + +The Databricks adapter implements the `microbatch` strategy using `replace_where`. Note the requirements and caution statements for `replace_where` above. For more information about this strategy, see the [microbatch reference page](/docs/build/incremental-microbatch). + +In the following example, the upstream table `events` have been annotated with an `event_time` column called `ts` in its schema file. + + + + + + +```sql +{{ config( + materialized='incremental', + file_format='delta', + incremental_strategy = 'microbatch' + event_time='date' # Use 'date' as the grain for this microbatch table +) }} + +with new_events as ( + + select * from {{ ref('events') }} + +) + +select + user_id, + date, + count(*) as visits + +from events +group by 1, 2 +``` + + + + + + + +```sql +create temporary view replace_where__dbt_tmp as + + with new_events as ( + + select * from (select * from analytics.events where ts >= '2024-10-01' and ts < '2024-10-02') + + ) + + select + user_id, + date, + count(*) as visits + from events + group by 1, 2 +; + +insert into analytics.replace_where_incremental + replace where CAST(date as TIMESTAMP) >= '2024-10-01' and CAST(date as TIMESTAMP) < '2024-10-02' + table `replace_where__dbt_tmp` +``` + + + + + + + + ## Selecting compute per model @@ -553,9 +875,15 @@ Databricks adapter ... using compute resource . Materializing a python model requires execution of SQL as well as python. Specifically, if your python model is incremental, the current execution pattern involves executing python to create a staging table that is then merged into your target table using SQL. + The python code needs to run on an all purpose cluster, while the SQL code can run on an all purpose cluster or a SQL Warehouse. + + +The python code needs to run on an all purpose cluster (or serverless cluster, see [Python Submission Methods](#python-submission-methods)), while the SQL code can run on an all purpose cluster or a SQL Warehouse. + When you specify your `databricks_compute` for a python model, you are currently only specifying which compute to use when running the model-specific SQL. -If you wish to use a different compute for executing the python itself, you must specify an alternate `http_path` in the config for the model. Please note that declaring a separate SQL compute and a python compute for your python dbt models is optional. If you wish to do this: +If you wish to use a different compute for executing the python itself, you must specify an alternate compute in the config for the model. +For example: @@ -572,8 +900,6 @@ def model(dbt, session): If your default compute is a SQL Warehouse, you will need to specify an all purpose cluster `http_path` in this way. - - ## Persisting model descriptions Relation-level docs persistence is supported in dbt v0.17.0. For more @@ -705,7 +1031,7 @@ The following table summarizes our configuration support: partition_by='id', schedule = { 'cron': '0 0 * * * ? *', - 'time_zone': 'Etc/UTC' + 'time_zone_value': 'Etc/UTC' }, tblproperties={ 'key': 'value' @@ -785,9 +1111,5 @@ One application of this feature is making `delta` tables compatible with `iceber ) }} ``` - - `tblproperties` can be specified for python models, but they will be applied via an `ALTER` statement after table creation. This is due to a limitation in PySpark. - - diff --git a/website/docs/reference/resource-configs/dbt_valid_to_current.md b/website/docs/reference/resource-configs/dbt_valid_to_current.md new file mode 100644 index 00000000000..2a6cf3abe6d --- /dev/null +++ b/website/docs/reference/resource-configs/dbt_valid_to_current.md @@ -0,0 +1,116 @@ +--- +resource_types: [snapshots] +description: "Use the `dbt_valid_to_current` config to set a custom indicator for the value of `dbt_valid_to` in current snapshot records" +datatype: "{}" +default_value: {NULL} +id: "dbt_valid_to_current" +--- + +Available from dbt v1.9 or with [the dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks) dbt Cloud. + + + +```yaml +snapshots: + my_project: + +dbt_valid_to_current: "to_date('9999-12-31')" + +``` + + + + + +```sql +{{ + config( + unique_key='id', + strategy='timestamp', + updated_at='updated_at', + dbt_valid_to_current='to_date('9999-12-31')' + ) +}} +``` + + + + + +```yml +snapshots: + [](/reference/resource-configs/resource-path): + +dbt_valid_to_current: "to_date('9999-12-31')" +``` + + + +## Description + +Use the `dbt_valid_to_current` config to set a custom indicator for the value of `dbt_valid_to` in current snapshot records (like a future date). By default, this value is `NULL`. When set, dbt will use this specified value instead of `NULL` for `dbt_valid_to` for current records in the snapshot table. + +This approach makes it easier to assign a custom date, work in a join, or perform range-based filtering that requires an end date. + +:::warning + +To avoid any unintentional data modification, dbt will _not_ automatically adjust the current value in the existing `dbt_valid_to` column. Existing current records will still have `dbt_valid_to` set to `NULL`. + +Any new records inserted _after_ applying the `dbt_valid_to_current` configuration will have `dbt_valid_to` set to the specified value (like '9999-12-31'), instead of the default `NULL` value. + +::: + +### Considerations + +- **Date expressions** — Provide a hardcoded date expression compatible with your data platform, such as to_date`('9999-12-31')`. Note that syntax may vary by warehouse (for example, `to_date('YYYY-MM-DD'`) or `date(YYYY, MM, DD)`). + +- **Jinja limitation** — `dbt_valid_to_current` only accepts static SQL expressions. Jinja expressions (like `{{ var('my_future_date') }}`) are not supported. + +- **Deferral and `state:modified`** — Changes to `dbt_valid_to_current` are compatible with deferral and `--select state:modified`. When this configuration changes, it'll appear in `state:modified` selections, raising a warning to manually make the necessary snapshot updates. + +## Default + +By default, `dbt_valid_to` is set to `NULL` for current (most recent) records in your snapshot table. This means that these records are still valid and have no defined end date. + +If you prefer to use a specific value instead of `NULL` for `dbt_valid_to` in current and future records, you can use the `dbt_valid_to_current` configuration option. For example, setting a date in the far future, `9999-12-31`. + +The value assigned to `dbt_valid_to_current` should be a string representing a valid date or timestamp, depending on your database's requirements. Use expressions that work within the data platform. + + +## Impact on snapshot records + +When you set `dbt_valid_to_current`, it affects how dbt manages the `dbt_valid_to` column in your snapshot table: + +- **For existing records** — To avoid any unintentional data modification, dbt will _not_ automatically adjust the current value in the existing `dbt_valid_to` column. Existing current records will still have `dbt_valid_to` set to `NULL`. + +- **For new records** — Any new records inserted after applying the `dbt_valid_to_current` configuration will have `dbt_valid_to` set to the specified value (for example, '9999-12-31'), instead of `NULL`. + +This means your snapshot table will have current records with `dbt_valid_to` values of both `NULL` (from existing data) and the new specified value (from new data). If you'd rather have consistent `dbt_valid_to` values for current records, you can manually update existing records in your snapshot table (where `dbt_valid_to` is `NULL`) to match your `dbt_valid_to_current` value. + +## Example + + + +```yaml +snapshots: + - name: my_snapshot + config: + strategy: timestamp + updated_at: updated_at + dbt_valid_to_current: "to_date('9999-12-31')" + columns: + - name: dbt_valid_from + description: The timestamp when the record became valid. + - name: dbt_valid_to + description: > + The timestamp when the record ceased to be valid. For current records, + this is either `NULL` or the value specified in `dbt_valid_to_current` + (like `'9999-12-31'`). +``` + + + +The resulting snapshot table contains the configured dbt_valid_to column value: + +| id | dbt_scd_id | dbt_updated_at | dbt_valid_from | dbt_valid_to | +| -- | -------------------- | -------------------- | -------------------- | -------------------- | +| 1 | 60a1f1dbdf899a4dd... | 2024-10-02 ... | 2024-10-02 ... | 9999-12-31 ... | +| 2 | b1885d098f8bcff51... | 2024-10-02 ... | 2024-10-02 ... | 9999-12-31 ... | diff --git a/website/docs/reference/resource-configs/enabled.md b/website/docs/reference/resource-configs/enabled.md index febf1e50c88..b74d7250907 100644 --- a/website/docs/reference/resource-configs/enabled.md +++ b/website/docs/reference/resource-configs/enabled.md @@ -230,14 +230,6 @@ exposures: - - -Support for disabling semantic models has been added in dbt Core v1.7 - - - - - ```yaml @@ -259,20 +251,10 @@ semantic_models: - - - - -Support for disabling saved queries has been added in dbt Core v1.7. - - - - - ```yaml @@ -294,8 +276,6 @@ saved_queries: - - diff --git a/website/docs/reference/resource-configs/event-time.md b/website/docs/reference/resource-configs/event-time.md new file mode 100644 index 00000000000..c18c8de6397 --- /dev/null +++ b/website/docs/reference/resource-configs/event-time.md @@ -0,0 +1,284 @@ +--- +title: "event_time" +id: "event-time" +sidebar_label: "event_time" +resource_types: [models, seeds, source] +description: "dbt uses event_time to understand when an event occurred. When defined, event_time enables microbatch incremental models and more refined comparison of datasets during Advanced CI." +datatype: string +--- + +Available in [the dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks) and dbt Core v1.9 and higher. + + + + + + +```yml +models: + [resource-path:](/reference/resource-configs/resource-path) + +event_time: my_time_field +``` + + + + + +```yml +models: + - name: model_name + [config](/reference/resource-properties/config): + event_time: my_time_field +``` + + + + +```sql +{{ config( + event_time='my_time_field' +) }} +``` + + + + + + + + + +```yml +seeds: + [resource-path:](/reference/resource-configs/resource-path) + +event_time: my_time_field +``` + + + + +```yml +seeds: + - name: seed_name + [config](/reference/resource-properties/config): + event_time: my_time_field +``` + + + + + + + + +```yml +snapshots: + [resource-path:](/reference/resource-configs/resource-path) + +event_time: my_time_field +``` + + + + + +```yml +snapshots: + - name: snapshot_name + [config](/reference/resource-properties/config): + event_time: my_time_field +``` + + + + + + + +```sql + +{{ config( + event_time: 'my_time_field' +) }} +``` + + + + +import SnapshotYaml from '/snippets/_snapshot-yaml-spec.md'; + + + + + + + + + + + + +```yml +sources: + [resource-path:](/reference/resource-configs/resource-path) + +event_time: my_time_field +``` + + + + +```yml +sources: + - name: source_name + [config](/reference/resource-properties/config): + event_time: my_time_field +``` + + + + + +## Definition + +Set the `event_time` to the name of the field that represents the timestamp of the event -- "at what time did the row occur" -- as opposed to an event ingestion date. You can configure `event_time` for a [model](/docs/build/models), [seed](/docs/build/seeds), or [source](/docs/build/sources) in your `dbt_project.yml` file, property YAML file, or config block. + +Here are some examples of good and bad `event_time` columns: + +- ✅ Good: + - `account_created_at` — This represents the specific time when an account was created, making it a fixed event in time. + - `session_began_at` — This captures the exact timestamp when a user session started, which won’t change and directly ties to the event. + +- ❌ Bad: + + - `_fivetran_synced` — This isn't the time that the event happened, it's the time that the event was ingested. + - `last_updated_at` — This isn't a good use case as this will keep changing over time. + +`event_time` is required for [Incremental microbatch](/docs/build/incremental-microbatch) and highly recommended for [Advanced CI's compare changes](/docs/deploy/advanced-ci#optimizing-comparisons) in CI/CD workflows, where it ensures the same time-slice of data is correctly compared between your CI and production environments. + +## Examples + + + + + +Here's an example in the `dbt_project.yml` file: + + + +```yml +models: + my_project: + user_sessions: + +event_time: session_start_time +``` + + +Example in a properties YAML file: + + + +```yml +models: + - name: user_sessions + config: + event_time: session_start_time +``` + + + +Example in sql model config block: + + + +```sql +{{ config( + event_time='session_start_time' +) }} +``` + + + +This setup sets `session_start_time` as the `event_time` for the `user_sessions` model. + + + + +Here's an example in the `dbt_project.yml` file: + + + +```yml +seeds: + my_project: + my_seed: + +event_time: record_timestamp +``` + + + +Example in a seed properties YAML: + + + +```yml +seeds: + - name: my_seed + config: + event_time: record_timestamp +``` + + +This setup sets `record_timestamp` as the `event_time` for `my_seed`. + + + + + +Here's an example in the `dbt_project.yml` file: + + + +```yml +snapshots: + my_project: + my_snapshot: + +event_time: record_timestamp +``` + + + +Example in a snapshot properties YAML: + + + +```yml +snapshots: + - name: my_snapshot + config: + event_time: record_timestamp +``` + + +This setup sets `record_timestamp` as the `event_time` for `my_snapshot`. + + + + + +Here's an example of source properties YAML file: + + + +```yml +sources: + - name: source_name + tables: + - name: table_name + config: + event_time: event_timestamp +``` + + +This setup sets `event_timestamp` as the `event_time` for the specified source table. + + + diff --git a/website/docs/reference/resource-configs/group.md b/website/docs/reference/resource-configs/group.md index 717d7de89f5..cd0ad2683f5 100644 --- a/website/docs/reference/resource-configs/group.md +++ b/website/docs/reference/resource-configs/group.md @@ -218,14 +218,6 @@ metrics: - - -Support for grouping semantic models has been added in dbt Core v1.7. - - - - - ```yaml @@ -247,20 +239,10 @@ semantic_models: - - - - -Support for grouping saved queries has been added in dbt Core v1.7. - - - - - ```yaml @@ -282,8 +264,6 @@ saved_queries: - - diff --git a/website/docs/reference/resource-configs/hard-deletes.md b/website/docs/reference/resource-configs/hard-deletes.md new file mode 100644 index 00000000000..50c8046f4e1 --- /dev/null +++ b/website/docs/reference/resource-configs/hard-deletes.md @@ -0,0 +1,111 @@ +--- +title: hard_deletes +resource_types: [snapshots] +description: "Use the `hard_deletes` config to control how deleted rows are tracked in your snapshot table." +datatype: "boolean" +default_value: {ignore} +id: "hard-deletes" +sidebar_label: "hard_deletes" +--- + +Available from dbt v1.9 or with [dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks). + + + + +```yaml +snapshots: + - name: + config: + hard_deletes: 'ignore' | 'invalidate' | 'new_record' +``` + + + + +```yml +snapshots: + [](/reference/resource-configs/resource-path): + +hard_deletes: "ignore" | "invalidate" | "new_record" +``` + + + + + +```sql +{{ + config( + unique_key='id', + strategy='timestamp', + updated_at='updated_at', + hard_deletes='ignore' | 'invalidate' | 'new_record' + ) +}} +``` + + + + +## Description + +The `hard_deletes` config gives you more control on how to handle deleted rows from the source. Supported options are `ignore` (default), `invalidate` (replaces the legacy `invalidate_hard_deletes=true`), and `new_record`. Note that `new_record` will create a new metadata column in the snapshot table. + +import HardDeletes from '/snippets/_hard-deletes.md'; + + + +:::warning + +If you're updating an existing snapshot to use the `hard_deletes` config, dbt _will not_ handle migrations automatically. We recommend either only using these settings for net-new snapshots, or [arranging an update](/reference/snapshot-configs#snapshot-configuration-migration) of pre-existing tables before enabling this setting. +::: + +## Default + +By default, if you don’t specify `hard_deletes`, it'll automatically default to `ignore`. Deleted rows will not be tracked and their `dbt_valid_to` column remains `NULL`. + +The `hard_deletes` config has three methods: + +| Methods | Description | +| --------- | ----------- | +| `ignore` (default) | No action for deleted records. | +| `invalidate` | Behaves the same as the existing `invalidate_hard_deletes=true`, where deleted records are invalidated by setting `dbt_valid_to` to current time. This method replaces the `invalidate_hard_deletes` config to give you more control on how to handle deleted rows from the source. | +| `new_record` | Tracks deleted records as new rows using the `dbt_is_deleted` meta field when records are deleted.| + +## Considerations +- **Backward compatibility**: The `invalidate_hard_deletes` config is still supported for existing snapshots but can't be used alongside `hard_deletes`. +- **New snapshots**: For new snapshots, we recommend using `hard_deletes` instead of `invalidate_hard_deletes`. +- **Migration**: If you switch an existing snapshot to use `hard_deletes` without migrating your data, you may encounter inconsistent or incorrect results, such as a mix of old and new data formats. + +## Example + + + +```yaml +snapshots: + - name: my_snapshot + config: + hard_deletes: new_record # options are: 'ignore', 'invalidate', or 'new_record' + strategy: timestamp + updated_at: updated_at + columns: + - name: dbt_valid_from + description: Timestamp when the record became valid. + - name: dbt_valid_to + description: Timestamp when the record stopped being valid. + - name: dbt_is_deleted + description: Indicates whether the record was deleted. +``` + + + +The resulting snapshot table contains the `hard_deletes: new_record` configuration. If a record is deleted and later restored, the resulting snapshot table might look like this: + +| id | dbt_scd_id | Status | dbt_updated_at | dbt_valid_from | dbt_valid_to | dbt_is_deleted | +| -- | -------------------- | ----- | -------------------- | --------------------| -------------------- | ----------- | +| 1 | 60a1f1dbdf899a4dd... | pending | 2024-10-02 ... | 2024-05-19... | 2024-05-20 ... | False | +| 1 | b1885d098f8bcff51... | pending | 2024-10-02 ... | 2024-05-20 ... | 2024-06-03 ... | True | +| 1 | b1885d098f8bcff53... | shipped | 2024-10-02 ... | 2024-06-03 ... | | False | +| 2 | b1885d098f8bcff55... | active | 2024-10-02 ... | 2024-05-19 ... | | False | + +In this example, the `dbt_is_deleted` column is set to `True` when the record is deleted. When the record is restored, the `dbt_is_deleted` column is set to `False`. diff --git a/website/docs/reference/resource-configs/invalidate_hard_deletes.md b/website/docs/reference/resource-configs/invalidate_hard_deletes.md index bdaec7e33a9..67123487fa1 100644 --- a/website/docs/reference/resource-configs/invalidate_hard_deletes.md +++ b/website/docs/reference/resource-configs/invalidate_hard_deletes.md @@ -1,9 +1,17 @@ --- +title: invalidate_hard_deletes (legacy) resource_types: [snapshots] description: "Invalidate_hard_deletes - Read this in-depth guide to learn about configurations in dbt." datatype: column_name +sidebar_label: invalidate_hard_deletes (legacy) --- +:::warning This is a legacy config — Use the [`hard_deletes`](/reference/resource-configs/hard-deletes) config instead. + +In Versionless and dbt Core 1.9 and higher, the [`hard_deletes`](/reference/resource-configs/hard-deletes) config replaces the `invalidate_hard_deletes` config for better control over how to handle deleted rows from the source. + +For new snapshots, set the config to `hard_deletes='invalidate'` instead of `invalidate_hard_deletes=true`. For existing snapshots, [arrange an update](/reference/snapshot-configs#snapshot-configuration-migration) of pre-existing tables before enabling this setting. Refer to +::: diff --git a/website/docs/reference/resource-configs/lookback.md b/website/docs/reference/resource-configs/lookback.md new file mode 100644 index 00000000000..037ffdeb68f --- /dev/null +++ b/website/docs/reference/resource-configs/lookback.md @@ -0,0 +1,55 @@ +--- +title: "lookback" +id: "lookback" +sidebar_label: "lookback" +resource_types: [models] +description: "dbt uses `lookback` to detrmine how many 'batches' of `batch_size` to reprocesses when a microbatch incremental model is running incrementally." +datatype: int +--- + +Available in the [dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks) and dbt Core v1.9 and higher. + +## Definition + +Set the `lookback` to an integer greater than or equal to zero. The default value is `1`. You can configure `lookback` for a [model](/docs/build/models) in your `dbt_project.yml` file, property YAML file, or config block. + +## Examples + +The following examples set `2` as the `lookback` config for the `user_sessions` model. + +Example in the `dbt_project.yml` file: + + + +```yml +models: + my_project: + user_sessions: + +lookback: 2 +``` + + +Example in a properties YAML file: + + + +```yml +models: + - name: user_sessions + config: + lookback: 2 +``` + + + +Example in sql model config block: + + + +```sql +{{ config( + lookback=2 +) }} +``` + + diff --git a/website/docs/reference/resource-configs/meta.md b/website/docs/reference/resource-configs/meta.md index 2bcccdd4141..e1542bdbc82 100644 --- a/website/docs/reference/resource-configs/meta.md +++ b/website/docs/reference/resource-configs/meta.md @@ -56,7 +56,7 @@ See [configs and properties](/reference/configs-and-properties) for details. ```yml version: 2 -sources: +[sources](/reference/source-properties): - name: model_name config: meta: {} @@ -110,7 +110,7 @@ version: 2 snapshots: - name: snapshot_name config: - meta: {} + [meta](/reference/snapshot-properties): {} columns: - name: column_name @@ -147,7 +147,7 @@ The `meta` config is not currently supported for analyses. ```yml version: 2 -macros: +[macros](/reference/macro-properties): - name: macro_name meta: {} @@ -179,14 +179,6 @@ exposures: - - -Support for grouping semantic models was added in dbt Core v1.7 - - - - - ```yml @@ -201,8 +193,6 @@ semantic_models: The `meta` config can also be defined under the `semantic-models` config block in `dbt_project.yml`. See [configs and properties](/reference/configs-and-properties) for details. - - @@ -249,14 +239,6 @@ metrics: - - -Support for saved queries has been added in dbt Core v1.7. - - - - - ```yml @@ -268,8 +250,6 @@ saved_queries: - - @@ -307,7 +287,7 @@ models: ```yml version: 2 -sources: +[sources](/reference/source-properties): - name: salesforce tables: diff --git a/website/docs/reference/resource-configs/no-configs.md b/website/docs/reference/resource-configs/no-configs.md index 5eec26917c8..f72b286c837 100644 --- a/website/docs/reference/resource-configs/no-configs.md +++ b/website/docs/reference/resource-configs/no-configs.md @@ -1,11 +1,12 @@ --- -title: "No specifc configurations for this Adapter" +title: "No specific configurations for this adapter" id: "no-configs" --- If you were guided to this page from a data platform setup article, it most likely means: - Setting up the profile is the only action the end-user needs to take on the data platform, or -- The subsequent actions the end-user needs to take are not currently documented +- The subsequent actions the end-user needs to take are not currently documented, or +- Relevant information is provided on the documentation pages of the data platform vendor. If you'd like to contribute to data platform-specific configuration information, refer to [Documenting a new adapter](/guides/adapter-creation) diff --git a/website/docs/reference/resource-configs/postgres-configs.md b/website/docs/reference/resource-configs/postgres-configs.md index 07cfc938f1c..e71c6f1484d 100644 --- a/website/docs/reference/resource-configs/postgres-configs.md +++ b/website/docs/reference/resource-configs/postgres-configs.md @@ -11,6 +11,7 @@ In dbt-postgres, the following incremental materialization strategies are suppor - `append` (default when `unique_key` is not defined) - `merge` - `delete+insert` (default when `unique_key` is defined) +- [`microbatch`](/docs/build/incremental-microbatch) ## Performance optimizations @@ -185,20 +186,3 @@ It's worth noting that, unlike tables, dbt monitors this parameter for changes a This happens via a `DROP/CREATE` of the indexes, which can be thought of as an `ALTER` of the materialized view. Learn more about these parameters in Postgres's [docs](https://www.postgresql.org/docs/current/sql-creatematerializedview.html). - - - -### Limitations - -#### Changing materialization to and from "materialized_view" - -Swapping an already materialized model to a materialized view, and vice versa, is not supported. -The workaround is to manually drop the existing materialization in the data warehouse prior to calling `dbt run`. -Running with `--full-refresh` flag will not work to drop the existing table or view and create the materialized view (and vice versa). -This would only need to be done once as the existing object would then be a materialized view. - -For example,`my_model`, has already been materialized as a table in the underlying data platform via `dbt run`. -If the user changes the model's config to `materialized="materialized_view"`, they will get an error. -The solution is to execute `DROP TABLE my_model` on the data warehouse before trying the model again. - - diff --git a/website/docs/reference/resource-configs/pre-hook-post-hook.md b/website/docs/reference/resource-configs/pre-hook-post-hook.md index ce818768134..ee3c81b0fd6 100644 --- a/website/docs/reference/resource-configs/pre-hook-post-hook.md +++ b/website/docs/reference/resource-configs/pre-hook-post-hook.md @@ -154,9 +154,11 @@ Pre- and post-hooks can also call macros that return SQL statements. If your mac dbt aims to provide all the boilerplate SQL you need (DDL, DML, and DCL) via out-of-the-box functionality, which you can configure quickly and concisely. In some cases, there may be SQL that you want or need to run, specific to functionality in your data platform, which dbt does not (yet) offer as a built-in feature. In those cases, you can write the exact SQL you need, using dbt's compilation context, and pass it into a `pre-` or `post-` hook to run before or after your model, seed, or snapshot. -## Examples +import SQLCompilationError from '/snippets/_render-method.md'; + + - +## Examples ### [Redshift] Unload one model to S3 diff --git a/website/docs/reference/resource-configs/redshift-configs.md b/website/docs/reference/resource-configs/redshift-configs.md index e7149ae484e..01c9bffd055 100644 --- a/website/docs/reference/resource-configs/redshift-configs.md +++ b/website/docs/reference/resource-configs/redshift-configs.md @@ -17,6 +17,7 @@ In dbt-redshift, the following incremental materialization strategies are suppor - `append` (default when `unique_key` is not defined) - `merge` - `delete+insert` (default when `unique_key` is defined) +- [`microbatch`](/docs/build/incremental-microbatch) All of these strategies are inherited from dbt-postgres. @@ -230,21 +231,6 @@ As with most data platforms, there are limitations associated with materialized Find more information about materialized view limitations in Redshift's [docs](https://docs.aws.amazon.com/redshift/latest/dg/materialized-view-create-sql-command.html#mv_CREATE_MATERIALIZED_VIEW-limitations). - - -#### Changing materialization from "materialized_view" to "table" or "view" - -Swapping a materialized view to a table or view is not supported. -You must manually drop the existing materialized view in the data warehouse before calling `dbt run`. -Normally, re-running with the `--full-refresh` flag would resolve this, but not in this case. -This would only need to be done once as the existing object would then be a materialized view. - -For example, assume that a materialized view, `my_mv.sql`, has already been materialized to the underlying data platform via `dbt run`. -If the user changes the model's config to `materialized="table"`, they will get an error. -The workaround is to execute `DROP MATERIALIZED VIEW my_mv CASCADE` on the data warehouse before trying the model again. - - - ## Unit test limitations diff --git a/website/docs/reference/resource-configs/schema.md b/website/docs/reference/resource-configs/schema.md index 1e2ff47729c..6f56215de61 100644 --- a/website/docs/reference/resource-configs/schema.md +++ b/website/docs/reference/resource-configs/schema.md @@ -50,7 +50,7 @@ This would result in the generated relation being located in the `mappings` sche -Available for versionless dbt Cloud or dbt Core v1.9+. Select v1.9 or newer from the version dropdown to view the configs. +Available in dbt Core v1.9+. Select v1.9 or newer from the version dropdown to view the configs. Try it now in the [dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks). @@ -108,7 +108,9 @@ This would result in the test results being stored in the `test_results` schema. Refer to [Usage](#usage) for more examples. ## Definition -Optionally specify a custom schema for a [model](/docs/build/sql-models) or [seed](/docs/build/seeds). (To specify a schema for a [snapshot](/docs/build/snapshots), use the [`target_schema` config](/reference/resource-configs/target_schema)). +Optionally specify a custom schema for a [model](/docs/build/sql-models), [seed](/docs/build/seeds), [snapshot](/docs/build/snapshots), [saved query](/docs/build/saved-queries), or [test](/docs/build/data-tests). + +For users on dbt Cloud v1.8 or earlier, use the [`target_schema` config](/reference/resource-configs/target_schema) to specify a custom schema for a snapshot. When dbt creates a relation (/) in a database, it creates it as: `{{ database }}.{{ schema }}.{{ identifier }}`, e.g. `analytics.finance.payments` diff --git a/website/docs/reference/resource-configs/snapshot_meta_column_names.md b/website/docs/reference/resource-configs/snapshot_meta_column_names.md index 46aba7886d0..f1d29ba8bee 100644 --- a/website/docs/reference/resource-configs/snapshot_meta_column_names.md +++ b/website/docs/reference/resource-configs/snapshot_meta_column_names.md @@ -6,7 +6,7 @@ default_value: {"dbt_valid_from": "dbt_valid_from", "dbt_valid_to": "dbt_valid_t id: "snapshot_meta_column_names" --- -Starting in 1.9 or with [versionless](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) dbt Cloud. +Available in dbt Core v1.9+. Select v1.9 or newer from the version dropdown to view the configs. Try it now in the [dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks). @@ -19,6 +19,7 @@ snapshots: dbt_valid_to: dbt_scd_id: dbt_updated_at: + dbt_is_deleted: ``` @@ -34,6 +35,7 @@ snapshots: "dbt_valid_to": "", "dbt_scd_id": "", "dbt_updated_at": "", + "dbt_is_deleted": "", } ) }} @@ -52,7 +54,7 @@ snapshots: dbt_valid_to: dbt_scd_id: dbt_updated_at: - + dbt_is_deleted: ``` @@ -71,6 +73,7 @@ By default, dbt snapshots use the following column names to track change history | `dbt_valid_to` | The timestamp when this row is no longer valid. | | | `dbt_scd_id` | A unique key generated for each snapshot row. | This is used internally by dbt. | | `dbt_updated_at` | The `updated_at` timestamp of the source record when this snapshot row was inserted. | This is used internally by dbt. | +| `dbt_is_deleted` | A boolean value indicating if the record has been deleted. `True` if deleted, `False` otherwise. | Added when `hard_deletes='new_record'` is configured. | However, these column names can be customized using the `snapshot_meta_column_names` config. @@ -92,18 +95,21 @@ snapshots: unique_key: id strategy: check check_cols: all + hard_deletes: new_record snapshot_meta_column_names: dbt_valid_from: start_date dbt_valid_to: end_date dbt_scd_id: scd_id dbt_updated_at: modified_date + dbt_is_deleted: is_deleted ``` The resulting snapshot table contains the configured meta column names: -| id | scd_id | modified_date | start_date | end_date | -| -- | -------------------- | -------------------- | -------------------- | -------------------- | -| 1 | 60a1f1dbdf899a4dd... | 2024-10-02 ... | 2024-10-02 ... | 2024-10-02 ... | -| 2 | b1885d098f8bcff51... | 2024-10-02 ... | 2024-10-02 ... | | +| id | scd_id | modified_date | start_date | end_date | is_deleted | +| -- | -------------------- | -------------------- | -------------------- | -------------------- | ---------- | +| 1 | 60a1f1dbdf899a4dd... | 2024-10-02 ... | 2024-10-02 ... | 2024-10-03 ... | False | +| 1 | 60a1f1dbdf899a4dd... | 2024-10-03 ... | 2024-10-03 ... | | True | +| 2 | b1885d098f8bcff51... | 2024-10-02 ... | 2024-10-02 ... | | False | diff --git a/website/docs/reference/resource-configs/snowflake-configs.md b/website/docs/reference/resource-configs/snowflake-configs.md index 342e8290458..d576b195b65 100644 --- a/website/docs/reference/resource-configs/snowflake-configs.md +++ b/website/docs/reference/resource-configs/snowflake-configs.md @@ -38,11 +38,11 @@ flags: The following configurations are supported. For more information, check out the Snowflake reference for [`CREATE ICEBERG TABLE` (Snowflake as the catalog)](https://docs.snowflake.com/en/sql-reference/sql/create-iceberg-table-snowflake). -| Field | Type | Required | Description | Sample input | Note | -| --------------------- | ------ | -------- | -------------------------------------------------------------------------------------------------------------------------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Table Format | String | Yes | Configures the objects table format. | `iceberg` | `iceberg` is the only accepted value. | +| Field | Type | Required | Description | Sample input | Note | +| ------ | ----- | -------- | ------------- | ------------ | ------ | +| Table Format | String | Yes | Configures the objects table format. | `iceberg` | `iceberg` is the only accepted value. | | External volume | String | Yes(*) | Specifies the identifier (name) of the external volume where Snowflake writes the Iceberg table's metadata and data files. | `my_s3_bucket` | *You don't need to specify this if the account, database, or schema already has an associated external volume. [More info](https://docs.snowflake.com/en/sql-reference/sql/create-iceberg-table-snowflake#:~:text=Snowflake%20Table%20Structures.-,external_volume) | -| Base location Subpath | String | No | An optional suffix to add to the `base_location` path that dbt automatically specifies. | `jaffle_marketing_folder` | We recommend that you do not specify this. Modifying this parameter results in a new Iceberg table. See [Base Location](#base-location) for more info. | +| Base location Subpath | String | No | An optional suffix to add to the `base_location` path that dbt automatically specifies. | `jaffle_marketing_folder` | We recommend that you do not specify this. Modifying this parameter results in a new Iceberg table. See [Base Location](#base-location) for more info. | ### Example configuration @@ -299,7 +299,7 @@ Snowflake allows two configuration scenarios for scheduling automatic refreshes: - **Time-based** — Provide a value of the form ` { seconds | minutes | hours | days }`. For example, if the dynamic table needs to be updated every 30 minutes, use `target_lag='30 minutes'`. - **Downstream** — Applicable when the dynamic table is referenced by other dynamic tables. In this scenario, `target_lag='downstream'` allows for refreshes to be controlled at the target, instead of at each layer. -Learn more about `target_lag` in Snowflake's [docs](https://docs.snowflake.com/en/user-guide/dynamic-tables-refresh#understanding-target-lag). +Learn more about `target_lag` in Snowflake's [docs](https://docs.snowflake.com/en/user-guide/dynamic-tables-refresh#understanding-target-lag). Please note that Snowflake supports a target lag of 1 minute or longer. @@ -337,33 +337,6 @@ For dbt limitations, these dbt features are not supported: - [Model contracts](/docs/collaborate/govern/model-contracts) - [Copy grants configuration](/reference/resource-configs/snowflake-configs#copying-grants) - - -#### Changing materialization to and from "dynamic_table" - -Version `1.6.x` does not support altering the materialization from a non-dynamic table be a dynamic table and vice versa. -Re-running with the `--full-refresh` does not resolve this either. -The workaround is manually dropping the existing model in the warehouse prior to calling `dbt run`. -This only needs to be done once for the conversion. - -For example, assume for the example model below, `my_model`, has already been materialized to the underlying data platform via `dbt run`. -If the model config is updated to `materialized="dynamic_table"`, dbt will return an error. -The workaround is to execute `DROP TABLE my_model` on the data warehouse before trying the model again. - - - -```yaml - -{{ config( - materialized="table" # or any model type (e.g. view, incremental) -) }} - -``` - - - - - ## Temporary tables Incremental table merges for Snowflake prefer to utilize a `view` rather than a `temporary table`. The reasoning is to avoid the database write step that a temporary table would initiate and save compile time. @@ -497,8 +470,15 @@ In this example, you can set up a query tag to be applied to every query with th The [`incremental_strategy` config](/docs/build/incremental-strategy) controls how dbt builds incremental models. By default, dbt will use a [merge statement](https://docs.snowflake.net/manuals/sql-reference/sql/merge.html) on Snowflake to refresh incremental tables. +Snowflake supports the following incremental strategies: +- Merge (default) +- Append +- Delete+insert +- [`microbatch`](/docs/build/incremental-microbatch) + Snowflake's `merge` statement fails with a "nondeterministic merge" error if the `unique_key` specified in your model config is not actually unique. If you encounter this error, you can instruct dbt to use a two-step incremental approach by setting the `incremental_strategy` config for your model to `delete+insert`. + ## Configuring table clustering dbt supports [table clustering](https://docs.snowflake.net/manuals/user-guide/tables-clustering-keys.html) on Snowflake. To control clustering for a or incremental model, use the `cluster_by` config. When this configuration is applied, dbt will do two things: @@ -695,277 +675,37 @@ models: +## Source freshness known limitation -The Snowflake adapter supports [dynamic tables](https://docs.snowflake.com/en/user-guide/dynamic-tables-about). -This materialization is specific to Snowflake, which means that any model configuration that -would normally come along for the ride from `dbt-core` (e.g. as with a `view`) may not be available -for dynamic tables. This gap will decrease in future patches and versions. -While this materialization is specific to Snowflake, it very much follows the implementation -of [materialized views](/docs/build/materializations#Materialized-View). -In particular, dynamic tables have access to the `on_configuration_change` setting. -Dynamic tables are supported with the following configuration parameters: - - - -| Parameter | Type | Required | Default | Change Monitoring Support | -|--------------------|------------|----------|-------------|---------------------------| -| [`on_configuration_change`](/reference/resource-configs/on_configuration_change) | `` | no | `apply` | n/a | -| [`target_lag`](#target-lag) | `` | yes | | alter | -| [`snowflake_warehouse`](#configuring-virtual-warehouses) | `` | yes | | alter | - - - - -| Parameter | Type | Required | Default | Change Monitoring Support | -|--------------------|------------|----------|-------------|---------------------------| -| [`on_configuration_change`](/reference/resource-configs/on_configuration_change) | `` | no | `apply` | n/a | -| [`target_lag`](#target-lag) | `` | yes | | alter | -| [`snowflake_warehouse`](#configuring-virtual-warehouses) | `` | yes | | alter | -| [`refresh_mode`](#refresh-mode) | `` | no | `AUTO` | refresh | -| [`initialize`](#initialize) | `` | no | `ON_CREATE` | n/a | - - - - - - - - - - - -```yaml -models: - [](/reference/resource-configs/resource-path): - [+](/reference/resource-configs/plus-prefix)[materialized](/reference/resource-configs/materialized): dynamic_table - [+](/reference/resource-configs/plus-prefix)[on_configuration_change](/reference/resource-configs/on_configuration_change): apply | continue | fail - [+](/reference/resource-configs/plus-prefix)[target_lag](#target-lag): downstream | - [+](/reference/resource-configs/plus-prefix)[snowflake_warehouse](#configuring-virtual-warehouses): - -``` - - - - - - - - - - -```yaml -version: 2 - -models: - - name: [] - config: - [materialized](/reference/resource-configs/materialized): dynamic_table - [on_configuration_change](/reference/resource-configs/on_configuration_change): apply | continue | fail - [target_lag](#target-lag): downstream | - [snowflake_warehouse](#configuring-virtual-warehouses): - -``` - - - - - - - - - - -```jinja - -{{ config( - [materialized](/reference/resource-configs/materialized)="dynamic_table", - [on_configuration_change](/reference/resource-configs/on_configuration_change)="apply" | "continue" | "fail", - [target_lag](#target-lag)="downstream" | " seconds | minutes | hours | days", - [snowflake_warehouse](#configuring-virtual-warehouses)="", - -) }} - -``` - - - - - - - - - - - - - - - - - -```yaml -models: - [](/reference/resource-configs/resource-path): - [+](/reference/resource-configs/plus-prefix)[materialized](/reference/resource-configs/materialized): dynamic_table - [+](/reference/resource-configs/plus-prefix)[on_configuration_change](/reference/resource-configs/on_configuration_change): apply | continue | fail - [+](/reference/resource-configs/plus-prefix)[target_lag](#target-lag): downstream | - [+](/reference/resource-configs/plus-prefix)[snowflake_warehouse](#configuring-virtual-warehouses): - [+](/reference/resource-configs/plus-prefix)[refresh_mode](#refresh-mode): AUTO | FULL | INCREMENTAL - [+](/reference/resource-configs/plus-prefix)[initialize](#initialize): ON_CREATE | ON_SCHEDULE - -``` - - - - - - - - - - -```yaml -version: 2 - -models: - - name: [] - config: - [materialized](/reference/resource-configs/materialized): dynamic_table - [on_configuration_change](/reference/resource-configs/on_configuration_change): apply | continue | fail - [target_lag](#target-lag): downstream | - [snowflake_warehouse](#configuring-virtual-warehouses): - [refresh_mode](#refresh-mode): AUTO | FULL | INCREMENTAL - [initialize](#initialize): ON_CREATE | ON_SCHEDULE - -``` - - - - - - - - - - -```jinja - -{{ config( - [materialized](/reference/resource-configs/materialized)="dynamic_table", - [on_configuration_change](/reference/resource-configs/on_configuration_change)="apply" | "continue" | "fail", - [target_lag](#target-lag)="downstream" | " seconds | minutes | hours | days", - [snowflake_warehouse](#configuring-virtual-warehouses)="", - [refresh_mode](#refresh-mode)="AUTO" | "FULL" | "INCREMENTAL", - [initialize](#initialize)="ON_CREATE" | "ON_SCHEDULE", - -) }} - -``` - - - - - - - - - -Learn more about these parameters in Snowflake's [docs](https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table): - -### Target lag +Snowflake calculates source freshness using information from the `LAST_ALTERED` column, meaning it relies on a field updated whenever any object undergoes modification, not only data updates. No action must be taken, but analytics teams should note this caveat. -Snowflake allows two configuration scenarios for scheduling automatic refreshes: -- **Time-based** — Provide a value of the form ` { seconds | minutes | hours | days }`. For example, if the dynamic table needs to be updated every 30 minutes, use `target_lag='30 minutes'`. -- **Downstream** — Applicable when the dynamic table is referenced by other dynamic tables. In this scenario, `target_lag='downstream'` allows for refreshes to be controlled at the target, instead of at each layer. +Per the [Snowflake documentation](https://docs.snowflake.com/en/sql-reference/info-schema/tables#usage-notes): -Learn more about `target_lag` in Snowflake's [docs](https://docs.snowflake.com/en/user-guide/dynamic-tables-refresh#understanding-target-lag). + >The `LAST_ALTERED` column is updated when the following operations are performed on an object: + >- DDL operations. + >- DML operations (for tables only). + >- Background maintenance operations on metadata performed by Snowflake. -### Refresh mode +## Pagination for object results -Snowflake allows three options for refresh mode: -- **AUTO** — Enforces an incremental refresh of the dynamic table by default. If the `CREATE DYNAMIC TABLE` statement does not support the incremental refresh mode, the dynamic table is automatically created with the full refresh mode. -- **FULL** — Enforces a full refresh of the dynamic table, even if the dynamic table can be incrementally refreshed. -- **INCREMENTAL** — Enforces an incremental refresh of the dynamic table. If the query that underlies the dynamic table can’t perform an incremental refresh, dynamic table creation fails and displays an error message. +By default, when dbt encounters a schema with up to 100,000 objects, it will paginate the results from `show objects` at 10,000 per page for up to 10 pages. -Learn more about `refresh_mode` in [Snowflake's docs](https://docs.snowflake.com/en/user-guide/dynamic-tables-refresh). +Environments with more than 100,000 objects in a schema can customize the number of results per page and the page limit using the following [flags](/reference/global-configs/about-global-configs) in the `dbt_project.yml`: -### Initialize +- `list_relations_per_page` — The number of relations on each page (Max 10k as this is the most Snowflake allows). +- `list_relations_page_limit` — The maximum number of pages to include in the results. -Snowflake allows two options for initialize: -- **ON_CREATE** — Refreshes the dynamic table synchronously at creation. If this refresh fails, dynamic table creation fails and displays an error message. -- **ON_SCHEDULE** — Refreshes the dynamic table at the next scheduled refresh. - -Learn more about `initialize` in [Snowflake's docs](https://docs.snowflake.com/en/user-guide/dynamic-tables-refresh). - - +For example, if you wanted to include 10,000 objects per page and include up to 100 pages (1 million objects), configure the flags as follows: -### Limitations -As with materialized views on most data platforms, there are limitations associated with dynamic tables. Some worth noting include: +```yml -- Dynamic table SQL has a [limited feature set](https://docs.snowflake.com/en/user-guide/dynamic-tables-tasks-create#query-constructs-not-currently-supported-in-dynamic-tables). -- Dynamic table SQL cannot be updated; the dynamic table must go through a `--full-refresh` (DROP/CREATE). -- Dynamic tables cannot be downstream from: materialized views, external tables, streams. -- Dynamic tables cannot reference a view that is downstream from another dynamic table. - -Find more information about dynamic table limitations in Snowflake's [docs](https://docs.snowflake.com/en/user-guide/dynamic-tables-tasks-create#dynamic-table-limitations-and-supported-functions). - -For dbt limitations, these dbt features are not supported: -- [Model contracts](/docs/collaborate/govern/model-contracts) -- [Copy grants configuration](/reference/resource-configs/snowflake-configs#copying-grants) - - - -#### Changing materialization to and from "dynamic_table" - -Version `1.6.x` does not support altering the materialization from a non-dynamic table be a dynamic table and vice versa. -Re-running with the `--full-refresh` does not resolve this either. -The workaround is manually dropping the existing model in the warehouse prior to calling `dbt run`. -This only needs to be done once for the conversion. - -For example, assume for the example model below, `my_model`, has already been materialized to the underlying data platform via `dbt run`. -If the model config is updated to `materialized="dynamic_table"`, dbt will return an error. -The workaround is to execute `DROP TABLE my_model` on the data warehouse before trying the model again. - - - -```yaml - -{{ config( - materialized="table" # or any model type (e.g. view, incremental) -) }} +flags: + list_relations_per_page: 10000 + list_relations_page_limit: 100 ``` - - - -## Source freshness known limitation - -Snowflake calculates source freshness using information from the `LAST_ALTERED` column, meaning it relies on a field updated whenever any object undergoes modification, not only data updates. No action must be taken, but analytics teams should note this caveat. - -Per the [Snowflake documentation](https://docs.snowflake.com/en/sql-reference/info-schema/tables#usage-notes): - - >The `LAST_ALTERED` column is updated when the following operations are performed on an object: - >- DDL operations. - >- DML operations (for tables only). - >- Background maintenance operations on metadata performed by Snowflake. diff --git a/website/docs/reference/resource-configs/spark-configs.md b/website/docs/reference/resource-configs/spark-configs.md index 3b2174b8ff5..a52fd93eace 100644 --- a/website/docs/reference/resource-configs/spark-configs.md +++ b/website/docs/reference/resource-configs/spark-configs.md @@ -37,7 +37,8 @@ For that reason, the dbt-spark plugin leans heavily on the [`incremental_strateg - **`append`** (default): Insert new records without updating or overwriting any existing data. - **`insert_overwrite`**: If `partition_by` is specified, overwrite partitions in the with new data. If no `partition_by` is specified, overwrite the entire table with new data. - **`merge`** (Delta, Iceberg and Hudi file format only): Match records based on a `unique_key`; update old records, insert new ones. (If no `unique_key` is specified, all new data is inserted, similar to `append`.) - +- `microbatch` Implements the [microbatch strategy](/docs/build/incremental-microbatch) using `event_time` to define time-based ranges for filtering data. + Each of these strategies has its pros and cons, which we'll discuss below. As with any model config, `incremental_strategy` may be specified in `dbt_project.yml` or within a model file's `config()` block. ### The `append` strategy diff --git a/website/docs/reference/resource-configs/target_database.md b/website/docs/reference/resource-configs/target_database.md index 3c07b442107..f80dd31f214 100644 --- a/website/docs/reference/resource-configs/target_database.md +++ b/website/docs/reference/resource-configs/target_database.md @@ -6,7 +6,9 @@ datatype: string :::note -For [versionless](/docs/dbt-versions/core-upgrade/upgrading-to-v1.8#versionless) dbt Cloud accounts and dbt Core v1.9+, this functionality is no longer utilized. Use the [database](/reference/resource-configs/database) config as an alternative to define a custom database while still respecting the `generate_database_name` macro. +Starting in dbt Core v1.9+, this functionality is no longer utilized. Use the [database](/reference/resource-configs/database) config as an alternative to define a custom database while still respecting the `generate_database_name` macro. + +Try it now in the [dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks). ::: diff --git a/website/docs/reference/resource-configs/target_schema.md b/website/docs/reference/resource-configs/target_schema.md index ffa95df9be7..1117e3ec42c 100644 --- a/website/docs/reference/resource-configs/target_schema.md +++ b/website/docs/reference/resource-configs/target_schema.md @@ -6,7 +6,9 @@ datatype: string :::info -For [versionless](/docs/dbt-versions/core-upgrade/upgrading-to-v1.8#versionless) dbt Cloud accounts and dbt Core v1.9+, this configuration is no longer required. Use the [schema](/reference/resource-configs/schema) config as an alternative to define a custom schema while still respecting the `generate_schema_name` macro. +Starting in dbt Core v1.9+, this functionality is no longer utilized. Use the [database](/reference/resource-configs/database) config as an alternative to define a custom database while still respecting the `generate_database_name` macro. + +Try it now in the [dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks). ::: @@ -40,7 +42,7 @@ On **BigQuery**, this is analogous to a `dataset`. ## Default This is a required parameter, no default is provided. -For versionless dbt Cloud accounts and dbt Core v1.9+, this is not a required parameter. +In dbt Core v1.9+ and dbt Cloud "Latest" release track, this is not a required parameter. ## Examples ### Build all snapshots in a schema named `snapshots` diff --git a/website/docs/reference/resource-configs/unique_key.md b/website/docs/reference/resource-configs/unique_key.md index 996e7148292..071102bae6d 100644 --- a/website/docs/reference/resource-configs/unique_key.md +++ b/website/docs/reference/resource-configs/unique_key.md @@ -1,12 +1,65 @@ --- -resource_types: [snapshots] -description: "Unique_key - Read this in-depth guide to learn about configurations in dbt." +resource_types: [snapshots, models] +description: "Learn more about unique_key configurations in dbt." datatype: column_name_or_expression --- + + + + +Configure the `unique_key` in the `config` block of your [incremental model's](/docs/build/incremental-models) SQL file, in your `models/properties.yml` file, or in your `dbt_project.yml` file. + + + +```sql +{{ + config( + materialized='incremental', + unique_key='id' + ) +}} + +``` + + + + + +```yaml +models: + - name: my_incremental_model + description: "An incremental model example with a unique key." + config: + materialized: incremental + unique_key: id + +``` + + + + + +```yaml +name: jaffle_shop + +models: + jaffle_shop: + staging: + +unique_key: id +``` + + + + + + + +For [snapshots](/docs/build/snapshots), configure the `unique_key` in the your `snapshot/filename.yml` file or in your `dbt_project.yml` file. + ```yaml @@ -14,7 +67,7 @@ snapshots: - name: orders_snapshot relation: source('my_source', 'my_table') [config](/reference/snapshot-configs): - unique_key: id + unique_key: order_id ``` @@ -23,6 +76,8 @@ snapshots: +Configure the `unique_key` in the `config` block of your snapshot SQL file or in your `dbt_project.yml` file. + import SnapshotYaml from '/snippets/_snapshot-yaml-spec.md'; @@ -49,10 +104,13 @@ snapshots: + + + ## Description -A column name or expression that is unique for the inputs of a snapshot. dbt uses this to match records between a result set and an existing snapshot, so that changes can be captured correctly. +A column name or expression that is unique for the inputs of a snapshot or incremental model. dbt uses this to match records between a result set and an existing snapshot or incremental model, so that changes can be captured correctly. -In Versionless and dbt v1.9 and later, [snapshots](/docs/build/snapshots) are defined and configured in YAML files within your `snapshots/` directory. The `unique_key` is specified within the `config` block of your snapshot YAML file. +In dbt Cloud "Latest" release track and from dbt v1.9, [snapshots](/docs/build/snapshots) are defined and configured in YAML files within your `snapshots/` directory. You can specify one or multiple `unique_key` values within your snapshot YAML file's `config` key. :::caution @@ -67,6 +125,32 @@ This is a **required parameter**. No default is provided. ## Examples ### Use an `id` column as a unique key + + + + +In this example, the `id` column is the unique key for an incremental model. + + + +```sql +{{ + config( + materialized='incremental', + unique_key='id' + ) +}} + +select * from .. +``` + + + + + + +In this example, the `id` column is used as a unique key for a snapshot. + @@ -114,29 +198,88 @@ snapshots: -### Use a combination of two columns as a unique key -This configuration accepts a valid column expression. As such, you can concatenate two columns together as a unique key if required. It's a good idea to use a separator (e.g. `'-'`) to ensure uniqueness. + + +### Use multiple unique keys + + + + +Configure multiple unique keys for an incremental model as a string representing a single column or a list of single-quoted column names that can be used together, for example, `['col1', 'col2', …]`. + +Columns must not contain null values, otherwise the incremental model will fail to match rows and generate duplicate rows. Refer to [Defining a unique key](/docs/build/incremental-models#defining-a-unique-key-optional) for more information. + + + +```sql +{{ config( + materialized='incremental', + unique_key=['order_id', 'location_id'] +) }} + +with... + +``` + + + + + + + +You can configure snapshots to use multiple unique keys for `primary_key` columns. + ```yaml snapshots: - - name: transaction_items_snapshot - relation: source('erp', 'transactions') + - name: orders_snapshot + relation: source('jaffle_shop', 'orders') config: schema: snapshots - unique_key: "transaction_id || '-' || line_item_id" + unique_key: + - order_id + - product_id strategy: timestamp updated_at: updated_at - + ``` + + + +### Use a combination of two columns as a unique key + + + + + + +```sql +{{ config( + materialized='incremental', + unique_key=['order_id', 'location_id'] +) }} + +with... + +``` + + + + + + + +This configuration accepts a valid column expression. As such, you can concatenate two columns together as a unique key if required. It's a good idea to use a separator (for example, `'-'`) to ensure uniqueness. + ```jinja2 @@ -159,26 +302,9 @@ from {{ source('erp', 'transactions') }} ``` - Though, it's probably a better idea to construct this column in your query and use that as the `unique_key`: - - - - -```yaml -snapshots: - - name: transaction_items_snapshot - relation: {{ ref('transaction_items_ephemeral') }} - config: - schema: snapshots - unique_key: id - strategy: timestamp - updated_at: updated_at -``` - - ```sql @@ -195,9 +321,6 @@ from {{ source('erp', 'transactions') }} In this example, we create an ephemeral model `transaction_items_ephemeral` that creates an `id` column that can be used as the `unique_key` our snapshot configuration. - - - ```jinja2 @@ -222,4 +345,6 @@ from {{ source('erp', 'transactions') }} ``` + + diff --git a/website/docs/reference/resource-properties/concurrent_batches.md b/website/docs/reference/resource-properties/concurrent_batches.md new file mode 100644 index 00000000000..4d6b2ea0af4 --- /dev/null +++ b/website/docs/reference/resource-properties/concurrent_batches.md @@ -0,0 +1,90 @@ +--- +title: "concurrent_batches" +resource_types: [models] +datatype: model_name +description: "Learn about concurrent_batches in dbt." +--- + +:::note + +Available in dbt Core v1.9+ or the [dbt Cloud "Latest" release tracks](/docs/dbt-versions/cloud-release-tracks). + +::: + + + + + + + +```yaml +models: + +concurrent_batches: true +``` + + + + + + + + + + +```sql +{{ + config( + materialized='incremental', + concurrent_batches=true, + incremental_strategy='microbatch' + ... + ) +}} +select ... +``` + + + + + + +## Definition + +`concurrent_batches` is an override which allows you to decide whether or not you want to run batches in parallel or sequentially (one at a time). + +For more information, refer to [how batch execution works](/docs/build/incremental-microbatch#how-parallel-batch-execution-works). +## Example + +By default, dbt auto-detects whether batches can run in parallel for microbatch models. However, you can override dbt's detection by setting the `concurrent_batches` config to `false` in your `dbt_project.yml` or model `.sql` file to specify parallel or sequential execution, given you meet these conditions: +* You've configured a microbatch incremental strategy. +* You're working with cumulative metrics or any logic that depends on batch order. + +Set `concurrent_batches` config to `false` to ensure batches are processed sequentially. For example: + + + +```yaml +models: + my_project: + cumulative_metrics_model: + +concurrent_batches: false +``` + + + + + +```sql +{{ + config( + materialized='incremental', + incremental_strategy='microbatch' + concurrent_batches=false + ) +}} +select ... + +``` + + + diff --git a/website/docs/reference/resource-properties/config.md b/website/docs/reference/resource-properties/config.md index 8190c7dd8ca..1e1867dda04 100644 --- a/website/docs/reference/resource-properties/config.md +++ b/website/docs/reference/resource-properties/config.md @@ -170,14 +170,6 @@ exposures: - - -Support for the `config` property on `semantic_models` was added in dbt Core v1.7 - - - - - ```yml @@ -193,20 +185,10 @@ semantic_models: - - - - -Support for the `config` property on `saved queries` was added in dbt Core v1.7. - - - - - ```yml @@ -226,8 +208,6 @@ saved-queries: - - diff --git a/website/docs/reference/resource-properties/constraints.md b/website/docs/reference/resource-properties/constraints.md index 63582974040..1e418e884be 100644 --- a/website/docs/reference/resource-properties/constraints.md +++ b/website/docs/reference/resource-properties/constraints.md @@ -29,7 +29,7 @@ Foreign key constraints accept two additional inputs: - `to`: A relation input, likely `ref()`, indicating the referenced table. - `to_columns`: A list of column(s) in that table containing the corresponding primary or unique key. -This syntax for defining foreign keys uses `ref`, meaning it will capture dependencies and works across different environments. It's available in [dbt Cloud Versionless](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless) and versions of dbt Core starting with v1.9. +This syntax for defining foreign keys uses `ref`, meaning it will capture dependencies and works across different environments. It's available in [dbt Cloud "Latest""](/docs/dbt-versions/cloud-release-tracks) and [dbt Core v1.9+](/docs/dbt-versions/core-upgrade/upgrading-to-v1.9). @@ -65,7 +65,7 @@ models: - type: unique - type: foreign_key to: ref('other_model_name') - to_columns: other_model_column + to_columns: [other_model_column] - type: ... ``` diff --git a/website/docs/reference/resource-properties/deprecation_date.md b/website/docs/reference/resource-properties/deprecation_date.md index be76ccb07f6..501fdc30237 100644 --- a/website/docs/reference/resource-properties/deprecation_date.md +++ b/website/docs/reference/resource-properties/deprecation_date.md @@ -53,11 +53,11 @@ Additionally, [`WARN_ERROR_OPTIONS`](/reference/global-configs/warnings) gives a |--------------------------------|----------------------------------------------------|------------------------| | `DeprecatedModel` | Parsing a project that defines a deprecated model | Producer | | `DeprecatedReference` | Referencing a model with a past deprecation date | Producer and consumers | -| `UpcomingDeprecationReference` | Referencing a model with a future deprecation date | Producer and consumers | +| `UpcomingReferenceDeprecation` | Referencing a model with a future deprecation date | Producer and consumers | -** Example ** +**Example** -Example output for an `UpcomingDeprecationReference` warning: +Example output for an `UpcomingReferenceDeprecation` warning: ``` $ dbt parse 15:48:14 Running with dbt=1.6.0 diff --git a/website/docs/reference/resource-properties/description.md b/website/docs/reference/resource-properties/description.md index 6f32f75efa4..cf7b2b29a5a 100644 --- a/website/docs/reference/resource-properties/description.md +++ b/website/docs/reference/resource-properties/description.md @@ -13,7 +13,7 @@ description: "This guide explains how to use the description key to add YAML des { label: 'Snapshots', value: 'snapshots', }, { label: 'Analyses', value: 'analyses', }, { label: 'Macros', value: 'macros', }, - { label: 'Singular data tests', value: 'singular_data_tests', }, + { label: 'Data tests', value: 'data_tests', }, ] }> @@ -146,17 +146,17 @@ macros: - + - + ```yml version: 2 data_tests: - - name: singular_data_test_name + - name: data_test_name description: markdown_string ``` @@ -167,13 +167,12 @@ data_tests: -The `description` property is available for singular data tests beginning in dbt v1.9. +The `description` property is available for generic and singular data tests beginning in dbt v1.9. - ## Definition diff --git a/website/docs/reference/resource-properties/freshness.md b/website/docs/reference/resource-properties/freshness.md index 03037e7b681..d68dee4fade 100644 --- a/website/docs/reference/resource-properties/freshness.md +++ b/website/docs/reference/resource-properties/freshness.md @@ -37,8 +37,6 @@ A freshness block is used to define the acceptable amount of time between the mo In the `freshness` block, one or both of `warn_after` and `error_after` can be provided. If neither is provided, then dbt will not calculate freshness snapshots for the tables in this source. - - In most cases, the `loaded_at_field` is required. Some adapters support calculating source freshness from the warehouse metadata tables and can exclude the `loaded_at_field`. If a source has a `freshness:` block, dbt will attempt to calculate freshness for that source: @@ -62,29 +60,9 @@ To exclude a source from freshness calculations, you have two options: - Don't add a `freshness:` block. - Explicitly set `freshness: null`. - - - - -Additionally, the `loaded_at_field` is required to calculate freshness for a table. If a `loaded_at_field` is not provided, then dbt will not calculate freshness for the table. - -Freshness blocks are applied hierarchically: -- A `freshness` and `loaded_at_field` property added to a source will be applied to all tables defined in that source -- A `freshness` and `loaded_at_field` property added to a source _table_ will override any properties applied to the source. - -This is useful when all of the tables in a source have the same `loaded_at_field`, as is often the case. - - ## loaded_at_field - -(Optional on adapters that support pulling freshness from warehouse metadata tables, required otherwise.) - - - -(Required) - - +Optional on adapters that support pulling freshness from warehouse metadata tables, required otherwise.

A column name (or expression) that returns a timestamp indicating freshness. If using a date field, you may have to cast it to a timestamp: diff --git a/website/docs/reference/resource-properties/schema.md b/website/docs/reference/resource-properties/schema.md index 017d93e3235..6b5ba66ff8f 100644 --- a/website/docs/reference/resource-properties/schema.md +++ b/website/docs/reference/resource-properties/schema.md @@ -10,7 +10,7 @@ datatype: schema_name ```yml version: 2 -sources: +[sources](/reference/source-properties): - name: database: schema: @@ -25,7 +25,7 @@ sources: ## Definition The schema name as stored in the database. -This parameter is useful if you want to use a source name that differs from the schema name. +This parameter is useful if you want to use a [source](/reference/source-properties) name that differs from the schema name. :::info BigQuery terminology diff --git a/website/docs/reference/resource-properties/unit-tests.md b/website/docs/reference/resource-properties/unit-tests.md index 08081c4c24a..7bc177a133c 100644 --- a/website/docs/reference/resource-properties/unit-tests.md +++ b/website/docs/reference/resource-properties/unit-tests.md @@ -7,7 +7,7 @@ datatype: test :::note -This functionality is only supported in dbt Core v1.8+ or dbt Cloud accounts that have gone ["Versionless"](/docs/dbt-versions/upgrade-dbt-version-in-cloud#versionless). +This functionality is available in dbt Core v1.8+ and [dbt Cloud release tracks](/docs/dbt-versions/cloud-release-tracks). ::: diff --git a/website/docs/reference/resource-properties/versions.md b/website/docs/reference/resource-properties/versions.md index f6b71852aef..748aa477a4f 100644 --- a/website/docs/reference/resource-properties/versions.md +++ b/website/docs/reference/resource-properties/versions.md @@ -73,13 +73,13 @@ Note that the value of `defined_in` and the `alias` configuration of a model are When you use the `state:modified` selection method in Slim CI, dbt will detect changes to versioned model contracts, and raise an error if any of those changes could be breaking for downstream consumers. -Breaking changes include: -- Removing an existing column -- Changing the `data_type` of an existing column -- Removing or modifying one of the `constraints` on an existing column (dbt v1.6 or higher) -- Changing unversioned, contracted models. - - dbt also warns if a model has or had a contract but isn't versioned - +import BreakingChanges from '/snippets/_versions-contracts.md'; + + + diff --git a/website/docs/reference/seed-configs.md b/website/docs/reference/seed-configs.md index 5d5c39071d6..a18f1fc28f7 100644 --- a/website/docs/reference/seed-configs.md +++ b/website/docs/reference/seed-configs.md @@ -79,6 +79,8 @@ seeds: + + ```yaml seeds: [](/reference/resource-configs/resource-path): @@ -95,7 +97,28 @@ seeds: [+](/reference/resource-configs/plus-prefix)[grants](/reference/resource-configs/grants): {} ``` + + + + +```yaml +seeds: + [](/reference/resource-configs/resource-path): + [+](/reference/resource-configs/plus-prefix)[enabled](/reference/resource-configs/enabled): true | false + [+](/reference/resource-configs/plus-prefix)[tags](/reference/resource-configs/tags): | [] + [+](/reference/resource-configs/plus-prefix)[pre-hook](/reference/resource-configs/pre-hook-post-hook): | [] + [+](/reference/resource-configs/plus-prefix)[post-hook](/reference/resource-configs/pre-hook-post-hook): | [] + [+](/reference/resource-configs/plus-prefix)[database](/reference/resource-configs/database): + [+](/reference/resource-configs/plus-prefix)[schema](/reference/resource-properties/schema): + [+](/reference/resource-configs/plus-prefix)[alias](/reference/resource-configs/alias): + [+](/reference/resource-configs/plus-prefix)[persist_docs](/reference/resource-configs/persist_docs): + [+](/reference/resource-configs/plus-prefix)[full_refresh](/reference/resource-configs/full_refresh): + [+](/reference/resource-configs/plus-prefix)[meta](/reference/resource-configs/meta): {} + [+](/reference/resource-configs/plus-prefix)[grants](/reference/resource-configs/grants): {} + [+](/reference/resource-configs/plus-prefix)[event_time](/reference/resource-configs/event-time): my_time_field +``` + @@ -105,6 +128,8 @@ seeds: + + ```yaml version: 2 @@ -122,13 +147,36 @@ seeds: [full_refresh](/reference/resource-configs/full_refresh): [meta](/reference/resource-configs/meta): {} [grants](/reference/resource-configs/grants): {} + [event_time](/reference/resource-configs/event-time): my_time_field + +``` + + + + +```yaml +version: 2 +seeds: + - name: [] + config: + [enabled](/reference/resource-configs/enabled): true | false + [tags](/reference/resource-configs/tags): | [] + [pre_hook](/reference/resource-configs/pre-hook-post-hook): | [] + [post_hook](/reference/resource-configs/pre-hook-post-hook): | [] + [database](/reference/resource-configs/database): + [schema](/reference/resource-properties/schema): + [alias](/reference/resource-configs/alias): + [persist_docs](/reference/resource-configs/persist_docs): + [full_refresh](/reference/resource-configs/full_refresh): + [meta](/reference/resource-configs/meta): {} + [grants](/reference/resource-configs/grants): {} ``` +
- ## Configuring seeds diff --git a/website/docs/reference/snapshot-configs.md b/website/docs/reference/snapshot-configs.md index e867747dc96..018988a4934 100644 --- a/website/docs/reference/snapshot-configs.md +++ b/website/docs/reference/snapshot-configs.md @@ -8,30 +8,16 @@ meta: import ConfigResource from '/snippets/_config-description-resource.md'; import ConfigGeneral from '/snippets/_config-description-general.md'; - ## Related documentation * [Snapshots](/docs/build/snapshots) * The `dbt snapshot` [command](/reference/commands/snapshot) - ## Available configurations ### Snapshot-specific configurations - - -import SnapshotYaml from '/snippets/_snapshot-yaml-spec.md'; - - - - - [+](/reference/resource-configs/plus-prefix)[check_cols](/reference/resource-configs/check_cols): [] | all - + [+](/reference/resource-configs/plus-prefix)[invalidate_hard_deletes](/reference/resource-configs/invalidate_hard_deletes) : true | false ```
@@ -79,7 +65,8 @@ snapshots: [+](/reference/resource-configs/plus-prefix)[updated_at](/reference/resource-configs/updated_at): [+](/reference/resource-configs/plus-prefix)[check_cols](/reference/resource-configs/check_cols): [] | all [+](/reference/resource-configs/plus-prefix)[snapshot_meta_column_names](/reference/resource-configs/snapshot_meta_column_names): {} - + [+](/reference/resource-configs/plus-prefix)[dbt_valid_to_current](/reference/resource-configs/dbt_valid_to_current): + [+](/reference/resource-configs/plus-prefix)[hard_deletes](/reference/resource-configs/hard-deletes): string ``` @@ -113,7 +100,8 @@ snapshots: [updated_at](/reference/resource-configs/updated_at): [check_cols](/reference/resource-configs/check_cols): [] | all [snapshot_meta_column_names](/reference/resource-configs/snapshot_meta_column_names): {} - + [hard_deletes](/reference/resource-configs/hard-deletes): string + [dbt_valid_to_current](/reference/resource-configs/dbt_valid_to_current): ``` @@ -123,11 +111,9 @@ snapshots: - - -Configurations can be applied to snapshots using [YAML syntax](/docs/build/snapshots), available in Versionless and dbt v1.9 and higher, in the the `snapshot` directory file. +import LegacySnapshotConfig from '/snippets/_legacy-snapshot-config.md'; - + @@ -140,7 +126,7 @@ Configurations can be applied to snapshots using [YAML syntax](/docs/build/snaps [strategy](/reference/resource-configs/strategy)="timestamp" | "check", [updated_at](/reference/resource-configs/updated_at)="", [check_cols](/reference/resource-configs/check_cols)=[""] | "all" - [snapshot_meta_column_names](/reference/resource-configs/snapshot_meta_column_names)={} + [invalidate_hard_deletes](/reference/resource-configs/invalidate_hard_deletes) : true | false ) }} ``` @@ -150,11 +136,25 @@ Configurations can be applied to snapshots using [YAML syntax](/docs/build/snaps +### Snapshot configuration migration + +The latest snapshot configurations introduced in dbt Core v1.9 (such as [`snapshot_meta_column_names`](/reference/resource-configs/snapshot_meta_column_names), [`dbt_valid_to_current`](/reference/resource-configs/dbt_valid_to_current), and `hard_deletes`) are best suited for new snapshots. For existing snapshots, we recommend the following to avoid any inconsistencies in your snapshots: + +#### For existing snapshots +- Migrate tables — Migrate the previous snapshot to the new table schema and values: + - Create a backup copy of your snapshots. + - Use `alter` statements as needed (or a script to apply `alter` statements) to ensure table consistency. +- New configurations — Convert the configs one at a time, testing as you go. + +:::warning +If you use one of the latest configs, such as `dbt_valid_to_current`, without migrating your data, you may have mixed old and new data, leading to an incorrect downstream result. +::: ### General configurations + + + + ```yaml snapshots: [](/reference/resource-configs/resource-path): @@ -178,7 +181,24 @@ snapshots: [+](/reference/resource-configs/plus-prefix)[post-hook](/reference/resource-configs/pre-hook-post-hook): | [] [+](/reference/resource-configs/plus-prefix)[persist_docs](/reference/resource-configs/persist_docs): {} [+](/reference/resource-configs/plus-prefix)[grants](/reference/resource-configs/grants): {} + [+](/reference/resource-configs/plus-prefix)[event_time](/reference/resource-configs/event-time): my_time_field ``` + + + + +```yaml +snapshots: + [](/reference/resource-configs/resource-path): + [+](/reference/resource-configs/plus-prefix)[enabled](/reference/resource-configs/enabled): true | false + [+](/reference/resource-configs/plus-prefix)[tags](/reference/resource-configs/tags): | [] + [+](/reference/resource-configs/plus-prefix)[alias](/reference/resource-configs/alias): + [+](/reference/resource-configs/plus-prefix)[pre-hook](/reference/resource-configs/pre-hook-post-hook): | [] + [+](/reference/resource-configs/plus-prefix)[post-hook](/reference/resource-configs/pre-hook-post-hook): | [] + [+](/reference/resource-configs/plus-prefix)[persist_docs](/reference/resource-configs/persist_docs): {} + [+](/reference/resource-configs/plus-prefix)[grants](/reference/resource-configs/grants): {} +``` + @@ -198,8 +218,8 @@ snapshots: [enabled](/reference/resource-configs/enabled): true | false [tags](/reference/resource-configs/tags): | [] [alias](/reference/resource-configs/alias): - [pre-hook](/reference/resource-configs/pre-hook-post-hook): | [] - [post-hook](/reference/resource-configs/pre-hook-post-hook): | [] + [pre_hook](/reference/resource-configs/pre-hook-post-hook): | [] + [post_hook](/reference/resource-configs/pre-hook-post-hook): | [] [persist_docs](/reference/resource-configs/persist_docs): {} [grants](/reference/resource-configs/grants): {} ``` @@ -221,10 +241,11 @@ snapshots: [enabled](/reference/resource-configs/enabled): true | false [tags](/reference/resource-configs/tags): | [] [alias](/reference/resource-configs/alias): - [pre-hook](/reference/resource-configs/pre-hook-post-hook): | [] - [post-hook](/reference/resource-configs/pre-hook-post-hook): | [] + [pre_hook](/reference/resource-configs/pre-hook-post-hook): | [] + [post_hook](/reference/resource-configs/pre-hook-post-hook): | [] [persist_docs](/reference/resource-configs/persist_docs): {} [grants](/reference/resource-configs/grants): {} + [event_time](/reference/resource-configs/event-time): my_time_field ``` @@ -234,11 +255,7 @@ snapshots: - - -Configurations can be applied to snapshots using [YAML syntax](/docs/build/snapshots), available in Versionless and dbt v1.9 and higher, in the the `snapshot` directory file. - - + @@ -267,24 +284,29 @@ Snapshots can be configured in multiple ways: -1. Defined in YAML files using a `config` [resource property](/reference/model-properties), typically in your [snapshots directory](/reference/project-configs/snapshot-paths) (available in [Versionless](/docs/dbt-versions/versionless-cloud) or and dbt Core v1.9 and higher). +1. Defined in YAML files using a `config` [resource property](/reference/model-properties), typically in your [snapshots directory](/reference/project-configs/snapshot-paths) (available in [the dbt Cloud release track](/docs/dbt-versions/cloud-release-tracks) and dbt v1.9 and higher). 2. From the `dbt_project.yml` file, under the `snapshots:` key. To apply a configuration to a snapshot, or directory of snapshots, define the resource path as nested dictionary keys. -1. Defined in YAML files using a `config` [resource property](/reference/model-properties), typically in your [snapshots directory](/reference/project-configs/snapshot-paths) (available in [Versionless](/docs/dbt-versions/versionless-cloud) or and dbt Core v1.9 and higher). -2. Using a `config` block within a snapshot defined in Jinja SQL +1. Defined in a YAML file using a `config` [resource property](/reference/model-properties), typically in your [snapshots directory](/reference/project-configs/snapshot-paths) (available in [the dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks) and dbt v1.9 and higher). The latest snapshot YAML syntax provides faster and more efficient management. +2. Using a `config` block within a snapshot defined in Jinja SQL. 3. From the `dbt_project.yml` file, under the `snapshots:` key. To apply a configuration to a snapshot, or directory of snapshots, define the resource path as nested dictionary keys. -Note that in Versionless and dbt v1.9 and later, snapshots are defined in an updated syntax using a YAML file within your `snapshots/` directory (as defined by the [`snapshot-paths` config](/reference/project-configs/snapshot-paths)). For faster and more efficient management, consider the updated snapshot YAML syntax, [available in Versionless](/docs/dbt-versions/versionless-cloud) or [dbt Core v1.9 and later](/docs/dbt-versions/core). - Snapshot configurations are applied hierarchically in the order above with higher taking precedence. ### Examples -The following examples demonstrate how to configure snapshots using the `dbt_project.yml` file, a `config` block within a snapshot, and a `.yml` file. + + +The following examples demonstrate how to configure snapshots using the `dbt_project.yml` file and a `.yml` file. + + + +The following examples demonstrate how to configure snapshots using the `dbt_project.yml` file, a `config` block within a snapshot (legacy method), and a `.yml` file. + - #### Apply configurations to all snapshots To apply a configuration to all snapshots, including those in any installed [packages](/docs/build/packages), nest the configuration directly under the `snapshots` key: @@ -292,7 +314,6 @@ The following examples demonstrate how to configure snapshots using the `dbt_pro ```yml - snapshots: +unique_key: id ``` @@ -307,7 +328,6 @@ The following examples demonstrate how to configure snapshots using the `dbt_pro ```yml - snapshots: jaffle_shop: +unique_key: id @@ -329,6 +349,7 @@ The following examples demonstrate how to configure snapshots using the `dbt_pro {{ config( unique_key='id', + target_schema='snapshots', strategy='timestamp', updated_at='updated_at' ) @@ -378,7 +399,7 @@ The following examples demonstrate how to configure snapshots using the `dbt_pro - You can also define some common configs in a snapshot's `config` block. We don't recommend this for a snapshot's required configuration, however. + You can also define some common configs in a snapshot's `config` block. However, we don't recommend this for a snapshot's required configuration. diff --git a/website/docs/reference/snapshot-properties.md b/website/docs/reference/snapshot-properties.md index d940a9f344c..11fb956a163 100644 --- a/website/docs/reference/snapshot-properties.md +++ b/website/docs/reference/snapshot-properties.md @@ -5,7 +5,7 @@ description: "Read this guide to learn about using source properties in dbt." -In Versionless and dbt v1.9 and later, snapshots are defined and configured in YAML files within your `snapshots/` directory (as defined by the [`snapshot-paths` config](/reference/project-configs/snapshot-paths)). Snapshot properties are declared within these YAML files, allowing you to define both the snapshot configurations and properties in one place. +In dbt v1.9 and later, snapshots are defined and configured in YAML files within your `snapshots/` directory (as defined by the [`snapshot-paths` config](/reference/project-configs/snapshot-paths)). Snapshot properties are declared within these YAML files, allowing you to define both the snapshot configurations and properties in one place. @@ -15,7 +15,7 @@ Snapshots properties can be declared in `.yml` files in: - your `snapshots/` directory (as defined by the [`snapshot-paths` config](/reference/project-configs/snapshot-paths)). - your `models/` directory (as defined by the [`model-paths` config](/reference/project-configs/model-paths)) -Note, in Versionless and dbt v1.9 and later, snapshots are defined in an updated syntax using a YAML file within your `snapshots/` directory (as defined by the [`snapshot-paths` config](/reference/project-configs/snapshot-paths)). For faster and more efficient management, consider the updated snapshot YAML syntax, [available in Versionless](/docs/dbt-versions/versionless-cloud) or [dbt Core v1.9 and later](/docs/dbt-versions/core). +Note, in dbt v1.9 and later, snapshots are defined in an updated syntax using a YAML file within your `snapshots/` directory (as defined by the [`snapshot-paths` config](/reference/project-configs/snapshot-paths)). For faster and more efficient management, consider the updated snapshot YAML syntax, available now in [the dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks) and soon in [dbt Core v1.9](/docs/dbt-versions/core-upgrade/upgrading-to-v1.9). diff --git a/website/docs/reference/source-configs.md b/website/docs/reference/source-configs.md index 64dda8bffde..959d4c542e9 100644 --- a/website/docs/reference/source-configs.md +++ b/website/docs/reference/source-configs.md @@ -8,7 +8,17 @@ import ConfigGeneral from '/snippets/_config-description-general.md'; ## Available configurations -Sources only support one configuration, [`enabled`](/reference/resource-configs/enabled). + + +Sources supports [`enabled`](/reference/resource-configs/enabled) and [`meta`](/reference/resource-configs/meta). + + + + + +Sources configurations support [`enabled`](/reference/resource-configs/enabled), [`event_time`](/reference/resource-configs/event-time), and [`meta`](/reference/resource-configs/meta) + + ### General configurations @@ -27,12 +37,29 @@ Sources only support one configuration, [`enabled`](/reference/resource-configs/ + + ```yaml sources: [](/reference/resource-configs/resource-path): [+](/reference/resource-configs/plus-prefix)[enabled](/reference/resource-configs/enabled): true | false + [+](/reference/resource-configs/plus-prefix)[event_time](/reference/resource-configs/event-time): my_time_field + [+](/reference/resource-configs/plus-prefix)[meta](/reference/resource-configs/meta): + key: value ``` + + + + +```yaml +sources: + [](/reference/resource-configs/resource-path): + [+](/reference/resource-configs/plus-prefix)[enabled](/reference/resource-configs/enabled): true | false + [+](/reference/resource-configs/plus-prefix)[meta](/reference/resource-configs/meta): + key: value +``` + @@ -43,6 +70,8 @@ sources: + + ```yaml version: 2 @@ -50,12 +79,37 @@ sources: - name: [] [config](/reference/resource-properties/config): [enabled](/reference/resource-configs/enabled): true | false + [event_time](/reference/resource-configs/event-time): my_time_field + [meta](/reference/resource-configs/meta): {} + tables: - name: [] [config](/reference/resource-properties/config): [enabled](/reference/resource-configs/enabled): true | false + [event_time](/reference/resource-configs/event-time): my_time_field + [meta](/reference/resource-configs/meta): {} ``` + + + + +```yaml +version: 2 + +sources: + - name: [] + [config](/reference/resource-properties/config): + [enabled](/reference/resource-configs/enabled): true | false + [meta](/reference/resource-configs/meta): {} + tables: + - name: [] + [config](/reference/resource-properties/config): + [enabled](/reference/resource-configs/enabled): true | false + [meta](/reference/resource-configs/meta): {} + +``` + @@ -74,6 +128,8 @@ You can disable sources imported from a package to prevent them from rendering i + + ```yaml sources: your_project_name: @@ -81,11 +137,34 @@ You can disable sources imported from a package to prevent them from rendering i source_name: source_table_name: +enabled: false + +event_time: my_time_field ``` + + + + + ```yaml + sources: + your_project_name: + subdirectory_name: + source_name: + source_table_name: + +enabled: false + ``` + ### Examples + +The following examples show how to configure sources in your dbt project. + +— [Disable all sources imported from a package](#disable-all-sources-imported-from-a-package)
+— [Conditionally enable a single source](#conditionally-enable-a-single-source)
+— [Disable a single source from a package](#disable-a-single-source-from-a-package)
+— [Configure a source with an `event_time`](#configure-a-source-with-an-event_time)
+— [Configure meta to a source](#configure-meta-to-a-source)
+ #### Disable all sources imported from a package To apply a configuration to all sources included from a [package](/docs/build/packages), state your configuration under the [project name](/reference/project-configs/name.md) in the @@ -172,6 +251,53 @@ sources: +#### Configure a source with an `event_time` + + + +Configuring an [`event_time`](/reference/resource-configs/event-time) for a source is only available in [the dbt Cloud "Latest" release track](/docs/dbt-versions/cloud-release-tracks) or dbt Core versions 1.9 and later. + + + + + +To configure a source with an `event_time`, specify the `event_time` field in the source configuration. This field is used to represent the actual timestamp of the event, rather than something like a loading date. + +For example, if you had a source table called `clickstream` in the `events` source, you can use the timestamp for each event in the `event_timestamp` column as follows: + + + +```yaml +sources: + events: + clickstream: + +event_time: event_timestamp +``` + + +In this example, the `event_time` is set to `event_timestamp`, which has the exact time each clickstream event happened. +Not only is this required for the [incremental microbatching strategy](/docs/build/incremental-microbatch), but when you compare data across [CI and production](/docs/deploy/advanced-ci#speeding-up-comparisons) environments, dbt will use `event_timestamp` to filter and match data by this event-based timeframe, ensuring that only overlapping timeframes are compared. + + + +#### Configure meta to a source + +Use the `meta` field to assign metadata information to sources. This is useful for tracking additional context, documentation, logging, and more. + +For example, you can add `meta` information to a `clickstream` source to include information about the data source system: + + + +```yaml +sources: + events: + clickstream: + +meta: + source_system: "Google analytics" + data_owner: "marketing_team" +``` + + ## Example source configuration The following is a valid source configuration for a project with: * `name: jaffle_shop` diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index dbd389a2299..a6cb4e40628 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -72,18 +72,17 @@ var siteSettings = { }, announcementBar: { id: "biweekly-demos", - content: - "Register now for Coalesce 2024 ✨ The Analytics Engineering Conference!", - backgroundColor: "#7444FD", + content: "Join our biweekly demos and see dbt Cloud in action!", + backgroundColor: "#047377", textColor: "#fff", isCloseable: true, }, announcementBarActive: true, announcementBarLink: - "https://coalesce.getdbt.com/register/?utm_medium=internal&utm_source=docs&utm_campaign=q3-2025_coalesce-2024_aw&utm_content=coalesce____&utm_term=all_all__", + "https://www.getdbt.com/resources/webinars/dbt-cloud-demos-with-experts/?utm_medium=i[…]ly-demos_aw&utm_content=biweekly-demos____&utm_term=all_all__", // Set community spotlight member on homepage // This is the ID for a specific file under docs/community/spotlight - communitySpotlightMember: "meagan-palmer", + communitySpotlightMember: "original-dbt-athena-maintainers", prism: { theme: (() => { var theme = themes.nightOwl; @@ -206,10 +205,11 @@ var siteSettings = { src="https://solve-widget.forethought.ai/embed.js" id="forethought-widget-embed-script" data-api-key="9d421bf3-96b8-403e-9900-6fb059132264" data-ft-workflow-tag="docs" config-ft-greeting-message="Welcome to dbt Product docs! Ask a question." + config-ft-widget-header-title = "Ask a question" > diff --git a/website/src/components/hero/styles.module.css b/website/src/components/hero/styles.module.css index f596b53762a..67d3c8c5d68 100644 --- a/website/src/components/hero/styles.module.css +++ b/website/src/components/hero/styles.module.css @@ -49,3 +49,34 @@ width: 60%; } } + +.callToActionsTitle { + font-weight: bold; + margin-top: 20px; + margin-bottom: 20px; + font-size: 1.25rem; + display: block; +} + +.callToActions { + display: flex; + flex-flow: wrap; + gap: 0.8rem; + justify-content: center; +} + +.callToAction { + outline: #fff solid 1px; + border-radius: 4px; + padding: 0 12px; + color: #fff; + transition: all .2s; + cursor: pointer; +} + +.callToAction:hover, .callToAction:active, .callToAction:focus { + text-decoration: none; + outline: rgb(4, 115, 119) solid 1px; + background-color: rgb(4, 115, 119); + color: #fff; +} diff --git a/website/src/components/quickstartGuideList/index.js b/website/src/components/quickstartGuideList/index.js index 0f4b5764340..2b87ae3e4a1 100644 --- a/website/src/components/quickstartGuideList/index.js +++ b/website/src/components/quickstartGuideList/index.js @@ -61,7 +61,7 @@ function QuickstartList({ quickstartData }) { // Update the URL with the new search parameters history.replace({ search: params.toString() }); -}; + }; // Handle all filters const handleDataFilter = () => { @@ -98,6 +98,30 @@ function QuickstartList({ quickstartData }) { handleDataFilter(); }, [selectedTags, selectedLevel, searchInput]); // Added searchInput to dependency array + // Set the featured guides that will show as CTAs in the hero section + // The value of the tag must match a tag in the frontmatter of the guides in order for the filter to apply after clicking + const heroCTAs = [ + { + title: 'Quickstart guides', + value: 'Quickstart' + }, + { + title: 'Use Jinja to improve your SQL code', + value: 'Jinja' + }, + { + title: 'Orchestration', + value: 'Orchestration' + }, + ]; + + // Function to handle CTA clicks + const handleCallToActionClick = (value) => { + const params = new URLSearchParams(location.search); + params.set('tags', value); + history.replace({ search: params.toString() }); + }; + return ( @@ -111,6 +135,13 @@ function QuickstartList({ quickstartData }) { showGraphic={false} customStyles={{ marginBottom: 0 }} classNames={styles.quickstartHero} + callToActions={heroCTAs.map(guide => ({ + title: guide.title, + href: guide.href, + onClick: () => handleCallToActionClick(guide.value), + newTab: guide.newTab + }))} + callToActionsTitle={'Popular guides'} />
@@ -135,7 +166,7 @@ function QuickstartList({ quickstartData }) {
- ) + ); } export default QuickstartList; diff --git a/website/src/components/sortableTable/index.js b/website/src/components/sortableTable/index.js new file mode 100644 index 00000000000..93d54252c94 --- /dev/null +++ b/website/src/components/sortableTable/index.js @@ -0,0 +1,114 @@ +import React, { useState, useMemo } from 'react'; +import Markdown from 'markdown-to-jsx'; + +const stripMarkdown = (text) => { + let strippedText = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); + strippedText = strippedText.replace(/[_*`~]/g, ''); + return strippedText; +}; + +const parseMarkdownTable = (markdown) => { + const rows = markdown.trim().split('\n'); + const headers = rows[0].split('|').map((header) => header.trim()).filter(Boolean); + + const alignmentsRow = rows[1].split('|').map((align) => align.trim()).filter(Boolean); + const columnAlignments = alignmentsRow.map((alignment) => { + if (alignment.startsWith(':') && alignment.endsWith(':')) { + return 'center'; + } else if (alignment.startsWith(':')) { + return 'left'; + } else if (alignment.endsWith(':')) { + return 'right'; + } else { + return 'left'; + } + }); + + const data = rows.slice(2).map(row => row.split('|').map(cell => cell.trim()).filter(Boolean)); + + return { headers, data, columnAlignments }; +}; + +const SortableTable = ({ children }) => { + const { headers, data: initialData, columnAlignments } = useMemo( + () => parseMarkdownTable(children), + [children] + ); + + const [data, setData] = useState(initialData); + const [sortConfig, setSortConfig] = useState({ key: '', direction: 'asc' }); + + const sortTable = (keyIndex) => { + const newDirection = (sortConfig.key === keyIndex && sortConfig.direction === 'asc') ? 'desc' : 'asc'; + setSortConfig({ key: keyIndex, direction: newDirection }); + + const sortedData = [...data].sort((a, b) => { + const aVal = stripMarkdown(a[keyIndex]); + const bVal = stripMarkdown(b[keyIndex]); + if (aVal < bVal) return newDirection === 'asc' ? -1 : 1; + if (aVal > bVal) return newDirection === 'asc' ? 1 : -1; + return 0; + }); + + setData(sortedData); + }; + + return ( + + + + {headers.map((header, index) => ( + + ))} + + + + {data.map((row, rowIndex) => ( + + {row.map((cell, cellIndex) => ( + + ))} + + ))} + +
sortTable(index)} + style={{ + cursor: 'pointer', + position: 'relative', + textAlign: columnAlignments[index], + padding: '10px' + }} + > +
+ {header} + + ↑ + + + ↓ + +
+
+ {cell || '\u00A0'} +
+ ); +}; + +export default SortableTable; diff --git a/website/src/css/custom.css b/website/src/css/custom.css index e240a5dfabf..b8979ffc943 100644 --- a/website/src/css/custom.css +++ b/website/src/css/custom.css @@ -2112,3 +2112,35 @@ h2.anchor.clicked a.hash-link:before { flex-direction: column; } } + +.markdown table th, +.markdown table td { + padding: 8px; + border: 1px solid var(--table-border-color); + word-wrap: break-word; + white-space: normal; + text-align: left; +} + +table th { + background-color: #ED7254; /* Table header background color */ +} + +:root { + --table-border-color: #000000; /* Light mode table border color */ +} + +/* Dark mode border */ +[data-theme="dark"] { + --table-border-color: #ddd; /* Dark mode table border color */ +} +table th { + color: #ffffff; /* White text on lighter background */ + font-weight: bold; +} + +/* Dark mode table header text */ +[data-theme='dark'] table th { + color: #000000; /* Black text on darker background */ + font-weight: bold; +} \ No newline at end of file diff --git a/website/src/pages/styles.js b/website/src/pages/styles.js deleted file mode 100644 index 23d13d10813..00000000000 --- a/website/src/pages/styles.js +++ /dev/null @@ -1,176 +0,0 @@ - -import React from 'react'; -import Layout from '@theme/Layout'; -import CodeBlock from '@theme/CodeBlock'; -import Changelog from '@site/src/components/changelog'; -import CloudCore from '@site/src/components/cloudcore'; -import Collapsible from '@site/src/components/collapsible'; -import FAQ from '@site/src/components/faqs'; -import File from '@site/src/components/file'; -import Lightbox from '@site/src/components/lightbox'; -import LoomVideo from '@site/src/components/loom'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; -import YoutubeVideo from '@site/src/components/youtube'; - -function Styles() { - return ( - -
-
-

- The following components are baked into the Markdown compilation context, - so there is no need to import them from Markdown files. Simply add the components - inline to use them. -

-
-
-

Changelog

-
{`
-    

This functionality has changed in dbt v0.16.0

-
-`}
- This functionality has changed in dbt v0.16.0 -
-
-

CloudCore

-
{`
-    
-

The first div contains Cloud info

-
-
-

The second div contains Core info

-
-
-`}
- -
-

The first div contains Cloud info

-
-
-

The second div contains Core info

-
-
-
-
-

Collapsible

-
{`
-    
-

Shows and hides children elements

-
-
-`}
- -
-

Shows and hides children elements

-
-
-
-
-

FAQList

-
{``}
-

(Not shown)

-
- -
-

FAQ

-
{``}
- - -
- -
-

File

-
{`
-
-\`\`\`yml
-password: hunter2
-\`\`\`
-
-
-
-`}
- -
-                        password: hunter2
-                    
-
-
- -
-

Lightbox

-
{``}
- -
- -
-

Markdown Links

- Refer to the Links section of the Content Style Guide to read about how you can use links in the dbt product documentation. -
- -
-

LoomVideo

-
{``}
- -
- -
-

Tabs

-
{`
-
-
-
-\`\`\`sql
-select id from customers
-\`\`\`
-
-
-
-
-\`\`\`sql
-select "ID" from customers
-\`\`\`
-
-
-
-`}
- - - -
-select id from customers
-
- -
- - -
-select "ID" from customers
-
- -
-
-
-
-

YoutubeVideo

-
{``}
- -
-
-
- ); -} - -export default Styles; diff --git a/website/src/theme/DocRoot/Layout/Main/index.js b/website/src/theme/DocRoot/Layout/Main/index.js index a8c9d449b82..154c3cbfab6 100644 --- a/website/src/theme/DocRoot/Layout/Main/index.js +++ b/website/src/theme/DocRoot/Layout/Main/index.js @@ -89,7 +89,7 @@ export default function DocRootLayoutMain({ if (new Date() > new Date(EOLDate)) { setEOLData({ showEOLBanner: true, - EOLBannerText: `This version of dbt Core is no longer supported. There will be no more patches or security fixes. For improved performance, security, and features, upgrade to the latest stable version.`, + EOLBannerText: `This version of dbt Core is no longer supported. There will be no more patches or security fixes. For improved performance, security, and features, upgrade to the latest stable version. Some dbt Cloud customers might have an extended critical support window. `, }); } else if (new Date() > threeMonths) { setEOLData({ diff --git a/website/src/theme/MDXComponents/index.js b/website/src/theme/MDXComponents/index.js index d136222a0ce..422d6c99fab 100644 --- a/website/src/theme/MDXComponents/index.js +++ b/website/src/theme/MDXComponents/index.js @@ -13,6 +13,7 @@ import Mermaid from '@theme/Mermaid'; /* dbt Customizations: * Imports the following components below for export */ +import SortableTable from '@site/src/components/sortableTable'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem' import Changelog from '@site/src/components/changelog'; @@ -95,5 +96,6 @@ const MDXComponents = { DetailsToggle: DetailsToggle, Expandable: Expandable, ConfettiTrigger: ConfettiTrigger, + SortableTable: SortableTable, }; export default MDXComponents; diff --git a/website/static/img/adapter-guide/0-full-release-notes.png b/website/static/img/adapter-guide/0-full-release-notes.png index 6cb9f0ae8ed..284343ff955 100644 Binary files a/website/static/img/adapter-guide/0-full-release-notes.png and b/website/static/img/adapter-guide/0-full-release-notes.png differ diff --git a/website/static/img/adapter-guide/1-announcement.png b/website/static/img/adapter-guide/1-announcement.png index 587fee769d3..90bc965278f 100644 Binary files a/website/static/img/adapter-guide/1-announcement.png and b/website/static/img/adapter-guide/1-announcement.png differ diff --git a/website/static/img/adapter-guide/2-short-description.png b/website/static/img/adapter-guide/2-short-description.png index 167457cbcf3..16c128f94c8 100644 Binary files a/website/static/img/adapter-guide/2-short-description.png and b/website/static/img/adapter-guide/2-short-description.png differ diff --git a/website/static/img/adapter-guide/3-additional-resources.png b/website/static/img/adapter-guide/3-additional-resources.png index ba52aed613e..715978a119c 100644 Binary files a/website/static/img/adapter-guide/3-additional-resources.png and b/website/static/img/adapter-guide/3-additional-resources.png differ diff --git a/website/static/img/adapter-guide/4-installation.png b/website/static/img/adapter-guide/4-installation.png index d075b3c0569..80ced6e75dc 100644 Binary files a/website/static/img/adapter-guide/4-installation.png and b/website/static/img/adapter-guide/4-installation.png differ diff --git a/website/static/img/adapter-guide/6-thank-contribs.png b/website/static/img/adapter-guide/6-thank-contribs.png index 289d67ea5b3..815f6235c70 100644 Binary files a/website/static/img/adapter-guide/6-thank-contribs.png and b/website/static/img/adapter-guide/6-thank-contribs.png differ diff --git a/website/static/img/best-practices/materializations/model-timing-diagram.png b/website/static/img/best-practices/materializations/model-timing-diagram.png index 75aaf17123f..6dc85a01f1a 100644 Binary files a/website/static/img/best-practices/materializations/model-timing-diagram.png and b/website/static/img/best-practices/materializations/model-timing-diagram.png differ diff --git a/website/static/img/bigquery/bigquery-optional-config.png b/website/static/img/bigquery/bigquery-optional-config.png new file mode 100644 index 00000000000..ba9dba2afac Binary files /dev/null and b/website/static/img/bigquery/bigquery-optional-config.png differ diff --git a/website/static/img/blog/2024-05-07-unit-testing/unit-test-terminal-output.png b/website/static/img/blog/2024-05-07-unit-testing/unit-test-terminal-output.png new file mode 100644 index 00000000000..9e68587fa61 Binary files /dev/null and b/website/static/img/blog/2024-05-07-unit-testing/unit-test-terminal-output.png differ diff --git a/website/static/img/blog/2024-11-27-test-smarter-part-2/testing_pipeline.png b/website/static/img/blog/2024-11-27-test-smarter-part-2/testing_pipeline.png new file mode 100644 index 00000000000..223846b043c Binary files /dev/null and b/website/static/img/blog/2024-11-27-test-smarter-part-2/testing_pipeline.png differ diff --git a/website/static/img/blog/authors/faith_pic.png b/website/static/img/blog/authors/faith_pic.png new file mode 100644 index 00000000000..3635183bba3 Binary files /dev/null and b/website/static/img/blog/authors/faith_pic.png differ diff --git a/website/static/img/blog/authors/jerrie.jpg b/website/static/img/blog/authors/jerrie.jpg new file mode 100644 index 00000000000..9ae49d2fffe Binary files /dev/null and b/website/static/img/blog/authors/jerrie.jpg differ diff --git a/website/static/img/community/spotlight/bruno-souza-de-lima-newimage.jpg b/website/static/img/community/spotlight/bruno-souza-de-lima-newimage.jpg new file mode 100644 index 00000000000..4bcee8d5acc Binary files /dev/null and b/website/static/img/community/spotlight/bruno-souza-de-lima-newimage.jpg differ diff --git a/website/static/img/community/spotlight/christophe-oudar.jpg b/website/static/img/community/spotlight/christophe-oudar.jpg new file mode 100644 index 00000000000..11f31a6a4bd Binary files /dev/null and b/website/static/img/community/spotlight/christophe-oudar.jpg differ diff --git a/website/static/img/community/spotlight/dbt-athena-groupheadshot.jpg b/website/static/img/community/spotlight/dbt-athena-groupheadshot.jpg new file mode 100644 index 00000000000..2cc543890b8 Binary files /dev/null and b/website/static/img/community/spotlight/dbt-athena-groupheadshot.jpg differ diff --git a/website/static/img/community/spotlight/jenna-jordan.jpg b/website/static/img/community/spotlight/jenna-jordan.jpg new file mode 100644 index 00000000000..527bafb469f Binary files /dev/null and b/website/static/img/community/spotlight/jenna-jordan.jpg differ diff --git a/website/static/img/community/spotlight/mike-stanley.jpg b/website/static/img/community/spotlight/mike-stanley.jpg new file mode 100644 index 00000000000..df1c2e98ddf Binary files /dev/null and b/website/static/img/community/spotlight/mike-stanley.jpg differ diff --git a/website/static/img/community/spotlight/ruth-onyekwe.jpeg b/website/static/img/community/spotlight/ruth-onyekwe.jpeg new file mode 100644 index 00000000000..92c470184b1 Binary files /dev/null and b/website/static/img/community/spotlight/ruth-onyekwe.jpeg differ diff --git a/website/static/img/dbt-env.png b/website/static/img/dbt-env.png new file mode 100644 index 00000000000..d4cf58d7824 Binary files /dev/null and b/website/static/img/dbt-env.png differ diff --git a/website/static/img/docs/cloud-integrations/example-snowflake-native-app-service-token.png b/website/static/img/docs/cloud-integrations/example-snowflake-native-app-service-token.png index 7e4c7ab99da..930182969c2 100644 Binary files a/website/static/img/docs/cloud-integrations/example-snowflake-native-app-service-token.png and b/website/static/img/docs/cloud-integrations/example-snowflake-native-app-service-token.png differ diff --git a/website/static/img/docs/collaborate/dbt-explorer/example-model-details.png b/website/static/img/docs/collaborate/dbt-explorer/example-model-details.png index 9ceee1b3a23..a46f4d4ac5e 100644 Binary files a/website/static/img/docs/collaborate/dbt-explorer/example-model-details.png and b/website/static/img/docs/collaborate/dbt-explorer/example-model-details.png differ diff --git a/website/static/img/docs/collaborate/dbt-explorer/example-recommendations-tab.png b/website/static/img/docs/collaborate/dbt-explorer/example-recommendations-tab.png index 493930c35db..004019bfa54 100644 Binary files a/website/static/img/docs/collaborate/dbt-explorer/example-recommendations-tab.png and b/website/static/img/docs/collaborate/dbt-explorer/example-recommendations-tab.png differ diff --git a/website/static/img/docs/collaborate/dbt-explorer/explore-staging-env.png b/website/static/img/docs/collaborate/dbt-explorer/explore-staging-env.png index 6cd5d5b379b..61148dab9a9 100644 Binary files a/website/static/img/docs/collaborate/dbt-explorer/explore-staging-env.png and b/website/static/img/docs/collaborate/dbt-explorer/explore-staging-env.png differ diff --git a/website/static/img/docs/collaborate/dbt-explorer/sigma-example.jpg b/website/static/img/docs/collaborate/dbt-explorer/sigma-example.jpg new file mode 100644 index 00000000000..b1aa4533e08 Binary files /dev/null and b/website/static/img/docs/collaborate/dbt-explorer/sigma-example.jpg differ diff --git a/website/static/img/docs/connect-data-platform/choose-a-connection.png b/website/static/img/docs/connect-data-platform/choose-a-connection.png new file mode 100644 index 00000000000..cf8d106dd59 Binary files /dev/null and b/website/static/img/docs/connect-data-platform/choose-a-connection.png differ diff --git a/website/static/img/docs/connect-data-platform/connection-list.png b/website/static/img/docs/connect-data-platform/connection-list.png new file mode 100644 index 00000000000..c499e9baeba Binary files /dev/null and b/website/static/img/docs/connect-data-platform/connection-list.png differ diff --git a/website/static/img/docs/dbt-cloud/Navigate To Account Settings.png b/website/static/img/docs/dbt-cloud/Navigate To Account Settings.png index 08848fe39b1..cd4792b5c34 100644 Binary files a/website/static/img/docs/dbt-cloud/Navigate To Account Settings.png and b/website/static/img/docs/dbt-cloud/Navigate To Account Settings.png differ diff --git a/website/static/img/docs/dbt-cloud/Navigate-to-account-settings.png b/website/static/img/docs/dbt-cloud/Navigate-to-account-settings.png new file mode 100644 index 00000000000..cd4792b5c34 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/Navigate-to-account-settings.png differ diff --git a/website/static/img/docs/dbt-cloud/access-control/sso-uri.png b/website/static/img/docs/dbt-cloud/access-control/sso-uri.png new file mode 100644 index 00000000000..c557b903e57 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/access-control/sso-uri.png differ diff --git a/website/static/img/docs/dbt-cloud/account-integration-ai.jpg b/website/static/img/docs/dbt-cloud/account-integration-ai.jpg new file mode 100644 index 00000000000..7dd42ee037b Binary files /dev/null and b/website/static/img/docs/dbt-cloud/account-integration-ai.jpg differ diff --git a/website/static/img/docs/dbt-cloud/account-integration-azure-manual.jpg b/website/static/img/docs/dbt-cloud/account-integration-azure-manual.jpg new file mode 100644 index 00000000000..3b509d1c965 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/account-integration-azure-manual.jpg differ diff --git a/website/static/img/docs/dbt-cloud/account-integration-azure-target.jpg b/website/static/img/docs/dbt-cloud/account-integration-azure-target.jpg new file mode 100644 index 00000000000..c8ff5dd8cf6 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/account-integration-azure-target.jpg differ diff --git a/website/static/img/docs/dbt-cloud/account-integration-dbtlabs.jpg b/website/static/img/docs/dbt-cloud/account-integration-dbtlabs.jpg new file mode 100644 index 00000000000..a2d1386e0fa Binary files /dev/null and b/website/static/img/docs/dbt-cloud/account-integration-dbtlabs.jpg differ diff --git a/website/static/img/docs/dbt-cloud/account-integration-git.jpg b/website/static/img/docs/dbt-cloud/account-integration-git.jpg new file mode 100644 index 00000000000..70a275bd039 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/account-integration-git.jpg differ diff --git a/website/static/img/docs/dbt-cloud/account-integration-oauth.jpg b/website/static/img/docs/dbt-cloud/account-integration-oauth.jpg new file mode 100644 index 00000000000..6efb135c46f Binary files /dev/null and b/website/static/img/docs/dbt-cloud/account-integration-oauth.jpg differ diff --git a/website/static/img/docs/dbt-cloud/account-integration-openai.jpg b/website/static/img/docs/dbt-cloud/account-integration-openai.jpg new file mode 100644 index 00000000000..f92fec5c712 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/account-integration-openai.jpg differ diff --git a/website/static/img/docs/dbt-cloud/account-integrations.jpg b/website/static/img/docs/dbt-cloud/account-integrations.jpg new file mode 100644 index 00000000000..56ff1859636 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/account-integrations.jpg differ diff --git a/website/static/img/docs/dbt-cloud/change_user_to_read_only_20221023.gif b/website/static/img/docs/dbt-cloud/change_user_to_read_only_20221023.gif index 8490393a0e6..703d5516d8b 100644 Binary files a/website/static/img/docs/dbt-cloud/change_user_to_read_only_20221023.gif and b/website/static/img/docs/dbt-cloud/change_user_to_read_only_20221023.gif differ diff --git a/website/static/img/docs/dbt-cloud/change_user_to_read_only_20221023.png b/website/static/img/docs/dbt-cloud/change_user_to_read_only_20221023.png new file mode 100644 index 00000000000..433ce4f3f56 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/change_user_to_read_only_20221023.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/choosing-dbt-version/example-environment-settings.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/choosing-dbt-version/example-environment-settings.png index 02e5073fd16..7e0d2ea747a 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/choosing-dbt-version/example-environment-settings.png and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/choosing-dbt-version/example-environment-settings.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/choosing-dbt-version/job-settings.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/choosing-dbt-version/job-settings.png index 8048df4c67a..bbbd852efbe 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/choosing-dbt-version/job-settings.png and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/choosing-dbt-version/job-settings.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-github/github-connect-1.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-github/github-connect-1.png new file mode 100644 index 00000000000..31becd8c453 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-github/github-connect-1.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-github/github-connect.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-github/github-connect.png new file mode 100644 index 00000000000..18869ab426f Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/connecting-github/github-connect.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/create-deploy-env.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/create-deploy-env.png new file mode 100644 index 00000000000..5f75707090c Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/create-deploy-env.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/delete-environment.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/delete-environment.png new file mode 100644 index 00000000000..58225b53a57 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/delete-environment.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/delete-job.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/delete-job.png new file mode 100644 index 00000000000..c8817e08898 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/delete-job.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/dev-environment-custom-branch.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/dev-environment-custom-branch.png index 2ccf3ff9e76..ca2d0cd4e8e 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/dev-environment-custom-branch.png and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/dev-environment-custom-branch.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/managed-repo.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/managed-repo.png index e2014cf3607..d5850f1bed1 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/managed-repo.png and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/managed-repo.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/prod-settings.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/prod-settings.png new file mode 100644 index 00000000000..c36e9b56d7d Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/prod-settings.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/copilot-sql-generation-prompt.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/copilot-sql-generation-prompt.jpg new file mode 100644 index 00000000000..da42bbd83dd Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-ide/copilot-sql-generation-prompt.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/copilot-sql-generation.gif b/website/static/img/docs/dbt-cloud/cloud-ide/copilot-sql-generation.gif new file mode 100644 index 00000000000..74e6409e34d Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-ide/copilot-sql-generation.gif differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/dbt-assist-toggle.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/dbt-assist-toggle.jpg deleted file mode 100644 index 50dfbe7f51a..00000000000 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/dbt-assist-toggle.jpg and /dev/null differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/dbt-assist.gif b/website/static/img/docs/dbt-cloud/cloud-ide/dbt-assist.gif deleted file mode 100644 index be3236a5123..00000000000 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/dbt-assist.gif and /dev/null differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/dbt-copilot-doc.gif b/website/static/img/docs/dbt-cloud/cloud-ide/dbt-copilot-doc.gif index cca8db37a0a..2e4d42e2efe 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/dbt-copilot-doc.gif and b/website/static/img/docs/dbt-cloud/cloud-ide/dbt-copilot-doc.gif differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/editor-tab-menu-with-save.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/editor-tab-menu-with-save.jpg index 73551cbcaa7..deca4bedc43 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/editor-tab-menu-with-save.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/editor-tab-menu-with-save.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/gitignore-italics.png b/website/static/img/docs/dbt-cloud/cloud-ide/gitignore-italics.png new file mode 100644 index 00000000000..943bbcfdb3f Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-ide/gitignore-italics.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-basic-layout.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-basic-layout.jpg index 3960c6a4bff..116644b4764 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-basic-layout.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-basic-layout.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-command-bar.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-command-bar.jpg index ba6f8fc22c0..b1d0fd3ec7b 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-command-bar.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-command-bar.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-console-overview.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-console-overview.jpg index 8212e9e3311..33780cf76f9 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-console-overview.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-console-overview.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-editing.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-editing.jpg index 897497efc5b..d35caf29768 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-editing.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-editing.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-editor-command-palette-with-save.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-editor-command-palette-with-save.jpg index 25e4f2b32a1..2b50f870251 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-editor-command-palette-with-save.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-editor-command-palette-with-save.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-file-search-with-save.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-file-search-with-save.jpg index 9d8e82b98cb..775e1141330 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-file-search-with-save.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-file-search-with-save.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-git-diff-view-with-save.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-git-diff-view-with-save.jpg index 777551dc49b..1f92e5a4cb5 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-git-diff-view-with-save.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-git-diff-view-with-save.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-global-command-palette-with-save.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-global-command-palette-with-save.jpg index 32ce741269c..d2c86345895 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-global-command-palette-with-save.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-global-command-palette-with-save.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-minimap.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-minimap.jpg index 8da575c2034..ca465bf2ec8 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-minimap.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-minimap.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-options-menu-with-save.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-options-menu-with-save.jpg index bd57ee514ee..8a968f684bd 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-options-menu-with-save.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-options-menu-with-save.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/ide-side-menu.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/ide-side-menu.jpg index 060d273e3f5..71c182c302a 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/ide-side-menu.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/ide-side-menu.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/lineage-console-tab.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/lineage-console-tab.jpg index cc0a0ffc41b..7d27314408c 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/lineage-console-tab.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/lineage-console-tab.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/restart-ide.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/restart-ide.jpg index 98d71403cdd..031ec19227f 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/restart-ide.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/restart-ide.jpg differ diff --git a/website/static/img/docs/dbt-cloud/cloud-ide/revert-uncommitted-changes-with-save.jpg b/website/static/img/docs/dbt-cloud/cloud-ide/revert-uncommitted-changes-with-save.jpg index bfd5832c001..7f7f520f5bb 100644 Binary files a/website/static/img/docs/dbt-cloud/cloud-ide/revert-uncommitted-changes-with-save.jpg and b/website/static/img/docs/dbt-cloud/cloud-ide/revert-uncommitted-changes-with-save.jpg differ diff --git a/website/static/img/docs/dbt-cloud/connecting-azure-devops/LinktoAzure.png b/website/static/img/docs/dbt-cloud/connecting-azure-devops/LinktoAzure.png index cd233b3f8e7..6cc30d05c6f 100644 Binary files a/website/static/img/docs/dbt-cloud/connecting-azure-devops/LinktoAzure.png and b/website/static/img/docs/dbt-cloud/connecting-azure-devops/LinktoAzure.png differ diff --git a/website/static/img/docs/dbt-cloud/delete_projects_from_dbt_cloud.png b/website/static/img/docs/dbt-cloud/delete_projects_from_dbt_cloud.png new file mode 100644 index 00000000000..c3a47797e84 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/delete_projects_from_dbt_cloud.png differ diff --git a/website/static/img/docs/dbt-cloud/delete_user.png b/website/static/img/docs/dbt-cloud/delete_user.png new file mode 100644 index 00000000000..e767af673d8 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/delete_user.png differ diff --git a/website/static/img/docs/dbt-cloud/deployment/access-logs.png b/website/static/img/docs/dbt-cloud/deployment/access-logs.png new file mode 100644 index 00000000000..ee8dd5f07fc Binary files /dev/null and b/website/static/img/docs/dbt-cloud/deployment/access-logs.png differ diff --git a/website/static/img/docs/dbt-cloud/deployment/run-history.png b/website/static/img/docs/dbt-cloud/deployment/run-history.png index 17196ba57a1..019a961b257 100644 Binary files a/website/static/img/docs/dbt-cloud/deployment/run-history.png and b/website/static/img/docs/dbt-cloud/deployment/run-history.png differ diff --git a/website/static/img/docs/dbt-cloud/deployment/sl-ci-job.png b/website/static/img/docs/dbt-cloud/deployment/sl-ci-job.png new file mode 100644 index 00000000000..e64822e1fe7 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/deployment/sl-ci-job.png differ diff --git a/website/static/img/docs/dbt-cloud/disconnect-repo.png b/website/static/img/docs/dbt-cloud/disconnect-repo.png new file mode 100644 index 00000000000..084bea9cfd7 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/disconnect-repo.png differ diff --git a/website/static/img/docs/dbt-cloud/discovery-api/model-timing.png b/website/static/img/docs/dbt-cloud/discovery-api/model-timing.png new file mode 100644 index 00000000000..3510473a090 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/discovery-api/model-timing.png differ diff --git a/website/static/img/docs/dbt-cloud/example-enable-model-notifications.png b/website/static/img/docs/dbt-cloud/example-enable-model-notifications.png new file mode 100644 index 00000000000..16cf5457db5 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/example-enable-model-notifications.png differ diff --git a/website/static/img/docs/dbt-cloud/example-git-signed-commits-setting.png b/website/static/img/docs/dbt-cloud/example-git-signed-commits-setting.png new file mode 100644 index 00000000000..bf3f8169359 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/example-git-signed-commits-setting.png differ diff --git a/website/static/img/docs/dbt-cloud/git-sign-verified.jpg b/website/static/img/docs/dbt-cloud/git-sign-verified.jpg new file mode 100644 index 00000000000..86fbdd58dc9 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/git-sign-verified.jpg differ diff --git a/website/static/img/docs/dbt-cloud/refresh-ide/new-environment-fields.png b/website/static/img/docs/dbt-cloud/refresh-ide/new-environment-fields.png index b70d047553b..d94122187c2 100644 Binary files a/website/static/img/docs/dbt-cloud/refresh-ide/new-environment-fields.png and b/website/static/img/docs/dbt-cloud/refresh-ide/new-environment-fields.png differ diff --git a/website/static/img/docs/dbt-cloud/semantic-layer/sl-add-service-token.gif b/website/static/img/docs/dbt-cloud/semantic-layer/sl-add-service-token.gif new file mode 100644 index 00000000000..a27df85e8ec Binary files /dev/null and b/website/static/img/docs/dbt-cloud/semantic-layer/sl-add-service-token.gif differ diff --git a/website/static/img/docs/dbt-cloud/semantic-layer/sl-concept.png b/website/static/img/docs/dbt-cloud/semantic-layer/sl-concept.png new file mode 100644 index 00000000000..f1b1a252dc6 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/semantic-layer/sl-concept.png differ diff --git a/website/static/img/docs/dbt-cloud/semantic-layer/sl-credential-no-service-token.jpg b/website/static/img/docs/dbt-cloud/semantic-layer/sl-credential-no-service-token.jpg new file mode 100644 index 00000000000..5a6ab83d96b Binary files /dev/null and b/website/static/img/docs/dbt-cloud/semantic-layer/sl-credential-no-service-token.jpg differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/job-override.gif b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/job-override.gif index 3ce6cee6259..1fb2cbd3e97 100644 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/job-override.gif and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/job-override.gif differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/navigate-to-env-vars.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/navigate-to-env-vars.png new file mode 100644 index 00000000000..fc72778ff33 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/navigate-to-env-vars.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/personal-override.gif b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/personal-override.gif index 4185e3c98d8..d3e64f2c4af 100644 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/personal-override.gif and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/personal-override.gif differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/personal-override.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/personal-override.png index 64b0ac8170f..b221a0b73ba 100644 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/personal-override.png and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/personal-override.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/refresh-ide.gif b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/refresh-ide.gif deleted file mode 100644 index 14b700547ca..00000000000 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/refresh-ide.gif and /dev/null differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/refresh-ide.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/refresh-ide.png new file mode 100644 index 00000000000..54588f53d5d Binary files /dev/null and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/Environment Variables/refresh-ide.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/create-ci-job.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/create-ci-job.png index 4455d52f1a8..e1c94a74539 100644 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/create-ci-job.png and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/create-ci-job.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/data-sources.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/data-sources.png index be7a96f7177..8119f404742 100644 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/data-sources.png and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/data-sources.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/dbt-cloud-enterprise/BQ-auth/dbt-cloud-bq-id-secret-02.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/dbt-cloud-enterprise/BQ-auth/dbt-cloud-bq-id-secret-02.png new file mode 100644 index 00000000000..40d1a6b3be8 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/dbt-cloud-enterprise/BQ-auth/dbt-cloud-bq-id-secret-02.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/documentation-job-execution-settings.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/documentation-job-execution-settings.png index 845e1fcf7a7..0886f82dc0c 100644 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/documentation-job-execution-settings.png and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/documentation-job-execution-settings.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/documentation-project-details.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/documentation-project-details.png index 6c5e845284d..7aae09edc14 100644 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/documentation-project-details.png and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/documentation-project-details.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/fail-dbtdeps.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/fail-dbtdeps.png new file mode 100644 index 00000000000..f7375e9f3db Binary files /dev/null and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/fail-dbtdeps.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/jobs-settings-target-name.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/jobs-settings-target-name.png index cdaaef68ed1..3249a01c0db 100644 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/jobs-settings-target-name.png and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/jobs-settings-target-name.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/prod-settings-1.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/prod-settings-1.png new file mode 100644 index 00000000000..5fd53ffde78 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/prod-settings-1.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/prod-settings.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/prod-settings.png new file mode 100644 index 00000000000..5f75707090c Binary files /dev/null and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/prod-settings.png differ diff --git a/website/static/img/docs/dbt-cloud/using-dbt-cloud/using_ci_dbt_cloud.png b/website/static/img/docs/dbt-cloud/using-dbt-cloud/using_ci_dbt_cloud.png index 5e89f81c621..352976cd38d 100644 Binary files a/website/static/img/docs/dbt-cloud/using-dbt-cloud/using_ci_dbt_cloud.png and b/website/static/img/docs/dbt-cloud/using-dbt-cloud/using_ci_dbt_cloud.png differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/built-in-ai.jpg b/website/static/img/docs/dbt-cloud/visual-editor/built-in-ai.jpg new file mode 100644 index 00000000000..bb38140947e Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/built-in-ai.jpg differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/config-panel.jpg b/website/static/img/docs/dbt-cloud/visual-editor/config-panel.jpg new file mode 100644 index 00000000000..3f0cbb5cf4b Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/config-panel.jpg differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/config-panel.png b/website/static/img/docs/dbt-cloud/visual-editor/config-panel.png new file mode 100644 index 00000000000..a568ac0e8a3 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/config-panel.png differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/connector.jpg b/website/static/img/docs/dbt-cloud/visual-editor/connector.jpg new file mode 100644 index 00000000000..41b23fde089 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/connector.jpg differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/connector.png b/website/static/img/docs/dbt-cloud/visual-editor/connector.png new file mode 100644 index 00000000000..7dc86be406e Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/connector.png differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/edit-model.jpg b/website/static/img/docs/dbt-cloud/visual-editor/edit-model.jpg new file mode 100644 index 00000000000..0ee8c505ebf Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/edit-model.jpg differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/edit-model.png b/website/static/img/docs/dbt-cloud/visual-editor/edit-model.png new file mode 100644 index 00000000000..06a4ca5c40e Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/edit-model.png differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/operator.jpg b/website/static/img/docs/dbt-cloud/visual-editor/operator.jpg new file mode 100644 index 00000000000..87a49f63a97 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/operator.jpg differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/operator.png b/website/static/img/docs/dbt-cloud/visual-editor/operator.png new file mode 100644 index 00000000000..e48f6d6e015 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/operator.png differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/visual-editor.jpg b/website/static/img/docs/dbt-cloud/visual-editor/visual-editor.jpg new file mode 100644 index 00000000000..2e7faa34e02 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/visual-editor.jpg differ diff --git a/website/static/img/docs/dbt-cloud/visual-editor/visual-editor.png b/website/static/img/docs/dbt-cloud/visual-editor/visual-editor.png new file mode 100644 index 00000000000..1f3f28bb052 Binary files /dev/null and b/website/static/img/docs/dbt-cloud/visual-editor/visual-editor.png differ diff --git a/website/static/img/docs/dbt-versions/experimental-feats.png b/website/static/img/docs/dbt-versions/experimental-feats.png index 93764f66b7c..fb1a4dbaf87 100644 Binary files a/website/static/img/docs/dbt-versions/experimental-feats.png and b/website/static/img/docs/dbt-versions/experimental-feats.png differ diff --git a/website/static/img/docs/deploy/apples_to_apples.png b/website/static/img/docs/deploy/apples_to_apples.png new file mode 100644 index 00000000000..b1216b6eeb2 Binary files /dev/null and b/website/static/img/docs/deploy/apples_to_apples.png differ diff --git a/website/static/img/docs/deploy/dbt-compare.jpg b/website/static/img/docs/deploy/dbt-compare.jpg new file mode 100644 index 00000000000..a7f27d31efa Binary files /dev/null and b/website/static/img/docs/deploy/dbt-compare.jpg differ diff --git a/website/static/img/guides/dbt-ecosystem/dbt-python-snowpark/5-development-schema-name/1-settings-gear-icon.png b/website/static/img/guides/dbt-ecosystem/dbt-python-snowpark/5-development-schema-name/1-settings-gear-icon.png index c23cc053998..941ac76c093 100644 Binary files a/website/static/img/guides/dbt-ecosystem/dbt-python-snowpark/5-development-schema-name/1-settings-gear-icon.png and b/website/static/img/guides/dbt-ecosystem/dbt-python-snowpark/5-development-schema-name/1-settings-gear-icon.png differ diff --git a/website/static/img/icons/dot-ai.svg b/website/static/img/icons/dot-ai.svg new file mode 100644 index 00000000000..d0223968caa --- /dev/null +++ b/website/static/img/icons/dot-ai.svg @@ -0,0 +1,33441 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/website/static/img/icons/white/dot-ai.svg b/website/static/img/icons/white/dot-ai.svg new file mode 100644 index 00000000000..d0223968caa --- /dev/null +++ b/website/static/img/icons/white/dot-ai.svg @@ -0,0 +1,33441 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/website/vercel.json b/website/vercel.json index 0674313f3f5..b68dc053db9 100644 --- a/website/vercel.json +++ b/website/vercel.json @@ -2,6 +2,16 @@ "cleanUrls": true, "trailingSlash": false, "redirects": [ + { + "source": "/faqs/API/rotate-token", + "destination": "/docs/dbt-cloud-apis/service-tokens#service-token-update", + "permanent": true + }, + { + "source": "/styles", + "destination": "https://github.com/dbt-labs/docs.getdbt.com/blob/current/contributing/adding-page-components.md", + "permanent": true + }, { "source": "/docs/dbt-cloud-apis/sl-manifest", "destination": "/reference/artifacts/sl-manifest", @@ -92,6 +102,11 @@ "destination": "/docs/dbt-versions/core-upgrade/Older%20versions/upgrading-to-v1.4", "permanent": true }, + { + "source": "/docs/dbt-versions/versionless-cloud", + "destination": "/docs/dbt-versions/cloud-release-tracks", + "permanent": true + }, { "source": "/best-practices/how-we-mesh/mesh-4-faqs", "destination": "/best-practices/how-we-mesh/mesh-5-faqs", @@ -3636,7 +3651,7 @@ }, { "key": "Content-Security-Policy", - "value": "img-src 'self' data: https:;" + "value": "img-src 'self' data: https:; frame-ancestors 'self' https://*.mutinyhq.com https://*.getdbt.com" }, { "key": "Strict-Transport-Security",