Skip to content

Commit

Permalink
fix: data_warehouse DTS SA, reorganization, fix workflow order
Browse files Browse the repository at this point in the history
  • Loading branch information
davenportjw authored Jul 20, 2023
1 parent 8ff81d7 commit 88fe3ef
Show file tree
Hide file tree
Showing 14 changed files with 440 additions and 335 deletions.
13 changes: 13 additions & 0 deletions examples/scheduled_queries/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,16 @@ Run the following commands within this directory:
- `terraform plan` to see the infrastructure plan
- `terraform apply` to apply the infrastructure build
- `terraform destroy` to destroy the built infrastructure

<!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
## Inputs

No inputs.

## Outputs

| Name | Description |
|------|-------------|
| query\_names | The resource names of the transfer config |

<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
2 changes: 1 addition & 1 deletion metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
source:
repo: https://github.com/terraform-google-modules/terraform-google-bigquery
sourceType: git
version: 6.0.0
version: 6.1.0
actuationTool:
flavor: Terraform
version: '>= 0.13'
Expand Down
2 changes: 1 addition & 1 deletion modules/authorization/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
repo: https://github.com/terraform-google-modules/terraform-google-bigquery
sourceType: git
dir: /modules/authorization
version: 6.0.0
version: 6.1.0
actuationTool:
flavor: Terraform
version: '>= 0.13'
Expand Down
217 changes: 217 additions & 0 deletions modules/data_warehouse/bigquery.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

# Set up BigQuery resources
# # Create the BigQuery dataset
resource "google_bigquery_dataset" "ds_edw" {
project = module.project-services.project_id
dataset_id = "ds_edw"
friendly_name = "My EDW Dataset"
description = "My EDW Dataset with tables"
location = var.region
labels = var.labels
delete_contents_on_destroy = var.force_destroy
}

# # Create a BigQuery connection
resource "google_bigquery_connection" "ds_connection" {
project = module.project-services.project_id
connection_id = "ds_connection"
location = var.region
friendly_name = "Storage Bucket Connection"
cloud_resource {}
}

# # Grant IAM access to the BigQuery Connection account for Cloud Storage
resource "google_storage_bucket_iam_binding" "bq_connection_iam_object_viewer" {
bucket = google_storage_bucket.raw_bucket.name
role = "roles/storage.objectViewer"
members = [
"serviceAccount:${google_bigquery_connection.ds_connection.cloud_resource[0].service_account_id}",
]

depends_on = [
google_bigquery_connection.ds_connection,
]
}

# # Create a BigQuery external table
resource "google_bigquery_table" "tbl_edw_taxi" {
dataset_id = google_bigquery_dataset.ds_edw.dataset_id
table_id = "taxi_trips"
project = module.project-services.project_id
deletion_protection = var.deletion_protection

external_data_configuration {
autodetect = true
connection_id = "${module.project-services.project_id}.${var.region}.ds_connection"
source_format = "PARQUET"
source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/new-york-taxi-trips/tlc-yellow-trips-2022/taxi-*.Parquet"]

}

schema = file("${path.module}/src/taxi_trips_schema.json")

depends_on = [
google_bigquery_connection.ds_connection,
google_storage_bucket.raw_bucket,
]
}

# Load Queries for Stored Procedure Execution
# # Load Lookup Data Tables
resource "google_bigquery_routine" "sp_provision_lookup_tables" {
project = module.project-services.project_id
dataset_id = google_bigquery_dataset.ds_edw.dataset_id
routine_id = "sp_provision_lookup_tables"
routine_type = "PROCEDURE"
language = "SQL"
definition_body = templatefile("${path.module}/src/sql/sp_provision_lookup_tables.sql", { project_id = module.project-services.project_id })

depends_on = [
google_bigquery_dataset.ds_edw,
]
}


# # Add Looker Studio Data Report Procedure
resource "google_bigquery_routine" "sproc_sp_demo_datastudio_report" {
project = module.project-services.project_id
dataset_id = google_bigquery_dataset.ds_edw.dataset_id
routine_id = "sp_lookerstudio_report"
routine_type = "PROCEDURE"
language = "SQL"
definition_body = templatefile("${path.module}/src/sql/sp_lookerstudio_report.sql", { project_id = module.project-services.project_id })

depends_on = [
google_bigquery_table.tbl_edw_taxi,
]
}

# # Add Sample Queries
resource "google_bigquery_routine" "sp_sample_queries" {
project = module.project-services.project_id
dataset_id = google_bigquery_dataset.ds_edw.dataset_id
routine_id = "sp_sample_queries"
routine_type = "PROCEDURE"
language = "SQL"
definition_body = templatefile("${path.module}/src/sql/sp_sample_queries.sql", { project_id = module.project-services.project_id })

depends_on = [
google_bigquery_table.tbl_edw_taxi,
]
}

# # Add Bigquery ML Model
resource "google_bigquery_routine" "sp_bigqueryml_model" {
project = module.project-services.project_id
dataset_id = google_bigquery_dataset.ds_edw.dataset_id
routine_id = "sp_bigqueryml_model"
routine_type = "PROCEDURE"
language = "SQL"
definition_body = templatefile("${path.module}/src/sql/sp_bigqueryml_model.sql", { project_id = module.project-services.project_id })

depends_on = [
google_bigquery_table.tbl_edw_taxi,
]
}

# # Add Translation Scripts
resource "google_bigquery_routine" "sp_sample_translation_queries" {
project = module.project-services.project_id
dataset_id = google_bigquery_dataset.ds_edw.dataset_id
routine_id = "sp_sample_translation_queries"
routine_type = "PROCEDURE"
language = "SQL"
definition_body = templatefile("${path.module}/src/sql/sp_sample_translation_queries.sql", { project_id = module.project-services.project_id })

depends_on = [
google_bigquery_table.tbl_edw_taxi,
]
}

# Add Scheduled Query
# # Set up DTS permissions
resource "google_project_service_identity" "bigquery_data_transfer_sa" {
provider = google-beta
project = module.project-services.project_id
service = "bigquerydatatransfer.googleapis.com"
}

# # Grant the DTS service account access
resource "google_project_iam_member" "dts_service_account_roles" {
for_each = toset([
"roles/bigquerydatatransfer.serviceAgent",
])

project = module.project-services.project_id
role = each.key
member = "serviceAccount:${google_project_service_identity.bigquery_data_transfer_sa.email}"
}

# Create specific service account for DTS Run
# # Set up the DTA service account
resource "google_service_account" "dts" {
project = module.project-services.project_id
account_id = "cloud-dts-sa-${random_id.id.hex}"
display_name = "Service Account for Data Transfer Service"
}

# # Grant the DTS Specific service account access
resource "google_project_iam_member" "dts_roles" {
for_each = toset([
"roles/bigquery.user",
"roles/bigquery.dataEditor",
])

project = module.project-services.project_id
role = each.key
member = "serviceAccount:${google_service_account.dts.email}"
}

# # Grant the DTS specific service account Token Creator to the DTS Service Identity
resource "google_service_account_iam_binding" "dts_token_creator" {
service_account_id = google_service_account.dts.id
role = "roles/iam.serviceAccountTokenCreator"
members = [
"serviceAccount:${google_project_service_identity.bigquery_data_transfer_sa.email}"
]

depends_on = [
google_project_iam_member.dts_service_account_roles,
]
}

# Set up scheduled query
resource "google_bigquery_data_transfer_config" "dts_config" {

display_name = "nightlyloadquery"
project = module.project-services.project_id
location = var.region
data_source_id = "scheduled_query"
schedule = "every day 00:00"
params = {
query = "CALL `${module.project-services.project_id}.ds_edw.sp_bigqueryml_model`()"
}
service_account_name = google_service_account.dts.email

depends_on = [
google_project_iam_member.dts_roles,
google_bigquery_dataset.ds_edw,
google_service_account_iam_binding.dts_token_creator,
time_sleep.wait_to_startfile,
]
}
Loading

0 comments on commit 88fe3ef

Please sign in to comment.