diff --git a/0-bootstrap/README.md b/0-bootstrap/README.md
index 6ea7739b..62788a6d 100644
--- a/0-bootstrap/README.md
+++ b/0-bootstrap/README.md
@@ -63,7 +63,10 @@ To run the commands described in this document, install the following:
- [Terraform](https://www.terraform.io/downloads.html) version 1.5.7
- [jq](https://jqlang.github.io/jq/download/) version 1.6.0 or later
-**Note:** Make sure that you use version 1.5.7 of Terraform throughout this series. Otherwise, you might experience Terraform state snapshot lock errors.
+**Notes:**
+
+- Make sure that you use version 1.5.7 of Terraform throughout this series. Otherwise, you might experience Terraform state snapshot lock errors.
+- It is recommended to use Bash terminal de deploy the code from this repository. Using other terminals might cause unexpected behaviours.
Also make sure that you've done the following:
diff --git a/1-org/README.md b/1-org/README.md
index 063d0ded..03f55aca 100644
--- a/1-org/README.md
+++ b/1-org/README.md
@@ -192,6 +192,8 @@ If required, run `terraform output cloudbuild_project_id` in the `0-bootstrap` f
```bash
git checkout -b production
git push origin production
+
+ cd ..
```
1. Proceed to the [2-environments](../2-environments/README.md) step.
diff --git a/1-org/envs/shared/ml_key_rings.tf b/1-org/envs/shared/ml_key_rings.tf
index 9fa6751d..602c46ae 100644
--- a/1-org/envs/shared/ml_key_rings.tf
+++ b/1-org/envs/shared/ml_key_rings.tf
@@ -21,7 +21,8 @@ module "kms_keyring" {
keyring_admins = [
"serviceAccount:${local.projects_step_terraform_service_account_email}"
]
- project_id = module.common_kms.project_id
+
+ project_id = module.org_kms.project_id
keyring_regions = var.keyring_regions
keyring_name = var.keyring_name
}
diff --git a/1-org/envs/shared/projects.tf b/1-org/envs/shared/projects.tf
index f4e88528..768f207d 100644
--- a/1-org/envs/shared/projects.tf
+++ b/1-org/envs/shared/projects.tf
@@ -38,7 +38,7 @@ locals {
module "org_audit_logs" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
@@ -66,7 +66,7 @@ module "org_audit_logs" {
module "org_billing_logs" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
@@ -98,7 +98,7 @@ module "org_billing_logs" {
module "org_kms" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
@@ -131,7 +131,7 @@ module "org_kms" {
module "org_secrets" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
@@ -163,7 +163,7 @@ module "org_secrets" {
module "interconnect" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
@@ -195,7 +195,7 @@ module "interconnect" {
module "scc_notifications" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
@@ -227,7 +227,7 @@ module "scc_notifications" {
module "dns_hub" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
@@ -267,7 +267,7 @@ module "dns_hub" {
module "base_network_hub" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
count = var.enable_hub_and_spoke ? 1 : 0
random_project_id = true
@@ -316,7 +316,7 @@ resource "google_project_iam_member" "network_sa_base" {
module "restricted_network_hub" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
count = var.enable_hub_and_spoke ? 1 : 0
random_project_id = true
diff --git a/1-org/modules/cai-monitoring/main.tf b/1-org/modules/cai-monitoring/main.tf
index b31da358..8d42fe22 100644
--- a/1-org/modules/cai-monitoring/main.tf
+++ b/1-org/modules/cai-monitoring/main.tf
@@ -71,7 +71,7 @@ data "archive_file" "function_source_zip" {
module "cloudfunction_source_bucket" {
source = "terraform-google-modules/cloud-storage/google//modules/simple_bucket"
- version = "~>3.4"
+ version = "~>5.0"
project_id = var.project_id
name = "bkt-cai-monitoring-${random_id.suffix.hex}-sources-${data.google_project.project.number}-${var.location}"
@@ -121,7 +121,7 @@ resource "google_cloud_asset_organization_feed" "organization_feed" {
module "pubsub_cai_feed" {
source = "terraform-google-modules/pubsub/google"
- version = "~> 5.0"
+ version = "~> 6.0"
topic = "top-cai-monitoring-${random_id.suffix.hex}-event"
project_id = var.project_id
@@ -142,7 +142,7 @@ resource "google_scc_source" "cai_monitoring" {
// Cloud Function
module "cloud_function" {
source = "GoogleCloudPlatform/cloud-functions/google"
- version = "0.4.1"
+ version = "0.5"
function_name = "caiMonitoring"
description = "Check on the Organization for members (users, groups and service accounts) that contains the IAM roles listed."
diff --git a/1-org/modules/cai-monitoring/versions.tf b/1-org/modules/cai-monitoring/versions.tf
index 328699bc..d516241c 100644
--- a/1-org/modules/cai-monitoring/versions.tf
+++ b/1-org/modules/cai-monitoring/versions.tf
@@ -19,11 +19,11 @@ terraform {
required_providers {
google = {
source = "hashicorp/google"
- version = ">= 3.77"
+ version = ">= 3.77, <=5.37"
}
google-beta = {
source = "hashicorp/google-beta"
- version = ">= 3.77"
+ version = ">= 3.77, <=5.37"
}
random = {
source = "hashicorp/random"
diff --git a/1-org/modules/network/main.tf b/1-org/modules/network/main.tf
index 54704f38..7e6b68a7 100644
--- a/1-org/modules/network/main.tf
+++ b/1-org/modules/network/main.tf
@@ -20,7 +20,7 @@
module "base_shared_vpc_host_project" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
@@ -56,7 +56,7 @@ module "base_shared_vpc_host_project" {
module "restricted_shared_vpc_host_project" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
diff --git a/2-environments/README.md b/2-environments/README.md
index 132e80e8..2f487a0f 100644
--- a/2-environments/README.md
+++ b/2-environments/README.md
@@ -171,7 +171,7 @@ Run `terraform output cloudbuild_project_id` in the `0-bootstrap` folder to get
git push origin production
```
-### Read this before continuing further
+### `N.B.` Read this before continuing further
A logging project will be created in every environment (`development`, `non-production`, `production`) when running this code. This project contains a storage bucket for the purposes of project logging within its respective environment. This requires the `cloud-storage-analytics@google.com` group permissions for the storage bucket. Since foundations has more restricted security measures, a domain restriction constraint is enforced. This restraint will prevent the google cloud-storage-analytics group to be added to any permissions. In order for this terraform code to execute without error, manual intervention must be made to ensure everything applies without issue.
@@ -196,7 +196,7 @@ You will be doing this procedure for each environment (`development`, `non-produ
Make sure your git is checked out to the development branch by running `git checkout development` on `GCP_ENVIRONMENTS_PATH`.
```bash
- (cd $GCP_ENVIRONMENTS_PATH && git checkout development)
+ (cd $GCP_ENVIRONMENTS_PATH && git checkout development && ./tf-wrapper.sh init development)
```
2. Retrieve the bucket name and project id from terraform outputs.
@@ -244,7 +244,7 @@ You will be doing this procedure for each environment (`development`, `non-produ
Make sure your git is checked out to the `non-production` branch by running `git checkout non-production` on `GCP_ENVIRONMENTS_PATH`.
```bash
- (cd $GCP_ENVIRONMENTS_PATH && git checkout non-production)
+ (cd $GCP_ENVIRONMENTS_PATH && git checkout non-production && ./tf-wrapper.sh init non-production)
```
2. Retrieve the bucket name and project id from terraform outputs.
@@ -292,7 +292,7 @@ You will be doing this procedure for each environment (`development`, `non-produ
Make sure your git is checked out to the `production` branch by running `git checkout production` on `GCP_ENVIRONMENTS_PATH`.
```bash
- (cd $GCP_ENVIRONMENTS_PATH && git checkout production)
+ (cd $GCP_ENVIRONMENTS_PATH && git checkout production && ./tf-wrapper.sh init production)
```
2. Retrieve the bucket name and project id from terraform outputs.
@@ -405,7 +405,6 @@ To use the `validate` option of the `tf-wrapper.sh` script, please follow the [i
export GOOGLE_IMPERSONATE_SERVICE_ACCOUNT=$(terraform -chdir="../0-bootstrap/" output -raw environment_step_terraform_service_account_email)
echo ${GOOGLE_IMPERSONATE_SERVICE_ACCOUNT}
```
-
1. Ensure you [disable The Organization Policy](#read-this-before-continuing-further) on the `development` folder before continuing further.
1. Run `init` and `plan` and review output for environment development.
@@ -447,7 +446,6 @@ To use the `validate` option of the `tf-wrapper.sh` script, please follow the [i
```bash
./tf-wrapper.sh apply non-production
```
-
1. Ensure you [disable The Organization Policy](#read-this-before-continuing-further) on the `non-production` folder before continuing further.
1. Run `init` and `plan` and review output for environment production.
@@ -477,6 +475,6 @@ Before executing the next stages, unset the `GOOGLE_IMPERSONATE_SERVICE_ACCOUNT`
unset GOOGLE_IMPERSONATE_SERVICE_ACCOUNT
cd ../..
-```
+ ```
1. You can now move to the instructions in the network step. To use the [Dual Shared VPC](https://cloud.google.com/architecture/security-foundations/networking#vpcsharedvpc-id7-1-shared-vpc-) network mode go to [3-networks-dual-svpc](../3-networks-dual-svpc/README.md).
diff --git a/2-environments/modules/env_baseline/kms.tf b/2-environments/modules/env_baseline/kms.tf
index 99b96c4d..2920ec54 100644
--- a/2-environments/modules/env_baseline/kms.tf
+++ b/2-environments/modules/env_baseline/kms.tf
@@ -21,7 +21,7 @@
module "env_kms" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
diff --git a/2-environments/modules/env_baseline/ml_logging.tf b/2-environments/modules/env_baseline/ml_logging.tf
index e247e976..72d2d59f 100644
--- a/2-environments/modules/env_baseline/ml_logging.tf
+++ b/2-environments/modules/env_baseline/ml_logging.tf
@@ -24,7 +24,7 @@ data "google_storage_project_service_account" "gcs_logging_account" {
module "env_logs" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
diff --git a/2-environments/modules/env_baseline/monitoring.tf b/2-environments/modules/env_baseline/monitoring.tf
index 264d4c88..73e7f85b 100644
--- a/2-environments/modules/env_baseline/monitoring.tf
+++ b/2-environments/modules/env_baseline/monitoring.tf
@@ -20,7 +20,7 @@
module "monitoring_project" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
diff --git a/2-environments/modules/env_baseline/secrets.tf b/2-environments/modules/env_baseline/secrets.tf
index 03cc9715..6d0ef7a9 100644
--- a/2-environments/modules/env_baseline/secrets.tf
+++ b/2-environments/modules/env_baseline/secrets.tf
@@ -21,7 +21,7 @@
module "env_secrets" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
diff --git a/3-networks-dual-svpc/README.md b/3-networks-dual-svpc/README.md
index 9e77eb74..b92d77ee 100644
--- a/3-networks-dual-svpc/README.md
+++ b/3-networks-dual-svpc/README.md
@@ -417,6 +417,8 @@ Before executing the next stages, unset the `GOOGLE_IMPERSONATE_SERVICE_ACCOUNT`
```bash
unset GOOGLE_IMPERSONATE_SERVICE_ACCOUNT
+
+cd ../..
```
1. You can now move to the instructions in the [4-projects](../4-projects/README.md) step.
diff --git a/5-app-infra/README.md b/5-app-infra/README.md
index ff835467..ec757db3 100644
--- a/5-app-infra/README.md
+++ b/5-app-infra/README.md
@@ -134,9 +134,9 @@ The Pipeline is connected to a Google Cloud Source Repository with a simple stru
└── tf2-gpu.2-13:0.1
└── Dockerfile
```
-for the purposes of this example, the pipeline is configured to monitor the `main` branch of this repository.
+For the purposes of this example, the pipeline is configured to monitor the `main` branch of this repository.
-each folder under `images` has the full name and tag of the image that must be built. Once a change to the `main` branch is pushed, the pipeline will analyse which files have changed and build that image out and place it in the artifact repository. For example, if there is a change to the Dockerfile in the `tf2-cpu-13:0.1` folder, or if the folder itself has been renamed, it will build out an image and tag it based on the folder name that the Dockerfile has been housed in.
+Each folder under `images` has the full name and tag of the image that must be built. Once a change to the `main` branch is pushed, the pipeline will analyse which files have changed and build that image out and place it in the artifact repository. For example, if there is a change to the Dockerfile in the `tf2-cpu-13:0.1` folder, or if the folder itself has been renamed, it will build out an image and tag it based on the folder name that the Dockerfile has been housed in.
Once pushed, the pipeline build logs can be accessed by navigating to the artifacts project name created in step-4:
@@ -363,6 +363,7 @@ The pipeline also listens for changes made to `plan`, `development`, `non-produc
1. Update the `log_bucket` variable with the value of the `logs_export_storage_bucket_name`.
```bash
+ terraform -chdir="../gcp-org/envs/shared" init
export log_bucket=$(terraform -chdir="../gcp-org/envs/shared" output -raw logs_export_storage_bucket_name)
echo "log_bucket = ${log_bucket}"
sed -i "s/REPLACE_LOG_BUCKET/${log_bucket}/" ./common.auto.tfvars
diff --git a/5-app-infra/modules/service_catalog/main.tf b/5-app-infra/modules/service_catalog/main.tf
index 0d9f4763..68eef92d 100644
--- a/5-app-infra/modules/service_catalog/main.tf
+++ b/5-app-infra/modules/service_catalog/main.tf
@@ -57,7 +57,6 @@ resource "google_storage_bucket_iam_member" "bucket_role" {
role = "roles/storage.admin"
member = google_service_account.trigger_sa.member
}
-
resource "google_sourcerepo_repository_iam_member" "read" {
project = var.project_id
repository = var.name
diff --git a/5-app-infra/projects/service-catalog/common.auto.example.tfvars b/5-app-infra/projects/service-catalog/common.auto.example.tfvars
index a9f152f5..0ae3e1fc 100644
--- a/5-app-infra/projects/service-catalog/common.auto.example.tfvars
+++ b/5-app-infra/projects/service-catalog/common.auto.example.tfvars
@@ -20,9 +20,9 @@ remote_state_bucket = "REMOTE_STATE_BUCKET"
log_bucket = "REPLACE_LOG_BUCKET"
-# github_ api_ token = "PUT IN TOKEN"
+# github_ api_ token = "GITHUB_APP_TOKEN"
-# github_app_installation_id = "18685983"
+# github_app_installation_id = "GITHUB_APP_ID"
-# github_remote_uri = "https://github.com/badal-io/ml-foundations-tf-modules.git"
+# github_remote_uri = "GITHUB_REMOTE_URI"
diff --git a/5-app-infra/source_repos/artifact-publish/images/vertexpipeline:v2/Dockerfile b/5-app-infra/source_repos/artifact-publish/images/vertexpipeline:v2/Dockerfile
index b466e248..5d1f079f 100644
--- a/5-app-infra/source_repos/artifact-publish/images/vertexpipeline:v2/Dockerfile
+++ b/5-app-infra/source_repos/artifact-publish/images/vertexpipeline:v2/Dockerfile
@@ -12,5 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-FROM tensorflow/tensorflow:2.8.0
-RUN pip install tensorflow-io==0.25.0 protobuf==3.20.0 google-cloud-bigquery==3.13.0 pandas==2.0.3 db-dtypes==1.2.0 google-cloud-aiplatform==1.36.0 google-cloud-storage==2.14.0 kfp google-cloud-pipeline-components
+FROM python:3.10
+
+RUN python3 -m pip install --no-cache-dir tensorflow-cpu==2.8.0
+RUN pip install tensorflow-io==0.25.0 protobuf==3.20.3 google-cloud-bigquery==3.13.0 pandas==2.0.3 db-dtypes==1.2.0 google-cloud-aiplatform==1.36.0 google-cloud-storage==2.14.0 kfp google-cloud-pipeline-components numpy==1.26.4
diff --git a/5-app-infra/source_repos/service-catalog/modules/composer/README.md b/5-app-infra/source_repos/service-catalog/modules/composer/README.md
index 0207aec7..1e5b449f 100644
--- a/5-app-infra/source_repos/service-catalog/modules/composer/README.md
+++ b/5-app-infra/source_repos/service-catalog/modules/composer/README.md
@@ -21,7 +21,7 @@ The following table outlines which of the suggested controls for Vertex Generati
|------|-------------|------|---------|:--------:|
| airflow\_config\_overrides | Airflow configuration properties to override. Property keys contain the section and property names, separated by a hyphen, for example "core-dags\_are\_paused\_at\_creation". | `map(string)` | `{}` | no |
| env\_variables | Additional environment variables to provide to the Apache Airflow scheduler, worker, and webserver processes. Environment variable names must match the regular expression [a-zA-Z\_][a-zA-Z0-9\_]*. They cannot specify Apache Airflow software configuration overrides (they cannot match the regular expression AIRFLOW\_\_[A-Z0-9\_]+\_\_[A-Z0-9\_]+), and they cannot match any of the following reserved names: [AIRFLOW\_HOME,C\_FORCE\_ROOT,CONTAINER\_NAME,DAGS\_FOLDER,GCP\_PROJECT,GCS\_BUCKET,GKE\_CLUSTER\_NAME,SQL\_DATABASE,SQL\_INSTANCE,SQL\_PASSWORD,SQL\_PROJECT,SQL\_REGION,SQL\_USER]. | `map(any)` | `{}` | no |
-| github\_app\_installation\_id | The app installation ID that was created when installing Google Cloud Build in GitHub: https://github.com/apps/google-cloud-build. | `number` | n/a | yes |
+| github\_app\_installation\_id | The app installation ID that was created when installing Google Cloud Build in GitHub: https://github.com/apps/google-cloud-build. | `number` | `null` | no |
| github\_name\_prefix | A name for your GitHub connection to Cloud Build. | `string` | `"github-modules"` | no |
| github\_remote\_uri | URL of your GitHub repo. | `string` | n/a | yes |
| github\_secret\_name | Name of the GitHub secret to extract GitHub token info. | `string` | `"github-api-token"` | no |
diff --git a/5-app-infra/source_repos/service-catalog/modules/composer/variables.tf b/5-app-infra/source_repos/service-catalog/modules/composer/variables.tf
index 8f6f1cbc..e19c581d 100644
--- a/5-app-infra/source_repos/service-catalog/modules/composer/variables.tf
+++ b/5-app-infra/source_repos/service-catalog/modules/composer/variables.tf
@@ -110,6 +110,7 @@ variable "github_name_prefix" {
variable "github_app_installation_id" {
type = number
description = "The app installation ID that was created when installing Google Cloud Build in GitHub: https://github.com/apps/google-cloud-build."
+ default = null
}
variable "service_account_prefix" {
diff --git a/docs/assets/terraform/2-environments/ml_logging.tf b/docs/assets/terraform/2-environments/ml_logging.tf
index 61a4e0e5..9bf2c0fc 100644
--- a/docs/assets/terraform/2-environments/ml_logging.tf
+++ b/docs/assets/terraform/2-environments/ml_logging.tf
@@ -24,7 +24,7 @@ data "google_storage_project_service_account" "gcs_logging_account" {
module "env_logs" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
random_project_id = true
random_project_id_length = 4
diff --git a/examples/machine-learning-pipeline/README.md b/examples/machine-learning-pipeline/README.md
index 6368c8f9..6450c298 100644
--- a/examples/machine-learning-pipeline/README.md
+++ b/examples/machine-learning-pipeline/README.md
@@ -1,924 +1,2228 @@
# Machine Learning Pipeline Overview
-This repo is part of a multi-part guide that shows how to configure and deploy
-the example.com reference architecture described in
-[Google Cloud security foundations guide](https://cloud.google.com/architecture/security-foundations). The following table lists the parts of the guide.
-
-
-
-
-0-bootstrap |
-Bootstraps a Google Cloud organization, creating all the required resources
-and permissions to start using the Cloud Foundation Toolkit (CFT). This
-step also configures a CI/CD Pipeline for foundations code in subsequent
-stages. |
-
-
-1-org |
-Sets up top-level shared folders, monitoring and networking projects,
-organization-level logging, and baseline security settings through
-organizational policies. |
-
-
-2-environments |
-Sets up development, non-production, and production environments within the
-Google Cloud organization that you've created. |
-
-
-3-networks-dual-svpc |
-Sets up base and restricted shared VPCs with default DNS, NAT (optional),
-Private Service networking, VPC service controls, on-premises Dedicated
-Interconnect, and baseline firewall rules for each environment. It also sets
-up the global DNS hub. |
-
-
-4-projects |
-Sets up a folder structure, projects, and an application infrastructure pipeline for applications,
- which are connected as service projects to the shared VPC created in the previous stage. |
-
-
-Machine-learning-pipeline(this file) |
-Deploys modules based on the modules created in 5-app-infra |
-
-
-
-
-For an overview of the architecture and the parts, see the
-[terraform-google-enterprise-genai README](https://github.com/GoogleCloudPlatform/terraform-google-enterprise-genai)
-file.
-
-## Purpose
-
-The purpose of this guide is to provide a structured to deploying a machine learning pipeline on Google Cloud Platform using Vertex AI.
+This example demonstrates the process of interactive coding and experimentation using the Google Vertex AI Workbench for data scientists. The guide outlines the creation of a machine learning (ML) pipeline within a notebook on a Google Vertex AI Workbench Instance.
-## Prerequisites
+This environment is set up for interactive coding and experimentations. After the project is up, the vertex workbench will be deployed from the base environment module on `/modules/base_env/main.tf` and the data scientists can use it to write their data processing code and pipeline components. In addition, a cloud storage bucket should be deployed to use as the storage for our operations. Optionally, a composer environment can be setup to schedule the pipeline run on intervals.
-1. 0-bootstrap executed successfully.
-2. 1-org executed successfully.
-3. 2-environments executed successfully.
-4. 3-networks executed successfully.
-5. 4-projects executed successfully.
-6. 5-app-infra executed successfully.
-7. The step bellow `VPC-SC` executed successfully.
+Each environment, Development, Non-Production and Production have their own purpose and they are not a mirror from the previous environment.
-### VPC-SC
+The Development environment is responsible to create pipeline components and make sure there are no issues in the environment.
-By now, `artifact-publish` and `service-catalog` have been deployed. The projects inflated under `machine-learning-pipeline` are set in a service perimiter for added security. As such, several services and accounts must be given ingress and egress policies before `machine-learning-pipeline` has been deployed.
+The non-production environment will result in triggering the pipeline if approved. The vertex pipeline takes about 30 minutes to finish.
-cd into gcp-networks
+The production environment will provide an endpoint in the project which you can use to make prediction requests.
- ```bash
- cd gcp-networks/
- ```
+## Steps Involved
-Below, you can find the values that will need to be applied to `common.auto.tfvars` and your `development.auto.tfvars`, `non-production.auto.tfvars` & `production.auto.tfvars`.
+- Creating the ML Pipeline:
+ - Use a notebook on Google Vertex AI Workbench Instance to develop and adjust the ML pipeline on the development environment.
+- Triggering the Pipeline:
+ - The pipeline is set to trigger via Cloud Build upon merges to the non-production branch after being validated on development environment.
+- Training and Deploying the Model:
+ - The model is trained and deployed using the census income dataset.
+ - Deployment and monitoring occur in the production environment.
+- A/B Testing:
+ - After successful pipeline runs, a new model version is deployed for A/B testing.
-In `common.auto.tfvars` update your `perimeter_additional_members` to include:
+## Prerequisites
- ```
- "serviceAccount:sa-tf-cb-ml-machine-learning@[prj_c_ml_infra_pipeline_project_id].iam.gserviceaccount.com"
- "serviceAccount:sa-terraform-env@[prj_b_seed_project_id].iam.gserviceaccount.com"
- "serviceAccount:service-[prj_d_logging_project_number]@gs-project-accounts.iam.gserviceaccount.com"
- "serviceAccount:[prj_d_machine_learning_project_number]@cloudbuild.gserviceaccount.com"
- ```
+1. 0-bootstrap executed successfully.
+1. 1-org executed successfully.
+1. 2-environments executed successfully.
+1. 3-networks executed successfully.
+1. 4-projects executed successfully.
+1. 5-app-infra executed successfully.
+1. The step below named `VPC-SC` executed successfully, configuring the VPC-SC rules that allows running the example.
- ```bash
- export prj_c_ml-infra_pipeline_project_id=$(terraform -chdir="../gcp-projects/ml_business_unit/shared/" output -raw cloudbuild_project_id)
- echo "prj_c_ml-infra_pipeline_project_id = ${prj_c_ml_infra_pipeline_project_id}"
+**IMPORTANT**: The steps below are specific if you are deploying via `Cloud Build`. If you are deploying using Local Terraform, skip directly to the `VPC-SC - Infrastructure Deployment with Local Terraform` section.
- export prj_b_seed_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/0-bootstrap/" output -raw seed_project_id)
- echo "prj_b_seed_project_id = ${prj_b_seed_project_id}"
+### VPC-SC - Infrastructure Deployment with Cloud Build
- export prj_b_seed_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/0-bootstrap/" output -raw seed_project_id)
- echo "prj_b_seed_project_id = ${prj_b_seed_project_id}"
+By now, `artifact-publish` and `service-catalog` have been deployed. The projects inflated under `machine-learning-pipeline` are set in a service perimiter for added security. As such, several services and accounts must be given ingress and egress policies before the notebook and the pipeline has been deployed. Below, you can find the values that will need to be applied to `common.auto.tfvars` and your `development.auto.tfvars`, `non-production.auto.tfvars` & `production.auto.tfvars`, each respective to it's own environment.
- export prj_b_seed_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/0-bootstrap/" output -raw seed_project_id)
- echo "prj_b_seed_project_id = ${prj_b_seed_project_id}"
+To create new ingress/egress rules on the VPC-SC perimiter, follow the steps below:
- export backend_bucket=$(terraform -chdir="../terraform-google-enterprise-genai/0-bootstrap/" output -raw gcs_bucket_tfstate)
- echo "remote_state_bucket = ${backend_bucket}"
+**IMPORTANT**: Please note that command below are running `terraform output` command, this means that the directories must be initialized with `terraform -chdir="" init` if it was not already initialized.
- export backend_bucket_projects=$(terraform -chdir="../terraform-google-enterprise-genai/0-bootstrap/" output -raw projects_gcs_bucket_tfstate)
- echo "backend_bucket_projects = ${backend_bucket_projects}"
+#### `development` environment
- export project_d_logging_project_number=$(gsutil cat gs://$backend_bucket/terraform/environments/development/default.tfstate | jq -r '.outputs.env_log_project_number.value')
- echo "project_d_logging_project_number = ${project_d_logging_project_number}"
+1. Navigate into `gcp-networks` directory and checkout to `development` branch:
- prj_d_machine_learning_project_number=$(gsutil cat gs://$backend_bucket_projects/terraform/projects/ml_business_unit/development/default.tfstate | jq -r '.outputs.machine_learning_project_number.value')
- echo "project_d_machine_learning_number = ${prj_d_machine_learning_project_number}"
- ```
+ ```bash
+ cd gcp-networks/
+ git checkout development
+ ```
-In each respective environment folders, update your `development.auto.tfvars`, `non-production.auto.tfvars` & `production.auto.tfvars` to include these changes under `ingress_policies`
+1. Retrieve the value for "sa-tf-cb-ml-machine-learning@[prj_c_ml_infra_pipeline_project_id].iam.gserviceaccount.com" on your environment by running:
-You can find the `sources.access_level` information by going to `Security` in your GCP Organization.
-Once there, select the perimeter that is associated with the environment (eg. `development`). Copy the string under Perimeter Name and place it under `YOUR_ACCESS_LEVEL`
+ ```bash
+ export ml_cb_sa=$(terraform -chdir="../gcp-projects/ml_business_unit/shared" output -json terraform_service_accounts | jq -r '."ml-machine-learning"')
+ echo $ml_cb_sa
+ ```
-#### Ingress Policies
+1. Retrieve the value for "sa-terraform-env@[prj_b_seed_project_id].iam.gserviceaccount.com" on your environment by running:
- ```
- ingress_policies = [
-
- // users
- {
- "from" = {
- "identity_type" = "ANY_IDENTITY"
- "sources" = {
- "access_level" = "[YOUR_ACCESS_LEVEL]"
- }
- },
- "to" = {
- "resources" = [
- "projects/[your-environment-shared-restricted-project-number]",
- "projects/[your-environment-kms-project-number]",
- "projects/[your-environment-mlmachine-learning-number]",
- ]
- "operations" = {
- "compute.googleapis.com" = {
- "methods" = ["*"]
- }
- "dns.googleapis.com" = {
- "methods" = ["*"]
- }
- "logging.googleapis.com" = {
- "methods" = ["*"]
- }
- "storage.googleapis.com" = {
- "methods" = ["*"]
- }
- "cloudkms.googleapis.com" = {
- "methods" = ["*"]
- }
- "iam.googleapis.com" = {
- "methods" = ["*"]
- }
- "cloudresourcemanager.googleapis.com" = {
- "methods" = ["*"]
- }
- "pubsub.googleapis.com" = {
- "methods" = ["*"]
- }
- "secretmanager.googleapis.com" = {
- "methods" = ["*"]
- }
- "aiplatform.googleapis.com" = {
- "methods" = ["*"]
- }
- "composer.googleapis.com" = {
- "methods" = ["*"]
- }
- "cloudbuild.googleapis.com" = {
- "methods" = ["*"]
- }
- "bigquery.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
- ]
- ```
+ ```bash
+ export env_step_sa=$(terraform -chdir="../gcp-bootstrap/envs/shared" output -raw environment_step_terraform_service_account_email)
+ echo $env_step_sa
+ ```
-#### Egress Policies
+1. Retrieve the value for `prj_d_logging_project_number`:
-For your DEVELOPMENT.AUTO.TFVARS file, also include this as an egress policy:
+ ```bash
+ terraform -chdir="../gcp-environments/envs/development" init
- ```bash
- egress_policies = [
- // notebooks
- {
- "from" = {
- "identity_type" = ""
- "identities" = [
- "serviceAccount:service-[prj-d-ml-machine-learning-project-number]@gcp-sa-notebooks.iam.gserviceaccount.com",
- "serviceAccount:service-[prj-d-ml-machine-learning-project-number]@compute-system.iam.gserviceaccount.com",
- ]
- },
- "to" = {
- "resources" = ["projects/[prj-d-kms-project-number]"]
- "operations" = {
- "compute.googleapis.com" = {
- "methods" = ["*"]
- }
- "cloudkms.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
- ]
- ```
+ export prj_d_logging_project_number=$(terraform -chdir="../gcp-environments/envs/development" output -raw env_log_project_number)
+ echo $prj_d_logging_project_number
+ ```
-### Troubleshooting
+1. Retrieve the values for `prj_d_machine_learning_project_id` and `prj_d_machine_learning_project_number`:
-Please refer to [troubleshooting](../docs/TROUBLESHOOTING.md) if you run into issues during this step.
+ ```bash
+ terraform -chdir="../gcp-projects/ml_business_unit/development" init
-## Usage
+ export prj_d_machine_learning_project_id=$(terraform -chdir="../gcp-projects/ml_business_unit/development" output -raw machine_learning_project_id)
+ echo $prj_d_machine_learning_project_id
-**Note:** If you are using MacOS, replace `cp -RT` with `cp -R` in the relevant
-commands. The `-T` flag is needed for Linux, but causes problems for MacOS.
+ export prj_d_machine_learning_project_number=$(terraform -chdir="../gcp-projects/ml_business_unit/development" output -raw machine_learning_project_number)
+ echo $prj_d_machine_learning_project_number
+ ```
-You will need a github repository set up for this step. This repository houses the DAG's for composer. As of this writing, the structure is as follows:
+1. Take note of the following command output and add in `common.auto.tfvars` update your `perimeter_additional_members` to include them:
- ```
- .
- ├── README.md
- └── dags
- ├── hello_world.py
- └── strings.py
- ```
+ ```bash
+ cat < envs/development/development.auto.tfvars
+ ```
-1. Rename `common.auto.example.tfvars` to `common.auto.tfvars`.
+> *IMPORTANT*: The command above assumes you are running it on the `gcp-networks` directory.
- ```bash
- mv common.auto.example.tfvars common.auto.tfvars
- ```
+1. Commit the results on `gcp-networks`.
-1. Update the `common.auto.tfvars` file with your github app installation id, along with the url of your repository.
+ ```bash
+ git add .
- ```bash
- GITHUB_APP_ID="YOUR-GITHUB-APP-ID-HERE"
- GITHUB_REMOTE_URI="YOUR-GITHUB-REMOTE-URI"
+ git commit -m 'Update ingress and egress rules'
+ git push origin development
+ ```
- sed -i "s/GITHUB_APP_ID/${GITHUB_APP_ID}/" ./common.auto.tfvars
- sed -i "s/GITHUB_REMOTE_URI/${GITHUB_REMOTE_URI}/" ./common.auto.tfvars
- ```
+> **DISCLAIMER**: Remember that before deleting or destroying the `machine-learning-pipeline` example, you must remove the egress/ingress policies related to the example, to prevent any inconsistencies.
-1. Use `terraform output` to get the project backend bucket value from 0-bootstrap.
+#### `non-production` environment
- ```bash
- export remote_state_bucket=$(terraform -chdir="../terraform-google-enterprise-genai/0-bootstrap/" output -raw projects_gcs_bucket_tfstate)
- echo "remote_state_bucket = ${remote_state_bucket}"
- sed -i "s/REMOTE_STATE_BUCKET/${remote_state_bucket}/" ./common.auto.tfvars
- ```
+1. Checkout to `non-production` branch:
-1. Use `terraform output` to retrieve the Service Catalog project-id from the projects step and update values in `module/base_env`.
+ ```bash
+ git checkout non-production
+ ```
- ```bash
- export service_catalog_project_id=$(terraform -chdir="../gcp-projects/ml_business_unit/shared/" output -raw service_catalog_project_id)
- echo "service_catalog_project_id = ${service_catalog_project_id}"
+1. Retrieve the value for "sa-tf-cb-ml-machine-learning@[prj_c_ml_infra_pipeline_project_id].iam.gserviceaccount.com" on your environment by running:
- ## Linux
- sed -i "s/SERVICE_CATALOG_PROJECT_ID/${service_catalog_project_id}/g" ./modules/base_env/main.tf
- ```
+ ```bash
+ export ml_cb_sa=$(terraform -chdir="../gcp-projects/ml_business_unit/shared" output -json terraform_service_accounts | jq -r '."ml-machine-learning"')
+ echo $ml_cb_sa
+ ```
-1. Update `backend.tf` with your bucket from the infra pipeline output.
+1. Retrieve the value for "sa-terraform-env@[prj_b_seed_project_id].iam.gserviceaccount.com" on your environment by running:
- ```bash
- export backend_bucket=$(terraform -chdir="../gcp-projects/ml_business_unit/shared/" output -json state_buckets | jq '."ml-machine-learning"' --raw-output)
- echo "backend_bucket = ${backend_bucket}"
+ ```bash
+ export env_step_sa=$(terraform -chdir="../gcp-bootstrap/envs/shared" output -raw environment_step_terraform_service_account_email)
+ echo $env_step_sa
+ ```
- ## Linux
- for i in `find . -name 'backend.tf'`; do sed -i "s/UPDATE_APP_INFRA_BUCKET/${backend_bucket}/" $i; done
+1. Retrieve the value for `prj_n_logging_project_number`:
- ## MacOS
- for i in `find . -name 'backend.tf'`; do sed -i "" "s/UPDATE_APP_INFRA_BUCKET/${backend_bucket}/" $i; done
- ```
+ ```bash
+ terraform -chdir="../gcp-environments/envs/non-production" init
-1. Update `modules/base_env/main.tf` with the name of service catalog project id to complete the git fqdn for module sources:
+ export prj_n_logging_project_number=$(terraform -chdir="../gcp-environments/envs/non-production" output -raw env_log_project_number)
+ echo $prj_n_logging_project_number
+ ```
- ```bash
- export service_catalog_project_id=$(terraform -chdir="../gcp-projects/ml_business_unit/shared/" output -raw service_catalog_project_id)
+1. Retrieve the values for `prj_n_machine_learning_project_id` and `prj_n_machine_learning_project_number`:
- ##LINUX
- sed -i "s/SERVICE-CATALOG-PROJECT-ID/${service_catalog_project_id}/" ./modules/base_env/main.tf
+ ```bash
+ terraform -chdir="../gcp-projects/ml_business_unit/non-production" init
- ##MacOS
- sed -i "" "s/SERVICE-CATALOG-PROJECT-ID/${service_catalog_project_id}/" ./modules/base_env/main.tf
- ```
+ export prj_n_machine_learning_project_id=$(terraform -chdir="../gcp-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
-1. Commit changes.
+ export prj_n_machine_learning_project_number=$(terraform -chdir="../gcp-projects/ml_business_unit/non-production" output -raw machine_learning_project_number)
+ echo $prj_n_machine_learning_project_number
+ ```
- ```bash
- git add .
- git commit -m 'Initialize repo'
- ```
+1. Take note of the following command output and add in `common.auto.tfvars` update your `perimeter_additional_members` to include them:
-1. Composer will rely on DAG's from a github repository. In `4-projects`, a secret 'github-api-token' was created to house your github's api access key. We need to create a new version for this secret which will be used in the composer module which is called in the `base_env` folder. Use the script below to add the secrets into each machine learnings respective environment:
+ ```bash
+ cat < envs/non-production/non-production.auto.tfvars
+ ```
-1. Update `modules/base_env/main.tf` with Service Catalog Project Id.
+ > *IMPORTANT*: The command above assumes you are running it on the `gcp-networks` directory.
- ```bash
- export service_catalog_project_id=$(terraform -chdir="../../4-projects/ml_business_unit/shared/" output -raw service_catalog_project_id)
- echo "service_catalog_project_id = ${service_catalog_project_id}"
+1. Update the results on `gcp-networks`.
- ## Linux
- sed -i "s/SERVICE_CATALOG_PROJECT_ID/${service_catalog_project_id}/g" ./modules/base_env/main.tf
- ```
+ ```bash
+ git add .
-We will now deploy each of our environments (development/production/non-production) using this script.
-When using Cloud Build or Jenkins as your CI/CD tool, each environment corresponds to a branch in the repository for the `machine-learning-pipeline` step. Only the corresponding environment is applied.
+ git commit -m 'Update ingress and egress rules'
+ git push origin non-production
+ ```
-To use the `validate` option of the `tf-wrapper.sh` script, please follow the [instructions](https://cloud.google.com/docs/terraform/policy-validation/validate-policies#install) to install the terraform-tools component.
+> **DISCLAIMER**: Remember that before deleting or destroying the `machine-learning-pipeline` example, you must remove the egress/ingress policies related to the example, to prevent any inconsistencies.
-1. Use `terraform output` to get the Infra Pipeline Project ID from 4-projects output.
+#### `production` environment
- ```bash
- export INFRA_PIPELINE_PROJECT_ID=$(terraform -chdir="../../4-projects/ml_business_unit/shared/" output -raw cloudbuild_project_id)
- echo ${INFRA_PIPELINE_PROJECT_ID}
+1. Navigate into `gcp-networks` directory and checkout to `production` branch:
- export GOOGLE_IMPERSONATE_SERVICE_ACCOUNT=$(terraform -chdir="../../4-projects/ml_business_unit/shared/" output -json terraform_service_accounts | jq '."ml-machine-learning"' --raw-output)
- echo ${GOOGLE_IMPERSONATE_SERVICE_ACCOUNT}
- ```
+ ```bash
+ git checkout production
+ ```
-1. Run `init` and `plan` and review output for environment production.
+1. Retrieve the value for "sa-tf-cb-ml-machine-learning@[prj_c_ml_infra_pipeline_project_id].iam.gserviceaccount.com" on your environment by running:
+
+ ```bash
+ export ml_cb_sa=$(terraform -chdir="../gcp-projects/ml_business_unit/shared" output -json terraform_service_accounts | jq -r '."ml-machine-learning"')
+ echo $ml_cb_sa
+ ```
+
+1. Retrieve the value for "sa-terraform-env@[prj_b_seed_project_id].iam.gserviceaccount.com" on your environment by running:
+
+ ```bash
+ export env_step_sa=$(terraform -chdir="../gcp-bootstrap/envs/shared" output -raw environment_step_terraform_service_account_email)
+ echo $env_step_sa
+ ```
+
+1. Retrieve the value for `prj_p_logging_project_number`:
+
+ ```bash
+ terraform -chdir="../gcp-environments/envs/production" init
+
+ export prj_p_logging_project_number=$(terraform -chdir="../gcp-environments/envs/production" output -raw env_log_project_number)
+ echo $prj_p_logging_project_number
+ ```
+
+1. Retrieve the values for `prj_p_machine_learning_project_id` and `prj_p_machine_learning_project_number`:
+
+ ```bash
+ export prj_p_machine_learning_project_id=$(terraform -chdir="../gcp-projects/ml_business_unit/production" output -raw machine_learning_project_id)
+ echo $prj_p_machine_learning_project_id
+
+ export prj_p_machine_learning_project_number=$(terraform -chdir="../gcp-projects/ml_business_unit/production" output -raw machine_learning_project_number)
+ echo $prj_p_machine_learning_project_number
+ ```
+
+1. Take note of the following command output and add in `common.auto.tfvars` update your `perimeter_additional_members` to include them:
+
+ ```bash
+ cat < envs/production/production.auto.tfvars
+ ```
+
+ > *IMPORTANT*: The command above assumes you are running it on the `gcp-networks` directory.
+
+1. Commit the results on `gcp-networks`.
+
+ ```bash
+ git add .
+
+ git commit -m 'Update ingress and egress rules'
+ git push origin production
+
+ cd ..
+ ```
+
+> **DISCLAIMER**: Remember that before deleting or destroying the `machine-learning-pipeline` example, you must remove the egress/ingress policies related to the example, to prevent any inconsistencies.
+
+## Usage with Cloud Build
+
+These environmental project inflations are closely tied to the `service-catalog` project that have already deployed. By now, the `ml-service-catalog` should have been inflated. `service-catalog` contains modules that are being deployed in an interactive (development) environment. Since they already exist; they can be used as terraform modules for operational (non-production, production) environments. This was done in order to avoid code redundancy. One area for all `machine-learning` deployments.
+
+Under `modules/base_env/main.tf` you will notice all module calls are using `git` links as sources. These links refer to the `service-catalog` cloud source repository we have already set up.
+
+### Infrastructure Deployment with Cloud Build
+
+Github App ID allows you to connect your GitHub repository to Cloud Build and it is optional to use or not.
+
+In case you want to integrate Github with Cloud Build you must have a github token for access to your repository ready, along with an [Application Installation Id](https://cloud.google.com/build/docs/automating-builds/github/connect-repo-github#connecting_a_github_host_programmatically) and the remote uri to your repository.
+
+The `GITHUB_APP_ID` value can be retrieved after [installing Cloud Build GitHub App](https://github.com/apps/google-cloud-build) on your GitHub account or in an organization you own.
+
+The id can be retrieved when accessing the app configuration page by retrieving its value on the URL (https://github.com/settings/installations/). To access the app configuration page, go to **Settings -> Applications -> Google Cloud Build (Configure Button)** on your github account.
+
+The `GITHUB_REMOTE_URI` value can be retrieved by creating a new github repository and copying its value.
+
+1. Clone the `ml-machine-learning` repo.
```bash
- ./tf-wrapper.sh init production
- ./tf-wrapper.sh plan production
+ export INFRA_PIPELINE_PROJECT_ID=$(terraform -chdir="gcp-projects/ml_business_unit/shared/" output -raw cloudbuild_project_id)
+ echo ${INFRA_PIPELINE_PROJECT_ID}
+
+ gcloud source repos clone ml-machine-learning --project=${INFRA_PIPELINE_PROJECT_ID}
```
-1. Run `validate` and check for violations.
+1. Navigate into the repo, change to non-main branch and copy contents of foundation to new repo.
+ All subsequent steps assume you are running them from the ml-machine-learning directory.
+ If you run them from another directory, adjust your copy paths accordingly.
```bash
- ./tf-wrapper.sh validate production $(pwd)/../policy-library ${INFRA_PIPELINE_PROJECT_ID}
+ cd ml-machine-learning
+ git checkout -b plan
+
+ cp -RT ../terraform-google-enterprise-genai/examples/machine-learning-pipeline .
+ cp ../terraform-google-enterprise-genai/build/cloudbuild-tf-* .
+ cp ../terraform-google-enterprise-genai/build/tf-wrapper.sh .
+ chmod 755 ./tf-wrapper.sh
```
-1. Run `apply` production.
+1. Rename `common.auto.example.tfvars` to `common.auto.tfvars`.
```bash
- ./tf-wrapper.sh apply production
+ mv common.auto.example.tfvars common.auto.tfvars
```
-1. Run `init` and `plan` and review output for environment non-production.
+1. If you are not integrating Github with Cloud Build, you can skip this step, otherwise you need to update the `common.auto.tfvars` file with your github app installation id, along with the url of your repository. Remember to uncomment the lines below that refer to Github.
```bash
- ./tf-wrapper.sh init non-production
- ./tf-wrapper.sh plan non-production
+ GITHUB_APP_ID="YOUR-GITHUB-APP-ID-HERE"
+ GITHUB_REMOTE_URI="YOUR-GITHUB-REMOTE-URI"
+
+ sed -i "s/GITHUB_APP_ID/${GITHUB_APP_ID}/" ./common.auto.tfvars
+ sed -i "s/GITHUB_REMOTE_URI/${GITHUB_REMOTE_URI}/" ./common.auto.tfvars
```
-1. Run `validate` and check for violations.
+1. Use `terraform output` to get the project backend bucket value from 0-bootstrap.
```bash
- ./tf-wrapper.sh validate non-production $(pwd)/../policy-library ${INFRA_PIPELINE_PROJECT_ID}
+ export remote_state_bucket=$(terraform -chdir="../gcp-bootstrap/envs/shared" output -raw projects_gcs_bucket_tfstate)
+ echo "remote_state_bucket = ${remote_state_bucket}"
+ sed -i "s/REMOTE_STATE_BUCKET/${remote_state_bucket}/" ./common.auto.tfvars
```
-1. Run `apply` non-production.
+1. Use `terraform output` to retrieve the Service Catalog project-id from the projects step and update values in `module/base_env`.
```bash
- ./tf-wrapper.sh apply non-production
+ export service_catalog_project_id=$(terraform -chdir="../gcp-projects/ml_business_unit/shared/" output -raw service_catalog_project_id)
+ echo "service_catalog_project_id = ${service_catalog_project_id}"
+
+ ## Linux
+ sed -i "s/SERVICE_CATALOG_PROJECT_ID/${service_catalog_project_id}/g" ./modules/base_env/main.tf
```
-1. Run `init` and `plan` and review output for environment development.
+1. Update bucket variable, to retrieve values from 2-environment steps.
+
+ ```bash
+ export seed_state_bucket=$(terraform -chdir="../gcp-bootstrap/envs/shared" output -raw gcs_bucket_tfstate)
+ echo "seed_state_bucket = ${seed_state_bucket}"
+
+ sed -i "s/REPLACE_SEED_TFSTATE_BUCKET/${seed_state_bucket}/" ./common.auto.tfvars
+ ```
+
+1. Update `vpc_project` variable with the development environment host VPC project.
```bash
- ./tf-wrapper.sh init development
- ./tf-wrapper.sh plan development
+ export vpc_project=$(terraform -chdir="../gcp-networks/envs/development" output -raw restricted_host_project_id)
+ echo $vpc_project
+
+ ## Linux
+ sed -i "s/REPLACE_WITH_DEV_VPC_PROJECT/${vpc_project}/g" ./modules/base_env/main.tf
+ ```
+
+1. Update `intance_owners` variable with you GCP user account email. Replace `INSERT_YOUR_USER_EMAIL_HERE` with your email.
+
+ ```bash
+ export user_email="INSERT_YOUR_USER_EMAIL_HERE"
+
+ ## Linux
+ sed -i "s/REPLACE_WITH_USER_GCP_EMAIL/${user_email}/g" ./modules/base_env/main.tf
+ ```
+
+1. Update `backend.tf` with your bucket from the infra pipeline output.
+
+ ```bash
+ export backend_bucket=$(terraform -chdir="../gcp-projects/ml_business_unit/shared/" output -json state_buckets | jq '."ml-machine-learning"' --raw-output)
+ echo "backend_bucket = ${backend_bucket}"
+
+ ## Linux
+ for i in `find . -name 'backend.tf'`; do sed -i "s/UPDATE_APP_INFRA_BUCKET/${backend_bucket}/" $i; done
+ ```
+
+1. Allow the Cloud Build Service Account to read 2-environments state.
+
+1. Retrieve the value for "sa-tf-cb-ml-machine-learning@[prj_c_ml_infra_pipeline_project_id].iam.gserviceaccount.com" on your environment by running:
+
+ ```bash
+ export ml_cb_sa=$(terraform -chdir="../gcp-projects/ml_business_unit/shared" output -json terraform_service_accounts | jq -r '."ml-machine-learning"')
+ echo $ml_cb_sa
+ ```
+
+1. Assign Storage Object Viewer on bucket:
+
+ ```bash
+ gcloud storage buckets add-iam-policy-binding gs://$seed_state_bucket \
+ --member=serviceAccount:$ml_cb_sa \
+ --role=roles/storage.objectViewer
+ ```
+
+1. Assign Artifact Registry Admin on publish artifacts project:
+
+ ```bash
+ gcloud projects add-iam-policy-binding $common_artifacts_project_id \
+ --member=serviceAccount:$ml_cb_sa \
+ --role=roles/artifactregistry.admin
+ ```
+
+1. Commit changes.
+
+ ```bash
+ git add .
+ git commit -m 'Initialize repo'
+ ```
+
+1. Push your plan branch to trigger a plan for all environments. Because the
+ *plan* branch is not a [named environment branch](../docs/FAQ.md#what-is-a-named-branch), pushing your *plan*
+ branch triggers *terraform plan* but not *terraform apply*. Review the plan output in your Cloud Build project
+
+ ```bash
+ git push --set-upstream origin plan
+ ```
+
+1. Merge changes to development. Because this is a [named environment branch](../docs/FAQ.md#what-is-a-named-branch),
+ pushing to this branch triggers both *terraform plan* and *terraform apply*. Review the apply output in your Cloud Build project
+
+ ```
+ git checkout -b development
+ git push origin development
+ ```
+
+ **Note:** In case of message of error `Error: Provider produced inconsistent final plan` in the Cloud Build, a Retry should be done.
+
+1. Merge changes to non-production. Because this is a [named environment branch](../docs/FAQ.md#what-is-a-named-branch),
+ pushing to this branch triggers both *terraform plan* and *terraform apply*. Review the apply output in your Cloud Build project
+
+ ```bash
+ git checkout -b non-production
+ git push origin non-production
+ ```
+
+1. Merge changes to production branch. Because this is a [named environment branch](../docs/FAQ.md#what-is-a-named-branch),
+ pushing to this branch triggers both *terraform plan* and *terraform apply*. Review the apply output in your Cloud Build project
+
+ ```bash
+ git checkout -b production
+ git push origin production
+ ```
+
+1. cd out of this directory
+
+ ```bash
+ cd ..
```
-1. Run `validate` and check for violations.
+### VPC-SC - Infrastructure Deployment with Local Terraform - Only proceed with these if you have not used Cloud Build
+
+By now, `artifact-publish` and `service-catalog` have been deployed. The projects inflated under `machine-learning-pipeline` are set in a service perimiter for added security. As such, several services and accounts must be given ingress and egress policies before the notebook and the pipeline has been deployed. Below, you can find the values that will need to be applied to `common.auto.tfvars` and your `development.auto.tfvars`, `non-production.auto.tfvars` & `production.auto.tfvars`, each respective to it's own environment.
+
+To create new ingress/egress rules on the VPC-SC perimiter, follow the steps below:
+
+**IMPORTANT**: Please note that command below are running `terraform output` command, this means that the directories must be initialized with `terraform -chdir="" init` if it was not already initialized.
+
+#### `development` environment
+
+1. Navigate into `3-networks-dual-svpc` directory:
+
+ ```bash
+ cd 3-networks-dual-svpc/
+ ```
+
+1. Retrieve the value for "sa-tf-cb-ml-machine-learning@[prj_c_ml_infra_pipeline_project_id].iam.gserviceaccount.com" on your environment by running:
+
+ ```bash
+ export ml_cb_sa=$(terraform -chdir="../4-projects/ml_business_unit/shared" output -json terraform_service_accounts | jq -r '."ml-machine-learning"')
+ echo $ml_cb_sa
+ ```
+
+1. Retrieve the value for "sa-terraform-env@[prj_b_seed_project_id].iam.gserviceaccount.com" on your environment by running:
+
+ ```bash
+ export env_step_sa=$(terraform -chdir="../../gcp-bootstrap/envs/shared" output -raw environment_step_terraform_service_account_email)
+ echo $env_step_sa
+ ```
+
+1. Retrieve the value for `prj_d_logging_project_number`:
+
+ ```bash
+ terraform -chdir="../2-environments/envs/development" init
+
+ export prj_d_logging_project_number=$(terraform -chdir="../2-environments/envs/development" output -raw env_log_project_number)
+ echo $prj_d_logging_project_number
+ ```
+
+1. Retrieve the values for `prj_d_machine_learning_project_id` and `prj_d_machine_learning_project_number`:
+
+ ```bash
+ terraform -chdir="../4-projects/ml_business_unit/development" init
+
+ export prj_d_machine_learning_project_id=$(terraform -chdir="../4-projects/ml_business_unit/development" output -raw machine_learning_project_id)
+ echo $prj_d_machine_learning_project_id
+
+ export prj_d_machine_learning_project_number=$(terraform -chdir="../4-projects/ml_business_unit/development" output -raw machine_learning_project_number)
+ echo $prj_d_machine_learning_project_number
+ ```
+
+1. Take note of the following command output and add in `common.auto.tfvars` update your `perimeter_additional_members` to include them:
+
+ ```bash
+ cat < envs/development/development.auto.tfvars
+ ```
+
+1. Apply the results for development environment on `3-networks-dual-svpc`.
+
+ ```bash
+ ./tf-wrapper.sh plan development
+ ./tf-wrapper.sh apply development
+ ```
+
+> **DISCLAIMER**: Remember that before deleting or destroying the `machine-learning-pipeline` example, you must remove the egress/ingress policies related to the example, to prevent any inconsistencies.
+
+**NOTE:** If you receive some error while running the plan/apply commands you may need to impersonate the Service Account:
+
+ ```bash
+ export CLOUD_BUILD_PROJECT_ID=$(terraform -chdir="../0-bootstrap/" output -raw cloudbuild_project_id)
+ echo ${CLOUD_BUILD_PROJECT_ID}
+
+ export GOOGLE_IMPERSONATE_SERVICE_ACCOUNT=$(terraform -chdir="../0-bootstrap/" output -raw networks_step_terraform_service_account_email)
+ echo ${GOOGLE_IMPERSONATE_SERVICE_ACCOUNT}
+ ```
+
+
+**DISCLAIMER**: Remember that before deleting or destroying the `machine-learning-pipeline` example, you must remove the egress/ingress policies related to the example, to prevent any inconsistencies.
+
+#### `non-production` environment
+
+1. Retrieve the value for "sa-tf-cb-ml-machine-learning@[prj_c_ml_infra_pipeline_project_id].iam.gserviceaccount.com" in your environment by running the following commands. These commands assume that you are executing them in the 3-networks-dual-svpc directory.
+
+ ```bash
+ export ml_cb_sa=$(terraform -chdir="../4-projects/ml_business_unit/shared" output -json terraform_service_accounts | jq -r '."ml-machine-learning"')
+ echo $ml_cb_sa
+ ```
+
+1. Retrieve the value for "sa-terraform-env@[prj_b_seed_project_id].iam.gserviceaccount.com" on your environment by running:
+
+ ```bash
+ export env_step_sa=$(terraform -chdir="../../gcp-bootstrap/envs/shared" output -raw environment_step_terraform_service_account_email)
+ echo $env_step_sa
+ ```
+
+1. Retrieve the value for `prj_n_logging_project_number`:
+
+ ```bash
+ terraform -chdir="../2-environments/envs/non-production" init
+
+ export prj_n_logging_project_number=$(terraform -chdir="../2-environments/envs/non-production" output -raw env_log_project_number)
+ echo $prj_n_logging_project_number
+ ```
+
+1. Retrieve the values for `prj_n_machine_learning_project_id` and `prj_n_machine_learning_project_number`:
+
+ ```bash
+ terraform -chdir="../4-projects/ml_business_unit/non-production" init
+
+ export prj_n_machine_learning_project_id=$(terraform -chdir="../4-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
+
+ export prj_n_machine_learning_project_number=$(terraform -chdir="../4-projects/ml_business_unit/non-production" output -raw machine_learning_project_number)
+ echo $prj_n_machine_learning_project_number
+ ```
+
+1. Take note of the following command output and add in `common.auto.tfvars` update your `perimeter_additional_members` to include them:
+
+ ```bash
+ cat < envs/non-production/non-production.auto.tfvars
+ ```
+
+> *IMPORTANT*: The command above assumes you are running it on the `3-networks-dual-svpc` directory.
+
+1. Apply the results for non-production environment on `3-networks-dual-svpc`.
+
+ ```bash
+ ./tf-wrapper.sh plan non-production
+ ./tf-wrapper.sh apply non-production
+ ```
+
+> **DISCLAIMER**: Remember that before deleting or destroying the `machine-learning-pipeline` example, you must remove the egress/ingress policies related to the example, to prevent any inconsistencies.
+
+#### `production` environment
+
+1. Retrieve the value for "sa-tf-cb-ml-machine-learning@[prj_c_ml_infra_pipeline_project_id].iam.gserviceaccount.com" in your environment by running the following commands. These commands assume that you are executing them in the 3-networks-dual-svpc directory.
+
+ ```bash
+ export ml_cb_sa=$(terraform -chdir="../4-projects/ml_business_unit/shared" output -json terraform_service_accounts | jq -r '."ml-machine-learning"')
+ echo $ml_cb_sa
+ ```
+
+1. Retrieve the value for "sa-terraform-env@[prj_b_seed_project_id].iam.gserviceaccount.com" on your environment by running:
+
+ ```bash
+ export env_step_sa=$(terraform -chdir="../0-bootstrap/envs/shared" output -raw environment_step_terraform_service_account_email)
+ echo $env_step_sa
+ ```
+
+1. Retrieve the value for `prj_p_logging_project_number`:
+
+ ```bash
+ terraform -chdir="../2-environments/envs/production" init
+
+ export prj_p_logging_project_number=$(terraform -chdir="../2-environments/envs/production" output -raw env_log_project_number)
+ echo $prj_p_logging_project_number
+ ```
+
+1. Retrieve the values for `prj_p_machine_learning_project_id` and `prj_p_machine_learning_project_number`:
+
+ ```bash
+ export prj_p_machine_learning_project_id=$(terraform -chdir="../4-projects/ml_business_unit/production" output -raw machine_learning_project_id)
+ echo $prj_p_machine_learning_project_id
+
+ export prj_p_machine_learning_project_number=$(terraform -chdir="../4-projects/ml_business_unit/production" output -raw machine_learning_project_number)
+ echo $prj_p_machine_learning_project_number
+ ```
+
+1. Take note of the following command output and add in `common.auto.tfvars` update your `perimeter_additional_members` to include them:
+
+ ```bash
+ cat < envs/production/production.auto.tfvars
+ ```
+
+> *IMPORTANT*: The command above assumes you are running it on the `3-networks-dual-svpc` directory.
+
+1. Apply the results for development environment on `3-networks-dual-svpc`.
+
+ ```bash
+ ./tf-wrapper.sh plan production
+ ./tf-wrapper.sh apply production
+
+ cd ../..
+ ```
+
+> **DISCLAIMER**: Remember that before deleting or destroying the `machine-learning-pipeline` example, you must remove the egress/ingress policies related to the example, to prevent any inconsistencies.
+
+## Usage with Local Terraform
+
+These environmental project inflations are closely tied to the `service-catalog` project that have already deployed. By now, the `ml-service-catalog` should have been inflated. `service-catalog` contains modules that are being deployed in an interactive (development) environment. Since they already exist; they can be used as terraform modules for operational (non-production, production) environments. This was done in order to avoid code redundancy. One area for all `machine-learning` deployments.
+
+Under `modules/base_env/main.tf` you will notice all module calls are using `git` links as sources. These links refer to the `service-catalog` cloud source repository we have already set up.
+
+### Infrastructure Deployment with Local Terraform - Only proceed with these if you have not used Cloud Build
+
+1. The next instructions assume that you are at the same level of the `terraform-google-enterprise-genai` folder. Change into `machine-learning-pipeline` example folder, copy the Terraform wrapper script and ensure it can be executed.
+
+ ```bash
+ cd terraform-google-enterprise-genai/examples/machine-learning-pipeline
+ cp ../../build/tf-wrapper.sh .
+ chmod 755 ./tf-wrapper.sh
+ ```
+
+1. Rename `common.auto.example.tfvars` files to `common.auto.tfvars`.
+
+ ```bash
+ mv common.auto.example.tfvars common.auto.tfvars
+ ```
+
+1. If you are not integrating Github with Cloud Build, you can skip this step, otherwise you need to update the common.auto.tfvars file with your github app installation id, along with the url of your repository. Remember to uncomment the lines below that refer to Github.
+
+ ```bash
+ GITHUB_APP_ID="YOUR-GITHUB-APP-ID-HERE"
+ GITHUB_REMOTE_URI="YOUR-GITHUB-REMOTE-URI"
+
+ sed -i "s/GITHUB_APP_ID/${GITHUB_APP_ID}/" ./common.auto.tfvars
+ sed -i "s/GITHUB_REMOTE_URI/${GITHUB_REMOTE_URI}/" ./common.auto.tfvars
+ ```
+
+1. Update `common.auto.tfvars` file with values from your environment. Use `terraform output` to get the project and seed backend bucket value from 0-bootstrap.
+
+ ```bash
+ export remote_state_bucket=$(terraform -chdir="../../0-bootstrap/" output -raw projects_gcs_bucket_tfstate)
+ echo "remote_state_bucket = ${remote_state_bucket}"
+ sed -i "s/REMOTE_STATE_BUCKET/${remote_state_bucket}/" ./common.auto.tfvars
+
+ export seed_state_bucket=$(terraform -chdir="../../0-bootstrap/" output -raw gcs_bucket_tfstate)
+ echo "seed_state_bucket = ${seed_state_bucket}"
+ sed -i "s/REPLACE_SEED_TFSTATE_BUCKET/${seed_state_bucket}/" ./common.auto.tfvars
+ ```
+
+1. Provide the user that will be running `./tf-wrapper.sh` the Service Account Token Creator role to the ml Terraform service account.
+
+1. Provide the user permissions to run the terraform locally with the `serviceAccountTokenCreator` permission.
+
+ ```bash
+ member="user:$(gcloud auth list --filter="status=ACTIVE" --format="value(account)")"
+ echo ${member}
+
+ project_id=$(terraform -chdir="../../4-projects/ml_business_unit/shared/" output -raw cloudbuild_project_id)
+ echo ${project_id}
+
+ terraform_sa=$(terraform -chdir="../../4-projects/ml_business_unit/shared/" output -json terraform_service_accounts | jq '."ml-machine-learning"' --raw-output)
+ echo ${terraform_sa}
+
+ gcloud iam service-accounts add-iam-policy-binding ${terraform_sa} --project ${project_id} --member="${member}" --role="roles/iam.serviceAccountTokenCreator"
+ ```
+
+1. Update `backend.tf` with your bucket from the infra pipeline output.
+
+ ```bash
+ export backend_bucket=$(terraform -chdir="../../4-projects/ml_business_unit/shared/" output -json state_buckets | jq '."ml-machine-learning"' --raw-output)
+ echo "backend_bucket = ${backend_bucket}"
+
+ for i in `find -name 'backend.tf'`; do sed -i "s/UPDATE_APP_INFRA_BUCKET/${backend_bucket}/" $i; done
+ ```
+
+1. Update `modules/base_env/main.tf` with Service Catalog Project Id.
+
+ ```bash
+ export service_catalog_project_id=$(terraform -chdir="../../4-projects/ml_business_unit/shared/" output -raw service_catalog_project_id)
+ echo "service_catalog_project_id = ${service_catalog_project_id}"
+
+ ## Linux
+ sed -i "s/SERVICE_CATALOG_PROJECT_ID/${service_catalog_project_id}/g" ./modules/base_env/main.tf
+ ```
+
+1. Update `vpc_project` variable with the development environment host VPC project.
+
+ ```bash
+ export vpc_project=$(terraform -chdir="../../3-networks-dual-svpc/envs/development" output -raw restricted_host_project_id)
+ echo $vpc_project
+
+ ## Linux
+ sed -i "s/REPLACE_WITH_DEV_VPC_PROJECT/${vpc_project}/g" ./modules/base_env/main.tf
+ ```
+
+1. Update `intance_owners` variable with you GCP user account email. Replace `INSERT_YOUR_USER_EMAIL_HERE` with your email.
+
+ ```bash
+ export user_email="INSERT_YOUR_USER_EMAIL_HERE"
+
+ ## Linux
+ sed -i "s/REPLACE_WITH_USER_GCP_EMAIL/${user_email}/g" ./modules/base_env/main.tf
+ ```
+
+1. Enable the Artifact Registry API for the `cloudbuild project`.
+
+ ```bash
+ export cloudbuild_project_id=$(terraform -chdir="../../4-projects/ml_business_unit/shared" output -raw cloudbuild_project_id)
+ echo $cloudbuild_project_id
+
+ gcloud services enable accesscontextmanager.googleapis.com --project=$cloudbuild_project_id
+ ```
+
+1. Retrieve the value for "sa-tf-cb-ml-machine-learning@[prj_c_ml_infra_pipeline_project_id].iam.gserviceaccount.com" on your environment by running:
+
+ ```bash
+ export ml_cb_sa=$(terraform -chdir="../../4-projects/ml_business_unit/shared" output -json terraform_service_accounts | jq -r '."ml-machine-learning"')
+ echo $ml_cb_sa
+ ```
+
+1. Assign Storage Object Viewer on bucket:
+
+ ```bash
+ gcloud storage buckets add-iam-policy-binding gs://$seed_state_bucket \
+ --member=serviceAccount:$ml_cb_sa \
+ --role=roles/storage.objectViewer
+ ```
+
+1. Assign Artifact Registry Admin on publish artifacts project:
+
+ ```bash
+ gcloud projects add-iam-policy-binding $common_artifacts_project_id \
+ --member=serviceAccount:$ml_cb_sa \
+ --role=roles/artifactregistry.admin
+ ```
+
+We will now deploy each of our environments (development/production/non-production) using this script.
+When using Cloud Build or Jenkins as your CI/CD tool, each environment corresponds to a branch in the repository for the `machine-learning-pipeline` step. Only the corresponding environment is applied.
+
+To use the `validate` option of the `tf-wrapper.sh` script, please follow the [instructions](https://cloud.google.com/docs/terraform/policy-validation/validate-policies#install) to install the terraform-tools component.
+
+1. Use `terraform output` to get the Infra Pipeline Project ID from 4-projects output.
+
+ ```bash
+ export INFRA_PIPELINE_PROJECT_ID=$(terraform -chdir="../../4-projects/ml_business_unit/shared/" output -raw cloudbuild_project_id)
+ echo ${INFRA_PIPELINE_PROJECT_ID}
+
+ export GOOGLE_IMPERSONATE_SERVICE_ACCOUNT=$(terraform -chdir="../../4-projects/ml_business_unit/shared/" output -json terraform_service_accounts | jq '."ml-machine-learning"' --raw-output)
+ echo ${GOOGLE_IMPERSONATE_SERVICE_ACCOUNT}
+ ```
+
+1. Run `init` and `plan` and review output for environment production.
+
+ ```bash
+ ./tf-wrapper.sh init production
+ ./tf-wrapper.sh plan production
+ ```
+- If you are using Github and you face some error related to Source Repo authentication, you need to access your Service Catalog repository `prj-c-mlservice-catalog-ID` `https://source.cloud.google.com//service-catalog` hit the `Clone` button in the right side -> how to setup -> Manually generated credentials and then follow the instructions in the step one `Generate and store your Git credentials`. Then, re-run the previous step again.
+
+1. Run `validate` and check for violations.
+
+ ```bash
+ ./tf-wrapper.sh validate production $(pwd)/../../policy-library ${INFRA_PIPELINE_PROJECT_ID}
+ ```
+
+1. Run `apply` production.
+
+ ```bash
+ ./tf-wrapper.sh apply production
+ ```
+
+1. Run `init` and `plan` and review output for environment non-production.
+
+ ```bash
+ ./tf-wrapper.sh init non-production
+ ./tf-wrapper.sh plan non-production
+ ```
+
+1. Run `validate` and check for violations.
+
+ ```bash
+ ./tf-wrapper.sh validate non-production $(pwd)/../../policy-library ${INFRA_PIPELINE_PROJECT_ID}
+ ```
+
+1. Run `apply` non-production.
+
+ ```bash
+ ./tf-wrapper.sh apply non-production
+ ```
+
+1. Run `init` and `plan` and review output for environment development.
+
+ ```bash
+ ./tf-wrapper.sh init development
+ ./tf-wrapper.sh plan development
+ ```
+
+1. Run `validate` and check for violations.
+
+ ```bash
+ ./tf-wrapper.sh validate development $(pwd)/../../policy-library ${INFRA_PIPELINE_PROJECT_ID}
+ ```
+
+1. Run `apply` development.
+
+ ```bash
+ ./tf-wrapper.sh apply development
+ ```
+
+If you received any errors or made any changes to the Terraform config or `common.auto.tfvars` you must re-run `./tf-wrapper.sh plan ` before running `./tf-wrapper.sh apply `.
+
+After executing this stage, unset the `GOOGLE_IMPERSONATE_SERVICE_ACCOUNT` environment variable.
+
+ ```bash
+ unset GOOGLE_IMPERSONATE_SERVICE_ACCOUNT
+ ```
+
+## Post Infrastructure Deployment
+
+### VPC-SC with Cloud Build
+
+For the next step, we need to update the non-production and production VPC-SC perimeters by adding the service accounts listed below.
+
+**IMPORTANT:** The content of `perimeter_additional_members` in the last line needs to follow this format: `"serviceAccount:YOUR-SERVICE_ACCOUNT"]`.
+
+1. Obtain the service accounts to be used:
+
+ ```bash
+ cd gcp-projects/ml_business_unit/non-production
+ git checkout non-production
+
+ export prj_n_machine_learning_project_id=$(terraform output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
+
+ cd ../production
+ git checkout production
+
+ export prj_p_machine_learning_project_id=$(terraform output -raw machine_learning_project_id)
+ echo $prj_p_machine_learning_project_id
+
+ export TRIGGER_SA="serviceAccount:trigger-sa@$prj_n_machine_learning_project_id.iam.gserviceaccount.com"
+ export GCP_SA_AIPLATFORM="serviceAccount:service-$prj_p_machine_learning_project_number@gcp-sa-aiplatform.iam.gserviceaccount.com"
+ export API_ROBOT_SA="serviceAccount:cloud-aiplatform-api-robot-prod@system.gserviceaccount.com"
+
+ echo $TRIGGER_SA
+ echo $GCP_SA_AIPLATFORM
+ echo $API_ROBOT_SA
+ ```
+
+**IMPORTANT:** The commands below assumes you are running it on the `terraform-google-enterprise-genai/examples/machine-learning-pipeline` directory.
+
+1. Run the command below to update the `perimeter_additional_members` in `common.auto.tfvars` for the non-production environment. The last line from your `perimeter_additional_members` should end following this format: `"serviceAccount:YOUR-SERVICE-ACCOUNT"]` (with no comma in the and the `]` in the same line from the service account).
+
+ ```bash
+
+ cd ../../../gcp-networks/envs/non-production/
+ git checkout non-production
+
+ UPDATE_SA=$(printf '"%s",\n"%s",\n"%s"]' "$TRIGGER_SA" "$GCP_SA_AIPLATFORM" "$API_ROBOT_SA")
+
+ TEMP_FILE=$(mktemp)
+
+ awk -v new_entries="$UPDATE_SA" '
+ /perimeter_additional_members = \[/ {
+ print
+ in_list=1
+ next
+ }
+ in_list && /\]$/ {
+ sub(/\]$/, "")
+ print $0 ","
+ printf "%s\n", new_entries
+ in_list=0
+ next
+ }
+ {print}
+ ' common.auto.tfvars > "$TEMP_FILE"
+
+ mv "$TEMP_FILE" common.auto.tfvars
+
+ cat common.auto.tfvars ; echo ""
+ ```
+
+1. Commit the results on gcp-networks.
+
+ ```bash
+ git add .
+
+ git commit -m 'Update perimeter additional members'
+ git push origin non-production
+ ```
+
+1. Run the command below to update the `perimeter_additional_members` in `common.auto.tfvars` for the production environment. The last line from your `perimeter_additional_members` should end following this format: `"serviceAccount:YOUR-SERVICE-ACCOUNT"]` (with no comma in the and the `]` in the same line from the service account).
+
+ ```bash
+
+ cd ../production/
+ git checkout production
+
+ var_global=$(printf '"%s"]' "$GCP_SA_AIPLATFORM")
+
+ TEMP_FILE=$(mktemp)
+
+ awk -v new_entry="$var_global" '
+ /perimeter_additional_members = \[/ {
+ print
+ in_list=1
+ next
+ }
+ in_list && /\]$/ {
+ sub(/\]$/, "")
+ print $0 ","
+ printf "%s\n", new_entry
+ in_list=0
+ next
+ }
+ {print}
+ ' common.auto.tfvars > "$TEMP_FILE"
+
+ mv "$TEMP_FILE" common.auto.tfvars
+
+ cat common.auto.tfvars ; echo ""
+ ```
+
+1. Commit the results on gcp-networks.
+
+ ```bash
+ git add .
+
+ git commit -m 'Update perimeter additional members'
+ git push origin production
+ ```
+
+### VPS-SC with Local Terraform - Only proceed with these if you have not used Cloud Build
+
+For the next step, we need to update the non-production and production VPC-SC perimeters by adding the service accounts listed below.
+
+**IMPORTANT:** The content of perimeter_additional_members in the last line needs to follow this format: `"serviceAccount:YOUR-SERVICE_ACCOUNT"]`.
+
+1. Obtain the service accounts to be used:
+
+ ```bash
+ export prj_n_machine_learning_project_id=$(terraform -chdir="../../4-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
+
+ export prj_p_machine_learning_project_id=$(terraform -chdir="../../4-projects/ml_business_unit/production" output -raw machine_learning_project_id)
+ echo $prj_p_machine_learning_project_id
+
+ export TRIGGER_SA="serviceAccount:trigger-sa@$prj_n_machine_learning_project_id.iam.gserviceaccount.com"
+ export GCP_SA_AIPLATFORM="serviceAccount:service-$prj_p_machine_learning_project_number@gcp-sa-aiplatform.iam.gserviceaccount.com"
+ export API_ROBOT_SA="serviceAccount:cloud-aiplatform-api-robot-prod@system.gserviceaccount.com"
+
+ echo $TRIGGER_SA
+ echo $GCP_SA_AIPLATFORM
+ echo $API_ROBOT_SA
+ ```
+
+1. Use `terraform output` to get the Seed project ID and the organization step Terraform service account from 0-bootstrap output. An environment variable `GOOGLE_IMPERSONATE_SERVICE_ACCOUNT` will be set using the Terraform Service Account to enable impersonation.
+
+ ```bash
+ export CLOUD_BUILD_PROJECT_ID=$(terraform -chdir="../0-bootstrap/" output -raw cloudbuild_project_id)
+ echo ${CLOUD_BUILD_PROJECT_ID}
+
+ export GOOGLE_IMPERSONATE_SERVICE_ACCOUNT=$(terraform -chdir="../0-bootstrap/" output -raw networks_step_terraform_service_account_email)
+ echo ${GOOGLE_IMPERSONATE_SERVICE_ACCOUNT}
+ ```
+
+**IMPORTANT:** The commands below assumes you are running it on the `terraform-google-enterprise-genai/examples/machine-learning-pipeline` directory.
+
+1. Run the command below to update the `perimeter_additional_members` in `common.auto.tfvars` for the non-production environment. The last line from your `perimeter_additional_members` should end following this format: `"serviceAccount:YOUR-SERVICE-ACCOUNT"]` (with no comma in the and the `]` in the same line from the service account).
+
+ ```bash
+
+ cd ../../3-networks-dual-svpc/envs/non-production/
+
+ UPDATE_SA=$(printf '"%s",\n"%s",\n"%s"]' "$TRIGGER_SA" "$GCP_SA_AIPLATFORM" "$API_ROBOT_SA")
+
+ TEMP_FILE=$(mktemp)
+
+ awk -v new_entries="$UPDATE_SA" '
+ /perimeter_additional_members = \[/ {
+ print
+ in_list=1
+ next
+ }
+ in_list && /\]$/ {
+ sub(/\]$/, "")
+ print $0 ","
+ printf "%s\n", new_entries
+ in_list=0
+ next
+ }
+ {print}
+ ' common.auto.tfvars > "$TEMP_FILE"
+
+ mv "$TEMP_FILE" common.auto.tfvars
+
+ cat common.auto.tfvars ; echo ""
+ ```
+
+1. Apply the results for non-production environment on 3-networks-dual-svpc.
+
+ ```bash
+ cd ../..
+
+ ./tf-wrapper.sh plan non-production
+ ./tf-wrapper.sh apply non-production
+ ```
+
+1. Apply the results for production environment on 3-networks-dual-svpc.
+
+ ```bash
+ ./tf-wrapper.sh plan production
+ ./tf-wrapper.sh apply production
+ ```
+
+1. Unset the `GOOGLE_IMPERSONATE_SERVICE_ACCOUNT` environment variable.
+
+ ```bash
+ unset GOOGLE_IMPERSONATE_SERVICE_ACCOUNT
+ ```
+
+### Permissions (steps for Cloud build and Local Terraform)
+
+1. The default compute engine from non-production project must have `roles/aiplatform.admin` on the production project. Run the command below to assign the permission:
+
+ ```bash
+ gcloud projects add-iam-policy-binding $prj_p_machine_learning_project_id \
+ --member="serviceAccount:$prj_n_machine_learning_project_number-compute@developer.gserviceaccount.com" --role='roles/aiplatform.admin'
+ ```
+
+1. The AI Platform Service Agent from production project must have `roles/storage.admin` on the non-production bucket. Run the command below to assign the permission
+
+ ```bash
+ export non_production_bucket_name=$(gcloud storage buckets list --project $prj_n_machine_learning_project_id --format="value(name)" |grep bkt)
+ echo $non_production_bucket_name
+
+ gcloud storage buckets add-iam-policy-binding gs://$non_production_bucket_name \
+ --member="serviceAccount:service-$prj_p_machine_learning_project_number@gcp-sa-aiplatform.iam.gserviceaccount.com" --role='roles/storage.admin'
+ ```
+**NOTE:** If the return of `$non_production_bucket_name` is empty, you may need to unset your billing quota project with the command below:
+
+```bash
+ gcloud config unset billing/quota_project
+```
+
+1. The Default Compute Engine SA from production project must have `roles/storage.admin` on the non-production bucket. Run the command below to assign the permission
+
+```bash
+ gcloud storage buckets add-iam-policy-binding gs://$non_production_bucket_name\
+ --member="serviceAccount:$prj_p_machine_learning_project_number-compute@developer.gserviceaccount.com" \
+ --role='roles/storage.admin'
+```
+
+### Big Query with Cloud Build
+
+**IMPORTANT**: The steps below are specific if you are deploying via `Cloud Build`. If you are deploying using Local Terraform, skip directly to the `Big Query with Local Terraform` section. The commands below assumes you are running it on the `terraform-google-enterprise-genai/examples/machine-learning-pipeline` directory.
+
+ 1. In order to avoid having to specify a kms key for every query against a bigquery resource, we set the default project encryption key to the corresponding environment key in advance
+
+ ```bash
+ ml_project_dev=$(terraform -chdir="../../../gcp-projects/ml_business_unit/development" output -raw machine_learning_project_id)
+ ml_project_dev_key=$(terraform -chdir="../../../gcp-projects/ml_business_unit/development" output -json machine_learning_kms_keys)
+ ml_project_nonprd=$(terraform -chdir="../../../gcp-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ ml_project_nonprod_key=$(terraform -chdir="../../../gcp-projects/ml_business_unit/non-production" output -json machine_learning_kms_keys)
+ ml_project_prd=$(terraform -chdir="../../../gcp-projects/ml_business_unit/production" output -raw machine_learning_project_id)
+ ml_project_prod_key=$(terraform -chdir="../../../gcp-projects/ml_business_unit/production" output -json machine_learning_kms_keys)
+
+ project_key=$(echo "$ml_project_dev_key "| jq -r '."us-central1".id')
+ echo "ALTER PROJECT \`$ml_project_dev\` SET OPTIONS (\`region-us-central1.default_kms_key_name\`=\"$project_key\");" | bq query --project_id "$ml_project_dev" --nouse_legacy_sql
+
+ project_key=$(echo "$ml_project_nonprod_key "| jq -r '."us-central1".id')
+ echo "ALTER PROJECT \`$ml_project_nonprd\` SET OPTIONS (\`region-us-central1.default_kms_key_name\`=\"$project_key\");" | bq query --project_id "$ml_project_nonprd" --nouse_legacy_sql
+
+ project_key=$(echo "$ml_project_prod_key "| jq -r '."us-central1".id')
+ echo "ALTER PROJECT \`$ml_project_prd\` SET OPTIONS (\`region-us-central1.default_kms_key_name\`=\"$project_key\");" | bq query --project_id "$ml_project_prd" --nouse_legacy_sql
+ ```
+
+1. Many of the necessary service agents and permissions were deployed in all project environments for machine-learning. Additional entries may be needed for each environment.
+
+1. Add in more agents to the DEVELOPMENT.AUTO.TFVARS file under `egress_policies`. This file is in `gcp-networks` directory. Make sure you are in the `development` branch.
+
+ - "serviceAccount:bq-[prj-d-ml-machine-learning-project-number]@bigquery-encryption.iam.gserviceaccount.com"
+
+ This should be added under egress_policies -> notebooks -> identities. It should look like this:
+
+ ```text
+ egress_policies = [
+ // notebooks
+ {
+ "from" = {
+ "identity_type" = ""
+ "identities" = [
+ "serviceAccount:bq-[prj-d-ml-machine-learning-project-number]@bigquery-encryption.iam.gserviceaccount.com", // << New Addition
+ "serviceAccount:service-[prj-d-ml-machine-learning-project-number]@gcp-sa-notebooks.iam.gserviceaccount.com",
+ "serviceAccount:service-[prj-d-ml-machine-learning-project-number]@compute-system.iam.gserviceaccount.com",
+ ]
+ },
+ "to" = {
+ "resources" = ["projects/[prj-d-kms-project-number]"]
+ "operations" = {
+ "compute.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ ]
+ ```
+
+1. Once this addition has been done, it is necessary to trigger the cloudbuild for `gcp-networks` for development environment:
+
+ ```bash
+ cd gcp-networks
+ git add .
+
+ git commit -m 'Update egress rules'
+ git push origin development
+ ```
+
+### Big Query with Local Terraform - Only proceed with these if you have not used Cloud Build
+
+ 1. In order to avoid having to specify a kms key for every query against a bigquery resource, we set the default project encryption key to the corresponding environment key in advance. The commands below assumes you are running it on the `terraform-google-enterprise-genai/examples/machine-learning-pipeline` directory.
+
+ ```bash
+ ml_project_dev=$(terraform -chdir="../../4-projects/ml_business_unit/development" output -raw machine_learning_project_id)
+ ml_project_dev_key=$(terraform -chdir="../../4-projects/ml_business_unit/development" output -json machine_learning_kms_keys)
+ ml_project_nonprd=$(terraform -chdir="../../4-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ ml_project_nonprod_key=$(terraform -chdir="../../4-projects/ml_business_unit/non-production" output -json machine_learning_kms_keys)
+ ml_project_prd=$(terraform -chdir="../../4-projects/ml_business_unit/production" output -raw machine_learning_project_id)
+ ml_project_prod_key=$(terraform -chdir="../../4-projects/ml_business_unit/production" output -json machine_learning_kms_keys)
+
+ project_key=$(echo "$ml_project_dev_key "| jq -r '."us-central1".id')
+ echo "ALTER PROJECT \`$ml_project_dev\` SET OPTIONS (\`region-us-central1.default_kms_key_name\`=\"$project_key\");" | bq query --project_id "$ml_project_dev" --nouse_legacy_sql
+
+ project_key=$(echo "$ml_project_nonprod_key "| jq -r '."us-central1".id')
+ echo "ALTER PROJECT \`$ml_project_nonprd\` SET OPTIONS (\`region-us-central1.default_kms_key_name\`=\"$project_key\");" | bq query --project_id "$ml_project_nonprd" --nouse_legacy_sql
+
+ project_key=$(echo "$ml_project_prod_key "| jq -r '."us-central1".id')
+ echo "ALTER PROJECT \`$ml_project_prd\` SET OPTIONS (\`region-us-central1.default_kms_key_name\`=\"$project_key\");" | bq query --project_id "$ml_project_prd" --nouse_legacy_sql
+ ```
+
+1. Many of the necessary service agents and permissions were deployed in all project environments for machine-learning. Additional entries may be needed for each environment.
+
+1. Add in more agents to the DEVELOPMENT.AUTO.TFVARS file under `egress_policies`. This file is in `3-networks-dual-svpc/envs/development` directory.
+
+ - "serviceAccount:bq-[prj-d-ml-machine-learning-project-number]@bigquery-encryption.iam.gserviceaccount.com"
+
+ This should be added under egress_policies -> notebooks -> identities. It should look like this:
+
+ ```text
+ egress_policies = [
+ // notebooks
+ {
+ "from" = {
+ "identity_type" = ""
+ "identities" = [
+ "serviceAccount:bq-[prj-d-ml-machine-learning-project-number]@bigquery-encryption.iam.gserviceaccount.com", // << New Addition
+ "serviceAccount:service-[prj-d-ml-machine-learning-project-number]@gcp-sa-notebooks.iam.gserviceaccount.com",
+ "serviceAccount:service-[prj-d-ml-machine-learning-project-number]@compute-system.iam.gserviceaccount.com",
+ ]
+ },
+ "to" = {
+ "resources" = ["projects/[prj-d-kms-project-number]"]
+ "operations" = {
+ "compute.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ ]
+ ```
+
+1. Use `terraform output` to get the Seed project ID and the organization step Terraform service account from 0-bootstrap output. An environment variable `GOOGLE_IMPERSONATE_SERVICE_ACCOUNT` will be set using the Terraform Service Account to enable impersonation.
+
+ ```bash
+ export CLOUD_BUILD_PROJECT_ID=$(terraform -chdir="../0-bootstrap/" output -raw cloudbuild_project_id)
+ echo ${CLOUD_BUILD_PROJECT_ID}
+
+ export GOOGLE_IMPERSONATE_SERVICE_ACCOUNT=$(terraform -chdir="../0-bootstrap/" output -raw networks_step_terraform_service_account_email)
+ echo ${GOOGLE_IMPERSONATE_SERVICE_ACCOUNT}
+ ```
+
+1. Once this addition has been done, it is necessary apply the changes for `3-networks-dual-svpc` for development environment:
+
+ ```bash
+ cd ../../3-networks-dual-svpc
+
+ ./tf-wrapper.sh plan development
+ ./tf-wrapper.sh apply development
+ ```
+
+1. Unset the `GOOGLE_IMPERSONATE_SERVICE_ACCOUNT` environment variable.
+
+ ```bash
+ unset GOOGLE_IMPERSONATE_SERVICE_ACCOUNT
+ ```
+
+## Running the Machine Learning Pipeline
+
+Each environment, Development, Non-Production and Production have their own purpose and they are not a mirror from the previous environment. As you can see on the diagram below:
+
+```text
++---------------+ +-----------------------------+ +----------------+
+| | | | | |
+| Development | | Non-production | | Production |
+| | | | | |
+| | | | | |
+| Notebook | | Promotion Pipeline | | |
+| | | | (Cloud Build) ----------+--+--> ML Model |
+| | | | deploys | | |
+| |deploys | | | | |
+| | | | | | |
+| v | | | | |
+| ML Model | | | | |
+| | | | | |
+| | | | | |
++---------------+ +-----------------------------+ +----------------+
+```
+
+The Development environment is responsible to create pipeline components and make sure there are no issues in the environment, after running the notebook on the development environment you will have a Machine Learning Model deployed that can be viewed on the following link `` and a Vertex AI workbench instance that is billed, refer to the [following link](https://cloud.google.com/vertex-ai/pricing#notebooks) for more detailed billing information.
+
+The non-production environment will result in triggering the pipeline if approved. The vertex pipeline takes about 30 minutes to finish and deploys the model to production environment.
+
+The production environment will provide an endpoint in the project which you can use to make prediction requests to the Machine Learning Model.
+
+For our pipeline which trains and deploys a model on the [census income dataset](https://archive.ics.uci.edu/dataset/20/census+income), we use a notebook in the development environment workbench to create our pipeline components, put them together into a pipeline and do a dry run of the pipeline to make sure there are no issues. You can access the repository that contains assets for the notebook [here](./assets/Vertexpipeline/).
+
+There is a [Dockerfile](../../5-app-infra/source_repos/artifact-publish/images/vertexpipeline:v2/Dockerfile) in the repo which is the docker image used to run all pipeline steps and cloud build steps. In non-prod and prod environments, the only NIST compliant way to access additional dependencies and requirements is via docker images uploaded to artifact registry. We have baked everything for running the pipeline into this docker which exist in the shared artifact registry.
+
+Once confident that the pipeline runs successfully on the development environment, we divide the code in two separate files to use in our CI/CD process, at the non-production environment. First file is *compile_pipeline.py* which includes the code to build the pipeline and compile it into a directory (in our case, `common/vertex-ai-pipeline/pipeline_package.yaml`)
+
+The second file, i.e. *runpipeline.py* includes the code for running the compiled pipeline. This is where the correct environment variables for non-production and production (e.g., service accounts to use for each stage of the pipeline, kms keys corresponding to each step, buckets, etc.) are set. And eventually the pipeline is loaded from the yaml file at *common/vertex-ai-pipeline/pipeline_package.yaml* and submitted to Vertex AI.
+
+There should be a *cloudbuild.yaml* template file at `examples/machine-learning-pipeline/assets/Vertexpipeline/cloudbuild.yaml` in this repository with the CI/CD steps as follows:
+
+1. Upload the Dataflow src file to the bucket in non-prod
+2. Upload the dataset to the bucket
+3. Run *compile_pipeline.py* to compile the pipeline
+4. Run the pipeline via *runpipeline.py*
+5. Optionally, upload the pipeline's yaml file to the composer bucket to make it available for scheduled pipeline runs
+
+The cloud build trigger will be setup in the non-production project which is where the previously validated ML pipeline will run. There should be three branches on the repo namely dev, non-prod, and prod. Cloud build will trigger the pipeline once there is a merge into the non-prod branch from dev. However, model deployment and monitorings steps take place in the production environment. As a result, the service agents and service accounts of the non-prod environment are given some permission on the prod environment and vice versa.
+
+Each time a pipeline job finishes successfully, a new version of the census income bracket predictor model will be deployed on the endpoint which will only take 25 percent of the traffic wherease the other 75 percent goes to the previous version of the model to enable A/B testing.
+
+You can read more about the details of the pipeline components on the [pipeline's repo](./assets/Vertexpipeline/)
+
+### Step by step
+
+If you are using Github make sure you have your personal git access token ready. The git menu option on the left bar of the workbench requires the personal token to connect to git and clone the repo.
+
+Also make sure to have a gcs bucket ready to store the artifacts for the tutorial. To deploy a new bucket, you can go to service catalog and create a new deployment from the storage bucket solution.
+
+#### Creating the Vertex AI Workbench Instance
+
+- The workbench instance was deployed on `modules/base_env/main.tf` when running the infrastructure pipeline. You can also deploy notebook instances using Service Catalog, after configuring it, refer to [Google Docs for more information](https://cloud.google.com/service-catalog/docs/create-solutions).
+
+#### 1. Run the notebook with Cloud Build
+
+**IMPORTANT**: The steps below are specific if you are deploying via `Cloud Build`. If you are deploying using Local Terraform, skip directly to the `Run the notebook with Local Terraform` section.
+
+1. Before running the notebook, create a new git repository with the content of `examples/machine-learning-pipeline/assets/Vertexpipeline` folder in the same level that your `terraform-google-enterprise-genai`. Take note of this git repository url, you will need it to clone the repository into your notebooks. You need to create `development` and `non-prod` branches in this repo.
+
+1. Export the email address that will be used to monitor the configuration in the notebook. To do this, execute the following code:
+
+ ```bash
+ export your_monitoring_email="YOUR-EMAIL@YOUR-COMPANY.COM"
+ echo $your_monitoring_email
+ ```
+
+1. In the next step, you can use the following commands to update the placeholders used in the file `census_pipeline.ipynb`. The commands below assume that you are in the new Git repository you created, on the development branch.
+
+ ```bash
+ export prj_d_machine_learning_project_id=$(terraform -chdir="../gcp-projects/ml_business_unit/development" output -raw machine_learning_project_id)
+ echo $prj_d_machine_learning_project_id
+
+ export prj_d_machine_learning_project_number=$(terraform -chdir="../gcp-projects/ml_business_unit/development" output -raw machine_learning_project_number)
+ echo $prj_d_machine_learning_project_number
+
+ export prj_d_shared_restricted_id=$(terraform -chdir="../gcp-networks/envs/development" output -raw restricted_host_project_id)
+ echo $prj_d_shared_restricted_id
+
+ export prj_d_kms_id=$(terraform -chdir="../gcp-environments/envs/development" output -raw env_kms_project_id)
+ echo $prj_d_kms_id
+
+ export common_artifacts_project_id=$(terraform -chdir="../gcp-projects/ml_business_unit/shared" output -raw common_artifacts_project_id)
+ echo $common_artifacts_project_id
+
+ export development_bucket_name=$(gcloud storage buckets list --project $prj_d_machine_learning_project_id --format="value(name)" |grep bkt)
+ echo $development_bucket_name
+
+
+ sed -i \
+ -e "s/MACHINE_LEARNING_PROJECT_ID/$prj_d_machine_learning_project_id/g" \
+ -e "s/MACHINE_LEARNING_PROJECT_BUCKET_ID/$development_bucket_name/g" \
+ -e "s/YOUR_PROJECT_D_SHARED_ID/$prj_d_shared_restricted_id/g" \
+ -e "s/MACHINE_LEARNING_PROJECT_NUMBER/$prj_d_machine_learning_project_number/g" \
+ -e "s/KMS_D_PROJECT_ID/$prj_d_kms_id/g" \
+ -e "s/PRJ_C_ML_ARTIFACTS_ID/$common_artifacts_project_id/g" \
+ -e "s/YOUR-EMAIL@YOUR-COMPANY.COM/$your_monitoring_email/g" \
+ ./census_pipeline.ipynb
+ ```
+
+1. Push the changes to your Git Vertex repository (development branch):
+
+ ```bash
+ git add .
+ git commit -m 'Update census_pipeline.ipynb'
+ git push --set-upstream origin development
+ ```
+
+1. Access workbench in your development project at the `https://console.cloud.google.com/vertex-ai/workbench/instances` link.
+
+1. Click `Open Jupyterlab` button on the instance created, this will take you to an interactive environment inside Vertex AI.
+
+1. Click the Git Icon (left side bar) and clone over HTTPS the repository you created, select the development branch.
+
+1. Navigate to the directory that contains `census_pipeline.ipynb` file and execute [the notebook](https://github.com/GoogleCloudPlatform/terraform-google-enterprise-genai/blob/main/examples/machine-learning-pipeline/assets/Vertexpipeline/census_pipeline.ipynb) cell by cell. Pay attention to the instructions and comments in the notebook, ensuring that you set the correct values for your development project. If a message pops up asking which kernel to use, select Python 3. Make sure you are in the `development branch` and the fields were populated properly.
+
+***NOTE:*** If you get an error in the first run related to `bq-jobs` you may be facing some propagation issue. Re-run the last step from `census_pipeline.ipynb` should fix it.
+
+
+#### 2. Configure cloud build trigger with Cloud Build
+
+After the notebook runs successfully and the pipeline's test run finishes in the development environment, create a cloud build trigger in your non-production project. Configure the trigger to run when there is a merge into the non-prod branch by following the below settings.
+
+1. You can use the command below to get the `NON-PROD_MACHINE_LEARNING_PROJECT_ID`. The commands below assumes you are in the same level as `terraform-google-enterprise-genai` folder.
+ ```bash
+ export prj_n_machine_learning_project_id=$(terraform -chdir="gcp-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
+ echo "trigger-sa@"$prj_n_machine_learning_project_id".iam.gserviceaccount.com"
+ ```
+
+ |Setting|Value|
+ |-------|-----|
+ |Event|push to branch|
+ |Repository generation|1st gen|
+ |Repository|the url to your fork of the repo|
+ |Branch|non-prod|
+ |Configuration|Autodetected/Cloud Build configuration file (yaml or json)|
+ |Location|Repository|
+ |Cloud Build configuration file location|cloudbuild.yaml (only if you chose Cloud Build configuration file)|
+ |Service Account|trigger-sa@YOUR_NON-PROD_MACHINE_LEARNING_PROJECT_ID.iam.gserviceaccount.com|
+
+1. Execute the following commands to update the `cloudbuild.yaml` file. These commands assume that you are in the cloned Git repository and that you are on the development branch. The output will include placeholders that need to be replaced with values from `bucket-name` and `artifact-project`. You can find the template at `assets/Vertexpipeline/cloudbuild.yaml`.
+
+ ```bash
+ export directory="../gcp-projects/ml_business_unit/non-production"
+ (cd $directory && git checkout production)
+
+ export prj_n_machine_learning_project_id=$(terraform -chdir=$directory output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
+
+ export non_prod_bucket_name=$(gsutil ls -p $prj_n_machine_learning_project_id | grep -o 'gs://bkt-n-ml[^/]*')
+ non_prod_bucket_name=$(echo $non_prod_bucket_name | sed 's#gs://##')
+ echo $non_prod_bucket_name
+
+ export directory="../gcp-projects/ml_business_unit/shared"
+ (cd $directory && git checkout production)
+
+ export common_artifacts_project_id=$(terraform -chdir="$directory" output -raw common_artifacts_project_id)
+ echo $common_artifacts_project_id
+
+ sed -i\
+ -e "s/{NON_PROD_BUCKET_NAME}/$non_prod_bucket_name/g" \
+ -e "s/{COMMOM_ARTIFACTS_PRJ_ID}/$common_artifacts_project_id/g" \
+ ./cloudbuild.yaml
+ ```
+
+1. Optionally, if you want to schedule pipeline runs on regular intervals, uncomment the last two steps and replace the composer bucket with the name of your composer's bucket. The first step uploads the pipeline's yaml to the bucket and the second step uploads the dag to read that yaml and trigger the vertex pipeline:
+
+ ```yaml
+ # upload to composer
+ - name: 'gcr.io/cloud-builders/gsutil'
+ args: ['cp', './common/vertex-ai-pipeline/pipeline_package.yaml', 'gs://{your-composer-bucket}/dags/common/vertex-ai-pipeline/']
+ id: 'upload_composer_file'
+
+ # upload pipeline dag to composer
+ - name: 'gcr.io/cloud-builders/gsutil'
+ args: ['cp', './composer/dags/dag.py', 'gs://{your-composer-bucket}/dags/']
+ id: 'upload dag'
+ ```
+
+1. Execute the following commands to update the `runpipeline.py` file. These commands assume that you are in the same Git repository from previous step and in the development branch. The output will include placeholders that need to be replaced with values from the projects that were deployed. You can find the example template at `assets/Vertexpipeline/runpipeline.py`.
+
+ ```bash
+ export directory="../gcp-projects/ml_business_unit/shared"
+ (cd $directory && git checkout production)
+
+ export common_artifacts_project_id=$(terraform -chdir="$directory" output -raw common_artifacts_project_id)
+ echo $common_artifacts_project_id
+
+ export directory="../gcp-environments/envs/non-production"
+ (cd $directory && git checkout non-production)
- ```bash
- ./tf-wrapper.sh validate development $(pwd)/../policy-library ${INFRA_PIPELINE_PROJECT_ID}
- ```
+ export prj_n_kms_id=$(terraform -chdir="../gcp-environments/envs/non-production" output -raw env_kms_project_id)
+ echo $prj_n_kms_id
-1. Run `apply` development.
+ export directory="../gcp-networks/envs/non-production"
+ (cd $directory && git checkout non-production)
- ```bash
- ./tf-wrapper.sh apply development
- ```
+ export prj_n_shared_restricted_id=$(terraform -chdir="$directory" output -raw restricted_host_project_id)
+ echo $prj_n_shared_restricted_id
-If you received any errors or made any changes to the Terraform config or `common.auto.tfvars` you must re-run `./tf-wrapper.sh plan ` before running `./tf-wrapper.sh apply `.
+ export directory="../gcp-projects/ml_business_unit/non-production"
+ (cd $directory && git checkout non-production)
-After executing this stage, unset the `GOOGLE_IMPERSONATE_SERVICE_ACCOUNT` environment variable.
+ export prj_n_machine_learning_project_number=$(terraform -chdir=$directory output -raw machine_learning_project_number)
+ echo $prj_n_machine_learning_project_number
- ```bash
- unset GOOGLE_IMPERSONATE_SERVICE_ACCOUNT
- ```
+ export prj_n_machine_learning_project_id=$(terraform -chdir=$directory output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
-## Post Deployment
+ export non_prod_bucket_name=$(gsutil ls -p $prj_n_machine_learning_project_id | grep -o 'gs://bkt-n-ml[^/]*')
+ non_prod_bucket_name=$(echo $non_prod_bucket_name | sed 's#gs://##')
+ echo $non_prod_bucket_name
-### Big Query
+ export dataflow_sa="dataflow-sa@${prj_n_machine_learning_project_id}.iam.gserviceaccount.com"
+ echo $dataflow_sa
- In order to avoid having to specify a kms key for every query against a bigquery resource, we set the default project encryption key to the corresponding environment key in advance
- ```bash
- ml_project_dev=$(terraform -chdir="gcp-projects/ml_business_unit/development" output -json)
- ml_project_nonprd=$(terraform -chdir="gcp-projects/ml_business_unit/non-production" output -json)
- ml_project_prd=$(terraform -chdir="gcp-projects/ml_business_unit/production" output -json)
+ export directory="../gcp-projects/ml_business_unit/production"
+ (cd $directory && git checkout production)
- projects=( "$ml_project_dev" "$ml_project_nonprd" "$ml_project_prd" )
+ export prj_p_machine_learning_project_number=$(terraform -chdir=$directory output -raw machine_learning_project_number)
+ echo $prj_p_machine_learning_project_number
- for project in "${projects[@]}"; do
- project_id=$(echo "$project" | jq -r '.machine_learning_project_id.value')
- project_key=$(echo "$project "| jq -r '.machine_learning_kms_keys.value."us-central1".id')
- echo "ALTER PROJECT \`$project_id\` SET OPTIONS (\`region-us-central1.default_kms_key_name\`=\"$project_key\");" | bq query --project_id "$project_id" --nouse_legacy_sql
- done
- ```
+ export prj_p_machine_learning_project_id=$(terraform -chdir=$directory output -raw machine_learning_project_id)
+ echo $prj_p_machine_learning_project_id
-### VPC-SC
+ export directory="../gcp-environments/envs/production"
+ (cd $directory && git checkout production)
-1. Now that machine learning's projects have all been inflated, please _return to gcp-projects_ and update COMMON.AUTO.TFVARS with this __additional__ information under `perimeter_additional_members`:
+ export prj_p_kms_id=$(terraform -chdir="../gcp-environments/envs/production" output -raw env_kms_project_id)
+ echo $prj_p_kms_id
- ```
- "serviceAccount:service-[prj-n-ml-machine-learning-number]@dataflow-service-producer-prod.iam.gserviceaccount.com",
- "serviceAccount:[prj-n-ml-machine-learning-number]@cloudbuild.gserviceaccount.com",
- "serviceAccount:[prj-n-ml-machine-learning-number]-compute@developer.gserviceaccount.com",
- "serviceAccount:[prj-p-ml-machine-learning-number]@cloudbuild.gserviceaccount.com",
- "serviceAccount:service-[prj-p-ml-machine-learning-number]@gcp-sa-aiplatform.iam.gserviceaccount.com",
+ sed -i \
+ -e "s/{PRJ_C_MLARTIFACTS_ID}/$common_artifacts_project_id/g" \
+ -e "s/{PRJ_N_KMS_ID}/$prj_n_kms_id/g" \
+ -e "s/{PRJ_N_SHARED_RESTRICTED_ID}/$prj_n_shared_restricted_id/g" \
+ -e "s/{PRJ_N_MACHINE_LEARNING_NUMBER}/$prj_n_machine_learning_project_number/g" \
+ -e "s/{PRJ_N_MACHINE_LEARNING_ID}/$prj_n_machine_learning_project_id/g" \
+ -e "s/{NON_PROD_BUCKET_NAME}/${non_prod_bucket_name}/g" \
+ -e "s/{DATAFLOW_SA}/$dataflow_sa/g" \
+ -e "s/{PRJ_P_MACHINE_LEARNING_NUMBER}/$prj_p_machine_learning_project_number/g" \
+ -e "s/{PRJ_P_MACHINE_LEARNING_ID}/$prj_p_machine_learning_project_id/g" \
+ -e "s/{PRJ_P_KMS_ID}/$prj_p_kms_id/g" \
+ -e "s/YOUR-EMAIL@YOUR-COMPANY.COM/$your_monitoring_email/g" \
+ ./runpipeline.py
```
-2. optional - run the below command to generate a list of the above changes needed to COMMON.AUTO.TFVARS
+1. Execute the following commands to update the `compile_pipeline.py` file. These commands assume that you are in the same Git repository from previous step and in the development branch. The output will include placeholders that need to be replaced with values from the projects that were deployed. You can find the example template at `assets/Vertexpipeline/compile_pipeline.py`.
```bash
- ml_n=$(terraform -chdir="gcp-projects/ml_business_unit/non-production" output -raw machine_learning_project_number)
- ml_p=$(terraform -chdir="gcp-projects/ml_business_unit/production" output -raw machine_learning_project_number)
+ export directory="../gcp-projects/ml_business_unit/shared"
+ (cd $directory && git checkout production)
- echo "serviceAccount:service-${ml_n}@dataflow-service-producer-prod.iam.gserviceaccount.com",
- echo "serviceAccount:${ml_n}@cloudbuild.gserviceaccount.com",
- echo "serviceAccount:${ml_n}-compute@developer.gserviceaccount.com",
- echo "serviceAccount:${ml_p}@cloudbuild.gserviceaccount.com",
- echo "serviceAccount:service-${ml_p}@gcp-sa-aiplatform.iam.gserviceaccount.com",
- ```
+ export common_artifacts_project_id=$(terraform -chdir="$directory" output -raw common_artifacts_project_id)
+ echo $common_artifacts_project_id
-1. Many of the necessary service agents and permissions were deployed in all project environments for machine-learning. Additional entries will be needed for each environment.
+ export directory="../gcp-projects/ml_business_unit/non-production"
+ (cd $directory && git checkout non-production)
-1. Add in more agents to the DEVELOPMENT.AUTO.TFVARS file under `egress_policies`.
-Notably:
+ export prj_n_machine_learning_project_id=$(terraform -chdir=$directory output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
- * "serviceAccount:bq-[prj-d-ml-machine-learning-project-number]@bigquery-encryption.iam.gserviceaccount.com"
+ export non_prod_bucket_name=$(gsutil ls -p $prj_n_machine_learning_project_id | grep -o 'gs://bkt-n-ml[^/]*')
+ non_prod_bucket_name=$(echo $non_prod_bucket_name | sed 's#gs://##')
+ echo $non_prod_bucket_name
- This should be added under identities. It should look like this::
+ sed -i \
+ -e "s/{NON_PROD_BUCKET_NAME}/$non_prod_bucket_name/g" \
+ -e "s/{COMMOM_ARTIFACTS_PRJ_ID}/$common_artifacts_project_id/g" \
+ -e "s/{PRJ_N_MACHINE_LEARNING_ID}/$prj_n_machine_learning_project_id/g" \
+ ./compile_pipeline.py
```
- egress_policies = [
- // notebooks
- {
- "from" = {
- "identity_type" = ""
- "identities" = [
- "serviceAccount:bq-[prj-d-ml-machine-learning-project-number]@bigquery-encryption.iam.gserviceaccount.com" << New Addition
- "serviceAccount:service-[prj-d-ml-machine-learning-project-number]@gcp-sa-notebooks.iam.gserviceaccount.com",
- "serviceAccount:service-[prj-d-ml-machine-learning-project-number]@compute-system.iam.gserviceaccount.com",
- ]
- },
- "to" = {
- "resources" = ["projects/[prj-d-kms-project-number]"]
- "operations" = {
- "compute.googleapis.com" = {
- "methods" = ["*"]
- }
- "cloudkms.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
- ]
- ```
-1. Remain in DEVELOPMENT.AUTO.TFVARS and include this entry under `egress_policies`. Ensure you replace all [project numbers] with their corresponding project:
+***NOTE:*** If you get an error in the first run related to `bq-jobs` you may be facing some propagation issue. Re-try the triger previous created should fix it.
- ```
- // artifact Registry
- {
- "from" = {
- "identity_type" = ""
- "identities" = [
- "serviceAccount:service-[prj-d-ml-machine-learning-number]@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
- ]
- },
- "to" = {
- "resources" = ["projects/[prj-c-ml-artifacts-number]"]
- "operations" = {
- "artifactregistry.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
- // Dataflow
- {
- "from" = {
- "identity_type" = ""
- "identities" = [
- "serviceAccount:service-[prj-n-ml-machine-learning-number]@dataflow-service-producer-prod.iam.gserviceaccount.com",
- ]
- },
- "to" = {
- "resources" = ["projects/[prj-n-ml-machine-learning-number]"]
- "operations" = {
- "compute.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
- ```
+#### 1. Run the notebook with Local Terraform - Only proceed with these if you have not used Cloud Build
-1. Under NON-PRODUCTION.AUTO.TFVARS, add these entries under `egress_policies`:
- ```
- {
- "from" = {
- "identity_type" = "ANY_IDENTITY"
- "identities" = []
- },
- "to" = {
- "resources" = [
- "projects/[prj-c-ml-artifacts-number]"
- ]
- "operations" = {
- "artifactregistry.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
- // artifact Registry
- {
- "from" = {
- "identity_type" = ""
- "identities" = [
- "serviceAccount:service-[prj-n-ml-machine-learning-number]@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
- ]
- },
- "to" = {
- "resources" = ["projects/[prj-c-ml-artifacts-number]"]
- "operations" = {
- "artifactregistry.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
- // DataFlow
- {
- "from" = {
- "identity_type" = ""
- "identities" = [
- "serviceAccount:service-[prj-n-ml-machine-learning-number]@dataflow-service-producer-prod.iam.gserviceaccount.com",
- ]
- },
- "to" = {
- "resources" = ["projects/[prj-d-shared-restricted-number]"]
- "operations" = {
- "compute.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
- {
- "from" = {
- "identity_type" = ""
- "identities" = [
- "serviceAccount:[prj-n-ml-machine-learning-number]-compute@developer.gserviceaccount.com",
- "serviceAccount:service-[prj-d-ml-machine-learning-number]@gcp-sa-aiplatform.iam.gserviceaccount.com",
- ]
- },
- "to" = {
- "resources" = ["projects/[prj-p-ml-machine-learning-number]"]
- "operations" = {
- "aiplatform.googleapis.com" = {
- "methods" = ["*"]
- },
- "storage.googleapis.com" = {
- "methods" = ["*"]
- },
- "bigquery.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
- ```
+1. Before running the notebook, create a Git repository with the content of `examples/machine-learning-pipeline/assets/Vertexpipeline` folder in the same level that your `terraform-google-enterprise-genai`. Take note of this git repository url, you will need it to clone the repository into your notebooks. You need to create `development` and `non-prod` branches in this repo.
-1. Under PRODUCTION.AUTO.TFVARS, add these entries under `egress_policies`:
+1. Export the email address that will be used to monitor the configuration in the notebook. To do this, execute the following code:
- ```
- {
- "from" = {
- "identity_type" = ""
- "identities" = [
- "serviceAccount:service-[prj-p-ml-machine-learning-number]@gcp-sa-aiplatform.iam.gserviceaccount.com",
- "serviceAccount:service-[prj-p-ml-machine-learning-number]@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
- "serviceAccount:cloud-cicd-artifact-registry-copier@system.gserviceaccount.com",
- ]
- },
- "to" = {
- "resources" = [
- "projects/[prj-n-ml-machine-learning-number]",
- "projects/[prj-c-ml-artifacts-number]",
- ]
- "operations" = {
- "artifactregistry.googleapis.com" = {
- "methods" = ["*"]
- },
- "storage.googleapis.com" = {
- "methods" = ["*"]
- },
- "bigquery.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
+ ```bash
+ export your_monitoring_email="YOUR-EMAIL@YOUR-COMPANY.COM"
+ echo $your_monitoring_email
```
-### Service Catalog
+1. In the next step, you can use the following commands to update the placeholders used in the file `census_pipeline.ipynb`. The commands below assume that you are in the new Git repository you created, on the development branch.
-Once you have set up service catalog and attempt to deploy out terraform code, there is a high chance you will encounter this error:
-`Permission denied; please check you have the correct IAM permissions and APIs enabled.`
-This is due to a VPC Service control error that until now, is impossible to add into the egress policy. Go to `prj-d-ml-machine-learning` project and view the logs, filtering for ERRORS. There will be a VPC Service Controls entry that has an `egressViolation`. It should look something like the following:
-```
-egressViolations: [
- 0: {
- servicePerimeter: "accessPolicies/1066661933618/servicePerimeters/sp_d_shared_restricted_default_perimeter_f3fv"
- source: "projects/[machine-learning-project-number]"
- sourceType: "Resource"
- targetResource: "projects/[unknown-project-number]"
- }
-]
-```
-
-we want the `unknown-project-number` here. Add this into your `egress_policies` in `3-networks` under DEVELOPMENT.AUTO.TFVARS, NON-PRODUCTION.AUTO.TFVARS & PRODUCTION.AUTO.TFVARS
-
-```
-// Service Catalog
- {
- "from" = {
- "identity_type" = "ANY_IDENTITY"
- "identities" = []
- },
- "to" = {
- "resources" = ["projects/[unknown-project-number]"]
- "operations" = {
- "cloudbuild.googleapis.com" = {
- "methods" = ["*"]
- }
- }
- }
- },
-```
-
-### Machine Learning Pipeline
+ ```bash
+ export prj_d_machine_learning_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/development" output -raw machine_learning_project_id)
+ echo $prj_d_machine_learning_project_id
-This environment is set up for interactive coding and experimentations. After the project is up, the vertex workbench is deployed from service catalog and The datascientis can use it to write their code including any experiments, data processing code and pipeline components. In addition, a cloud storage bucket is deployed to use as the storage for our operations. Optionally a composer environment is which will later be used to schedule the pipeline run on intervals.
+ export prj_d_machine_learning_project_number=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/development" output -raw machine_learning_project_number)
+ echo $prj_d_machine_learning_project_number
-For our pipeline which trains and deploys a model on the [census income dataset](https://archive.ics.uci.edu/dataset/20/census+income), we use a notebook in the dev workbench to create our pipeline components, put them together into a pipeline and do a dry run of the pipeline to make sure there are no issues. You can access the repository [here](https://github.com/GoogleCloudPlatform/terraform-google-enterprise-genai/tree/main/7-vertexpipeline). [^1]
+ export prj_d_shared_restricted_id=$(terraform -chdir="../terraform-google-enterprise-genai/3-networks-dual-svpc/envs/development" output -raw restricted_host_project_id)
+ echo $prj_d_shared_restricted_id
-[^1]: There is a Dockerfile in the repo which is the docker image used to run all pipeline steps and cloud build steps. In non-prod and prod environments, the only NIST compliant way to access additional dependencies and requirements is via docker images uploaded to artifact registry. We have baked everything for running the pipeline into this docker which exsits in the shared artifact registry.
+ export prj_d_kms_id=$(terraform -chdir="../terraform-google-enterprise-genai/2-environments/envs/development" output -raw env_kms_project_id)
+ echo $prj_d_kms_id
-Once confident, we divide the code in two separate files to use in our CI/CD process in the non-prod environment. First is *compile_pipeline.py* which includes the code to build the pipeline and compile it into a directory (in our case, /common/vertex-ai-pipeline/pipeline_package.yaml)
+ export common_artifacts_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/shared" output -raw common_artifacts_project_id)
+ echo $common_artifacts_project_id
-The second file, i.e. *runpipeline.py* includes the code for running the compiled pipeline. This is where the correct environemnt variables for non-prod nad prod (e.g., service accounts to use for each stage of the pipeline, kms keys corresponding to each step, buckets, etc.) are set. And eventually the pipeline is loaded from the yaml file at *common/vertex-ai-pipeline/pipeline_package.yaml* and submitted to vertex ai.
+ export development_bucket_name=$(gcloud storage buckets list --project $prj_d_machine_learning_project_id --format="value(name)" |grep bkt)
+ echo $development_bucket_name
+ sed -i \
+ -e "s/MACHINE_LEARNING_PROJECT_ID/$prj_d_machine_learning_project_id/g" \
+ -e "s/MACHINE_LEARNING_PROJECT_BUCKET_ID/$development_bucket_name/g" \
+ -e "s/YOUR_PROJECT_D_SHARED_ID/$prj_d_shared_restricted_id/g" \
+ -e "s/MACHINE_LEARNING_PROJECT_NUMBER/$prj_d_machine_learning_project_number/g" \
+ -e "s/KMS_D_PROJECT_ID/$prj_d_kms_id/g" \
+ -e "s/PRJ_C_ML_ARTIFACTS_ID/$common_artifacts_project_id/g" \
+ -e "s/YOUR-EMAIL@YOUR-COMPANY.COM/$your_monitoring_email/g" \
+ ./census_pipeline.ipynb
+ ```
-There is a *cloudbuild.yaml* file in the repo with the CI/CD steps as follows:
+1. Push the changes to your Git Vertex repository (development branch):
-1. Upload the Dataflow src file to the bucket in non-prod
-2. Upload the dataset to the bucket
-3. Run *compile_pipeline.py* to compile the pipeline
-4. Run the pipeline via *runpipeline.py*
-5. Optionally, upload the pipeline's yaml file to the composer bucket to make it available for scheduled pipeline runs
+ ```bash
+ git add .
+ git commit -m 'Update census_pipeline.ipynb'
+ git push --set-upstream origin development
+ ```
-The cloud build trigger will be setup in the non-prod project which is where the ML pipeline will run. There are currently three branches on the repo namely dev, staging (non-prod), and prod. Cloud build will trigger the pipeline once there is a merge into the staging (non-prod) branch from dev. However, model deployment and monitorings steps take place in the prod environment. As a result, the service agents and service accounts of the non-prod environment are given some permission on the prod environment and vice versa.
+1. Access workbench in your development project at the `https://console.cloud.google.com/vertex-ai/workbench/instances` link.
-Each time a pipeline job finishes successfully, a new version of the census income bracket predictor model will be deployed on the endpoint which will only take 25 percent of the traffic wherease the other 75 percent goes to the previous version of the model to enable A/B testing.
+1. Click `Open Jupyterlab` button on the instance created, this will take you to an interactive environment inside Vertex AI.
-You can read more about the details of the pipeline components on the [pipeline's repo](https://github.com/GoogleCloudPlatform/terraform-google-enterprise-genai/tree/main/7-vertexpipeline#readme)
+1. Click the Git Icon (left side bar) and clone over HTTPS the repository you created, select the development branch.
-### Step by step
+1. Navigate to the directory that contains `census_pipeline.ipynb` file and execute [the notebook](https://github.com/GoogleCloudPlatform/terraform-google-enterprise-genai/blob/main/examples/machine-learning-pipeline/assets/Vertexpipeline/census_pipeline.ipynb) cell by cell. Pay attention to the instructions and comments in the notebook, ensuring that you set the correct values for your development project. If a message pops up asking which kernel to use, select Python 3. Make sure you are in the `development branch` and the fields were populated properly.
-Before you start, make sure you have your personal git access token ready. The git menu option on the left bar of the workbench requires the personal token to connect to git and clone the repo.
-Also make sure to have a gcs bucket ready to store the artifacts for the tutorial. To deploy the bucket, you can go to service catalog and create a new deployment from the storage bucket solution.
+***NOTE:*** If you get an error in the first run related to `bq-jobs` you may be facing some propagation issue. Re-run the last step from `census_pipeline.ipynb` should fix it.
-#### 1. Run the notebook
-- Take 7-vertexpipeline folder and make you own copy as a standalone git repository and clone it in the workbench in your dev project. Create a dev branch of the new repository. Switch to the dev branch by choosing it in the branch section of the git view. Now go back to the file browser view by clicking the first option on the left bar menu. Navigate to the directory you just clone and run [the notebook](https://github.com/GoogleCloudPlatform/terraform-google-enterprise-genai/blob/main/7-vertexpipeline/census_pipeline.ipynb) cell by cell. Pay attention to the instructions and comments in the notebook and don't forget to set the correct values corresponding to your dev project.
+#### 2. Configure cloud build trigger with Local Terraform - Only proceed with these if you have not used Cloud Build
-#### 2. Configure cloud build
+After the notebook runs successfully and the pipeline's test run finishes in the development environment, create a cloud build trigger in your non-production project. Configure the trigger to run when there is a merge into the non-prod branch by following the below settings.
-- After the notebook runs successfully and the pipeline's test run finishes in the dev environment, create a cloud build trigger in your non-prod project. Configure the trigger to run when there is a merge into the staging (non-prod) branch by following the below settings.
+1. You can use the command below to get the `NON-PROD_MACHINE_LEARNING_PROJECT_ID`.
+ ```bash
+ export prj_n_machine_learning_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
+ echo "trigger-sa@"$prj_n_machine_learning_project_id".iam.gserviceaccount.com"
+ ```
|Setting|Value|
|-------|-----|
|Event|push to branch|
|Repository generation|1st gen|
|Repository|the url to your fork of the repo|
- |Branch|staging|
+ |Branch|non-prod|
|Configuration|Autodetected/Cloud Build configuration file (yaml or json)|
|Location|Repository|
- |Cloud Build configuration file location|cloudbuild.yaml|
+ |Cloud Build configuration file location|cloudbuild.yaml (only if you chose Cloud Build configuration file)|
+ |Service Account|trigger-sa@YOUR_NON-PROD_MACHINE_LEARNING_PROJECT_ID.iam.gserviceaccount.com|
+1. Execute the following commands to update the `cloudbuild.yaml` file. These commands assume that you are in the cloned Git repository and that you are on the development branch. The output will include placeholders that need to be replaced with values from `bucket-name` and `artifact-project`. You can find the template at `assets/Vertexpipeline/cloudbuild.yaml`.
-- Open the cloudbuild.yaml file in your workbench and for steps 1 which uploads the source code for the dataflow job to your bucket.
+ ```bash
+ export prj_n_machine_learning_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
+ export non_prod_bucket_name=$(gsutil ls -p $prj_n_machine_learning_project_id | grep -o 'gs://bkt-n-ml[^/]*')
+ non_prod_bucket_name=$(echo $non_prod_bucket_name | sed 's#gs://##')
+ echo $non_prod_bucket_name
+
+ export common_artifacts_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/shared" output -raw common_artifacts_project_id)
+ echo $common_artifacts_project_id
+
+ sed -i\
+ -e "s/{NON_PROD_BUCKET_NAME}/$non_prod_bucket_name/g" \
+ -e "s/{COMMOM_ARTIFACTS_PRJ_ID}/$common_artifacts_project_id/g" \
+ ./cloudbuild.yaml
```
- name: 'gcr.io/cloud-builders/gsutil'
- args: ['cp', '-r', './src', 'gs://{your-bucket-name}']
- ```
-- Similarly in step 2, replace the bucket name with the name of your own bucket in the non-prod project in order to upload the data to your bucket:
+1. Optionally, if you want to schedule pipeline runs on regular intervals, uncomment the last two steps and replace the composer bucket with the name of your composer's bucket. The first step uploads the pipeline's yaml to the bucket and the second step uploads the dag to read that yaml and trigger the vertex pipeline:
+
+ ```yaml
+ # upload to composer
+ - name: 'gcr.io/cloud-builders/gsutil'
+ args: ['cp', './common/vertex-ai-pipeline/pipeline_package.yaml', 'gs://{your-composer-bucket}/dags/common/vertex-ai-pipeline/']
+ id: 'upload_composer_file'
+
+ # upload pipeline dag to composer
+ - name: 'gcr.io/cloud-builders/gsutil'
+ args: ['cp', './composer/dags/dag.py', 'gs://{your-composer-bucket}/dags/']
+ id: 'upload dag'
```
- name: 'gcr.io/cloud-builders/gsutil'
- args: ['cp', '-r', './data', 'gs://{your-bucket-name}']
+
+1. Execute the following commands to update the `runpipeline.py` file. These commands assume that you are in the same Git repository from previous step and in the development branch. The output will include placeholders that need to be replaced with values from the projects that were deployed. You can find the example template at `assets/Vertexpipeline/runpipeline.py`.
+
+ ```bash
+ export common_artifacts_project_id=$(terraform -chdir=../terraform-google-enterprise-genai/4-projects/ml_business_unit/shared output -raw common_artifacts_project_id)
+ echo $common_artifacts_project_id
+
+ export prj_n_kms_id=$(terraform -chdir="../terraform-google-enterprise-genai/2-environments/envs/non-production" output -raw env_kms_project_id)
+ echo $prj_n_kms_id
+
+ export prj_n_shared_restricted_id=$(terraform -chdir="../terraform-google-enterprise-genai/3-networks-dual-svpc/envs/non-production" output -raw restricted_host_project_id)
+ echo $prj_n_shared_restricted_id
+
+ export prj_n_machine_learning_project_number=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/non-production" output -raw machine_learning_project_number)
+ echo $prj_n_machine_learning_project_number
+
+ export prj_n_machine_learning_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
+
+ export non_prod_bucket_name=$(gsutil ls -p $prj_n_machine_learning_project_id | grep -o 'gs://bkt-n-ml[^/]*')
+ non_prod_bucket_name=$(echo $non_prod_bucket_name | sed 's#gs://##')
+ echo $non_prod_bucket_name
+
+ export dataflow_sa="dataflow-sa@${prj_n_machine_learning_project_id}.iam.gserviceaccount.com"
+ echo $dataflow_sa
+
+ export prj_p_machine_learning_project_number=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/production" output -raw machine_learning_project_number)
+ echo $prj_p_machine_learning_project_number
+
+ export prj_p_machine_learning_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/production" output -raw machine_learning_project_id)
+ echo $prj_p_machine_learning_project_id
+
+ export prj_p_kms_id=$(terraform -chdir="../terraform-google-enterprise-genai/2-environments/envs/production" output -raw env_kms_project_id)
+ echo $prj_p_kms_id
+
+ sed -i \
+ -e "s/{PRJ_C_MLARTIFACTS_ID}/$common_artifacts_project_id/g" \
+ -e "s/{PRJ_N_KMS_ID}/$prj_n_kms_id/g" \
+ -e "s/{PRJ_N_SHARED_RESTRICTED_ID}/$prj_n_shared_restricted_id/g" \
+ -e "s/{PRJ_N_MACHINE_LEARNING_NUMBER}/$prj_n_machine_learning_project_number/g" \
+ -e "s/{PRJ_N_MACHINE_LEARNING_ID}/$prj_n_machine_learning_project_id/g" \
+ -e "s/{NON_PROD_BUCKET_NAME}/${non_prod_bucket_name}/g" \
+ -e "s/{DATAFLOW_SA}/$dataflow_sa/g" \
+ -e "s/{PRJ_P_MACHINE_LEARNING_NUMBER}/$prj_p_machine_learning_project_number/g" \
+ -e "s/{PRJ_P_MACHINE_LEARNING_ID}/$prj_p_machine_learning_project_id/g" \
+ -e "s/{PRJ_P_KMS_ID}/$prj_p_kms_id/g" \
+ -e "s/YOUR-EMAIL@YOUR-COMPANY.COM/$your_monitoring_email/g" \
+ ./runpipeline.py
```
-- Change the name of the image for step 3 and 4 to that of your own artifact project, i.e., `us-central1-docker.pkg.dev/{artifact_project_id}/c-publish-artifacts/vertexpipeline:v2` This is the project with artifact registry that houses the image required to run the pipeline.
+1. Execute the following commands to update the `compile_pipeline.py` file. These commands assume that you are in the same Git repository from previous step and in the development branch. The output will include placeholders that need to be replaced with values from the projects that were deployed. You can find the example template at `assets/Vertexpipeline/compile_pipeline.py`.
-```
- - name: 'us-central1-docker.pkg.dev/{your-artifact-project}/c-publish-artifacts/vertexpipeline:v2'
- entrypoint: 'python'
- args: ['compile_pipeline.py']
- id: 'compile_job'
-
- # run pipeline
- - name: 'us-central1-docker.pkg.dev/{your-artifact-project}/c-publish-artifacts/vertexpipeline:v2'
- entrypoint: 'python'
- args: ['runpipeline.py']
- id: 'run_job'
- waitFor: ['compile_job']
-```
+ ```bash
+ export common_artifacts_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/shared" output -raw common_artifacts_project_id)
+ echo $common_artifacts_project_id
-- Optionally, if you want to schedule pipeline runs on regular intervals, uncomment the last two steps and replace the composer bucket with the name of your composer's bucket. The first step uploads the pipeline's yaml to the bucket and the second step uploads the dag to read that yaml and trigger the vertex pipeline:
-```
- # upload to composer
- - name: 'gcr.io/cloud-builders/gsutil'
- args: ['cp', './common/vertex-ai-pipeline/pipeline_package.yaml', 'gs://{your-composer-bucket}/dags/common/vertex-ai-pipeline/']
- id: 'upload_composer_file'
-
- # upload pipeline dag to composer
- - name: 'gcr.io/cloud-builders/gsutil'
- args: ['cp', './composer/dags/dag.py', 'gs://{your-composer-bucket}/dags/']
- id: 'upload dag'
-```
+ export prj_n_machine_learning_project_id=$(terraform -chdir="../terraform-google-enterprise-genai/4-projects/ml_business_unit/non-production" output -raw machine_learning_project_id)
+ echo $prj_n_machine_learning_project_id
+
+ export non_prod_bucket_name=$(gsutil ls -p $prj_n_machine_learning_project_id | grep -o 'gs://bkt-n-ml[^/]*')
+ non_prod_bucket_name=$(echo $non_prod_bucket_name | sed 's#gs://##')
+ echo $non_prod_bucket_name
+
+ sed -i \
+ -e "s/{NON_PROD_BUCKET_NAME}/$non_prod_bucket_name/g" \
+ -e "s/{COMMOM_ARTIFACTS_PRJ_ID}/$common_artifacts_project_id/g" \
+ -e "s/{PRJ_N_MACHINE_LEARNING_ID}/$prj_n_machine_learning_project_id/g" \
+ ./compile_pipeline.py
+ ```
#### 3. Configure variables in compile_pipeline.py and runpipeline.py
@@ -926,13 +2230,13 @@ Also make sure to have a gcs bucket ready to store the artifacts for the tutoria
|variable|definition|example value|How to obtain|
|--------|----------|-------------|-------------|
- |PROJECT_ID|The id of the non-prod project|`{none-prod-project-id}`|From the project's menu in console navigate to the `fldr-non-production/fldr-non-production-ml` folder; here you can find the machine learning project in non-prod (`prj-n-ml-machine-learning`) and obtain its' ID|
+ |PROJECT_ID|The id of the non-prod project|`{non-prod-project-id}`|From the project's menu in console navigate to the `fldr-non-production/fldr-non-production-ml` folder; here you can find the machine learning project in non-prod (`prj-n-ml-machine-learning`) and obtain its' ID|
|BUCKET_URI|URI of the non-prod bucket|`gs://non-prod-bucket`|From the project menu in console navigate to the non-prod ML project `fldr-non-production/fldr-non-production-ml/prj-n-ml-machine-learning` project, navigate to cloud storage and copy the name of the bucket available there|
|REGION|The region for pipeline jobs|Can be left as default `us-central1`|
|PROD_PROJECT_ID|ID of the prod project|`prod-project-id`|In console's project menu, navigate to the `fldr-production/fldr-production-ml` folder; here you can find the machine learning project in prod (`prj-p-ml-machine-learning`) and obtain its' ID|
- |Image|The image artifact used to run the pipeline components. The image is already built and pushed to the artifact repository in your artifact project under the common folder|`f"us-central1-docker.pkg.dev/{{artifact-project}}/{{artifact-repository}}/vertexpipeline:v2"`|Navigate to `fldr-common/prj-c-ml-artifacts` project. Navigate to the artifact registry repositories in the project to find the full name of the image artifact.|
+ |Image|The image artifact used to run the pipeline components. The image is already built and pushed to the artifact repository in your artifact project under the common folder|`f"us-central1-docker.pkg.dev/{artifact-project}/{artifact-repository}/vertexpipeline:v2"`|Navigate to `fldr-common/prj-c-ml-artifacts` project. Navigate to the artifact registry repositories in the project to find the full name of the image artifact.|
|DATAFLOW_SUBNET|The shared subnet in non-prod env required to run the dataflow job|`https://www.googleapis.com/compute/v1/projects/{non-prod-network-project}/regions/us-central1/subnetworks/{subnetwork-name}`|Navigate to the `fldr-network/prj-n-shared-restricted` project. Navigate to the VPC networks and under the subnets tab, find the name of the network associated with your region (us-central1)|
- |SERVICE_ACCOUNT|The service account used to run the pipeline and it's components such as the model monitoring job. This is the compute default service account of non-prod if you don't plan on using another costume service account|`{non-prod-project_number}-compute@developer.gserviceaccount.com`|Head over to the IAM page in the non-prod project `fldr-non-production/fldr-non-production-ml/prj-n-ml-machine-learning`, check the box for `Include Google-provided role grants` and look for the service account with the `{project_number}-compute@developer.gserviceaccount.com`|
+ |SERVICE_ACCOUNT|The service account used to run the pipeline and it's components such as the model monitoring job. This is the compute default service account of non-prod if you don't plan on using another costume service account|`{non-prod-project_number}-compute@developer.gserviceaccount.com`|Head over to the IAM page in the non-prod project `fldr-non-production/fldr-non-production-ml/prj-n-mlmachine-learning`, check the box for `Include Google-provided role grants` and look for the service account with the `{project_number}-compute@developer.gserviceaccount.com`|
|PROD_SERICE_ACCOUNT|The service account used to create endpoint, upload the model, and deploy the model in the prod project. This is the compute default service account of prod if you don't plan on using another costume service account|`{prod-project_number}-compute@developer.gserviceaccount.com`|Head over to the IAM page in the prod project `fldr-production/fldr-production-ml/prj-p-ml-machine-learning`, check the box for `Include Google-provided role grants` and look for the service account with the `{project_number}-compute@developer.gserviceaccount.com`|
|deployment_config['encryption']|The kms key for the prod env. This key is used to encrypt the vertex model, endpoint, model deployment, and model monitoring.|`projects/{prod-kms-project}/locations/us-central1/keyRings/{keyring-name}/cryptoKeys/{key-name}`|Navigate to `fldr-production/prj-n-kms`, navigate to the Security/Key management in that project to find the key in `sample-keyring` keyring of your target region `us-central1`|
|encryption_spec_key_name|The name of the encryption key for the non-prod env. This key is used to create the vertex pipeline job and it's associated metadata store|`projects/{non-prod-kms-project}/locations/us-central1/keyRings/{keyring-name}/cryptoKeys/{key-name}`|Navigate to `fldr-non-production/prj-n-kms`, navigate to the Security/Key management in that project to find the key in `sample-keyring` keyring of your target region `us-central1`|
@@ -942,61 +2246,174 @@ The compile_pipeline.py and runpipeline.py files are commented to point out thes
#### 4. Merge and deploy
-- Once everything is configured, you can commit your changes and push to the dev branch. Then, create a PR to from dev to staging(non-prod) which will result in triggering the pipeline if approved. The vertex pipeline takes about 30 minutes to finish and if there are no errors, a trained model will be deployed to and endpoint in the prod project which you can use to make prediction requests.
-
-### 5. Model Validation
-
-Once you have the model running at an endpoint in the production project, you will be able to test it.
-Here are step-by-step instructions to make a request to your model using `gcloud` and `curl`:
-
-1. Initialize variables on your terminal session
+Once everything is configured, you can commit your changes and push to the development branch. Then, create a PR to from dev to non-prod which will result in triggering the pipeline if approved. The vertex pipeline takes about 30 minutes to finish and if there are no errors, a trained model will be deployed to and endpoint in the prod project which you can use to make prediction requests.
+1. The command below assumes that you are in the Git repository you cloned in the `Configure cloud build trigger` step and you are in the `development` branch.
```bash
- ENDPOINT_ID=
- PROJECT_ID=
- INPUT_DATA_FILE="body.json"
+ git add .
+
+ git commit -m 'Update notebook files'
+ git push origin development
```
- > You can retrieve your ENDPOINT_ID by running `gcloud ai endpoints list --region=us-central1 --project=` or by navigating to it on the Google Cloud Console (https://console.cloud.google.com/vertex-ai/online-prediction/endpoints?project=`)
+ ***NOTE:*** If you get an error in the first run related to `bq-jobs` you may be facing some propagation issue. Re-try the triger previous created should fix it.
-2. Create a file named `body.json` and put some sample data into it:
+#### 5. Model Validation
- ```json
- {
- "instances": [
- {
- "features/gender": "Female",
- "features/workclass": "Private",
- "features/occupation": "Tech-support",
- "features/marital_status": "Married-civ-spouse",
- "features/race": "White",
- "features/capital_gain": 0,
- "features/education": "9th",
- "features/age": 33,
- "features/hours_per_week": 40,
- "features/relationship": "Wife",
- "features/native_country": "Canada",
- "features/capital_loss": 0
- }
- ]
- }
- ```
+Once you have the model running at an endpoint in the production project, you will be able to test it. It is expected you are in the Git repository you created in previous steps to run the commands below.
+Here are the instructions to make a request to your model using `gcloud` and `curl`:
-3. Run a curl request using `body.json` file as the JSON Body.
+1. Initialize variables on your terminal session
```bash
+ export ENDPOINT_ID=$(gcloud ai endpoints list --region=us-central1 --project=$prj_p_machine_learning_project_id |awk 'NR==2 {print $1}')
+ echo $ENDPOINT_ID
+
+ echo $prj_p_machine_learning_project_id
+ export INPUT_DATA_FILE="body.json"
+
curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "Content-Type: application/json" \
- https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/endpoints/${ENDPOINT_ID}:predict -d "@${INPUT_DATA_FILE}"
+ https://us-central1-aiplatform.googleapis.com/v1/projects/${prj_p_machine_learning_project_id}/locations/us-central1/endpoints/${ENDPOINT_ID}:predict -d "@${INPUT_DATA_FILE}"
```
- You should get an output from 0 to 1, indicating the level of confidence of the binary classification based on the parameters above.
Values closer to 1 means the individual is more likely to be included in the income_bracket greater than 50K.
-#### Common errors
+## Optional: Composer
+
+**Note 1:** If you are using MacOS, replace `cp -RT` with `cp -R` in the relevant commands. The `-T` flag is needed for Linux, but causes problems for MacOS.
+
+**Note 2:** If you are deploying using Local Terraform, you need to chant the output line to `3-networks-dual-svpc` instead of `gcp-projects`.
+
+If you have chosen to deploy Composer with the Pipeline, you will need a github repository set up for this step. This repository houses the DAG's for composer. As of this writing, the structure is as follows:
+
+ ```
+ .
+ ├── README.md
+ └── dags
+ ├── hello_world.py
+ └── strings.py
+ ```
+
+Add in your dags in the `dags` folder. Any changes to this folder will trigger a pipeline and place the dags in the appropriate composer environment depending on which branch it is pushed to (`development`, `non-production`, `production`)
+
+1. Composer will rely on DAG's from a github repository. In `4-projects`, a secret 'github-api-token' was created to house your github's api access key. We need to create a new version for this secret which will be used in the composer module which is called in the `base_env` folder. Use the script below to add the secrets into each machine learnings respective environment:
+
+ ```bash
+ envs=(development non-production production)
+ project_ids=()
+ github_token="YOUR-GITHUB-TOKEN"
+
+ for env in "${envs[@]}"; do
+ output=$(terraform -chdir="../gcp-projects/ml_business_unit/${env}" output -raw machine_learning_project_id)
+ project_ids+=("$output")
+ done
+
+ for project in "${project_ids[@]}"; do
+ echo -n $github_token | gcloud secrets versions add github-api-token --data-file=- --project=${project}
+ done
+ ```
+
+## Common errors
- ***google.api_core.exceptions.ResourceExhausted: 429 The following quotas are exceeded: ```CustomModelServingCPUsPerProjectPerRegion 8: The following quotas are exceeded: CustomModelServingCPUsPerProjectPerRegion``` or similar error***:
-This is likely due to the fact that you have too many models uploaded and deployed in Vertex AI. To resolve the issue, you can either submit a quota increase request or undeploy and delete a few models to free up resources
+This is likely due to the fact that you have too many models uploaded and deployed in Vertex AI. To resolve the issue, you can either submit a quota increase request or undeploy and delete a few models to free up resources.
- ***Google Compute Engine Metadata service not available/found***:
-You might encounter this when the vertex pipeline job attempts to run even though it is an obsolete issue according to [this thread](https://issuetracker.google.com/issues/229537245#comment9). It'll most likely resolve by re-running the vertex pipeline
+You might encounter this when the vertex pipeline job attempts to run even though it is an obsolete issue according to [this thread](https://issuetracker.google.com/issues/229537245#comment9). It'll most likely resolve by re-running the vertex pipeline.
+
+### Troubleshooting
+
+#### Service Agent not existent
+
+##### Storage
+
+- Error: Error updating AccessLevel "accessPolicies/POLICY_ID/accessLevels/ACCESS_LEVEL": googleapi: Error 400: The email address '' is invalid or non-existent.
+ - To fix run: `gcloud storage service-agent --project=project_id_here`
+
+- If you get the error below when trying to list a bucket, it may be related to a quota project being used. To resolve this error, you may need to unset the quota_project from your gcloud config.
+
+ ```bash
+ ERROR: (gcloud.storage.buckets.list) HTTPError 403: Request is prohibited by organization's policy. vpcServiceControlsUniqueIdentifier: XxxxIqGYRNlbbDfpK4PxxxxS5mX3uln6o2sKd_B6RRYiFR_wfSyXxx. This command is authenticated as your_user@your-company.com which is the active account specified by the [core/account] property
+ ```
+
+ ```bash
+ gcloud config list
+ gcloud config unset billing/quota_project
+ ```
+
+
+##### Vertex AI Platform
+
+- Error: Request `Create IAM Members roles/bigquery.dataViewer serviceAccount:service-gcp-sa-aiplatform.iam.gserviceaccount.com for project "project_id"` returned error: Batch request and retried single request "Create IAM Members roles/bigquery.dataViewer serviceAccount:service-gcp-sa-aiplatform.iam.gserviceaccount.com for project \"project_id\"" both failed. Final error: Error applying IAM policy for project "project_id": Error setting IAM policy for project "project_id": googleapi: Error 400: Invalid service account (service-gcp-sa-aiplatform.iam.gserviceaccount.com)., badRequest
+
+ - To fix run: `gcloud beta services identity create --service=aiplatform.googleapis.com --project=`
+
+#### VPC-SC
+
+In case you face some error related to permission denied, you may need to add the following service accounts in the `common.auto.tfvars` file.
+
+```bash
+"run_job": google.api_core.exceptions.PermissionDenied:
+```
+
+- Under `COMMON.AUTO.TFVARS`, add these entries under for `DEVELOPMENT` environment:
+
+ ```bash
+ "serviceAccount:[prj-d-mlmachine-learning-number]@cloudbuild.gserviceaccount.com",
+ "serviceAccount:service-[prj-p-mlmachine-learning-number]@gcp-sa-aiplatform.iam.gserviceaccount.com",
+ ```
+
+- Under `COMMON.AUTO.TFVARS`, add these entries under for `NON-PRODUCTION` environment:
+
+ ```bash
+ "serviceAccount:[prj-d-mlmachine-learning-number]@cloudbuild.gserviceaccount.com",
+ "serviceAccount:service-[prj-p-mlmachine-learning-number]@gcp-sa-aiplatform.iam.gserviceaccount.com",
+ ```
+
+- Under `COMMON.AUTO.TFVARS`, add these entries under for `PRODUCTION` environment:
+
+ ```bash
+ "serviceAccount:[prj-d-mlmachine-learning-number]@cloudbuild.gserviceaccount.com",
+ "serviceAccount:cloud-aiplatform-api-robot-prod@system.gserviceaccount.com",
+ ```
+
+#### Service Catalog
+
+- If you have set up service catalog and attempt to deploy out terraform code, there is a high chance you will encounter this error:
+`Permission denied; please check you have the correct IAM permissions and APIs enabled.`
+This is due to a VPC Service control error that until now, is impossible to add into the egress policy. Go to `prj-d-ml-machine-learning` project and view the logs, filtering for ERRORS. There will be a VPC Service Controls entry that has an `egressViolation`. It should look something like the following:
+
+ ```
+ egressViolations: [
+ 0: {
+ servicePerimeter: "accessPolicies/1066661933618/servicePerimeters/sp_d_shared_restricted_default_perimeter_f3fv"
+ source: "projects/[machine-learning-project-number]"
+ sourceType: "Resource"
+ targetResource: "projects/[unknown-project-number]"
+ }
+ ]
+ ```
+
+- We want the `unknown-project-number` here. Add this into your `egress_policies` in `3-networks` under DEVELOPMENT.AUTO.TFVARS, NON-PRODUCTION.AUTO.TFVARS & PRODUCTION.AUTO.TFVARS
+
+ ```
+ // Service Catalog
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/[unknown-project-number]"]
+ "operations" = {
+ "cloudbuild.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ ```
+
+Please refer to [troubleshooting](../docs/TROUBLESHOOTING.md) if you run into issues during this step.
diff --git a/7-composer/.gitignore b/examples/machine-learning-pipeline/assets/Composer/.gitignore
similarity index 100%
rename from 7-composer/.gitignore
rename to examples/machine-learning-pipeline/assets/Composer/.gitignore
diff --git a/7-composer/components/deployment.py b/examples/machine-learning-pipeline/assets/Composer/components/deployment.py
similarity index 100%
rename from 7-composer/components/deployment.py
rename to examples/machine-learning-pipeline/assets/Composer/components/deployment.py
diff --git a/7-composer/components/eval.py b/examples/machine-learning-pipeline/assets/Composer/components/eval.py
similarity index 100%
rename from 7-composer/components/eval.py
rename to examples/machine-learning-pipeline/assets/Composer/components/eval.py
diff --git a/7-composer/components/monitoring.py b/examples/machine-learning-pipeline/assets/Composer/components/monitoring.py
similarity index 100%
rename from 7-composer/components/monitoring.py
rename to examples/machine-learning-pipeline/assets/Composer/components/monitoring.py
diff --git a/7-composer/components/training.py b/examples/machine-learning-pipeline/assets/Composer/components/training.py
similarity index 100%
rename from 7-composer/components/training.py
rename to examples/machine-learning-pipeline/assets/Composer/components/training.py
diff --git a/7-composer/components/utils.py b/examples/machine-learning-pipeline/assets/Composer/components/utils.py
similarity index 100%
rename from 7-composer/components/utils.py
rename to examples/machine-learning-pipeline/assets/Composer/components/utils.py
diff --git a/7-composer/dag.py b/examples/machine-learning-pipeline/assets/Composer/dag.py
similarity index 100%
rename from 7-composer/dag.py
rename to examples/machine-learning-pipeline/assets/Composer/dag.py
diff --git a/7-composer/monitoring_schema.yaml b/examples/machine-learning-pipeline/assets/Composer/monitoring_schema.yaml
similarity index 100%
rename from 7-composer/monitoring_schema.yaml
rename to examples/machine-learning-pipeline/assets/Composer/monitoring_schema.yaml
diff --git a/7-composer/requirements.txt b/examples/machine-learning-pipeline/assets/Composer/requirements.txt
similarity index 100%
rename from 7-composer/requirements.txt
rename to examples/machine-learning-pipeline/assets/Composer/requirements.txt
diff --git a/7-composer/us-central1-test-census-034e6abc-bucket/dags b/examples/machine-learning-pipeline/assets/Composer/us-central1-test-census-034e6abc-bucket/dags
similarity index 100%
rename from 7-composer/us-central1-test-census-034e6abc-bucket/dags
rename to examples/machine-learning-pipeline/assets/Composer/us-central1-test-census-034e6abc-bucket/dags
diff --git a/7-vertexpipeline/Dockerfile b/examples/machine-learning-pipeline/assets/Vertexpipeline/Dockerfile
similarity index 100%
rename from 7-vertexpipeline/Dockerfile
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/Dockerfile
diff --git a/7-vertexpipeline/Readme.md b/examples/machine-learning-pipeline/assets/Vertexpipeline/Readme.md
similarity index 100%
rename from 7-vertexpipeline/Readme.md
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/Readme.md
diff --git a/examples/machine-learning-pipeline/assets/Vertexpipeline/body.json b/examples/machine-learning-pipeline/assets/Vertexpipeline/body.json
new file mode 100644
index 00000000..a6523027
--- /dev/null
+++ b/examples/machine-learning-pipeline/assets/Vertexpipeline/body.json
@@ -0,0 +1,18 @@
+{
+ "instances": [
+ {
+ "features/gender": "Female",
+ "features/workclass": "Private",
+ "features/occupation": "Tech-support",
+ "features/marital_status": "Married-civ-spouse",
+ "features/race": "White",
+ "features/capital_gain": 0,
+ "features/education": "9th",
+ "features/age": 33,
+ "features/hours_per_week": 40,
+ "features/relationship": "Wife",
+ "features/native_country": "Canada",
+ "features/capital_loss": 0
+ }
+ ]
+}
diff --git a/7-vertexpipeline/census_pipeline.ipynb b/examples/machine-learning-pipeline/assets/Vertexpipeline/census_pipeline.ipynb
similarity index 63%
rename from 7-vertexpipeline/census_pipeline.ipynb
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/census_pipeline.ipynb
index 1e10de64..d2264ae9 100644
--- a/7-vertexpipeline/census_pipeline.ipynb
+++ b/examples/machine-learning-pipeline/assets/Vertexpipeline/census_pipeline.ipynb
@@ -15,21 +15,27 @@
"tags": []
},
"source": [
- "Set the values for PROJECT_ID, REGION, and BUCKET_URI according to your project."
+ "Set the values below according to your project."
]
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "9030884f-2daf-4bf5-abbc-c9366b82d649",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "PROJECT_ID = \"prj-d-ml-machine-learning-gxcv\"\n",
+ "PROJECT_ID = \"MACHINE_LEARNING_PROJECT_ID\"\n",
"REGION = \"us-central1\"\n",
- "BUCKET_URI = \"gs://bkt-d-vertexpipe-test-dev\""
+ "BUCKET_URI = \"gs://MACHINE_LEARNING_PROJECT_BUCKET_ID\"\n",
+ "DATAFLOW_SUBNET=\"https://www.googleapis.com/compute/v1/projects/YOUR_PROJECT_D_SHARED_ID/regions/us-central1/subnetworks/sb-d-shared-restricted-us-central1\"\n",
+ "KMS_KEY = \"projects/KMS_D_PROJECT_ID/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-d-mlmachine-learning\"\n",
+ "COMPUTE_ENGINE_SA = \"MACHINE_LEARNING_PROJECT_NUMBER-compute@developer.gserviceaccount.com\"\n",
+ "DATAFLOW_RUNNER_SA = \"dataflow-sa@MACHINE_LEARNING_PROJECT_ID.iam.gserviceaccount.com\"\n",
+ "VERTEX_SA = \"vertex-sa@MACHINE_LEARNING_PROJECT_ID.iam.gserviceaccount.com\"\n",
+ "VERTEX_MODEL_SA = \"vertex-model@MACHINE_LEARNING_PROJECT_ID.iam.gserviceaccount.com\""
]
},
{
@@ -42,7 +48,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "8231198a-8389-4cc0-a698-c2b1c24b1d1b",
"metadata": {
"tags": []
@@ -55,7 +61,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "331e585f-bb53-495e-80d7-c963a2fb3bb3",
"metadata": {
"tags": []
@@ -78,14 +84,14 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "33be8e30-0c49-43ca-ae3e-2eb644012f87",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "Image = \"us-central1-docker.pkg.dev/prj-c-ml-artifacts-5wdo/c-publish-artifacts/vertexpipeline:v2\""
+ "Image = \"us-central1-docker.pkg.dev/PRJ_C_ML_ARTIFACTS_ID/c-publish-artifacts/vertexpipeline:v2\""
]
},
{
@@ -98,7 +104,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "f60c6bc3-d97f-41c8-879c-1edb59a6cfb9",
"metadata": {
"collapsed": true,
@@ -107,138 +113,34 @@
},
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: tensorflow==2.8.0 in /opt/conda/lib/python3.10/site-packages (2.8.0)\n",
- "Requirement already satisfied: tensorflow-hub==0.13.0 in /opt/conda/lib/python3.10/site-packages (0.13.0)\n",
- "Requirement already satisfied: kfp in /opt/conda/lib/python3.10/site-packages (2.7.0)\n",
- "Requirement already satisfied: absl-py>=0.4.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (2.1.0)\n",
- "Requirement already satisfied: astunparse>=1.6.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (1.6.3)\n",
- "Requirement already satisfied: flatbuffers>=1.12 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (24.3.7)\n",
- "Requirement already satisfied: gast>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (0.5.4)\n",
- "Requirement already satisfied: google-pasta>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (0.2.0)\n",
- "Requirement already satisfied: h5py>=2.9.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (3.10.0)\n",
- "Requirement already satisfied: keras-preprocessing>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (1.1.2)\n",
- "Requirement already satisfied: libclang>=9.0.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (16.0.6)\n",
- "Requirement already satisfied: numpy>=1.20 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (1.25.2)\n",
- "Requirement already satisfied: opt-einsum>=2.3.2 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (3.3.0)\n",
- "Requirement already satisfied: protobuf>=3.9.2 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (3.20.0)\n",
- "Requirement already satisfied: setuptools in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (69.1.1)\n",
- "Requirement already satisfied: six>=1.12.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (1.16.0)\n",
- "Requirement already satisfied: termcolor>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (2.4.0)\n",
- "Requirement already satisfied: typing-extensions>=3.6.6 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (4.10.0)\n",
- "Requirement already satisfied: wrapt>=1.11.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (1.16.0)\n",
- "Requirement already satisfied: tensorboard<2.9,>=2.8 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (2.8.0)\n",
- "Requirement already satisfied: tf-estimator-nightly==2.8.0.dev2021122109 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (2.8.0.dev2021122109)\n",
- "Requirement already satisfied: keras<2.9,>=2.8.0rc0 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (2.8.0)\n",
- "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (0.36.0)\n",
- "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /opt/conda/lib/python3.10/site-packages (from tensorflow==2.8.0) (1.62.1)\n",
- "Requirement already satisfied: click<9,>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from kfp) (8.1.7)\n",
- "Requirement already satisfied: docstring-parser<1,>=0.7.3 in /opt/conda/lib/python3.10/site-packages (from kfp) (0.15)\n",
- "Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 in /opt/conda/lib/python3.10/site-packages (from kfp) (2.17.1)\n",
- "Requirement already satisfied: google-auth<3,>=1.6.1 in /opt/conda/lib/python3.10/site-packages (from kfp) (2.28.2)\n",
- "Requirement already satisfied: google-cloud-storage<3,>=2.2.1 in /opt/conda/lib/python3.10/site-packages (from kfp) (2.14.0)\n",
- "Requirement already satisfied: kfp-pipeline-spec==0.3.0 in /opt/conda/lib/python3.10/site-packages (from kfp) (0.3.0)\n",
- "Requirement already satisfied: kfp-server-api<2.1.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from kfp) (2.0.5)\n",
- "Requirement already satisfied: kubernetes<27,>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from kfp) (26.1.0)\n",
- "Collecting protobuf>=3.9.2 (from tensorflow==2.8.0)\n",
- " Using cached protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\n",
- "Requirement already satisfied: PyYAML<7,>=5.3 in /opt/conda/lib/python3.10/site-packages (from kfp) (6.0.1)\n",
- "Requirement already satisfied: requests-toolbelt<1,>=0.8.0 in /opt/conda/lib/python3.10/site-packages (from kfp) (0.10.1)\n",
- "Requirement already satisfied: tabulate<1,>=0.8.6 in /opt/conda/lib/python3.10/site-packages (from kfp) (0.9.0)\n",
- "Requirement already satisfied: urllib3<2.0.0 in /opt/conda/lib/python3.10/site-packages (from kfp) (1.26.18)\n",
- "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from astunparse>=1.6.0->tensorflow==2.8.0) (0.42.0)\n",
- "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->kfp) (1.63.0)\n",
- "Requirement already satisfied: requests<3.0.0.dev0,>=2.18.0 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->kfp) (2.31.0)\n",
- "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.1->kfp) (5.3.3)\n",
- "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.1->kfp) (0.3.0)\n",
- "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.1->kfp) (4.9)\n",
- "Requirement already satisfied: google-cloud-core<3.0dev,>=2.3.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-storage<3,>=2.2.1->kfp) (2.4.1)\n",
- "Requirement already satisfied: google-resumable-media>=2.6.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-storage<3,>=2.2.1->kfp) (2.7.0)\n",
- "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-storage<3,>=2.2.1->kfp) (1.5.0)\n",
- "Requirement already satisfied: certifi in /opt/conda/lib/python3.10/site-packages (from kfp-server-api<2.1.0,>=2.0.0->kfp) (2024.2.2)\n",
- "Requirement already satisfied: python-dateutil in /opt/conda/lib/python3.10/site-packages (from kfp-server-api<2.1.0,>=2.0.0->kfp) (2.9.0)\n",
- "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/conda/lib/python3.10/site-packages (from kubernetes<27,>=8.0.0->kfp) (1.7.0)\n",
- "Requirement already satisfied: requests-oauthlib in /opt/conda/lib/python3.10/site-packages (from kubernetes<27,>=8.0.0->kfp) (1.4.0)\n",
- "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.0) (0.4.6)\n",
- "Requirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.0) (3.6)\n",
- "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.0) (0.6.1)\n",
- "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.0) (1.8.1)\n",
- "Requirement already satisfied: werkzeug>=0.11.15 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.0) (3.0.1)\n",
- "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/conda/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.1->kfp) (0.5.1)\n",
- "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->kfp) (3.3.2)\n",
- "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->kfp) (3.6)\n",
- "Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.10/site-packages (from requests-oauthlib->kubernetes<27,>=8.0.0->kfp) (3.2.2)\n",
- "Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/conda/lib/python3.10/site-packages (from werkzeug>=0.11.15->tensorboard<2.9,>=2.8->tensorflow==2.8.0) (2.1.5)\n",
- "Using cached protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n",
- "Installing collected packages: protobuf\n",
- " Attempting uninstall: protobuf\n",
- " Found existing installation: protobuf 3.20.0\n",
- " Uninstalling protobuf-3.20.0:\n",
- " Successfully uninstalled protobuf-3.20.0\n",
- "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "google-api-python-client 1.8.0 requires google-api-core<2dev,>=1.13.0, but you have google-api-core 2.17.1 which is incompatible.\n",
- "google-cloud-datastore 1.15.5 requires protobuf<4.0.0dev, but you have protobuf 4.25.3 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0mSuccessfully installed protobuf-4.25.3\n",
- "Requirement already satisfied: google-cloud-pipeline-components in /opt/conda/lib/python3.10/site-packages (2.10.0)\n",
- "Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5 in /opt/conda/lib/python3.10/site-packages (from google-cloud-pipeline-components) (2.17.1)\n",
- "Requirement already satisfied: kfp<=2.7.0,>=2.6.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-pipeline-components) (2.7.0)\n",
- "Requirement already satisfied: google-cloud-aiplatform<2,>=1.14.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-pipeline-components) (1.43.0)\n",
- "Requirement already satisfied: Jinja2<4,>=3.1.2 in /opt/conda/lib/python3.10/site-packages (from google-cloud-pipeline-components) (3.1.3)\n",
- "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (1.63.0)\n",
- "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0.dev0,>=3.19.5 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (4.25.3)\n",
- "Requirement already satisfied: google-auth<3.0.dev0,>=2.14.1 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (2.28.2)\n",
- "Requirement already satisfied: requests<3.0.0.dev0,>=2.18.0 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (2.31.0)\n",
- "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (1.23.0)\n",
- "Requirement already satisfied: packaging>=14.3 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (24.0)\n",
- "Requirement already satisfied: google-cloud-storage<3.0.0dev,>=1.32.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (2.14.0)\n",
- "Requirement already satisfied: google-cloud-bigquery<4.0.0dev,>=1.15.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (3.19.0)\n",
- "Requirement already satisfied: google-cloud-resource-manager<3.0.0dev,>=1.3.3 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (1.12.3)\n",
- "Requirement already satisfied: shapely<3.0.0dev in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (2.0.3)\n",
- "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from Jinja2<4,>=3.1.2->google-cloud-pipeline-components) (2.1.5)\n",
- "Requirement already satisfied: click<9,>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (8.1.7)\n",
- "Requirement already satisfied: docstring-parser<1,>=0.7.3 in /opt/conda/lib/python3.10/site-packages (from kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (0.15)\n",
- "Requirement already satisfied: kfp-pipeline-spec==0.3.0 in /opt/conda/lib/python3.10/site-packages (from kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (0.3.0)\n",
- "Requirement already satisfied: kfp-server-api<2.1.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (2.0.5)\n",
- "Requirement already satisfied: kubernetes<27,>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (26.1.0)\n",
- "Requirement already satisfied: PyYAML<7,>=5.3 in /opt/conda/lib/python3.10/site-packages (from kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (6.0.1)\n",
- "Requirement already satisfied: requests-toolbelt<1,>=0.8.0 in /opt/conda/lib/python3.10/site-packages (from kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (0.10.1)\n",
- "Requirement already satisfied: tabulate<1,>=0.8.6 in /opt/conda/lib/python3.10/site-packages (from kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (0.9.0)\n",
- "Requirement already satisfied: urllib3<2.0.0 in /opt/conda/lib/python3.10/site-packages (from kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (1.26.18)\n",
- "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /opt/conda/lib/python3.10/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1->google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (1.62.1)\n",
- "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /opt/conda/lib/python3.10/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1->google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (1.48.2)\n",
- "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (5.3.3)\n",
- "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (0.3.0)\n",
- "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.10/site-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (4.9)\n",
- "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.6.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (2.4.1)\n",
- "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (2.7.0)\n",
- "Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /opt/conda/lib/python3.10/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (2.9.0)\n",
- "Requirement already satisfied: grpc-google-iam-v1<1.0.0dev,>=0.12.4 in /opt/conda/lib/python3.10/site-packages (from google-cloud-resource-manager<3.0.0dev,>=1.3.3->google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (0.13.0)\n",
- "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-storage<3.0.0dev,>=1.32.0->google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (1.5.0)\n",
- "Requirement already satisfied: six>=1.10 in /opt/conda/lib/python3.10/site-packages (from kfp-server-api<2.1.0,>=2.0.0->kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (1.16.0)\n",
- "Requirement already satisfied: certifi in /opt/conda/lib/python3.10/site-packages (from kfp-server-api<2.1.0,>=2.0.0->kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (2024.2.2)\n",
- "Requirement already satisfied: setuptools>=21.0.0 in /opt/conda/lib/python3.10/site-packages (from kubernetes<27,>=8.0.0->kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (69.1.1)\n",
- "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/conda/lib/python3.10/site-packages (from kubernetes<27,>=8.0.0->kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (1.7.0)\n",
- "Requirement already satisfied: requests-oauthlib in /opt/conda/lib/python3.10/site-packages (from kubernetes<27,>=8.0.0->kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (1.4.0)\n",
- "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (3.3.2)\n",
- "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (3.6)\n",
- "Requirement already satisfied: numpy<2,>=1.14 in /opt/conda/lib/python3.10/site-packages (from shapely<3.0.0dev->google-cloud-aiplatform<2,>=1.14.0->google-cloud-pipeline-components) (1.25.2)\n",
- "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/conda/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5->google-cloud-pipeline-components) (0.5.1)\n",
- "Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.10/site-packages (from requests-oauthlib->kubernetes<27,>=8.0.0->kfp<=2.7.0,>=2.6.0->google-cloud-pipeline-components) (3.2.2)\n"
- ]
- }
- ],
+ "outputs": [],
+ "source": [
+ "!pip install --upgrade pip"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bb95e1cc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install tensorflow==2.8.0 tensorflow-hub==0.13.0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c5c6beec",
+ "metadata": {},
+ "outputs": [],
"source": [
- "!pip install --upgrade tensorflow==2.8.0 tensorflow-hub==0.13.0 kfp\n",
- "!pip install --upgrade google-cloud-pipeline-components"
+ "!pip install kfp==2.4.0"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "2ddb16f5-a901-4a41-981e-5ec33d482715",
"metadata": {
"collapsed": true,
@@ -247,30 +149,9 @@
},
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "google-api-core 2.17.1 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0.dev0,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.\n",
- "google-api-python-client 1.8.0 requires google-api-core<2dev,>=1.13.0, but you have google-api-core 2.17.1 which is incompatible.\n",
- "google-cloud-aiplatform 1.43.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.\n",
- "google-cloud-artifact-registry 1.11.3 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.\n",
- "google-cloud-bigquery-storage 2.24.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.\n",
- "google-cloud-language 2.13.3 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.\n",
- "google-cloud-monitoring 2.19.3 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.\n",
- "google-cloud-resource-manager 1.12.3 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.\n",
- "googleapis-common-protos 1.63.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0.dev0,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.\n",
- "grpc-google-iam-v1 0.13.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.0 which is incompatible.\n",
- "kfp 2.7.0 requires protobuf<5,>=4.21.1, but you have protobuf 3.20.0 which is incompatible.\n",
- "kfp-pipeline-spec 0.3.0 requires protobuf<5,>=4.21.1, but you have protobuf 3.20.0 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0m"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "!pip install --quiet protobuf==3.20.0"
+ "!pip install protobuf==3.20.3"
]
},
{
@@ -283,23 +164,12 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "3040dc95-9cf7-4815-8091-2bcfb03f32d3",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Copying file://data/adult.data.csv [Content-Type=text/csv]...\n",
- "Copying file://data/adult.test.csv [Content-Type=text/csv]... \n",
- "- [2 files][ 5.4 MiB/ 5.4 MiB] \n",
- "Operation completed over 2 objects/5.4 MiB. \n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"!gsutil cp -r data {BUCKET_URI}"
]
@@ -314,7 +184,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "02c4447b-b375-4595-82c3-706bd18ee837",
"metadata": {
"tags": []
@@ -354,7 +224,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "a4223f59-9012-4064-bee9-67c7aa96cbbf",
"metadata": {
"tags": []
@@ -405,7 +275,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "dd5ace09-afd5-4135-8e28-9f8b6c2b1c96",
"metadata": {
"tags": []
@@ -418,7 +288,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "69b38ab3-f86c-4905-badc-7daaf2f61dbb",
"metadata": {
"tags": []
@@ -449,7 +319,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "459aac56-76d0-4e0d-bc39-b48e180f78fa",
"metadata": {
"tags": []
@@ -461,20 +331,12 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"id": "28a08119-b131-4100-a2f2-944088d21a3f",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Overwriting src/ingest_pipeline.py\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%writefile src/ingest_pipeline.py\n",
"from __future__ import absolute_import\n",
@@ -584,24 +446,12 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"id": "76881a2e-0ed8-4d0c-9e1f-512a93130312",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Copying file://src/ingest_pipeline.py [Content-Type=text/x-python]...\n",
- "Copying file://src/__init__.py [Content-Type=text/x-python]... \n",
- "Copying file://src/.ipynb_checkpoints/ingest_pipeline-checkpoint.py [Content-Type=text/x-python]...\n",
- "- [3 files][ 7.5 KiB/ 7.5 KiB] \n",
- "Operation completed over 3 objects/7.5 KiB. \n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"!gsutil cp -R {SRC} {BUCKET_URI}"
]
@@ -619,7 +469,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"id": "167c96f7-6416-464e-9326-6350b1345cbb",
"metadata": {
"tags": []
@@ -635,6 +485,7 @@
" runner: str,\n",
" bq_project: str,\n",
" subnet: str,\n",
+ " dataflow_sa: str,\n",
") -> list:\n",
" return [\n",
" \"--job_name\",\n",
@@ -654,6 +505,8 @@
" \"--no_use_public_ips\",\n",
" \"--worker_zone\",\n",
" \"us-central1-c\",\n",
+ " \"--service_account_email\",\n",
+ " dataflow_sa,\n",
" ]"
]
},
@@ -675,7 +528,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "b026b981-cbc0-43bd-af56-36a9f1a2f514",
"metadata": {
"tags": []
@@ -687,23 +540,12 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"id": "309ed10c-2434-4dd5-9f92-4352ff9e4b78",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/tmp/ipykernel_174003/1588383553.py:1: DeprecationWarning: output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.\n",
- " @component(\n",
- "/var/tmp/ipykernel_174003/1588383553.py:5: DeprecationWarning: output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.\n",
- " def custom_train_model(\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"@component(\n",
" base_image=Image,\n",
@@ -831,7 +673,17 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
+ "id": "924205fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install google-cloud-pipeline-components==2.16.1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"id": "4ac20209-57ca-4b3e-9740-e2fc531591a8",
"metadata": {
"tags": []
@@ -857,7 +709,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"id": "a3daaae2-229c-4deb-b264-6b86a80364b6",
"metadata": {
"tags": []
@@ -869,23 +721,12 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "7b8f2b3a-0562-465f-987c-a68782b48c0c",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/tmp/ipykernel_174003/1314921640.py:2: DeprecationWarning: output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.\n",
- " @component(\n",
- "/var/tmp/ipykernel_174003/1314921640.py:6: DeprecationWarning: output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.\n",
- " def custom_eval_model(\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# evaluation component\n",
"@component(\n",
@@ -986,7 +827,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"id": "0ab4b7c5-f952-4710-8d4e-8105f3a1cb4e",
"metadata": {
"tags": []
@@ -998,23 +839,12 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"id": "2797433c-4d68-4ee4-8b64-c27520a6e503",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/tmp/ipykernel_174003/1703232000.py:1: DeprecationWarning: output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.\n",
- " @component(\n",
- "/var/tmp/ipykernel_174003/1703232000.py:5: DeprecationWarning: output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.\n",
- " def deploy_model(\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"@component(\n",
" base_image=Image,\n",
@@ -1098,7 +928,6 @@
" latest_model_id = deployed_models[-1].id\n",
" print(\"your objects properties:\", deployed_models[0].create_time.__dir__())\n",
" model_deploy = uploaded_model.deploy(\n",
- " # machine_type=\"n1-standard-4\",\n",
" endpoint=endpoint,\n",
" traffic_split={\"0\": 25, latest_model_id: 75},\n",
" deployed_model_display_name=model_name,\n",
@@ -1109,7 +938,6 @@
" )\n",
" else:\n",
" model_deploy = uploaded_model.deploy(\n",
- " # machine_type=\"n1-standard-4\",\n",
" endpoint=endpoint,\n",
" traffic_split={\"0\": 100},\n",
" min_replica_count=min_nodes,\n",
@@ -1136,7 +964,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": null,
"id": "d191bd2b-41bc-4367-b292-a366b63561da",
"metadata": {
"tags": []
@@ -1148,23 +976,12 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"id": "57a49a83-cc9d-4c7d-8260-85451063968f",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/tmp/ipykernel_174003/3082954866.py:1: DeprecationWarning: output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.\n",
- " @component(\n",
- "/var/tmp/ipykernel_174003/3082954866.py:5: DeprecationWarning: output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.\n",
- " def create_monitoring(\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"@component(\n",
" base_image=Image,\n",
@@ -1282,7 +1099,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"id": "3bfb7d67-49b9-416b-90c1-568648631f7d",
"metadata": {
"tags": []
@@ -1323,6 +1140,7 @@
" min_nodes: int=2,\n",
" max_nodes: int=4,\n",
" traffic_split: int=25,\n",
+ " dataflow_sa: str=DATAFLOW_RUNNER_SA,\n",
"):\n",
" from google_cloud_pipeline_components.v1.bigquery import (\n",
" BigqueryQueryJobOp)\n",
@@ -1353,7 +1171,8 @@
" bq_table=bq_train_table,\n",
" runner=runner,\n",
" bq_project=project,\n",
- " subnet=dataflow_subnet\n",
+ " subnet=dataflow_subnet,\n",
+ " dataflow_sa=dataflow_sa,\n",
" ).after(bq_dataset_op)\n",
" dataflow_args_eval = build_dataflow_args(\n",
" job_name=f\"{job_name}eval\",\n",
@@ -1362,7 +1181,8 @@
" bq_table=bq_eval_table,\n",
" runner=runner,\n",
" bq_project=project,\n",
- " subnet=dataflow_subnet\n",
+ " subnet=dataflow_subnet,\n",
+ " dataflow_sa=dataflow_sa,\n",
" ).after(bq_dataset_op)\n",
"\n",
" # run dataflow job\n",
@@ -1447,7 +1267,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"id": "777fbce5-34fa-43e8-b1b8-0e95d6f20e3c",
"metadata": {
"tags": []
@@ -1475,54 +1295,26 @@
},
"source": [
"Finally to test the pipeline end to end, set the input arguments for each component accordingly.\n",
- "Note that there are two service accounts supplied. One for the current project and one for the prod environment. The reason behind it the CI/CD design that runs the pipeline in one environment (non-prod) and deploys the model to prod."
+ "Note that there are two service accounts supplied. One for the current project and one for the prod environment. The reason behind it the CI/CD design that runs the pipeline in one environment (non-prod) and deploys the model to prod. \n",
+ "\n",
+ "Remember to update the monitoring_config parameter to the email that will be used."
]
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": null,
"id": "78d133e2-a353-45bb-bd5d-f741f626e777",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Creating PipelineJob\n",
- "PipelineJob created. Resource name: projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933\n",
- "To use this PipelineJob in another session:\n",
- "pipeline_job = aiplatform.PipelineJob.get('projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933')\n",
- "View Pipeline Job:\n",
- "https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/census-income-pipeline-20240315155933?project=659816794461\n",
- "PipelineJob projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob run completed. Resource name: projects/659816794461/locations/us-central1/pipelineJobs/census-income-pipeline-20240315155933\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from datetime import datetime\n",
"timestamp = datetime.now().strftime(\"%d_%H_%M_%S\")\n",
"pipelineroot = f'{BUCKET_URI}/pipelineroot'\n",
"# In dev, these two service accounts are the same as the deployment environment is the same as where pipeline runs\n",
- "service_account = \"659816794461-compute@developer.gserviceaccount.com\"\n",
- "prod_service_account = \"659816794461-compute@developer.gserviceaccount.com\"\n",
+ "service_account = VERTEX_SA\n",
+ "prod_service_account = VERTEX_MODEL_SA\n",
"\n",
"data_config={\n",
" \"train_data_url\": TRAINING_URL,\n",
@@ -1537,7 +1329,7 @@
" \"python_file_path\": f'{BUCKET_URI}/src/ingest_pipeline.py',\n",
" \"temp_location\": f'{BUCKET_URI}/temp_dataflow',\n",
" \"runner\": RUNNER,\n",
- " \"subnet\": \"https://www.googleapis.com/compute/v1/projects/prj-d-shared-restricted-trxa/regions/us-central1/subnetworks/sb-d-shared-restricted-us-central1\"\n",
+ " \"subnet\": DATAFLOW_SUBNET,\n",
"}\n",
"\n",
"train_config={\n",
@@ -1554,16 +1346,16 @@
" 'min_nodes': 2,\n",
" 'max_nodes': 4,\n",
" 'deployment_project': PROJECT_ID,\n",
- " # important to replace the envryption key here with the key in your own dev environment.\n",
+ " # important to replace the encryption key here with the key in your own dev environment.\n",
" # format would be: projects/prj-d-kms-####/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-d-ml-machine-learning\n",
- " \"encryption\": 'projects/prj-d-kms-3yzc/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-d-ml-machine-learning',\n",
+ " \"encryption\": KMS_KEY,\n",
" \"service_account\": service_account,\n",
" \"prod_service_account\": prod_service_account,\n",
"}\n",
"\n",
"monitoring_config={\n",
- " 'email': 'majid.alikhani@badal.io', \n",
- " 'name': 'census_monitoring' \n",
+ " 'email': 'YOUR-EMAIL@YOUR-COMPANY.COM',\n",
+ " 'name': 'census_monitoring'\n",
"}\n",
"\n",
"\n",
@@ -1574,7 +1366,7 @@
" pipeline_root=pipelineroot,\n",
" # important to replace the envryption key here with the key in your own dev environment.\n",
" # format would be: projects/prj-d-kms-####/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-d-ml-machine-learning\n",
- " encryption_spec_key_name='projects/prj-d-kms-3yzc/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-d-ml-machine-learning',\n",
+ " encryption_spec_key_name=KMS_KEY,\n",
" parameter_values={\n",
" \"create_bq_dataset_query\": create_bq_dataset_query,\n",
" \"bq_dataset\": data_config['bq_dataset'],\n",
diff --git a/7-vertexpipeline/cloudbuild.yaml b/examples/machine-learning-pipeline/assets/Vertexpipeline/cloudbuild.yaml
similarity index 54%
rename from 7-vertexpipeline/cloudbuild.yaml
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/cloudbuild.yaml
index f2f7a620..a4c19a2f 100644
--- a/7-vertexpipeline/cloudbuild.yaml
+++ b/examples/machine-learning-pipeline/assets/Vertexpipeline/cloudbuild.yaml
@@ -13,21 +13,25 @@
# limitations under the License.
#
steps:
- # upload dataflow src file to bucket
+ # Upload dataflow src file to bucket
+ # Replace the {your-bucket-name} from non-prod in the bucket name
- name: 'gcr.io/cloud-builders/gsutil'
- args: ['cp', '-r', './src', 'gs://bkt-n-ml-storage-akdv']
+ args: ['cp', '-r', './src', 'gs://{NON_PROD_BUCKET_NAME}']
- name: 'gcr.io/cloud-builders/gsutil'
- args: ['cp', '-r', './data', 'gs://bkt-n-ml-storage-akdv']
+ # Replace the {your-bucket-name} from non-prod in the bucket name
+ args: ['cp', '-r', './data', 'gs://{NON_PROD_BUCKET_NAME}']
# compile pipeline
- - name: 'us-central1-docker.pkg.dev/prj-c-bu3artifacts-5wdo/c-publish-artifacts/vertexpipeline:v2'
+ # Replace the {your-artifact-project} for your artifact project ID
+ - name: 'us-central1-docker.pkg.dev/{COMMOM_ARTIFACTS_PRJ_ID}/c-publish-artifacts/vertexpipeline:v2'
entrypoint: 'python'
args: ['compile_pipeline.py']
id: 'compile_job'
# run pipeline
- - name: 'us-central1-docker.pkg.dev/prj-c-bu3artifacts-5wdo/c-publish-artifacts/vertexpipeline:v2'
+ # Replace the {your-artifact-project} for your artifact project ID
+ - name: 'us-central1-docker.pkg.dev/{COMMOM_ARTIFACTS_PRJ_ID}/c-publish-artifacts/vertexpipeline:v2'
entrypoint: 'python'
args: ['runpipeline.py']
id: 'run_job'
@@ -35,11 +39,15 @@ steps:
# # upload pipeline yaml to composer
# - name: 'gcr.io/cloud-builders/gsutil'
- # args: ['cp', './common/vertex-ai-pipeline/pipeline_package.yaml', 'gs://us-central1-d-isolated-comp-8f58e4b5-bucket/dags/common/vertex-ai-pipeline/']
+ # Replace the {your-composer-bucket} for your composer bucket
+ # args: ['cp', './common/vertex-ai-pipeline/pipeline_package.yaml', 'gs://{your-composer-bucket}/dags/common/vertex-ai-pipeline/']
# id: 'upload_composer_file'
# # upload pipeline dag to composer
# - name: 'gcr.io/cloud-builders/gsutil'
- # args: ['cp', './composer/dags/dag.py', 'gs://us-central1-d-isolated-comp-8f58e4b5-bucket/dags/']
+ # Replace the {your-composer-bucket} for your composer bucket
+ # args: ['cp', './composer/dags/dag.py', 'gs://{your-composer-bucket}/dags/']
# id: 'upload dag'
+options:
+ logging: CLOUD_LOGGING_ONLY
diff --git a/7-vertexpipeline/common/vertex-ai-pipeline/pipeline_package.yaml b/examples/machine-learning-pipeline/assets/Vertexpipeline/common/vertex-ai-pipeline/pipeline_package.yaml
similarity index 100%
rename from 7-vertexpipeline/common/vertex-ai-pipeline/pipeline_package.yaml
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/common/vertex-ai-pipeline/pipeline_package.yaml
diff --git a/7-vertexpipeline/compile_pipeline.py b/examples/machine-learning-pipeline/assets/Vertexpipeline/compile_pipeline.py
similarity index 98%
rename from 7-vertexpipeline/compile_pipeline.py
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/compile_pipeline.py
index 31898def..00ad5437 100644
--- a/7-vertexpipeline/compile_pipeline.py
+++ b/examples/machine-learning-pipeline/assets/Vertexpipeline/compile_pipeline.py
@@ -21,18 +21,18 @@
from typing import NamedTuple
# Replace with your non-prod project ID
-PROJECT_ID = "{project-id}"
+PROJECT_ID = "{PRJ_N_MACHINE_LEARNING_ID}"
# Replace with your region only if different
REGION = "us-central1"
-# Repalce with your bucket's uri
-BUCKET_URI = "gs://{bucket-name}"
+# Replace with your bucket's uri
+BUCKET_URI = "gs://{NON_PROD_BUCKET_NAME}"
KFP_COMPONENTS_PATH = "components"
SRC = "src"
BUILD = "build"
# Replace {artifact-project} and {artifact-repository}
# with your artifact project and repository
-Image = f"us-central1-docker.pkg.dev/{{artifact-project}}/{{artifact-repository}}/vertexpipeline:v2"
+Image = f"us-central1-docker.pkg.dev/{COMMOM_ARTIFACTS_PRJ_ID}/c-publish-artifacts/vertexpipeline:v2"
DATA_URL = f'{BUCKET_URI}/data'
@@ -58,6 +58,7 @@ def build_dataflow_args(
runner: str,
bq_project: str,
subnet: str,
+ dataflow_sa: str,
) -> list:
return [
"--job_name",
@@ -77,6 +78,8 @@ def build_dataflow_args(
"--no_use_public_ips",
"--worker_zone",
"us-central1-c",
+ "--service_account_email",
+ dataflow_sa,
]
# build_dataflow_args = components.create_component_from_func(
# build_dataflow_args_fun, base_image='python:3.8-slim')
@@ -550,6 +553,7 @@ def pipeline(
min_nodes: int = 2,
max_nodes: int = 4,
traffic_split: int = 25,
+ dataflow_sa: str = "",
):
from google_cloud_pipeline_components.v1.bigquery import (
BigqueryQueryJobOp)
@@ -580,7 +584,8 @@ def pipeline(
bq_table=bq_train_table,
runner=runner,
bq_project=project,
- subnet=dataflow_subnet
+ subnet=dataflow_subnet,
+ dataflow_sa=dataflow_sa,
).after(bq_dataset_op)
dataflow_args_eval = build_dataflow_args(
job_name=f"{job_name}eval",
@@ -589,7 +594,8 @@ def pipeline(
bq_table=bq_eval_table,
runner=runner,
bq_project=project,
- subnet=dataflow_subnet
+ subnet=dataflow_subnet,
+ dataflow_sa=dataflow_sa,
).after(bq_dataset_op)
# run dataflow job
diff --git a/7-vertexpipeline/components/bq_dataset_component/create_bq_dataset.sql b/examples/machine-learning-pipeline/assets/Vertexpipeline/components/bq_dataset_component/create_bq_dataset.sql
similarity index 100%
rename from 7-vertexpipeline/components/bq_dataset_component/create_bq_dataset.sql
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/components/bq_dataset_component/create_bq_dataset.sql
diff --git a/7-vertexpipeline/components/custom_eval_component/eval.yaml b/examples/machine-learning-pipeline/assets/Vertexpipeline/components/custom_eval_component/eval.yaml
similarity index 100%
rename from 7-vertexpipeline/components/custom_eval_component/eval.yaml
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/components/custom_eval_component/eval.yaml
diff --git a/7-vertexpipeline/components/custom_training_component/training.yaml b/examples/machine-learning-pipeline/assets/Vertexpipeline/components/custom_training_component/training.yaml
similarity index 100%
rename from 7-vertexpipeline/components/custom_training_component/training.yaml
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/components/custom_training_component/training.yaml
diff --git a/7-vertexpipeline/components/deployment_component/deploy.yaml b/examples/machine-learning-pipeline/assets/Vertexpipeline/components/deployment_component/deploy.yaml
similarity index 100%
rename from 7-vertexpipeline/components/deployment_component/deploy.yaml
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/components/deployment_component/deploy.yaml
diff --git a/7-vertexpipeline/components/monitoring_component/monitoring.yaml b/examples/machine-learning-pipeline/assets/Vertexpipeline/components/monitoring_component/monitoring.yaml
similarity index 100%
rename from 7-vertexpipeline/components/monitoring_component/monitoring.yaml
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/components/monitoring_component/monitoring.yaml
diff --git a/7-vertexpipeline/composer/dags/dag.py b/examples/machine-learning-pipeline/assets/Vertexpipeline/composer/dags/dag.py
similarity index 100%
rename from 7-vertexpipeline/composer/dags/dag.py
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/composer/dags/dag.py
diff --git a/7-vertexpipeline/data/adult.data.csv b/examples/machine-learning-pipeline/assets/Vertexpipeline/data/adult.data.csv
similarity index 100%
rename from 7-vertexpipeline/data/adult.data.csv
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/data/adult.data.csv
diff --git a/7-vertexpipeline/data/adult.test.csv b/examples/machine-learning-pipeline/assets/Vertexpipeline/data/adult.test.csv
similarity index 100%
rename from 7-vertexpipeline/data/adult.test.csv
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/data/adult.test.csv
diff --git a/7-vertexpipeline/monitoring_schema.yaml b/examples/machine-learning-pipeline/assets/Vertexpipeline/monitoring_schema.yaml
similarity index 100%
rename from 7-vertexpipeline/monitoring_schema.yaml
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/monitoring_schema.yaml
diff --git a/7-vertexpipeline/runpipeline.py b/examples/machine-learning-pipeline/assets/Vertexpipeline/runpipeline.py
similarity index 71%
rename from 7-vertexpipeline/runpipeline.py
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/runpipeline.py
index b737ebf5..07b6b7ea 100644
--- a/7-vertexpipeline/runpipeline.py
+++ b/examples/machine-learning-pipeline/assets/Vertexpipeline/runpipeline.py
@@ -33,8 +33,10 @@
class vertex_ai_pipeline:
def __init__(self,
- PROJECT_ID: str = "non-prod-projectID",
- PROD_PROJECT_ID: str = 'prod-projectID',
+ # Replace the {non-prod-project_id} for your non-prod-project-id
+ PROJECT_ID: str = "{PRJ_N_MACHINE_LEARNING_ID}",
+ # Replace the {prod-project_id} for your prod-project-id
+ PROD_PROJECT_ID: str = "{PRJ_P_MACHINE_LEARNING_ID}",
REGION: str = "us-central1",
BUCKET_URI: str = "bucket_uri",
DATA_PATH: str = "data",
@@ -47,10 +49,15 @@ def __init__(self,
TRAINING_TABLE_ID: str = 'census_train_table',
EVAL_TABLE_ID: str = 'census_eval_table',
RUNNER: str = "DataflowRunner",
- DATAFLOW_SUBNET: str = "https://www.googleapis.com/compute/v1/projects/prj-n-shared-restricted-wooh/regions/us-central1/subnetworks/sb-n-shared-restricted-us-central1",
+ # Replace the {PRJ_N_SHARED_RESTRICTED_ID} with your project_id
+ DATAFLOW_SUBNET: str = "https://www.googleapis.com/compute/v1/projects/{PRJ_N_SHARED_RESTRICTED_ID}/regions/us-central1/subnetworks/sb-n-shared-restricted-us-central1",
JOB_NAME: str = "census-ingest",
- SERVICE_ACCOUNT: str = "1053774269887-compute@developer.gserviceaccount.com",
- PROD_SERVICE_ACCOUNT: str = "941180056038-compute@developer.gserviceaccount.com"
+ # Replace the {PRJ_N_MACHINE_LEARNING_NUMBER} with your prj_n_machine_learning_number
+ SERVICE_ACCOUNT: str = "{PRJ_N_MACHINE_LEARNING_NUMBER}-compute@developer.gserviceaccount.com",
+ # Replace the {PRJ_P_MACHINE_LEARNING_NUMBER} with your prj_p_machine_learning_project_number
+ PROD_SERVICE_ACCOUNT: str = "{PRJ_P_MACHINE_LEARNING_NUMBER}-compute@developer.gserviceaccount.com",
+ # Replace the {DATAFLOW_SA} with your dataflow-sa from non-prod machine_learning_project
+ DATAFLOW_SA: str = "{DATAFLOW_SA}",
):
self.timestamp = datetime.now().strftime("%d_%H_%M_%S")
@@ -59,6 +66,7 @@ def __init__(self,
self.REGION = REGION
self.BUCKET_URI = BUCKET_URI
self.DATA_PATH = DATA_PATH
+ self.DATAFLOW_SA = DATAFLOW_SA
DAGS_FOLDER = os.environ.get("DAGS_FOLDER", "./")
COMMON_FOLDER = os.path.join(DAGS_FOLDER, "common")
@@ -68,8 +76,8 @@ def __init__(self,
self.KFP_COMPONENTS_PATH = KFP_COMPONENTS_PATH
self.SRC = SRC
self.BUILD = BUILD
- # Replace with the name of the image in artifact project of the common folder
- self.Image = "us-central1-docker.pkg.dev/prj-c-bu3artifacts-5wdo/c-publish-artifacts/vertexpipeline:v2"
+ # Replace the {PRJ_C_MLARTIFACTS_ID} with the name of the image in artifact project of the common folder
+ self.Image = "us-central1-docker.pkg.dev/{PRJ_C_MLARTIFACTS_ID}/c-publish-artifacts/vertexpipeline:v2"
self.DATA_URL = f'{BUCKET_URI}/data'
self.TRAINING_FILE = 'adult.data.csv'
@@ -116,14 +124,15 @@ def __init__(self,
'min_nodes': 2,
'max_nodes': 4,
'deployment_project': self.PROD_PROJECT_ID,
- # Raplace encryption with the name of the kms key in the kms project of the prod folder
- "encryption": 'projects/prj-p-kms-lkuy/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-p-ml-machine-learning',
+ # Replace encryption with the name of the kms key in the kms project of the prod folder and the prod kms ID project
+ "encryption": 'projects/{PRJ_P_KMS_ID}/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-p-mlmachine-learning',
"service_account": self.SERVICE_ACCOUNT,
"prod_service_account": self.PROD_SERVICE_ACCOUNT
}
self.monitoring_config = {
- 'email': 'my.email@myorg.com',
+ # Replace the email with your email address
+ 'email': '{YOUR-EMAIL@YOUR-COMPANY.COM}',
'name': 'census_monitoring'
}
@@ -134,8 +143,8 @@ def execute(self):
display_name=f"census_income_{self.timestamp}",
template_path=self.yaml_file_path,
pipeline_root=self.pipelineroot,
- # Raplace encryption with the name of the kms key in the kms project of the non-prod folder
- encryption_spec_key_name='projects/prj-n-kms-gi2r/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-n-ml-machine-learning',
+ # Replace encryption with the name of the kms key in the kms project of the non-prod folder and also de non-prod KMS project ID
+ encryption_spec_key_name='projects/{PRJ_N_KMS_ID}/locations/us-central1/keyRings/sample-keyring/cryptoKeys/prj-n-mlmachine-learning',
parameter_values={
"create_bq_dataset_query": self.create_bq_dataset_query,
"bq_dataset": self.data_config['bq_dataset'],
@@ -167,7 +176,7 @@ def execute(self):
"prod_service_account": self.deployment_config["prod_service_account"],
"monitoring_name": self.monitoring_config['name'],
"monitoring_email": self.monitoring_config['email'],
-
+ "dataflow_sa": self.DATAFLOW_SA,
},
enable_caching=False,
)
@@ -177,11 +186,13 @@ def execute(self):
if __name__ == "__main__":
pipeline = vertex_ai_pipeline(
- # Replace with your non-prod project Id
- PROJECT_ID="prj-n-bu3machine-learning-brk1", \
- PROD_PROJECT_ID='prj-p-ml-machine-learning-skc4', \ # Replace with your prod project Id
+ # Replace with your Machine Learning non-prod project Id
+ PROJECT_ID="{PRJ_N_MACHINE_LEARNING_ID}", \
+ # Replace with your Machine Learning prod project ID
+ PROD_PROJECT_ID='{PRJ_P_MACHINE_LEARNING_ID}', \
REGION="us-central1", \
- BUCKET_URI="gs://bkt-n-ml-storage-akdv", \ # Replace with your bucket in non-prod
+ # Replace with your bucket in non-prod ID
+ BUCKET_URI="gs://{NON_PROD_BUCKET_NAME}", \
DATA_PATH="data", \
KFP_COMPONENTS_PATH="components", \
SRC="src", \
@@ -193,12 +204,14 @@ def execute(self):
EVAL_TABLE_ID='census_eval_table', \
RUNNER="DataflowRunner", \
# Replace with the name of the subnet in your shared-restricted project in the non-prod environment
- DATAFLOW_SUBNET="https://www.googleapis.com/compute/v1/projects/prj-n-shared-restricted-wooh/regions/us-central1/subnetworks/sb-n-shared-restricted-us-central1",
+ DATAFLOW_SUBNET="https://www.googleapis.com/compute/v1/projects/{PRJ_N_SHARED_RESTRICTED_ID}/regions/us-central1/subnetworks/sb-n-shared-restricted-us-central1", \
JOB_NAME="census-ingest", \
- # Replace with the compute default service account of your non-prod project
- SERVICE_ACCOUNT="1053774269887-compute@developer.gserviceaccount.com", \
- # Replace with the compute default service account of your prod project
- PROD_SERVICE_ACCOUNT="941180056038-compute@developer.gserviceaccount.com"
+ # Replace the {PRJ_N_MACHINE_LEARNING_NUMBER} with your Non-production Machine Learning Project Number
+ SERVICE_ACCOUNT="{PRJ_N_MACHINE_LEARNING_NUMBER}-compute@developer.gserviceaccount.com", \
+ # Replace the {PRJ_P_MACHINE_LEARNING_NUMBER} with your Production Machine Learning Project Number
+ PROD_SERVICE_ACCOUNT="{PRJ_P_MACHINE_LEARNING_NUMBER}-compute@developer.gserviceaccount.com",
+ # Replace the {PRJ_N_MACHINE_LEARNING_ID} with your Non-production Machine Learning Project ID
+ DATAFLOW_SA="dataflow-sa@{PRJ_N_MACHINE_LEARNING_ID}.iam.gserviceaccount.com",
)
pipeline.execute()
diff --git a/7-vertexpipeline/src/__init__.py b/examples/machine-learning-pipeline/assets/Vertexpipeline/src/__init__.py
similarity index 100%
rename from 7-vertexpipeline/src/__init__.py
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/src/__init__.py
diff --git a/7-vertexpipeline/src/ingest_pipeline.py b/examples/machine-learning-pipeline/assets/Vertexpipeline/src/ingest_pipeline.py
similarity index 100%
rename from 7-vertexpipeline/src/ingest_pipeline.py
rename to examples/machine-learning-pipeline/assets/Vertexpipeline/src/ingest_pipeline.py
diff --git a/examples/machine-learning-pipeline/assets/vpc-sc-policies/development.tf.example b/examples/machine-learning-pipeline/assets/vpc-sc-policies/development.tf.example
new file mode 100644
index 00000000..fb7f8a8f
--- /dev/null
+++ b/examples/machine-learning-pipeline/assets/vpc-sc-policies/development.tf.example
@@ -0,0 +1,154 @@
+ingress_policies = [
+ // users
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "sources" = {
+ "access_level" = "REPLACE_WITH_ACCESS_LEVEL"
+ }
+ },
+ "to" = {
+ "resources" = [
+ "projects/REPLACE_WITH_SHARED_RESTRICTED_VPC_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_ENV_KMS_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_ENV_ML_PROJECT_NUMBER",
+ ]
+ "operations" = {
+ "compute.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "dns.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "dataproc.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "logging.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "iam.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudresourcemanager.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "pubsub.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "secretmanager.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "aiplatform.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "composer.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudbuild.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "bigquery.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+]
+
+egress_policies = [
+ // notebooks
+ {
+ "from" = {
+ "identity_type" = ""
+ "identities" = [
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@gcp-sa-notebooks.iam.gserviceaccount.com",
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@compute-system.iam.gserviceaccount.com",
+ ]
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ENV_KMS_PROJECT_NUMBER"]
+ "operations" = {
+ "compute.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ARTIFACTS_PROJECT_NUMBER"]
+ "operations" = {
+ "artifactregistry.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudbuild.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ENV_ML_PROJECT_NUMBER"]
+ "operations" = {
+ "aiplatform.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ENV_KMS_PROJECT_NUMBER"]
+ "operations" = {
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_LOGGING_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_NON_PROD_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_ENV_ML_PROJECT_NUMBER",
+ ]
+ "operations" = {
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "logging.googleapis.com" = {
+ methods = ["*"]
+ }
+ "cloudbuild.googleapis.com" = {
+ methods = ["*"]
+ }
+ }
+ }
+ },
+]
diff --git a/examples/machine-learning-pipeline/assets/vpc-sc-policies/non-production.tf.example b/examples/machine-learning-pipeline/assets/vpc-sc-policies/non-production.tf.example
new file mode 100644
index 00000000..1d434b19
--- /dev/null
+++ b/examples/machine-learning-pipeline/assets/vpc-sc-policies/non-production.tf.example
@@ -0,0 +1,222 @@
+ingress_policies = [
+ // users
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "sources" = {
+ "access_level" = "REPLACE_WITH_ACCESS_LEVEL"
+ }
+ },
+ "to" = {
+ "resources" = [
+ "projects/REPLACE_WITH_SHARED_RESTRICTED_VPC_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_ENV_KMS_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_ENV_ML_PROJECT_NUMBER",
+ ]
+ "operations" = {
+ "compute.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "dns.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "dataproc.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "logging.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "iam.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudresourcemanager.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "pubsub.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "secretmanager.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "aiplatform.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "composer.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudbuild.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "bigquery.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+]
+
+egress_policies = [
+ // notebooks
+ {
+ "from" = {
+ "identity_type" = ""
+ "identities" = [
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@gcp-sa-notebooks.iam.gserviceaccount.com",
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@compute-system.iam.gserviceaccount.com",
+ "serviceAccount:REPLACE_WITH_ENV_ML_PROJECT_NUMBER-compute@developer.gserviceaccount.com",
+ "serviceAccount:cloud-aiplatform-api-robot-prod@system.gserviceaccount.com",
+ ]
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ENV_KMS_PROJECT_NUMBER"]
+ "operations" = {
+ "compute.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ARTIFACTS_PROJECT_NUMBER"]
+ "operations" = {
+ "artifactregistry.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudbuild.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ENV_ML_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_PROD_ML_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_DEV_LOG_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_DEV_ML_PROJECT_NUMBER",
+ ]
+ "operations" = {
+ "aiplatform.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "bigquery.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "logging.googleapis.com" = {
+ methods = ["*"]
+ }
+ "cloudbuild.googleapis.com" = {
+ methods = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = ""
+ "identities" = [
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
+ ]
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ARTIFACTS_PROJECT_NUMBER"]
+ "operations" = {
+ "artifactregistry.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+// DataFlow
+ {
+ "from" = {
+ "identity_type" = ""
+ "identities" = [
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@dataflow-service-producer-prod.iam.gserviceaccount.com",
+ ]
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_DEV_SHARED_RESTRICTED_VPC_PROJECT_NUMBER"]
+ "operations" = {
+ "compute.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = ""
+ "identities" = [
+ "serviceAccount:REPLACE_WITH_ENV_ML_PROJECT_NUMBER-compute@developer.gserviceaccount.com",
+ "serviceAccount:service-REPLACE_WITH_DEV_ML_PROJECT_NUMBER@gcp-sa-aiplatform.iam.gserviceaccount.com",
+ ]
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_PROD_ML_PROJECT_NUMBER"]
+ "operations" = {
+ "aiplatform.googleapis.com" = {
+ "methods" = ["*"]
+ },
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ },
+ "bigquery.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ENV_KMS_PROJECT_NUMBER"]
+ "operations" = {
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_LOGGING_PROJECT_NUMBER"]
+ "operations" = {
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+]
diff --git a/examples/machine-learning-pipeline/assets/vpc-sc-policies/production.tf.example b/examples/machine-learning-pipeline/assets/vpc-sc-policies/production.tf.example
new file mode 100644
index 00000000..4179606b
--- /dev/null
+++ b/examples/machine-learning-pipeline/assets/vpc-sc-policies/production.tf.example
@@ -0,0 +1,196 @@
+ingress_policies = [
+ // users
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "sources" = {
+ "access_level" = "REPLACE_WITH_ACCESS_LEVEL"
+ }
+ },
+ "to" = {
+ "resources" = [
+ "projects/REPLACE_WITH_SHARED_RESTRICTED_VPC_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_ENV_KMS_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_ENV_ML_PROJECT_NUMBER",
+ ]
+ "operations" = {
+ "compute.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "dns.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "dataproc.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "logging.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "iam.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudresourcemanager.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "pubsub.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "secretmanager.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "aiplatform.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "composer.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudbuild.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "bigquery.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+]
+
+egress_policies = [
+ // notebooks
+ {
+ "from" = {
+ "identity_type" = ""
+ "identities" = [
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@gcp-sa-notebooks.iam.gserviceaccount.com",
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@compute-system.iam.gserviceaccount.com",
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@gcp-sa-aiplatform.iam.gserviceaccount.com",
+ ]
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ENV_KMS_PROJECT_NUMBER"]
+ "operations" = {
+ "compute.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = ""
+ "identities" = [
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@gcp-sa-aiplatform.iam.gserviceaccount.com",
+ "serviceAccount:service-REPLACE_WITH_ENV_ML_PROJECT_NUMBER@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
+ "serviceAccount:cloud-cicd-artifact-registry-copier@system.gserviceaccount.com",
+ ]
+ },
+ "to" = {
+ "resources" = [
+ "projects/REPLACE_WITH_NON_PROD_PROJECT_NUMBER",
+ "projects/REPLACE_WITH_ARTIFACTS_PROJECT_NUMBER",
+ ]
+ "operations" = {
+ "artifactregistry.googleapis.com" = {
+ "methods" = ["*"]
+ },
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ },
+ "bigquery.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ARTIFACTS_PROJECT_NUMBER"]
+ "operations" = {
+ "artifactregistry.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "cloudbuild.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ENV_ML_PROJECT_NUMBER"]
+ "operations" = {
+ "aiplatform.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_ENV_KMS_PROJECT_NUMBER"]
+ "operations" = {
+ "cloudkms.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_LOGGING_PROJECT_NUMBER"]
+ "operations" = {
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+ {
+ "from" = {
+ "identity_type" = "ANY_IDENTITY"
+ "identities" = []
+ },
+ "to" = {
+ "resources" = ["projects/REPLACE_WITH_NON_PROD_PROJECT_NUMBER"]
+ "operations" = {
+ "storage.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "aiplatform.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ "bigquery.googleapis.com" = {
+ "methods" = ["*"]
+ }
+ }
+ }
+ },
+]
diff --git a/examples/machine-learning-pipeline/common.auto.example.tfvars b/examples/machine-learning-pipeline/common.auto.example.tfvars
index da78518b..66aa9627 100644
--- a/examples/machine-learning-pipeline/common.auto.example.tfvars
+++ b/examples/machine-learning-pipeline/common.auto.example.tfvars
@@ -18,6 +18,8 @@ instance_region = "us-central1" // should be one of the regions used to create n
remote_state_bucket = "REMOTE_STATE_BUCKET"
-github_app_installation_id = "GITHUB_APP_ID"
+#github_app_installation_id = "GITHUB_APP_ID"
-github_remote_uri = "GITHUB_REMOTE_URI"
+#github_remote_uri = "GITHUB_REMOTE_URI"
+
+seed_state_bucket = "REPLACE_SEED_TFSTATE_BUCKET"
\ No newline at end of file
diff --git a/examples/machine-learning-pipeline/ml_business_unit/development/README.md b/examples/machine-learning-pipeline/ml_business_unit/development/README.md
index a782b0fb..a2ca5946 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/development/README.md
+++ b/examples/machine-learning-pipeline/ml_business_unit/development/README.md
@@ -3,10 +3,12 @@
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
-| github\_app\_installation\_id | The app installation ID that was created when installing Google Cloud Build in Github: https://github.com/apps/google-cloud-build | `number` | n/a | yes |
-| github\_remote\_uri | The remote uri of your github repository | `string` | n/a | yes |
+| github\_app\_installation\_id | The app installation ID that was created when installing Google Cloud Build in Github: https://github.com/apps/google-cloud-build | `number` | `null` | no |
+| github\_remote\_uri | The remote uri of your github repository | `string` | `null` | no |
| instance\_region | The region where notebook instance will be created. A subnetwork must exists in the instance region. | `string` | n/a | yes |
| remote\_state\_bucket | Backend bucket to load remote state information from previous steps. | `string` | n/a | yes |
+| repository\_id | Common artifacts repository id | `string` | `"c-publish-artifacts"` | no |
+| seed\_state\_bucket | Remote state bucket from 0-bootstrap | `string` | n/a | yes |
## Outputs
diff --git a/examples/machine-learning-pipeline/ml_business_unit/development/locals.tf b/examples/machine-learning-pipeline/ml_business_unit/development/locals.tf
index 593d0e47..64f403ec 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/development/locals.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/development/locals.tf
@@ -15,9 +15,17 @@
*/
locals {
- business_unit = "ml_business_unit"
- business_code = "ml"
- env = "development"
- environment_code = "d"
+ business_unit = "ml_business_unit"
+ business_code = "ml"
+ env = "development"
+ environment_code = "d"
+ region_kms_keyring = [for i in local.env_keyrings : i if split("/", i)[3] == var.instance_region]
+ roles = [
+ "roles/bigquery.admin",
+ "roles/dataflow.admin",
+ "roles/dataflow.worker",
+ "roles/storage.admin",
+ "roles/aiplatform.admin",
+ ]
}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/development/main.tf b/examples/machine-learning-pipeline/ml_business_unit/development/main.tf
index f6874834..19940198 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/development/main.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/development/main.tf
@@ -14,13 +14,91 @@
* limitations under the License.
*/
+data "google_project" "project" {
+ project_id = local.machine_learning_project_id
+}
+
+resource "google_artifact_registry_repository_iam_member" "member" {
+ for_each = {
+ "aiplatform-sa" = "serviceAccount:service-${data.google_project.project.number}@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
+ "vertex-sa" = google_service_account.vertex_sa.member,
+ }
+
+ project = local.common_artifacts_project_id
+ location = var.instance_region
+ repository = var.repository_id
+ role = "roles/artifactregistry.reader"
+ member = each.value
+}
+
+resource "google_service_account" "dataflow_sa" {
+ project = local.machine_learning_project_id
+ account_id = "dataflow-sa"
+}
+
+resource "google_service_account" "vertex_sa" {
+ project = local.machine_learning_project_id
+ account_id = "vertex-sa"
+}
+
+resource "google_service_account" "vertex_model" {
+ project = local.machine_learning_project_id
+ account_id = "vertex-model"
+}
+
+resource "google_project_iam_member" "dataflow_sa" {
+ for_each = toset(local.roles)
+ project = local.machine_learning_project_id
+ member = google_service_account.dataflow_sa.member
+ role = each.key
+}
+
+resource "google_project_iam_member" "vertex_sa" {
+ for_each = toset(local.roles)
+ project = local.machine_learning_project_id
+ member = google_service_account.vertex_sa.member
+ role = each.key
+}
+
+resource "google_service_account_iam_member" "compute_impersonate_vertex" {
+ service_account_id = google_service_account.vertex_sa.id
+ role = "roles/iam.serviceAccountUser"
+ member = "serviceAccount:${data.google_project.project.number}-compute@developer.gserviceaccount.com"
+}
+
+resource "google_service_account_iam_member" "vertex_impersonate_model" {
+ service_account_id = google_service_account.vertex_model.id
+ role = "roles/iam.serviceAccountUser"
+ member = google_service_account.vertex_sa.member
+}
+
+resource "google_service_account_iam_member" "impersonate_dataflow" {
+ service_account_id = google_service_account.dataflow_sa.id
+ role = "roles/iam.serviceAccountUser"
+ member = google_service_account.vertex_sa.member
+}
+
+resource "random_string" "suffix" {
+ length = 8
+ special = false
+ upper = false
+}
module "base_env" {
source = "../../modules/base_env"
- env = local.env
- environment_code = local.environment_code
- business_code = local.business_code
- project_id = local.machine_learning_project_id
- kms_keys = local.machine_learning_kms_keys
+ env = local.env
+ environment_code = local.environment_code
+ business_code = local.business_code
+ non_production_project_id = local.non_production_project_id
+ non_production_project_number = local.non_production_project_number
+ production_project_id = local.production_project_id
+ production_project_number = local.production_project_number
+ project_id = local.machine_learning_project_id
+ kms_keys = local.machine_learning_kms_keys
+
+ bucket_name = "ml-storage-${random_string.suffix.result}"
+
+ log_bucket = local.env_log_bucket
+ keyring = one(local.region_kms_keyring)
}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/development/remote.tf b/examples/machine-learning-pipeline/ml_business_unit/development/remote.tf
index dc533538..db614c37 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/development/remote.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/development/remote.tf
@@ -15,10 +15,26 @@
*/
locals {
- machine_learning_project_id = data.terraform_remote_state.projects_env.outputs.machine_learning_project_id
- machine_learning_kms_keys = data.terraform_remote_state.projects_env.outputs.machine_learning_kms_keys
- service_catalog_repo_name = data.terraform_remote_state.projects_shared.outputs.service_catalog_repo_name
- service_catalog_project_id = data.terraform_remote_state.projects_shared.outputs.service_catalog_project_id
+ machine_learning_project_id = data.terraform_remote_state.projects_env.outputs.machine_learning_project_id
+ machine_learning_kms_keys = data.terraform_remote_state.projects_env.outputs.machine_learning_kms_keys
+ service_catalog_repo_name = data.terraform_remote_state.projects_shared.outputs.service_catalog_repo_name
+ service_catalog_project_id = data.terraform_remote_state.projects_shared.outputs.service_catalog_project_id
+ non_production_project_number = data.terraform_remote_state.projects_nonproduction.outputs.machine_learning_project_number
+ non_production_project_id = data.terraform_remote_state.projects_nonproduction.outputs.machine_learning_project_id
+ production_project_number = data.terraform_remote_state.projects_production.outputs.machine_learning_project_number
+ production_project_id = data.terraform_remote_state.projects_production.outputs.machine_learning_project_id
+ env_log_bucket = data.terraform_remote_state.environments_env.outputs.env_log_bucket_name
+ env_keyrings = data.terraform_remote_state.environments_env.outputs.key_rings
+ common_artifacts_project_id = data.terraform_remote_state.projects_shared.outputs.common_artifacts_project_id
+}
+
+data "terraform_remote_state" "environments_env" {
+ backend = "gcs"
+
+ config = {
+ bucket = var.seed_state_bucket
+ prefix = "terraform/environments/${local.env}"
+ }
}
data "terraform_remote_state" "projects_env" {
@@ -38,3 +54,21 @@ data "terraform_remote_state" "projects_shared" {
prefix = "terraform/projects/${local.business_unit}/shared"
}
}
+
+data "terraform_remote_state" "projects_production" {
+ backend = "gcs"
+
+ config = {
+ bucket = var.remote_state_bucket
+ prefix = "terraform/projects/${local.business_unit}/production"
+ }
+}
+
+data "terraform_remote_state" "projects_nonproduction" {
+ backend = "gcs"
+
+ config = {
+ bucket = var.remote_state_bucket
+ prefix = "terraform/projects/${local.business_unit}/non-production"
+ }
+}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/development/variables.tf b/examples/machine-learning-pipeline/ml_business_unit/development/variables.tf
index c26d31ca..c8ede2c4 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/development/variables.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/development/variables.tf
@@ -27,9 +27,21 @@ variable "remote_state_bucket" {
variable "github_app_installation_id" {
description = "The app installation ID that was created when installing Google Cloud Build in Github: https://github.com/apps/google-cloud-build"
type = number
-
+ default = null
}
variable "github_remote_uri" {
description = "The remote uri of your github repository"
type = string
+ default = null
+}
+
+variable "seed_state_bucket" {
+ description = "Remote state bucket from 0-bootstrap"
+ type = string
+}
+
+variable "repository_id" {
+ description = "Common artifacts repository id"
+ type = string
+ default = "c-publish-artifacts"
}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/non-production/README.md b/examples/machine-learning-pipeline/ml_business_unit/non-production/README.md
index 3255fd1a..d407b53e 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/non-production/README.md
+++ b/examples/machine-learning-pipeline/ml_business_unit/non-production/README.md
@@ -3,10 +3,12 @@
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
-| github\_app\_installation\_id | The app installation ID that was created when installing Google Cloud Build in Github: https://github.com/apps/google-cloud-build | `number` | n/a | yes |
-| github\_remote\_uri | The remote uri of your github repository | `string` | n/a | yes |
+| github\_app\_installation\_id | The app installation ID that was created when installing Google Cloud Build in Github: https://github.com/apps/google-cloud-build | `number` | `null` | no |
+| github\_remote\_uri | The remote uri of your github repository | `string` | `null` | no |
| instance\_region | The region where notebook instance will be created. A subnetwork must exists in the instance region. | `string` | n/a | yes |
| remote\_state\_bucket | Backend bucket to load remote state information from previous steps. | `string` | n/a | yes |
+| repository\_id | Common artifacts repository id | `string` | `"c-publish-artifacts"` | no |
+| seed\_state\_bucket | Remote state bucket from 0-bootstrap | `string` | n/a | yes |
## Outputs
diff --git a/examples/machine-learning-pipeline/ml_business_unit/non-production/locals.tf b/examples/machine-learning-pipeline/ml_business_unit/non-production/locals.tf
index e59d1a15..c638e586 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/non-production/locals.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/non-production/locals.tf
@@ -15,9 +15,10 @@
*/
locals {
- business_unit = "ml_business_unit"
- business_code = "ml"
- env = "non-production"
- environment_code = "n"
+ business_unit = "ml_business_unit"
+ business_code = "ml"
+ env = "non-production"
+ environment_code = "n"
+ region_kms_keyring = [for i in local.env_keyrings : i if split("/", i)[3] == var.instance_region]
}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/non-production/main.tf b/examples/machine-learning-pipeline/ml_business_unit/non-production/main.tf
index 705f28c6..c6b40e31 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/non-production/main.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/non-production/main.tf
@@ -14,19 +14,98 @@
* limitations under the License.
*/
+data "google_compute_default_service_account" "non_prod" {
+ project = local.machine_learning_project_id
+}
+
+resource "google_service_account" "dataflow_sa" {
+ project = local.machine_learning_project_id
+ account_id = "dataflow-sa"
+}
+
+resource "google_project_iam_member" "dataflow_sa" {
+ for_each = toset([
+ "roles/bigquery.admin",
+ "roles/dataflow.admin",
+ "roles/dataflow.worker",
+ "roles/storage.admin",
+ "roles/aiplatform.admin",
+ ])
+ project = local.machine_learning_project_id
+ member = google_service_account.dataflow_sa.member
+ role = each.key
+}
+
+resource "google_service_account_iam_member" "impersonate_dataflow" {
+ service_account_id = google_service_account.dataflow_sa.id
+ role = "roles/iam.serviceAccountUser"
+ member = data.google_compute_default_service_account.non_prod.member
+}
+
+resource "google_service_account" "trigger_sa" {
+ project = local.machine_learning_project_id
+ account_id = "trigger-sa"
+}
+
+resource "google_storage_bucket_iam_member" "bucket" {
+ bucket = module.base_env.bucket.storage_bucket.name
+ role = "roles/storage.admin"
+ member = google_service_account.trigger_sa.member
+}
+
+resource "google_project_iam_member" "trigger_sa" {
+ for_each = toset([
+ "roles/logging.logWriter",
+ "roles/aiplatform.admin"
+ ])
+ project = local.machine_learning_project_id
+ member = google_service_account.trigger_sa.member
+ role = each.key
+}
+
+resource "google_service_account_iam_member" "impersonate" {
+ service_account_id = data.google_compute_default_service_account.non_prod.id
+ role = "roles/iam.serviceAccountUser"
+ member = google_service_account.trigger_sa.member
+}
+
+resource "google_artifact_registry_repository_iam_member" "ar_member" {
+ for_each = {
+ "compute-sa" = "serviceAccount:${local.non_production_project_number}-compute@developer.gserviceaccount.com",
+ "trigger-sa" = google_service_account.trigger_sa.member,
+ "aiplatform-sa" = "serviceAccount:service-${local.non_production_project_number}@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
+ }
+
+
+ project = local.common_artifacts_project_id
+ location = var.instance_region
+ repository = var.repository_id
+ role = "roles/artifactregistry.reader"
+ member = each.value
+}
+
+resource "random_string" "suffix" {
+ length = 8
+ special = false
+ upper = false
+}
module "base_env" {
source = "../../modules/base_env"
- env = local.env
- environment_code = local.environment_code
- business_code = local.business_code
- project_id = local.machine_learning_project_id
+ env = local.env
+ environment_code = local.environment_code
+ business_code = local.business_code
+ non_production_project_id = local.non_production_project_id
+ non_production_project_number = local.non_production_project_number
+ production_project_id = local.production_project_id
+ production_project_number = local.production_project_number
+ project_id = local.machine_learning_project_id
kms_keys = local.machine_learning_kms_keys
// Composer
- composer_enabled = true
+ composer_enabled = false
composer_name = "composer"
composer_airflow_config_overrides = {
@@ -51,8 +130,11 @@ module "base_env" {
metadata_name = "metadata-store-${local.env}"
// Bucket
- bucket_name = "ml-storage-akdv"
+ bucket_name = "ml-storage-${random_string.suffix.result}"
// TensorBoard
tensorboard_name = "ml-tensorboard-${local.env}"
+
+ log_bucket = local.env_log_bucket
+ keyring = one(local.region_kms_keyring)
}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/non-production/remote.tf b/examples/machine-learning-pipeline/ml_business_unit/non-production/remote.tf
index dc533538..5d256c2c 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/non-production/remote.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/non-production/remote.tf
@@ -15,10 +15,26 @@
*/
locals {
- machine_learning_project_id = data.terraform_remote_state.projects_env.outputs.machine_learning_project_id
- machine_learning_kms_keys = data.terraform_remote_state.projects_env.outputs.machine_learning_kms_keys
- service_catalog_repo_name = data.terraform_remote_state.projects_shared.outputs.service_catalog_repo_name
- service_catalog_project_id = data.terraform_remote_state.projects_shared.outputs.service_catalog_project_id
+ machine_learning_project_id = data.terraform_remote_state.projects_env.outputs.machine_learning_project_id
+ machine_learning_kms_keys = data.terraform_remote_state.projects_env.outputs.machine_learning_kms_keys
+ service_catalog_repo_name = data.terraform_remote_state.projects_shared.outputs.service_catalog_repo_name
+ service_catalog_project_id = data.terraform_remote_state.projects_shared.outputs.service_catalog_project_id
+ non_production_project_number = data.terraform_remote_state.projects_nonproduction.outputs.machine_learning_project_number
+ non_production_project_id = data.terraform_remote_state.projects_nonproduction.outputs.machine_learning_project_id
+ production_project_number = data.terraform_remote_state.projects_production.outputs.machine_learning_project_number
+ production_project_id = data.terraform_remote_state.projects_production.outputs.machine_learning_project_id
+ env_log_bucket = data.terraform_remote_state.environments_env.outputs.env_log_bucket_name
+ env_keyrings = data.terraform_remote_state.environments_env.outputs.key_rings
+ common_artifacts_project_id = data.terraform_remote_state.projects_shared.outputs.common_artifacts_project_id
+}
+
+data "terraform_remote_state" "environments_env" {
+ backend = "gcs"
+
+ config = {
+ bucket = var.seed_state_bucket
+ prefix = "terraform/environments/${local.env}"
+ }
}
data "terraform_remote_state" "projects_env" {
@@ -38,3 +54,21 @@ data "terraform_remote_state" "projects_shared" {
prefix = "terraform/projects/${local.business_unit}/shared"
}
}
+
+data "terraform_remote_state" "projects_production" {
+ backend = "gcs"
+
+ config = {
+ bucket = var.remote_state_bucket
+ prefix = "terraform/projects/${local.business_unit}/production"
+ }
+}
+
+data "terraform_remote_state" "projects_nonproduction" {
+ backend = "gcs"
+
+ config = {
+ bucket = var.remote_state_bucket
+ prefix = "terraform/projects/${local.business_unit}/non-production"
+ }
+}
\ No newline at end of file
diff --git a/examples/machine-learning-pipeline/ml_business_unit/non-production/variables.tf b/examples/machine-learning-pipeline/ml_business_unit/non-production/variables.tf
index c26d31ca..baad685c 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/non-production/variables.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/non-production/variables.tf
@@ -27,9 +27,22 @@ variable "remote_state_bucket" {
variable "github_app_installation_id" {
description = "The app installation ID that was created when installing Google Cloud Build in Github: https://github.com/apps/google-cloud-build"
type = number
+ default = null
}
variable "github_remote_uri" {
description = "The remote uri of your github repository"
type = string
+ default = null
+}
+
+variable "seed_state_bucket" {
+ description = "Remote state bucket from 0-bootstrap"
+ type = string
+}
+
+variable "repository_id" {
+ description = "Common artifacts repository id"
+ type = string
+ default = "c-publish-artifacts"
}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/production/README.md b/examples/machine-learning-pipeline/ml_business_unit/production/README.md
index 3255fd1a..d407b53e 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/production/README.md
+++ b/examples/machine-learning-pipeline/ml_business_unit/production/README.md
@@ -3,10 +3,12 @@
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
-| github\_app\_installation\_id | The app installation ID that was created when installing Google Cloud Build in Github: https://github.com/apps/google-cloud-build | `number` | n/a | yes |
-| github\_remote\_uri | The remote uri of your github repository | `string` | n/a | yes |
+| github\_app\_installation\_id | The app installation ID that was created when installing Google Cloud Build in Github: https://github.com/apps/google-cloud-build | `number` | `null` | no |
+| github\_remote\_uri | The remote uri of your github repository | `string` | `null` | no |
| instance\_region | The region where notebook instance will be created. A subnetwork must exists in the instance region. | `string` | n/a | yes |
| remote\_state\_bucket | Backend bucket to load remote state information from previous steps. | `string` | n/a | yes |
+| repository\_id | Common artifacts repository id | `string` | `"c-publish-artifacts"` | no |
+| seed\_state\_bucket | Remote state bucket from 0-bootstrap | `string` | n/a | yes |
## Outputs
diff --git a/examples/machine-learning-pipeline/ml_business_unit/production/locals.tf b/examples/machine-learning-pipeline/ml_business_unit/production/locals.tf
index daf31502..d96d7691 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/production/locals.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/production/locals.tf
@@ -15,9 +15,10 @@
*/
locals {
- business_unit = "ml_business_unit"
- business_code = "ml"
- env = "production"
- environment_code = "p"
+ business_unit = "ml_business_unit"
+ business_code = "ml"
+ env = "production"
+ environment_code = "p"
+ region_kms_keyring = [for i in local.env_keyrings : i if split("/", i)[3] == var.instance_region]
}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/production/main.tf b/examples/machine-learning-pipeline/ml_business_unit/production/main.tf
index 717e2c9d..0196970e 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/production/main.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/production/main.tf
@@ -14,14 +14,23 @@
* limitations under the License.
*/
+resource "random_string" "suffix" {
+ length = 8
+ special = false
+ upper = false
+}
module "base_env" {
source = "../../modules/base_env"
- env = local.env
- environment_code = local.environment_code
- business_code = local.business_code
- project_id = local.machine_learning_project_id
+ env = local.env
+ environment_code = local.environment_code
+ business_code = local.business_code
+ non_production_project_id = local.non_production_project_id
+ non_production_project_number = local.non_production_project_number
+ production_project_id = local.production_project_id
+ production_project_number = local.production_project_number
+ project_id = local.machine_learning_project_id
kms_keys = local.machine_learning_kms_keys
@@ -50,8 +59,11 @@ module "base_env" {
metadata_name = "metadata-store-${local.env}"
// Bucket
- bucket_name = "ml-storage-gvdf"
+ bucket_name = "ml-storage-${random_string.suffix.result}"
// TensorBoard
tensorboard_name = "ml-tensorboard-${local.env}"
+
+ log_bucket = local.env_log_bucket
+ keyring = one(local.region_kms_keyring)
}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/production/remote.tf b/examples/machine-learning-pipeline/ml_business_unit/production/remote.tf
index dc533538..db614c37 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/production/remote.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/production/remote.tf
@@ -15,10 +15,26 @@
*/
locals {
- machine_learning_project_id = data.terraform_remote_state.projects_env.outputs.machine_learning_project_id
- machine_learning_kms_keys = data.terraform_remote_state.projects_env.outputs.machine_learning_kms_keys
- service_catalog_repo_name = data.terraform_remote_state.projects_shared.outputs.service_catalog_repo_name
- service_catalog_project_id = data.terraform_remote_state.projects_shared.outputs.service_catalog_project_id
+ machine_learning_project_id = data.terraform_remote_state.projects_env.outputs.machine_learning_project_id
+ machine_learning_kms_keys = data.terraform_remote_state.projects_env.outputs.machine_learning_kms_keys
+ service_catalog_repo_name = data.terraform_remote_state.projects_shared.outputs.service_catalog_repo_name
+ service_catalog_project_id = data.terraform_remote_state.projects_shared.outputs.service_catalog_project_id
+ non_production_project_number = data.terraform_remote_state.projects_nonproduction.outputs.machine_learning_project_number
+ non_production_project_id = data.terraform_remote_state.projects_nonproduction.outputs.machine_learning_project_id
+ production_project_number = data.terraform_remote_state.projects_production.outputs.machine_learning_project_number
+ production_project_id = data.terraform_remote_state.projects_production.outputs.machine_learning_project_id
+ env_log_bucket = data.terraform_remote_state.environments_env.outputs.env_log_bucket_name
+ env_keyrings = data.terraform_remote_state.environments_env.outputs.key_rings
+ common_artifacts_project_id = data.terraform_remote_state.projects_shared.outputs.common_artifacts_project_id
+}
+
+data "terraform_remote_state" "environments_env" {
+ backend = "gcs"
+
+ config = {
+ bucket = var.seed_state_bucket
+ prefix = "terraform/environments/${local.env}"
+ }
}
data "terraform_remote_state" "projects_env" {
@@ -38,3 +54,21 @@ data "terraform_remote_state" "projects_shared" {
prefix = "terraform/projects/${local.business_unit}/shared"
}
}
+
+data "terraform_remote_state" "projects_production" {
+ backend = "gcs"
+
+ config = {
+ bucket = var.remote_state_bucket
+ prefix = "terraform/projects/${local.business_unit}/production"
+ }
+}
+
+data "terraform_remote_state" "projects_nonproduction" {
+ backend = "gcs"
+
+ config = {
+ bucket = var.remote_state_bucket
+ prefix = "terraform/projects/${local.business_unit}/non-production"
+ }
+}
diff --git a/examples/machine-learning-pipeline/ml_business_unit/production/variables.tf b/examples/machine-learning-pipeline/ml_business_unit/production/variables.tf
index c26d31ca..baad685c 100644
--- a/examples/machine-learning-pipeline/ml_business_unit/production/variables.tf
+++ b/examples/machine-learning-pipeline/ml_business_unit/production/variables.tf
@@ -27,9 +27,22 @@ variable "remote_state_bucket" {
variable "github_app_installation_id" {
description = "The app installation ID that was created when installing Google Cloud Build in Github: https://github.com/apps/google-cloud-build"
type = number
+ default = null
}
variable "github_remote_uri" {
description = "The remote uri of your github repository"
type = string
+ default = null
+}
+
+variable "seed_state_bucket" {
+ description = "Remote state bucket from 0-bootstrap"
+ type = string
+}
+
+variable "repository_id" {
+ description = "Common artifacts repository id"
+ type = string
+ default = "c-publish-artifacts"
}
diff --git a/examples/machine-learning-pipeline/modules/base_env/data.tf b/examples/machine-learning-pipeline/modules/base_env/data.tf
index eac4af39..3526f31d 100644
--- a/examples/machine-learning-pipeline/modules/base_env/data.tf
+++ b/examples/machine-learning-pipeline/modules/base_env/data.tf
@@ -18,20 +18,12 @@ data "google_project" "project" {
project_id = var.project_id
}
-data "google_projects" "non-production" {
- filter = "labels.application_name:machine-learning labels.env_code:n"
-}
-
-data "google_projects" "production" {
- filter = "labels.application_name:machine-learning labels.env_code:p"
-}
-
data "google_service_account" "non-production" {
- project = data.google_projects.non-production.projects.0.project_id
- account_id = "${data.google_projects.non-production.projects.0.number}-compute@developer.gserviceaccount.com"
+ project = var.non_production_project_id
+ account_id = "${var.non_production_project_number}-compute@developer.gserviceaccount.com"
}
data "google_service_account" "production" {
- project = data.google_projects.production.projects.0.project_id
- account_id = "${data.google_projects.production.projects.0.number}-compute@developer.gserviceaccount.com"
+ project = var.production_project_id
+ account_id = "${var.production_project_number}-compute@developer.gserviceaccount.com"
}
diff --git a/examples/machine-learning-pipeline/modules/base_env/iam.tf b/examples/machine-learning-pipeline/modules/base_env/iam.tf
index 2aa2efcb..5ebf4a3d 100644
--- a/examples/machine-learning-pipeline/modules/base_env/iam.tf
+++ b/examples/machine-learning-pipeline/modules/base_env/iam.tf
@@ -27,7 +27,7 @@ locals {
"roles/bigquery.admin",
"roles/dataflow.admin",
"roles/dataflow.worker",
- "roles/storage.objectUser",
+ "roles/storage.admin",
"roles/aiplatform.admin",
]
@@ -86,13 +86,13 @@ locals {
]
aiplatform_non_prod_sa = [
- "serviceAccount:service-${data.google_projects.non-production.projects.0.number}@gcp-sa-aiplatform.iam.gserviceaccount.com",
- "serviceAccount:service-${data.google_projects.non-production.projects.0.number}@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
+ "serviceAccount:service-${var.non_production_project_number}@gcp-sa-aiplatform.iam.gserviceaccount.com",
+ "serviceAccount:service-${var.non_production_project_number}@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
]
aiplatform_prod_sa = [
- "serviceAccount:service-${data.google_projects.production.projects.0.number}@gcp-sa-aiplatform.iam.gserviceaccount.com",
- "serviceAccount:service-${data.google_projects.production.projects.0.number}@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
+ "serviceAccount:service-${var.production_project_number}@gcp-sa-aiplatform.iam.gserviceaccount.com",
+ "serviceAccount:service-${var.production_project_number}@gcp-sa-aiplatform-cc.iam.gserviceaccount.com",
]
service_agent_key_binding = flatten([
@@ -132,7 +132,7 @@ resource "google_kms_crypto_key_iam_member" "composer_kms_key_binding_non_prod"
for_each = { for k, v in var.kms_keys : k => v if var.env == "non-production" }
crypto_key_id = each.value.id
role = "roles/cloudkms.cryptoKeyEncrypterDecrypter"
- member = "serviceAccount:service-${data.google_projects.production.projects.0.number}@gcp-sa-aiplatform.iam.gserviceaccount.com"
+ member = "serviceAccount:service-${var.production_project_number}@gcp-sa-aiplatform.iam.gserviceaccount.com"
}
resource "google_service_account_iam_member" "composer_service_agent" {
@@ -147,7 +147,7 @@ resource "google_service_account_iam_member" "compute_non_production" {
# provider = google-beta
service_account_id = data.google_service_account.non-production.name
role = "roles/iam.serviceAccountUser"
- member = "serviceAccount:${data.google_projects.production.projects.0.number}-compute@developer.gserviceaccount.com"
+ member = "serviceAccount:${var.production_project_number}-compute@developer.gserviceaccount.com"
}
resource "google_service_account_iam_member" "compute_production" {
@@ -155,7 +155,7 @@ resource "google_service_account_iam_member" "compute_production" {
# provider = google-beta
service_account_id = data.google_service_account.production.name
role = "roles/iam.serviceAccountUser"
- member = "serviceAccount:${data.google_projects.non-production.projects.0.number}-compute@developer.gserviceaccount.com"
+ member = "serviceAccount:${var.non_production_project_number}-compute@developer.gserviceaccount.com"
}
######################
@@ -207,7 +207,7 @@ resource "google_project_iam_member" "bq_pipeline_prod_vertex_admin" {
count = var.env == "production" ? 1 : 0
provider = google-beta
project = var.project_id
- member = "serviceAccount:service-${data.google_projects.non-production.projects.0.number}@gcp-sa-aiplatform-cc.iam.gserviceaccount.com"
+ member = "serviceAccount:service-${var.non_production_project_number}@gcp-sa-aiplatform-cc.iam.gserviceaccount.com"
role = "roles/aiplatform.admin"
}
@@ -231,7 +231,7 @@ resource "google_project_iam_member" "monitoring" {
count = var.env == "non-production" ? 1 : 0
project = var.project_id
role = "roles/bigquery.admin"
- member = "serviceAccount:service-${data.google_projects.production.projects.0.number}@gcp-sa-aiplatform.iam.gserviceaccount.com"
+ member = "serviceAccount:service-${var.production_project_number}@gcp-sa-aiplatform.iam.gserviceaccount.com"
}
resource "google_project_iam_member" "compute_roles" {
@@ -261,7 +261,7 @@ resource "google_storage_bucket_iam_member" "prod_access" {
count = var.env == "non-production" ? 1 : 0
bucket = join("-", [var.gcs_bucket_prefix, data.google_project.project.labels.env_code, var.bucket_name])
role = "roles/storage.objectViewer"
- member = "serviceAccount:service-${data.google_projects.production.projects.0.number}@gcp-sa-aiplatform.iam.gserviceaccount.com"
+ member = "serviceAccount:service-${var.production_project_number}@gcp-sa-aiplatform.iam.gserviceaccount.com"
depends_on = [module.bucket]
}
diff --git a/examples/machine-learning-pipeline/modules/base_env/main.tf b/examples/machine-learning-pipeline/modules/base_env/main.tf
index 812d6d5d..01911a5c 100644
--- a/examples/machine-learning-pipeline/modules/base_env/main.tf
+++ b/examples/machine-learning-pipeline/modules/base_env/main.tf
@@ -37,7 +37,8 @@ module "composer" {
python_version = var.composer_python_version
web_server_allowed_ip_ranges = var.composer_web_server_allowed_ip_ranges
- depends_on = [google_service_account.composer, google_kms_crypto_key_iam_member.service_agent_kms_key_binding]
+ kms_keyring = var.keyring
+ depends_on = [google_service_account.composer, google_kms_crypto_key_iam_member.service_agent_kms_key_binding]
}
########################
@@ -58,7 +59,8 @@ module "big_query" {
default_table_expiration_ms = var.big_query_default_table_expiration_ms
delete_contents_on_destroy = var.big_query_delete_contents_on_destroy
- depends_on = [google_kms_crypto_key_iam_member.service_agent_kms_key_binding]
+ kms_keyring = var.keyring
+ depends_on = [google_kms_crypto_key_iam_member.service_agent_kms_key_binding]
}
########################
@@ -66,7 +68,6 @@ module "big_query" {
########################
module "metadata" {
- count = var.env != "development" ? 1 : 0
source = "git::https://source.developers.google.com/p/SERVICE_CATALOG_PROJECT_ID/r/service-catalog//modules/metadata?ref=main"
project_id = var.project_id
@@ -74,6 +75,8 @@ module "metadata" {
region = var.region
+ kms_keyring = var.keyring
+
depends_on = [google_kms_crypto_key_iam_member.service_agent_kms_key_binding]
}
@@ -82,7 +85,6 @@ module "metadata" {
########################
module "bucket" {
- count = var.env != "development" ? 1 : 0
source = "git::https://source.developers.google.com/p/SERVICE_CATALOG_PROJECT_ID/r/service-catalog//modules/bucket?ref=main"
project_id = var.project_id
@@ -100,8 +102,9 @@ module "bucket" {
storage_class = var.bucket_storage_class
requester_pays = var.bucket_requester_pays
gcs_bucket_prefix = var.gcs_bucket_prefix
-
- depends_on = [google_kms_crypto_key_iam_member.service_agent_kms_key_binding]
+ kms_keyring = var.keyring
+ log_bucket = var.log_bucket
+ depends_on = [google_kms_crypto_key_iam_member.service_agent_kms_key_binding]
}
########################
@@ -112,10 +115,27 @@ module "tensorboard" {
count = var.env != "development" ? 1 : 0
source = "git::https://source.developers.google.com/p/SERVICE_CATALOG_PROJECT_ID/r/service-catalog//modules/tensorboard?ref=main"
- project_id = var.project_id
- name = var.tensorboard_name
+ project_id = var.project_id
+ name = var.tensorboard_name
+ kms_keyring = var.keyring
+ region = var.region
- region = var.region
+ depends_on = [google_kms_crypto_key_iam_member.service_agent_kms_key_binding]
+}
+
+########################
+# Notebook #
+########################
+
+module "notebook" {
+ count = var.env == "development" ? 1 : 0
+ source = "git::https://source.developers.google.com/p/SERVICE_CATALOG_PROJECT_ID/r/service-catalog//modules/notebook?ref=main"
+
+ project_id = var.project_id
+ kms_keyring = var.keyring
+ instance_owners = toset(["REPLACE_WITH_USER_GCP_EMAIL"])
+ name = "ml-instance"
+ vpc_project = "REPLACE_WITH_DEV_VPC_PROJECT"
depends_on = [google_kms_crypto_key_iam_member.service_agent_kms_key_binding]
}
diff --git a/examples/machine-learning-pipeline/modules/base_env/variables.tf b/examples/machine-learning-pipeline/modules/base_env/variables.tf
index 848a9ad4..b1492721 100644
--- a/examples/machine-learning-pipeline/modules/base_env/variables.tf
+++ b/examples/machine-learning-pipeline/modules/base_env/variables.tf
@@ -18,6 +18,16 @@
# Global Inputs #
########################
+variable "log_bucket" {
+ description = "Log Bucket to be used."
+ type = string
+}
+
+variable "keyring" {
+ description = "Keyring to be used."
+ type = string
+}
+
variable "env" {
description = "Environment name. (ex. production)"
type = string
@@ -39,6 +49,26 @@ variable "project_id" {
type = string
}
+variable "non_production_project_number" {
+ description = "Non-production Machine Learning Project Number"
+ type = string
+}
+
+variable "non_production_project_id" {
+ description = "Non-production Machine Learning Project ID"
+ type = string
+}
+
+variable "production_project_number" {
+ description = "Production Machine Learning Project Number"
+ type = string
+}
+
+variable "production_project_id" {
+ description = "Production Machine Learning Project ID"
+ type = string
+}
+
variable "region" {
type = string
description = "The resource region, one of [us-central1, us-east4]."
@@ -216,7 +246,7 @@ variable "big_query_delete_contents_on_destroy" {
variable "metadata_name" {
type = string
description = "The name of the metadata store instance"
- default = null
+ default = "default"
}
########################
diff --git a/test/setup/main.tf b/test/setup/main.tf
index 2439902a..7b85df91 100644
--- a/test/setup/main.tf
+++ b/test/setup/main.tf
@@ -46,7 +46,7 @@ resource "google_folder" "test_folder" {
module "project" {
source = "terraform-google-modules/project-factory/google"
- version = "~> 14.0"
+ version = "~> 15.0"
name = "ci-foundation-${random_string.suffix.result}"
random_project_id = true