diff --git a/README.md b/README.md index 37ecf4c..a13e5df 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,82 @@ -# Azure <> Terraform module -Terraform module for creation Azure <> +# AWS Databricks Workspace Terraform module +Terraform module for creation AWS Databricks Workspace ## Usage +## Requirements +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.8 | +| [aws](#requirement\_aws) | >= 5.0 | +| [databricks](#requirement\_databricks) | >= 1.55 | +| [time](#requirement\_time) | ~> 0.11 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | >= 5.0 | +| [databricks](#provider\_databricks) | >= 1.55 | +| [time](#provider\_time) | ~> 0.11 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [iam\_cross\_account\_workspace\_policy](#module\_iam\_cross\_account\_workspace\_policy) | terraform-aws-modules/iam/aws//modules/iam-policy | 5.41.0 | +| [iam\_cross\_account\_workspace\_role](#module\_iam\_cross\_account\_workspace\_role) | terraform-aws-modules/iam/aws//modules/iam-assumable-role | 5.41.0 | +| [privatelink\_vpce](#module\_privatelink\_vpce) | ./modules/privatelink/ | n/a | +| [storage\_configuration\_dbfs\_bucket](#module\_storage\_configuration\_dbfs\_bucket) | terraform-aws-modules/s3-bucket/aws | 4.1.2 | + +## Resources + +| Name | Type | +|------|------| +| [aws_s3_bucket_policy.databricks_aws_bucket_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_policy) | resource | +| [databricks_mws_credentials.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_credentials) | resource | +| [databricks_mws_networks.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_networks) | resource | +| [databricks_mws_private_access_settings.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_private_access_settings) | resource | +| [databricks_mws_storage_configurations.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_storage_configurations) | resource | +| [databricks_mws_workspaces.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/mws_workspaces) | resource | +| [time_sleep.wait_30_seconds](https://registry.terraform.io/providers/hashicorp/time/latest/docs/resources/sleep) | resource | +| [databricks_aws_assume_role_policy.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/data-sources/aws_assume_role_policy) | data source | +| [databricks_aws_bucket_policy.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/data-sources/aws_bucket_policy) | data source | +| [databricks_aws_crossaccount_policy.this](https://registry.terraform.io/providers/databricks/databricks/latest/docs/data-sources/aws_crossaccount_policy) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [account\_id](#input\_account\_id) | Databricks Account ID | `string` | n/a | yes | +| [iam\_cross\_account\_workspace\_role\_config](#input\_iam\_cross\_account\_workspace\_role\_config) | Configuration object for setting the IAM cross-account role for the Databricks workspace |
object({
role_name = optional(string, null)
policy_name = optional(string, null)
permission_boundary_arn = optional(string, null)
role_description = optional(string, "Databricks IAM Role to launch clusters in your AWS account, you must create a cross-account IAM role that gives access to Databricks.")
})
| `{}` | no | +| [iam\_cross\_account\_workspace\_role\_enabled](#input\_iam\_cross\_account\_workspace\_role\_enabled) | A boolean flag to determine if the cross-account IAM role for Databricks workspace access should be created | `bool` | `true` | no | +| [label](#input\_label) | A customizable string used as a prefix for naming Databricks resources | `string` | n/a | yes | +| [private\_access\_settings\_config](#input\_private\_access\_settings\_config) | Configuration for private access settings |
object({
name = optional(string, null)
allowed_vpc_endpoint_ids = optional(list(string), [])
public_access_enabled = optional(bool, true)
})
| `{}` | no | +| [private\_access\_settings\_enabled](#input\_private\_access\_settings\_enabled) | Indicates whether private access settings should be enabled for the Databricks workspace. Set to true to activate these settings | `bool` | `true` | no | +| [privatelink\_dedicated\_vpce\_config](#input\_privatelink\_dedicated\_vpce\_config) | Configuration object for AWS PrivateLink dedicated VPC Endpoints (VPCe) |
object({
rest_vpc_endpoint_name = optional(string, null)
relay_vpc_endpoint_name = optional(string, null)
rest_aws_vpc_endpoint_id = optional(string, null)
relay_aws_vpc_endpoint_id = optional(string, null)
})
| `{}` | no | +| [privatelink\_dedicated\_vpce\_enabled](#input\_privatelink\_dedicated\_vpce\_enabled) | Boolean flag to enable or disable the creation of dedicated AWS VPC Endpoints (VPCe) for Databricks PrivateLink | `bool` | `false` | no | +| [privatelink\_enabled](#input\_privatelink\_enabled) | Boolean flag to enabled registration of Privatelink VPC Endpoints (REST API and SCC Relay) in target Databricks Network Config | `bool` | `false` | no | +| [privatelink\_relay\_vpce\_id](#input\_privatelink\_relay\_vpce\_id) | AWS VPC Endpoint ID used for Databricks SCC Relay when PrivateLink is enabled | `string` | `null` | no | +| [privatelink\_rest\_vpce\_id](#input\_privatelink\_rest\_vpce\_id) | AWS VPC Endpoint ID used for Databricks REST API if PrivateLink is enabled | `string` | `null` | no | +| [region](#input\_region) | AWS region | `string` | n/a | yes | +| [security\_group\_ids](#input\_security\_group\_ids) | Set of AWS security group IDs for Databricks Account network configuration | `set(string)` | n/a | yes | +| [storage\_dbfs\_config](#input\_storage\_dbfs\_config) | Configuration for the Databricks File System (DBFS) storage |
object({
bucket_name = optional(string)
})
| `{}` | no | +| [storage\_dbfs\_enabled](#input\_storage\_dbfs\_enabled) | Flag to enable or disable the use of DBFS (Databricks File System) storage in the Databricks workspace | `bool` | `true` | no | +| [subnet\_ids](#input\_subnet\_ids) | Set of AWS subnet IDs for Databricks Account network configuration | `set(string)` | n/a | yes | +| [tags](#input\_tags) | Assigned tags to AWS services | `map(string)` | `{}` | no | +| [vpc\_id](#input\_vpc\_id) | AWS VPC ID | `string` | n/a | yes | +| [workspace\_creator\_token\_enabled](#input\_workspace\_creator\_token\_enabled) | Indicates whether to enable the creation of a token for workspace creators in Databricks | `bool` | `false` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [iam\_role](#output\_iam\_role) | The IAM role created for cross-account access to the Databricks workspace | +| [storage](#output\_storage) | The storage configuration for the DBFS bucket associated with the workspace | +| [workspace](#output\_workspace) | The Databricks workspace resource that has been created | +| [workspace\_url](#output\_workspace\_url) | The URL for accessing the Databricks workspace | ## License diff --git a/main.tf b/main.tf new file mode 100644 index 0000000..1d7d81d --- /dev/null +++ b/main.tf @@ -0,0 +1,160 @@ +################################################################################ +# Databricks Workspace +################################################################################ +resource "databricks_mws_workspaces" "this" { + account_id = var.account_id + aws_region = var.region + workspace_name = var.label + credentials_id = databricks_mws_credentials.this.credentials_id + storage_configuration_id = databricks_mws_storage_configurations.this.storage_configuration_id + network_id = databricks_mws_networks.this.network_id + private_access_settings_id = try(databricks_mws_private_access_settings.this[0].private_access_settings_id, null) + + dynamic "token" { + for_each = var.workspace_creator_token_enabled ? [1] : [] + content { + comment = "Workspace creator token managed by Terraform" + } + } + + lifecycle { + replace_triggered_by = [databricks_mws_credentials.this] + } + +} + +resource "databricks_mws_private_access_settings" "this" { + count = var.private_access_settings_enabled ? 1 : 0 + + private_access_settings_name = coalesce(var.private_access_settings_config.name, var.label) + region = var.region + public_access_enabled = var.private_access_settings_config.public_access_enabled + allowed_vpc_endpoint_ids = coalesce(var.private_access_settings_config.allowed_vpc_endpoint_ids, [var.privatelink_rest_vpce_id]) + private_access_level = "ENDPOINT" +} + +################################################################################ +# Network +################################################################################ +resource "databricks_mws_networks" "this" { + account_id = var.account_id + network_name = var.label + security_group_ids = var.security_group_ids + subnet_ids = var.subnet_ids + vpc_id = var.vpc_id + + dynamic "vpc_endpoints" { + for_each = var.privatelink_enabled ? [1] : [] + content { + dataplane_relay = [coalesce(try(module.privatelink_vpce.relay_vpce_id, null), var.privatelink_relay_vpce_id)] + rest_api = [coalesce(try(module.privatelink_vpce.rest_vpce_id, null), var.privatelink_rest_vpce_id)] + } + } +} + +################################################################################ +# Privatelink dedicated VPC Endpoints (REST/Relay) +################################################################################ +module "privatelink_vpce" { + count = var.privatelink_dedicated_vpce_enabled ? 1 : 0 + source = "./modules/privatelink/" + + account_id = var.account_id + region = var.region + relay_vpc_endpoint_name = var.privatelink_dedicated_vpce_config.relay_vpc_endpoint_name + relay_aws_vpc_endpoint_id = var.privatelink_dedicated_vpce_config.relay_aws_vpc_endpoint_id + rest_vpc_endpoint_name = var.privatelink_dedicated_vpce_config.rest_vpc_endpoint_name + rest_aws_vpc_endpoint_id = var.privatelink_dedicated_vpce_config.rest_aws_vpc_endpoint_id +} + +################################################################################ +# IAM +################################################################################ +data "databricks_aws_assume_role_policy" "this" { + external_id = var.account_id +} + +data "databricks_aws_crossaccount_policy" "this" {} + +module "iam_cross_account_workspace_policy" { + source = "terraform-aws-modules/iam/aws//modules/iam-policy" + version = "5.41.0" + + name = coalesce(var.iam_cross_account_workspace_role_config.policy_name, "${var.label}-dbx-crossaccount-policy") + policy = data.databricks_aws_crossaccount_policy.this.json +} + +module "iam_cross_account_workspace_role" { + count = var.iam_cross_account_workspace_role_enabled ? 1 : 0 + source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role" + version = "5.41.0" + + role_name = coalesce(var.iam_cross_account_workspace_role_config.role_name, "${var.label}-dbx-cross-account") + create_role = var.iam_cross_account_workspace_role_enabled + create_custom_role_trust_policy = true + custom_role_trust_policy = data.databricks_aws_assume_role_policy.this.json + role_permissions_boundary_arn = var.iam_cross_account_workspace_role_config.permission_boundary_arn + role_description = var.iam_cross_account_workspace_role_config.role_description + custom_role_policy_arns = [module.iam_cross_account_workspace_policy.arn] + tags = var.tags +} + +# It is required to wait up to 30 seconds after role creation so Databricks would successfuly reference it +resource "time_sleep" "wait_30_seconds" { + depends_on = [module.iam_cross_account_workspace_role] + + create_duration = "30s" +} + +resource "databricks_mws_credentials" "this" { + account_id = var.account_id + credentials_name = "${var.label}-credentials" + role_arn = module.iam_cross_account_workspace_role[0].iam_role_arn + + depends_on = [time_sleep.wait_30_seconds] +} + +################################################################################ +# Storage Configuration +################################################################################ +data "databricks_aws_bucket_policy" "this" { + bucket = module.storage_configuration_dbfs_bucket[0].s3_bucket_id +} + +module "storage_configuration_dbfs_bucket" { + count = var.storage_dbfs_enabled ? 1 : 0 + source = "terraform-aws-modules/s3-bucket/aws" + version = "4.1.2" + + bucket_prefix = coalesce(var.storage_dbfs_config.bucket_name, "${var.label}-dbfs-") + acl = "private" + + force_destroy = true + + control_object_ownership = true + object_ownership = "BucketOwnerPreferred" + + server_side_encryption_configuration = { + rule = { + apply_server_side_encryption_by_default = { + sse_algorithm = "AES256" + } + } + } + + versioning = { + status = "Disabled" + } + +} + +resource "aws_s3_bucket_policy" "databricks_aws_bucket_policy" { + bucket = module.storage_configuration_dbfs_bucket[0].s3_bucket_id + policy = data.databricks_aws_bucket_policy.this.json +} + +resource "databricks_mws_storage_configurations" "this" { + account_id = var.account_id + storage_configuration_name = var.label + bucket_name = module.storage_configuration_dbfs_bucket[0].s3_bucket_id +} diff --git a/modules/privatelink/main.tf b/modules/privatelink/main.tf new file mode 100644 index 0000000..005e2b1 --- /dev/null +++ b/modules/privatelink/main.tf @@ -0,0 +1,13 @@ +resource "databricks_mws_vpc_endpoint" "rest" { + account_id = var.account_id + aws_vpc_endpoint_id = var.rest_aws_vpc_endpoint_id + vpc_endpoint_name = var.rest_vpc_endpoint_name + region = var.region +} + +resource "databricks_mws_vpc_endpoint" "relay" { + account_id = var.account_id + aws_vpc_endpoint_id = var.relay_aws_vpc_endpoint_id + vpc_endpoint_name = var.relay_vpc_endpoint_name + region = var.region +} diff --git a/modules/privatelink/outputs.tf b/modules/privatelink/outputs.tf new file mode 100644 index 0000000..62f6995 --- /dev/null +++ b/modules/privatelink/outputs.tf @@ -0,0 +1,9 @@ +output "rest_vpce_id" { + value = databricks_mws_vpc_endpoint.rest.vpc_endpoint_id + description = "The ID of the AWS VPC endpoint associated with the Databricks REST API" +} + +output "relay_vpce_id" { + value = databricks_mws_vpc_endpoint.relay.vpc_endpoint_id + description = "The ID of the AWS VPC endpoint associated with the Databricks Relay service" +} diff --git a/modules/privatelink/variables.tf b/modules/privatelink/variables.tf new file mode 100644 index 0000000..66c2050 --- /dev/null +++ b/modules/privatelink/variables.tf @@ -0,0 +1,28 @@ +variable "region" { + type = string + description = "AWS region" +} + +variable "rest_vpc_endpoint_name" { + type = string + description = "The name to assign to the AWS VPC endpoint for the Databricks REST API" +} +variable "rest_aws_vpc_endpoint_id" { + type = string + description = "The AWS VPC endpoint ID for the Databricks REST API" +} + +variable "relay_vpc_endpoint_name" { + type = string + description = "The name to assign to the AWS VPC endpoint for the Databricks Relay service" +} + +variable "relay_aws_vpc_endpoint_id" { + type = string + description = "The AWS VPC endpoint ID for the Databricks Relay service" +} + +variable "account_id" { + type = string + description = "Databricks Account ID" +} diff --git a/modules/privatelink/versions.tf b/modules/privatelink/versions.tf new file mode 100644 index 0000000..21f9c42 --- /dev/null +++ b/modules/privatelink/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + databricks = { + source = "databricks/databricks" + version = ">= 1.55" + } + } +} diff --git a/outputs.tf b/outputs.tf new file mode 100644 index 0000000..d3d7247 --- /dev/null +++ b/outputs.tf @@ -0,0 +1,19 @@ +output "workspace" { + value = databricks_mws_workspaces.this + description = "The Databricks workspace resource that has been created" +} + +output "storage" { + value = try(module.storage_configuration_dbfs_bucket[0], null) + description = "The storage configuration for the DBFS bucket associated with the workspace" +} + +output "iam_role" { + value = try(module.iam_cross_account_workspace_role[0], null) + description = "The IAM role created for cross-account access to the Databricks workspace" +} + +output "workspace_url" { + value = databricks_mws_workspaces.this.workspace_url + description = "The URL for accessing the Databricks workspace" +} diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..2a974fb --- /dev/null +++ b/variables.tf @@ -0,0 +1,154 @@ +################################################################################ +# General +################################################################################ +variable "label" { + description = "A customizable string used as a prefix for naming Databricks resources" + type = string +} + +variable "region" { + description = "AWS region" + type = string +} + +variable "tags" { + description = "Assigned tags to AWS services" + type = map(string) + default = {} +} + +variable "account_id" { + description = "Databricks Account ID" + type = string +} + +################################################################################ +# Network configuration +################################################################################ +variable "vpc_id" { + description = "AWS VPC ID" + type = string +} + +variable "security_group_ids" { + description = "Set of AWS security group IDs for Databricks Account network configuration" + type = set(string) +} + +variable "subnet_ids" { + description = "Set of AWS subnet IDs for Databricks Account network configuration" + type = set(string) +} + +################################################################################ +# Privatelink configuration +################################################################################ +variable "privatelink_enabled" { + type = bool + description = "Boolean flag to enabled registration of Privatelink VPC Endpoints (REST API and SCC Relay) in target Databricks Network Config" + default = false +} + +variable "privatelink_rest_vpce_id" { + type = string + description = "AWS VPC Endpoint ID used for Databricks REST API if PrivateLink is enabled" + default = null + + validation { + error_message = "It is required to provide AWS VPC Endpoints for Databricks REST API in case Privatelink enabled" + condition = var.privatelink_enabled ? var.privatelink_rest_vpce_id != null : true + } +} + +variable "privatelink_relay_vpce_id" { + description = "AWS VPC Endpoint ID used for Databricks SCC Relay when PrivateLink is enabled" + type = string + default = null + + validation { + error_message = "It is required to provide AWS VPC Endpoints for Databricks SCC Relay in case Privatelink enabled" + condition = var.privatelink_enabled ? var.privatelink_relay_vpce_id != null : true + } +} + +variable "privatelink_dedicated_vpce_enabled" { + description = "Boolean flag to enable or disable the creation of dedicated AWS VPC Endpoints (VPCe) for Databricks PrivateLink" + type = bool + default = false +} + +variable "privatelink_dedicated_vpce_config" { + description = "Configuration object for AWS PrivateLink dedicated VPC Endpoints (VPCe)" + type = object({ + rest_vpc_endpoint_name = optional(string, null) + relay_vpc_endpoint_name = optional(string, null) + rest_aws_vpc_endpoint_id = optional(string, null) + relay_aws_vpc_endpoint_id = optional(string, null) + }) + default = {} +} + +################################################################################ +# Databricks Workspace +################################################################################ +variable "iam_cross_account_workspace_role_enabled" { + description = "A boolean flag to determine if the cross-account IAM role for Databricks workspace access should be created" + type = bool + default = true +} + +variable "iam_cross_account_workspace_role_config" { + description = "Configuration object for setting the IAM cross-account role for the Databricks workspace" + type = object({ + role_name = optional(string, null) + policy_name = optional(string, null) + permission_boundary_arn = optional(string, null) + role_description = optional(string, "Databricks IAM Role to launch clusters in your AWS account, you must create a cross-account IAM role that gives access to Databricks.") + }) + default = {} +} + +################################################################################ +# Storage root bucket config +################################################################################ +variable "storage_dbfs_enabled" { + description = "Flag to enable or disable the use of DBFS (Databricks File System) storage in the Databricks workspace" + type = bool + default = true +} + +variable "storage_dbfs_config" { + description = "Configuration for the Databricks File System (DBFS) storage" + type = object({ + bucket_name = optional(string) + }) + default = {} +} + +################################################################################ +# Workspace +################################################################################ +variable "workspace_creator_token_enabled" { + description = "Indicates whether to enable the creation of a token for workspace creators in Databricks" + type = bool + default = false +} + +################################################################################ +# Workspace access config +################################################################################ +variable "private_access_settings_enabled" { + description = "Indicates whether private access settings should be enabled for the Databricks workspace. Set to true to activate these settings" + type = bool + default = true +} + +variable "private_access_settings_config" { + description = "Configuration for private access settings" + type = object({ + name = optional(string, null) + allowed_vpc_endpoint_ids = optional(list(string), []) + public_access_enabled = optional(bool, true) + }) + default = {} +} diff --git a/versions.tf b/versions.tf new file mode 100644 index 0000000..2a9a86d --- /dev/null +++ b/versions.tf @@ -0,0 +1,18 @@ +terraform { + required_version = ">= 1.8" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + databricks = { + source = "databricks/databricks" + version = ">= 1.55" + } + time = { + source = "hashicorp/time" + version = "~> 0.11" + } + } +}