Skip to content

Commit

Permalink
Reserve IP addresses for Slurm instances.
Browse files Browse the repository at this point in the history
Reserve IP addresses using google_compute_address for all Slurm
instances so during the reconfiguration they maintain the same IP
address, regardless if static IP addresses where provided or not.
  • Loading branch information
wiktorn committed Jun 1, 2024
1 parent 8e41adf commit 6284b41
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ No modules.

| Name | Type |
|------|------|
| [google_compute_address.static_ip](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_address) | resource |
| [google_compute_instance_from_template.slurm_instance](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance_from_template) | resource |
| [null_resource.replace_trigger](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
| [google_compute_instance_template.base](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_instance_template) | data source |
Expand Down
71 changes: 53 additions & 18 deletions terraform/slurm_cluster/modules/_slurm_instance/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
##########

locals {
hostname = var.hostname == "" ? "default" : var.hostname
_hostname = var.hostname == "" ? "default" : var.hostname
_hostnames = [for index in range(local.num_instances) : var.add_hostname_suffix ? format("%s%s%s", local._hostname, var.hostname_suffix_separator, format("%03d", index + 1)) : local._hostname]
// hostname = var.hostname == "" ? "default" : var.hostname

num_instances = length(var.static_ips) == 0 ? var.num_instances : length(var.static_ips)

# local.static_ips is the same as var.static_ips with a dummy element appended
Expand All @@ -34,24 +37,39 @@ locals {
#################

locals {
network_interfaces = [for index in range(local.num_instances) :
concat([
{
access_config = var.access_config
alias_ip_range = []
ipv6_access_config = []
network = var.network
network_ip = length(var.static_ips) == 0 ? "" : element(local.static_ips, index)
nic_type = null
queue_count = null
stack_type = null
subnetwork = var.subnetwork
subnetwork_project = var.subnetwork_project
}
_network_interfaces = [for index in range(local.num_instances) :
concat(
[
{
access_config = var.access_config
alias_ip_range = []
ipv6_access_config = []
network = var.network
network_ip = length(var.static_ips) == 0 ? "" : element(local.static_ips, index)
nic_type = null
queue_count = null
stack_type = null
subnetwork = var.subnetwork
subnetwork_project = var.subnetwork_project
}
],
var.additional_networks
)
]
network_config = [
for index in range(local.num_instances) : {
hostname = local._hostnames[index]
network_interfaces = [
for nic in local._network_interfaces[index] : merge(
nic,
{
# generate unique name for ip address name based on hostname and first 16 characters of sha256 of (sub)network id
ip_address_name = "${local._hostnames[index]}-${substr(sha256(coalesce(nic.subnetwork, nic.network)), 0, 16)}"
}
)
]
}
]

slurm_instance_role = lower(var.slurm_instance_role)

Expand Down Expand Up @@ -87,14 +105,14 @@ resource "null_resource" "replace_trigger" {

resource "google_compute_instance_from_template" "slurm_instance" {
count = local.num_instances
name = var.add_hostname_suffix ? format("%s%s%s", local.hostname, var.hostname_suffix_separator, format("%03d", count.index + 1)) : local.hostname
name = local.network_config[count.index].hostname
project = var.project_id
zone = var.zone == null ? data.google_compute_zones.available.names[count.index % length(data.google_compute_zones.available.names)] : var.zone

allow_stopping_for_update = true

dynamic "network_interface" {
for_each = local.network_interfaces[count.index]
for_each = local.network_config[count.index].network_interfaces
iterator = nic
content {
dynamic "access_config" {
Expand All @@ -119,7 +137,7 @@ resource "google_compute_instance_from_template" "slurm_instance" {
}
}
network = nic.value.network
network_ip = nic.value.network_ip
network_ip = google_compute_address.static_ip[nic.value.ip_address_name].address
nic_type = nic.value.nic_type
queue_count = nic.value.queue_count
subnetwork = nic.value.subnetwork
Expand Down Expand Up @@ -153,3 +171,20 @@ resource "google_compute_instance_from_template" "slurm_instance" {
replace_triggered_by = [null_resource.replace_trigger.id]
}
}

##############
# IP ADDRESS #
##############

resource "google_compute_address" "static_ip" {
for_each = {
for nic in flatten(
[for net_cfg in local.network_config : [for nic in net_cfg.network_interfaces : nic]]
) : nic.ip_address_name => nic
}
name = each.value.ip_address_name
subnetwork = each.value.subnetwork
address_type = "INTERNAL"
region = var.region
address = each.value.network_ip
}

0 comments on commit 6284b41

Please sign in to comment.