From 79676634030b4f59431988d87b231cd2a3d5e191 Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Thu, 18 Jan 2024 10:21:16 -0500 Subject: [PATCH] ha: add support for specifying the lock namespace (#832) When HA mode is enabled, the leader lock must be managed in a specific namespace, but when setting the `nomad.namespace` configuration to the wildcard `*` value the Nomad API client was trying to write to this namespace value, which is not allowed. This commit adds a new `high_availability.lock_namespace` configuration to allow operators to specify the exact lock variable namespace. --- CHANGELOG.md | 1 + agent/config/config.go | 18 ++++++++++--- agent/config/config_test.go | 17 ++++++++----- command/agent.go | 22 +++++++++++----- command/agent_test.go | 28 ++++++++++++--------- command/test-fixtures/agent_config_full.hcl | 9 ++++--- 6 files changed, 63 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a2ac5b0..241f0304 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ IMPROVEMENTS: * agent: Add `BlockQueryWaitTime` config option for Nomad API connectivity [[GH-755](https://github.com/hashicorp/nomad-autoscaler/pull/755)] + * agent: Add `high_availability.lock_namespace` configuration to specify the namespace used for writing the high availability lock variable. [[GH-832](https://github.com/hashicorp/nomad-autoscaler/pull/832)] * metrics: Add `policy_id` and `target_name` labels to `scale.invoke.success_count` and `scale.invoke.error_count` metrics [[GH-814](https://github.com/hashicorp/nomad-autoscaler/pull/814)] * plugin/target/aws: Add `scale_in_protection` configuration [[GH-807](https://github.com/hashicorp/nomad-autoscaler/pull/807)] * scaleutils: Add new node filter option `node_pool` to select nodes by their node pool value [[GH-810](https://github.com/hashicorp/nomad-autoscaler/pull/810)] diff --git a/agent/config/config.go b/agent/config/config.go index da243c7e..cdbf23e6 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -16,6 +16,7 @@ import ( "github.com/hashicorp/nomad-autoscaler/plugins" "github.com/hashicorp/nomad-autoscaler/sdk/helper/file" "github.com/hashicorp/nomad-autoscaler/sdk/helper/ptr" + "github.com/hashicorp/nomad/api" "github.com/mitchellh/copystructure" ) @@ -288,6 +289,10 @@ type HighAvailability struct { // is successfully acquired. Enabled *bool `hcl:"enabled"` + // LockNamespace defines the namespace where the high availability lock + // variable is written. + LockNamespace string `hcl:"lock_namespace,optional" json:"-"` + // LockPath defines the path of the variable that will be used to sync the // leader when running on high availability mode. LockPath string `hcl:"lock_path,optional" json:"-"` @@ -472,10 +477,11 @@ func Default() (*Agent, error) { {Name: plugins.InternalTargetNomad, Driver: plugins.InternalTargetNomad}, }, HighAvailability: &HighAvailability{ - Enabled: ptr.Of(false), - LockPath: defaultLockPath, - LockTTL: defaultLockTTL, - LockDelay: defaultLockDelay, + Enabled: ptr.Of(false), + LockNamespace: api.DefaultNamespace, + LockPath: defaultLockPath, + LockTTL: defaultLockTTL, + LockDelay: defaultLockDelay, }, }, nil } @@ -768,6 +774,10 @@ func (ha *HighAvailability) merge(b *HighAvailability) *HighAvailability { result.Enabled = b.Enabled } + if b.LockNamespace != "" { + result.LockNamespace = b.LockNamespace + } + if b.LockPath != "" { result.LockPath = b.LockPath } diff --git a/agent/config/config_test.go b/agent/config/config_test.go index bfaa7c39..bd7d24e1 100644 --- a/agent/config/config_test.go +++ b/agent/config/config_test.go @@ -12,6 +12,7 @@ import ( "time" "github.com/hashicorp/nomad-autoscaler/sdk/helper/ptr" + "github.com/hashicorp/nomad/api" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -38,6 +39,7 @@ func Test_Default(t *testing.T) { assert.Equal(t, 1*time.Second, def.Telemetry.CollectionInterval) assert.False(t, def.EnableDebug, "ensure debugging is disabled by default") assert.False(t, *def.HighAvailability.Enabled, "ensure high availability is disabled by default") + assert.Equal(t, api.DefaultNamespace, def.HighAvailability.LockNamespace) assert.Equal(t, defaultLockPath, def.HighAvailability.LockPath) assert.Equal(t, defaultLockTTL, def.HighAvailability.LockTTL) assert.Equal(t, defaultLockDelay, def.HighAvailability.LockDelay) @@ -80,8 +82,9 @@ func TestAgent_Merge(t *testing.T) { }, }, HighAvailability: &HighAvailability{ - Enabled: ptr.Of(false), - LockPath: "original/path", + Enabled: ptr.Of(false), + LockNamespace: "ns-1", + LockPath: "original/path", }, } @@ -183,8 +186,9 @@ func TestAgent_Merge(t *testing.T) { }, }, HighAvailability: &HighAvailability{ - Enabled: ptr.Of(true), - LockPath: "second/path", + Enabled: ptr.Of(true), + LockNamespace: "ns-2", + LockPath: "second/path", }, } @@ -316,8 +320,9 @@ func TestAgent_Merge(t *testing.T) { }, }, HighAvailability: &HighAvailability{ - Enabled: ptr.Of(true), - LockPath: "second/path", + Enabled: ptr.Of(true), + LockNamespace: "ns-2", + LockPath: "second/path", }, } diff --git a/command/agent.go b/command/agent.go index 124ac2e5..591923a3 100644 --- a/command/agent.go +++ b/command/agent.go @@ -268,18 +268,24 @@ Telemetry Options: -telemetry-circonus-broker-select-tag A tag which is used to select a broker ID when an explicit broker ID is not provided. - + High Availability Options: -high-availability-enabled On cases when multiple instances of the autoscaler need to be run at the same - time, the high-availability option triggers a leader election using a lock + time, the high-availability option triggers a leader election using a lock for sync among the different lock instances. It defaults to false. - + + -high-availability-lock-namepsace + When using the high-availability mode, the namepsace where the lock is + written for leader election can be provided using the + high-availability-lock-namespace flag. The same namespace must be provided + to every instance of the autoscaler in order to be included in the election. + -high-availability-lock-path When using the high-availability mode, the path to the lock to be used for the - leader election can be provided using the high-availability-lock-path flag. - The same path must be provided to every instance of the autoscaler in order + leader election can be provided using the high-availability-lock-path flag. + The same path must be provided to every instance of the autoscaler in order to be included in the election. -high-availability-lock-ttl @@ -396,6 +402,7 @@ func (c *AgentCommand) Run(args []string) int { switch *parsedConfig.HighAvailability.Enabled { case true: logger.Info("running in HA mode", + "lock_namespace", parsedConfig.HighAvailability.LockNamespace, "lock_path", parsedConfig.HighAvailability.LockPath, "lock_ttl", parsedConfig.HighAvailability.LockTTL, "lock_delay", parsedConfig.HighAvailability.LockDelay) @@ -408,7 +415,9 @@ func (c *AgentCommand) Run(args []string) int { }, } - locker, err := c.agent.NomadClient.Locks(api.WriteOptions{}, asLock) + locker, err := c.agent.NomadClient.Locks(api.WriteOptions{ + Namespace: parsedConfig.HighAvailability.LockNamespace, + }, asLock) if err != nil { logger.Error("failed to start locker", "error", err) return 1 @@ -571,6 +580,7 @@ func (c *AgentCommand) readConfig() (*config.Agent, []string) { // Specify our High Availability flags. flags.BoolVar(&enableHighAvailability, "high-availability-enabled", false, "") + flags.StringVar(&cmdConfig.HighAvailability.LockNamespace, "high-availability-lock-namespace", "", "") flags.StringVar(&cmdConfig.HighAvailability.LockPath, "high-availability-lock-path", "", "") flags.DurationVar(&cmdConfig.HighAvailability.LockTTL, "high-availability-lock-ttl", 0, "") flags.DurationVar(&cmdConfig.HighAvailability.LockDelay, "high-availability-lock-delay", 0, "") diff --git a/command/agent_test.go b/command/agent_test.go index a2b2a293..ff958b67 100644 --- a/command/agent_test.go +++ b/command/agent_test.go @@ -195,16 +195,18 @@ func TestCommandAgent_readConfig(t *testing.T) { name: "high availlability flags", args: []string{ "-high-availability-enabled", + "-high-availability-lock-namespace", "my-namespace", "-high-availability-lock-path", "test/merge/path", "-high-availability-lock-ttl", "4m", "-high-availability-lock-delay", "2m", }, want: defaultConfig.Merge(&config.Agent{ HighAvailability: &config.HighAvailability{ - Enabled: ptr.Of(true), - LockPath: "test/merge/path", - LockTTL: 4 * time.Minute, - LockDelay: 2 * time.Minute, + Enabled: ptr.Of(true), + LockNamespace: "my-namespace", + LockPath: "test/merge/path", + LockTTL: 4 * time.Minute, + LockDelay: 2 * time.Minute, }, }), }, @@ -255,10 +257,11 @@ func TestCommandAgent_readConfig(t *testing.T) { }, }, HighAvailability: &config.HighAvailability{ - Enabled: ptr.Of(true), - LockPath: "my/custom/path", - LockTTL: 30 * time.Second, - LockDelay: 15 * time.Second, + Enabled: ptr.Of(true), + LockNamespace: "my-namespace", + LockPath: "my/custom/path", + LockTTL: 30 * time.Second, + LockDelay: 15 * time.Second, }, }), }, @@ -341,10 +344,11 @@ func TestCommandAgent_readConfig(t *testing.T) { }, }, HighAvailability: &config.HighAvailability{ - Enabled: ptr.Of(true), - LockPath: "my/custom/path", - LockTTL: 30 * time.Second, - LockDelay: 15 * time.Second, + Enabled: ptr.Of(true), + LockNamespace: "my-namespace", + LockPath: "my/custom/path", + LockTTL: 30 * time.Second, + LockDelay: 15 * time.Second, }, }), }, diff --git a/command/test-fixtures/agent_config_full.hcl b/command/test-fixtures/agent_config_full.hcl index e45d79f7..0c5af87c 100644 --- a/command/test-fixtures/agent_config_full.hcl +++ b/command/test-fixtures/agent_config_full.hcl @@ -51,8 +51,9 @@ policy_eval { } high_availability { - enabled = true - lock_path = "my/custom/path" - lock_ttl = "30s" - lock_delay = "15s" + enabled = true + lock_namespace = "my-namespace" + lock_path = "my/custom/path" + lock_ttl = "30s" + lock_delay = "15s" }