Merge pull request #59 from kamil-holubicki/DISTMYSQL-418

kamil-holubicki · web-flow · commit f722ac55db00 · 2025-02-07T09:57:45.000+01:00
DISTMYSQL-418: Introduce separate discovery queue for unhealthy instances
diff --git a/docs/configuration-discovery-advanced.md b/docs/configuration-discovery-advanced.md
@@ -0,0 +1,79 @@
+# Configuration: advanced discovery
+
+The Orchestrator uses an internal queue to manage instances for discovery. When an instance is ready for discovery, it gets added to the queue. Discovery workers process the queue. The `DiscoveryMaxConcurrency` setting in a configuration file controls the number of workers. This setting determines how many discoveries can happen in parallel.
+
+The Orchestrator uses this mechanism to periodically monitor all instances. `InstancePollSeconds` configuration parameter says how often the Orchestrator should refresh the information.
+
+When there is a lot of inaccessible or unhealthy instances, the Orchestrator may lose the proper view of the cluster and be late with needed recovery actions. This is because discoveries of such instances may take a long time and finish with failure anyway, at the same time consuming workers from the discovery workers pool. Healthy instances wait in the queue and they are not checked in a timely manner.
+
+To avoid this, Orchestrator can be configured to maintain a separate discovery queue for unhealthy instances. This queue is processed by a separate pool of workers. Additionally, an exponential time backoff mechanism can be applied for rechecking such instances.
+
+Configuration example:
+```json
+{
+  "DeadInstanceDiscoveryMaxConcurrency": 100,
+  "DeadInstancePollSecondsMultiplyFactor": 1.5,
+  "DeadInstancePollSecondsMax": 60,
+  "DeadInstanceDiscoveryLogsEnabled": true
+}
+```
+
+`DeadInstanceDiscoveryMaxConcurrency` (default: 0) - Determines the number of discovery workers dedicated to dead instances. If this pool size is grater than 0, the Orchestrator maintains a separate queue for dead instances.
+
+`DeadInstancePollSecondsMultiplyFactor` (default: 1) - Floating point number, allowed values are >= 1. Determines how aggressive the backoff mechanism is. By default, when `DeadInstancePollSecondsMultiplyFactor = 1`, the instance is checked every `InstancePollSeconds` seconds. If the parameter value is greater than 1, every consecutive try `n` is done after the period calculated according to the formula:
+
+dT(n) = InstancePollSeconds * DeadInstancePollSecondsMultiplyFactor ^ (n-1)
+
+Example:
+
+Let's use `D` as `DeadInstancePollSecondsMultiplyFactor`
+
+f(1) = 1\
+f(2) = f(1) * D\
+f(3) = f(2) * D\
+f(4) = f(3) * D
+
+That means:
+
+f(4) = 1 * D * D * D = D^3
+
+or in other words
+
+f(n) = DeadInstancePollSecondsMultiplyFactor ^ (n-1)
+
+so:
+
+dT(n) = InstancePollSeconds * f(n)\
+dT(n) = InstancePollSeconds * DeadInstancePollSecondsMultiplyFactor ^ (n-1)
+
+Note that `DeadInstanceDiscoveryMaxConcurrency` controls if the separate pool of discovery workers is created but has no impact on the backoff mechanism controlled by `DeadInstancePollSecondsMultiplyFactor`. It has the following implications:
+
+1. `DeadInstanceDiscoveryMaxConcurrency > 0` and `DeadInstancePollSecondsMultiplyFactor > 1`:\
+The separate discovery queue for dead instances is created, and dead instances are checked by a dedicated pool of go workers, and the instance is checked with exponential backoff mechanism time
+2. `DeadInstanceDiscoveryMaxConcurrency = 0` and `DeadInstancePollSecondsMultiplyFactor > 1`:\
+No separate discovery queue for dead instances is created, and dead instances are checked by the same pool of go workers as healthy instances however, an exponential backoff mechanism is applied for dead instances
+3. `DeadInstanceDiscoveryMaxConcurrency > 0` and `DeadInstancePollSecondsMultiplyFactor = 1`:\
+A separate discovery queue for dead instances is created, and dead instances are checked by a dedicated pool of go workers. No exponential backoff mechanism is applied for dead instances
+4. `DeadInstanceDiscoveryMaxConcurrency = 0` and `DeadInstancePollSecondsMultiplyFactor = 1`:\
+There is no separate discovery queue for dead instances, no dedicated go workers, no backoff mechanism. This is the default working mode.
+
+`DeadInstancePollSecondsMax` (default: 300) - Controls the maximum time for backoff mechanism. If the backoff calculation goes beyond this value, it is considered as saturated and stays at `DeadInstancePollSecondsMax`
+
+## Diagnostics
+Orchestrator provides `debug/metrics` web endpoint for diagnostics.
+
+`discoveries.dead_instances` - provides the number of instances currently registered as dead.\
+`discoveries.dead_instances_queue_length` - provides the current length of the queue dedicate for dead instances. Note this is valid only when `DeadInstanceDiscoveryMaxConcurrency > 0`, so when a separate queue is used. In other cases it is always zero.
+
+Other diagnostics endpoints:
+
+`api/discovery-queue-metrics-raw/:seconds` - provides the raw metrics for a given time for the `DEFAULT` discovery queue.\
+`api/discovery-queue-metrics-raw/:queue/:seconds` - provides the raw metrics for a given time for the supplied (`DEFAULT` or `DEADINSTANCES`) discovery queue.\
+`discovery-queue-metrics-aggregated/:seconds` - provides aggregated metrics for a given time for the `DEFAULT` discovery queue.\
+`discovery-queue-metrics-aggregated/:queue/:seconds` - provides aggregated metrics for a given time for the supplied (`DEFAULT` or `DEADINSTANCES`) discovery queue.
+
+
+Note that `DEADINSTANCES` queue is available only if `DeadInstanceDiscoveryMaxConcurrency > 0`
+
+## Logging
+Logging of dead instances discovery process is controlled vial `DeadInstanceDiscoveryLogsEnabled` bool parameter. It is disabled by default.
diff --git a/docs/configuration.md b/docs/configuration.md
@@ -10,6 +10,7 @@ Use the following small steps to configure `orchestrator`:
 
 - [Backend](configuration-backend.md)
 - [Discovery: basic](configuration-discovery-basic.md)
+- [Discovery: advanced](configuration-discovery-advanced.md)
 - [Discovery: resolving names](configuration-discovery-resolve.md)
 - [Discovery: classifying servers](configuration-discovery-classifying.md)
 - [Discovery: Pseudo-GTID](configuration-discovery-pseudo-gtid.md)
diff --git a/go/config/config.go b/go/config/config.go
@@ -142,6 +142,10 @@ type Configuration struct {
 	DiscoverByShowSlaveHosts                   bool     // Attempt SHOW SLAVE HOSTS before PROCESSLIST
 	UseSuperReadOnly                           bool     // Should orchestrator super_read_only any time it sets read_only
 	InstancePollSeconds                        uint     // Number of seconds between instance reads
+	DeadInstancePollSecondsMultiplyFactor      float32  // InstancePoolSeconds increase factor for dead instances read time calculation
+	DeadInstancePollSecondsMax                 uint     // Maximum delay between dead instance read attempts
+	DeadInstanceDiscoveryMaxConcurrency        uint     // Number of goroutines doing dead hosts discovery
+	DeadInstanceDiscoveryLogsEnabled           bool     // Enable logs related to dead instances discoveries
 	ReasonableInstanceCheckSeconds             uint     // Number of seconds an instance read is allowed to take before it is considered invalid, i.e. before LastCheckValid will be false
 	InstanceWriteBufferSize                    int      // Instance write buffer size (max number of instances to flush in one INSERT ODKU)
 	BufferInstanceWrites                       bool     // Set to 'true' for write-optimization on backend table (compromise: writes can be stale and overwrite non stale data)
@@ -332,6 +336,10 @@ func newConfiguration() *Configuration {
 		DefaultInstancePort:                        3306,
 		TLSCacheTTLFactor:                          100,
 		InstancePollSeconds:                        5,
+		DeadInstancePollSecondsMultiplyFactor:      1,
+		DeadInstancePollSecondsMax:                 5 * 60,
+		DeadInstanceDiscoveryMaxConcurrency:        0,
+		DeadInstanceDiscoveryLogsEnabled:           false,
 		ReasonableInstanceCheckSeconds:             1,
 		InstanceWriteBufferSize:                    100,
 		BufferInstanceWrites:                       false,
@@ -630,6 +638,13 @@ func (this *Configuration) postReadAdjustments() error {
 		this.ReasonableLockedSemiSyncMasterSeconds = uint(this.ReasonableReplicationLagSeconds)
 	}
 
+	if this.DeadInstancePollSecondsMultiplyFactor < 1 {
+		return fmt.Errorf("DeadInstancePollSecondsMultiplyFactor can not be smaller than 1")
+	}
+
+	if this.DeadInstancePollSecondsMax < this.InstancePollSeconds {
+		return fmt.Errorf(("DeadInstancePollSecondsMax can not be smaller than InstancePollSeconds"))
+	}
 	return nil
 }
 
diff --git a/go/discovery/queue.go b/go/discovery/queue.go
@@ -69,6 +69,15 @@ func StopMonitoring() {
 	}
 }
 
+func ReturnQueue(name string) *Queue {
+	dcLock.Lock()
+	defer dcLock.Unlock()
+	if q, found := discoveryQueue[name]; found {
+		return q
+	}
+	return nil
+}
+
 // CreateOrReturnQueue allows for creation of a new discovery queue or
 // returning a pointer to an existing one given the name.
 func CreateOrReturnQueue(name string) *Queue {
diff --git a/go/http/api.go b/go/http/api.go
@@ -2395,39 +2395,80 @@ func (this *HttpAPI) DiscoveryMetricsAggregated(params martini.Params, r render.
 	r.JSON(http.StatusOK, aggregated)
 }
 
-// DiscoveryQueueMetricsRaw returns the raw queue metrics (active and
-// queued values), data taken secondly for the last N seconds.
-func (this *HttpAPI) DiscoveryQueueMetricsRaw(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+func (this *HttpAPI) discoveryQueueMetricsAggregatedCommon(params martini.Params, r render.Render, req *http.Request, user auth.User, queueName string) {
+	seconds, err := strconv.Atoi(params["seconds"])
+	log.Debugf("DiscoveryQueueMetricsAggregated: queue: %s, seconds: %d", queueName, seconds)
+	if err != nil {
+		Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue aggregated metrics"})
+		return
+	}
+
+	queue := discovery.ReturnQueue(queueName)
+	if queue == nil {
+		Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue aggregated metrics for unknown queue"})
+		return
+	}
+	aggregated := queue.AggregatedDiscoveryQueueMetrics(seconds)
+	log.Debugf("DiscoveryQueueMetricsAggregated data: %+v", aggregated)
+
+	r.JSON(http.StatusOK, aggregated)
+}
+
+func (this *HttpAPI) discoveryQueueMetricsRawCommon(params martini.Params, r render.Render, req *http.Request, user auth.User, queueName string) {
 	seconds, err := strconv.Atoi(params["seconds"])
 	log.Debugf("DiscoveryQueueMetricsRaw: seconds: %d", seconds)
 	if err != nil {
-		Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue  aggregated metrics"})
+		Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue raw metrics"})
 		return
 	}
 
-	queue := discovery.CreateOrReturnQueue("DEFAULT")
+	queue := discovery.ReturnQueue(queueName)
+	if queue == nil {
+		Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue aggregated metrics for unknown queue"})
+		return
+	}
 	metrics := queue.DiscoveryQueueMetrics(seconds)
 	log.Debugf("DiscoveryQueueMetricsRaw data: %+v", metrics)
 
 	r.JSON(http.StatusOK, metrics)
 }
 
+// DiscoveryQueueMetricsRaw returns the raw queue metrics (active and
+// queued values), data taken secondly for the last N seconds.
+func (this *HttpAPI) DiscoveryQueueMetricsRaw(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	this.discoveryQueueMetricsRawCommon(params, r, req, user, "DEFAULT")
+}
+
 // DiscoveryQueueMetricsAggregated returns a single value showing the metrics of the discovery queue over the last N seconds.
 // This is expected to be called every 60 seconds (?) and the config setting of the retention period is currently hard-coded.
 // See go/discovery/ for more information.
 func (this *HttpAPI) DiscoveryQueueMetricsAggregated(params martini.Params, r render.Render, req *http.Request, user auth.User) {
-	seconds, err := strconv.Atoi(params["seconds"])
-	log.Debugf("DiscoveryQueueMetricsAggregated: seconds: %d", seconds)
-	if err != nil {
-		Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue aggregated metrics"})
+	this.discoveryQueueMetricsAggregatedCommon(params, r, req, user, "DEFAULT")
+}
+
+// DiscoveryQueueMetricsRaw2 returns the raw queue metrics (active and
+// queued values), data taken secondly for the last N seconds.
+func (this *HttpAPI) DiscoveryQueueMetricsRaw2(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	queue, found := params["queue"]
+	if !found {
+		Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue raw metrics"})
 		return
 	}
 
-	queue := discovery.CreateOrReturnQueue("DEFAULT")
-	aggregated := queue.AggregatedDiscoveryQueueMetrics(seconds)
-	log.Debugf("DiscoveryQueueMetricsAggregated data: %+v", aggregated)
+	this.discoveryQueueMetricsRawCommon(params, r, req, user, queue)
+}
 
-	r.JSON(http.StatusOK, aggregated)
+// DiscoveryQueueMetricsAggregated2 returns a single value showing the metrics of the discovery queue over the last N seconds.
+// This is expected to be called every 60 seconds (?) and the config setting of the retention period is currently hard-coded.
+// See go/discovery/ for more information.
+func (this *HttpAPI) DiscoveryQueueMetricsAggregated2(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	queue, found := params["queue"]
+	if !found {
+		Respond(r, &APIResponse{Code: ERROR, Message: "Unable to generate discovery queue aggregated metrics"})
+		return
+	}
+
+	this.discoveryQueueMetricsAggregatedCommon(params, r, req, user, queue)
 }
 
 // BackendQueryMetricsRaw returns the raw backend query metrics
@@ -3982,6 +4023,8 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) {
 	this.registerAPIRequest(m, "discovery-metrics-aggregated/:seconds", this.DiscoveryMetricsAggregated)
 	this.registerAPIRequest(m, "discovery-queue-metrics-raw/:seconds", this.DiscoveryQueueMetricsRaw)
 	this.registerAPIRequest(m, "discovery-queue-metrics-aggregated/:seconds", this.DiscoveryQueueMetricsAggregated)
+	this.registerAPIRequest(m, "discovery-queue-metrics-raw/:queue/:seconds", this.DiscoveryQueueMetricsRaw2)
+	this.registerAPIRequest(m, "discovery-queue-metrics-aggregated/:queue/:seconds", this.DiscoveryQueueMetricsAggregated2)
 	this.registerAPIRequest(m, "backend-query-metrics-raw/:seconds", this.BackendQueryMetricsRaw)
 	this.registerAPIRequest(m, "backend-query-metrics-aggregated/:seconds", this.BackendQueryMetricsAggregated)
 	this.registerAPIRequest(m, "write-buffer-metrics-raw/:seconds", this.WriteBufferMetricsRaw)
diff --git a/go/inst/dead_instance_filter.go b/go/inst/dead_instance_filter.go
@@ -0,0 +1,141 @@
+package inst
+
+import (
+	"sync"
+	"time"
+
+	"github.com/openark/golib/log"
+	"github.com/openark/orchestrator/go/config"
+	"github.com/rcrowley/go-metrics"
+)
+
+// The behavior depends on settings:
+// 1. DeadInstanceDiscoveryMaxConcurrency > 0 and DeadInstancePollSecondsMultiplyFactor > 1:
+//    The separate discovery queue for dead instances is created and dead instances
+// 	  are checked by dedicated pool of go workers
+//    and the instance is checked with exponential backoff mechanism time
+// 2. DeadInstanceDiscoveryMaxConcurrency = 0 and DeadInstancePollSecondsMultiplyFactor > 1:
+//    No separate discovery queue for dead instances is created and dead instances
+//    are checked by the same pool of go workers as healthy instances, however
+//    an exponential backoff mechanism is applied for dead instances
+// 3. DeadInstanceDiscoveryMaxConcurrency > 0 and DeadInstancePollSecondsMultiplyFactor = 1:
+//    The separate discovery queue for dead instances is created and dead instances
+//    are checked by dedicated pool of go workers. No exponential backoff mechanism
+//    is applied for dead instances
+// 4. DeadInstanceDiscoveryMaxConcurrency = 0 and DeadInstancePollSecondsMultiplyFactor = 1:
+//    No separate discovery queue for dead instances, no dedicated go workers,
+//    no backoff mechanism. This is the default working mode.
+//
+// We register a dead instance always. It shouldn't be a big overhead,
+// and we will get the info about the dead instances count.
+
+type deadInstance struct {
+	DelayFactor   float32
+	NextCheckTime time.Time
+	TryCnt        int
+}
+
+type deadInstancesFilter struct {
+	deadInstances      map[InstanceKey]deadInstance
+	deadInstancesMutex sync.RWMutex
+}
+
+var DeadInstancesFilter deadInstancesFilter
+
+var deadInstancesCounter = metrics.NewCounter()
+
+func init() {
+	metrics.Register("discoveries.dead_instances", deadInstancesCounter)
+	DeadInstancesFilter.deadInstances = make(map[InstanceKey]deadInstance)
+	DeadInstancesFilter.deadInstancesMutex = sync.RWMutex{}
+}
+
+// RegisterInstance registers a given instance in a dead instances cache.
+// Once the instance is registered its discovery can be delayed with exponential
+// backoff mechanism according to DeadInstancePollSecondsMultiplyFactor value.
+// During the registration, next desired check time is calculated basing on
+// the current delay factor, DeadInstancePollSecondsMultiplyFactor and
+// DeadInstancePollSecondsMax parameters.
+func (f *deadInstancesFilter) RegisterInstance(instanceKey *InstanceKey) {
+	delayFactor := float32(1)
+	previousTry := 0
+
+	f.deadInstancesMutex.Lock()
+	defer f.deadInstancesMutex.Unlock()
+
+	instance, exists := f.deadInstances[*instanceKey]
+	if exists {
+		delayFactor = config.Config.DeadInstancePollSecondsMultiplyFactor * instance.DelayFactor
+		previousTry = instance.TryCnt
+	} else {
+		deadInstancesCounter.Inc(1)
+	}
+
+	maxDelay := time.Duration(config.Config.DeadInstancePollSecondsMax) * time.Second
+	currentDelay := time.Duration(delayFactor*float32(config.Config.InstancePollSeconds)) * time.Second
+
+	// needed only for the debug log below
+	delayFactorTmp := delayFactor
+
+	if currentDelay > maxDelay {
+		// saturation
+		currentDelay = maxDelay
+		delayFactor = instance.DelayFactor // back to previous one
+	}
+	nextCheck := time.Now().Add(currentDelay)
+
+	instance = deadInstance{
+		DelayFactor:   delayFactor,
+		NextCheckTime: nextCheck,
+		TryCnt:        previousTry + 1,
+	}
+	f.deadInstances[*instanceKey] = instance
+
+	if config.Config.DeadInstanceDiscoveryLogsEnabled {
+		log.Debugf("Dead instance registered %v:%v. Iteration: %v. Current delay factor: %v (next check in %v (on %v))",
+			instanceKey.Hostname, instanceKey.Port, instance.TryCnt, delayFactorTmp, currentDelay, instance.NextCheckTime)
+	}
+}
+
+// UnregisterInstace removes the given instance from dead instances cache.
+func (f *deadInstancesFilter) UnregisterInstance(instanceKey *InstanceKey) {
+	f.deadInstancesMutex.Lock()
+	defer f.deadInstancesMutex.Unlock()
+
+	instance, exists := f.deadInstances[*instanceKey]
+	if exists {
+		if config.Config.DeadInstanceDiscoveryLogsEnabled {
+			log.Debugf("Dead instance unregistered: %v:%v after iteration: %v",
+				instanceKey.Hostname, instanceKey.Port, instance.TryCnt)
+		}
+		deadInstancesCounter.Dec(1)
+		delete(f.deadInstances, *instanceKey)
+	}
+}
+
+// InstanceRecheckNeeded checks if a given instance is registered in a dead instances
+// cache and if it is, is it time to rediscover it.
+// It returns two boolean values:
+// - The first boolean indicates if the instance is registered.
+// - The second boolean, indicates if it is time to rediscover the node.
+func (f *deadInstancesFilter) InstanceRecheckNeeded(instanceKey *InstanceKey) (bool, bool) {
+	f.deadInstancesMutex.RLock()
+	defer f.deadInstancesMutex.RUnlock()
+
+	instance, exists := f.deadInstances[*instanceKey]
+
+	if !exists {
+		return exists, false
+	}
+
+	if instance.NextCheckTime.After(time.Now()) {
+		// recheck time still in the future
+		return exists, false
+	}
+
+	if config.Config.DeadInstanceDiscoveryLogsEnabled {
+		log.Debugf("Dead instance recheck: %v:%v. Iteration: %v",
+			instanceKey.Hostname, instanceKey.Port, instance.TryCnt)
+	}
+	return exists, true
+}
diff --git a/go/inst/instance_dao.go b/go/inst/instance_dao.go
diff --git a/go/logic/orchestrator.go b/go/logic/orchestrator.go