From 8ac6b84ffdbd5dda4a6fb1e2d6f9c25261e34fdf Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Tue, 17 Dec 2024 15:56:36 +0000 Subject: [PATCH] fix(topology): use topology when decreasing replicas When removing volume replicas, take the pool topology into account and prefer to remove replicas which don't line up with the volume topology. This can happen when a volume's topology has been modified. Signed-off-by: Tiago Castro --- .../src/bin/core/controller/scheduling/mod.rs | 8 +++++ .../controller/scheduling/resources/mod.rs | 13 +++++++ .../bin/core/controller/scheduling/volume.rs | 8 +++++ .../scheduling/volume_policy/pool.rs | 35 ++++++++++++------- 4 files changed, 52 insertions(+), 12 deletions(-) diff --git a/control-plane/agents/src/bin/core/controller/scheduling/mod.rs b/control-plane/agents/src/bin/core/controller/scheduling/mod.rs index 147e0e475..bc48d0f00 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/mod.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/mod.rs @@ -163,6 +163,14 @@ impl ChildSorters { match Self::sort_by_health(a, b) { Ordering::Equal => match Self::sort_by_child(a, b) { Ordering::Equal => { + // remove mismatched topology replicas first + if let (Some(a), Some(b)) = (a.valid_pool_topology(), b.valid_pool_topology()) { + match a.cmp(b) { + Ordering::Equal => {} + _else => return _else, + } + } + let childa_is_local = !a.spec().share.shared(); let childb_is_local = !b.spec().share.shared(); if childa_is_local == childb_is_local { diff --git a/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs b/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs index 6193d57c6..b0efe210a 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs @@ -176,6 +176,7 @@ pub(crate) struct ReplicaItem { /// a Affinity Group and the already created volumes have replicas /// on this pool. ag_replicas_on_pool: Option, + valid_pool_topology: Option, } impl ReplicaItem { @@ -197,8 +198,20 @@ impl ReplicaItem { child_spec, child_info, ag_replicas_on_pool, + valid_pool_topology: None, } } + /// Set the validity of the replica's pool topology. + /// If set to false, this means the replica is not in sync with the volume topology and + /// therefore should be replaced by another replica which is in sync. + pub(crate) fn with_pool_topology(mut self, valid_pool_topology: Option) -> ReplicaItem { + self.valid_pool_topology = valid_pool_topology; + self + } + /// Get a reference to the topology validity flag. + pub(crate) fn valid_pool_topology(&self) -> &Option { + &self.valid_pool_topology + } /// Get a reference to the replica spec. pub(crate) fn spec(&self) -> &ReplicaSpec { &self.replica_spec diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume.rs index e3a1ef513..edaf1becb 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume.rs @@ -343,6 +343,14 @@ impl GetChildForRemovalContext { }), ag_rep_count, ) + .with_pool_topology({ + use crate::controller::scheduling::volume_policy::pool::PoolBaseFilters; + Some(PoolBaseFilters::topology_( + &self.spec, + &self.registry, + replica_spec.pool_name(), + )) + }) }) .collect::>() } diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs index 6dd80ef93..4be06eaba 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs @@ -1,10 +1,16 @@ -use crate::controller::scheduling::{ - resources::{ChildItem, PoolItem}, - volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext}, - volume_policy::qualifies_label_criteria, +use crate::controller::{ + registry::Registry, + scheduling::{ + resources::{ChildItem, PoolItem}, + volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext}, + volume_policy::qualifies_label_criteria, + }, }; use std::collections::HashMap; -use stor_port::types::v0::transport::{PoolStatus, PoolTopology}; +use stor_port::types::v0::{ + store::volume::VolumeSpec, + transport::{PoolId, PoolStatus, PoolTopology}, +}; /// Filter pools used for replica creation. pub(crate) struct PoolBaseFilters {} @@ -77,27 +83,32 @@ impl PoolBaseFilters { /// Should only attempt to use pools having specific creation label if topology has it. pub(crate) fn topology(request: &GetSuitablePoolsContext, item: &PoolItem) -> bool { + Self::topology_(request, request.registry(), &item.pool.id) + } + + /// Should only attempt to use pools having specific creation label if topology has it. + pub(crate) fn topology_(volume: &VolumeSpec, registry: &Registry, pool_id: &PoolId) -> bool { let volume_pool_topology_inclusion_labels: HashMap; - match request.topology.clone() { + match &volume.topology { None => return true, - Some(topology) => match topology.pool { + Some(topology) => match &topology.pool { None => return true, - Some(pool_topology) => match pool_topology { + Some(pool_topology) => match &pool_topology { PoolTopology::Labelled(labelled_topology) => { // The labels in Volume Pool Topology should match the pool labels if // present, otherwise selection of any pool is allowed. - if !labelled_topology.inclusion.is_empty() { - volume_pool_topology_inclusion_labels = labelled_topology.inclusion - } else { + if labelled_topology.inclusion.is_empty() { + // todo: missing exclusion check? return true; } + volume_pool_topology_inclusion_labels = labelled_topology.inclusion.clone() } }, }, }; // We will reach this part of code only if the volume has inclusion/exclusion labels. - match request.registry().specs().pool(&item.pool.id) { + match registry.specs().pool(pool_id) { Ok(spec) => match spec.labels { None => false, Some(pool_labels) => {