diff --git a/src/base/CMakeLists.txt b/src/base/CMakeLists.txt index 382f6c139..7a5f1e5e3 100644 --- a/src/base/CMakeLists.txt +++ b/src/base/CMakeLists.txt @@ -16,6 +16,7 @@ set(BASE_PROTOBUFS base/pod_anti_affinity.proto base/reference_desc.proto base/taints.proto + base/avoid_pods_annotation.proto base/resource_desc.proto base/resource_stats.proto base/resource_topology_node_desc.proto diff --git a/src/base/avoid_pods_annotation.proto b/src/base/avoid_pods_annotation.proto new file mode 100644 index 000000000..b3136bfce --- /dev/null +++ b/src/base/avoid_pods_annotation.proto @@ -0,0 +1,12 @@ +// The Firmament project +// Copyright (c) The Firmament Authors. +// + +syntax = "proto3"; + +package firmament; + +message AvoidPodsAnnotation { + string kind = 1; + string uid = 2; +} diff --git a/src/base/resource_desc.proto b/src/base/resource_desc.proto index d2c40a61a..b3d463883 100644 --- a/src/base/resource_desc.proto +++ b/src/base/resource_desc.proto @@ -12,6 +12,7 @@ import "base/label.proto"; import "base/resource_vector.proto"; import "base/whare_map_stats.proto"; import "base/taints.proto"; +import "base/avoid_pods_annotation.proto"; message ResourceDescriptor { enum ResourceState { @@ -65,4 +66,6 @@ message ResourceDescriptor { repeated Label labels = 32; //Taints repeated Taint taints = 33; + // Avoid pods annotations + repeated AvoidPodsAnnotation avoids = 34; } diff --git a/src/base/task_desc.proto b/src/base/task_desc.proto index b8d41e85d..2b85c7f96 100644 --- a/src/base/task_desc.proto +++ b/src/base/task_desc.proto @@ -94,4 +94,8 @@ message TaskDescriptor { string task_namespace = 35; // Tolerations repeated Toleration tolerations = 36; + // Owner reference kind + string owner_ref_kind = 37; + //Owner reference uid + string owner_ref_uid = 38; } diff --git a/src/scheduling/flow/cpu_cost_model.cc b/src/scheduling/flow/cpu_cost_model.cc index f04c01a02..a536cf3a7 100644 --- a/src/scheduling/flow/cpu_cost_model.cc +++ b/src/scheduling/flow/cpu_cost_model.cc @@ -323,6 +323,22 @@ ArcDescriptor CpuCostModel::EquivClassToEquivClass(EquivClass_t ec1, } else { taints_score.final_score = 0; } + + // Expressing avoid pods priority scores + if (rd.avoids_size()) { + unordered_map>* avoid_pods_priority_scores_ptr = + FindOrNull(ec_to_node_priority_scores, ec1); + CHECK_NOTNULL(avoid_pods_priority_scores_ptr); + PriorityScoresList_t* avoid_pods_scores_struct_ptr = + FindOrNull(*avoid_pods_priority_scores_ptr, *machine_res_id); + CHECK_NOTNULL(avoid_pods_scores_struct_ptr); + PriorityScore_t& prefer_avoid_pods_score = + avoid_pods_scores_struct_ptr->prefer_avoid_pods_priority; + cost_vector.prefer_avoid_pods_cost_ = prefer_avoid_pods_score.score; + } else { + cost_vector.prefer_avoid_pods_cost_ = 0; + } cost_vector.node_affinity_soft_cost_ = omega_ - node_affinity_normalized_score; @@ -355,6 +371,7 @@ Cost_t CpuCostModel::FlattenCostVector(CpuMemCostVector_t cv) { accumulator += cv.node_affinity_soft_cost_; accumulator += cv.pod_affinity_soft_cost_; accumulator += cv.intolerable_taints_cost_; + accumulator += cv.prefer_avoid_pods_cost_; if (accumulator > infinity_) infinity_ = accumulator + 1; return accumulator; } @@ -1191,6 +1208,49 @@ void CpuCostModel::CalculatePodAffinityAntiAffinityPreference( } } +void CpuCostModel::CalculateNodePreferAvoidPodsPriority( + const ResourceDescriptor rd, const TaskDescriptor td, + const EquivClass_t ec) { + unordered_map>* nodes_priority_scores_ptr = + FindOrNull(ec_to_node_priority_scores, ec); + if (!nodes_priority_scores_ptr) { + unordered_map> node_to_priority_scores_map; + InsertIfNotPresent(&ec_to_node_priority_scores, ec, + node_to_priority_scores_map); + nodes_priority_scores_ptr = FindOrNull(ec_to_node_priority_scores, ec); + } + CHECK_NOTNULL(nodes_priority_scores_ptr); + ResourceID_t res_id = ResourceIDFromString(rd.uuid()); + PriorityScoresList_t* priority_scores_struct_ptr = + FindOrNull(*nodes_priority_scores_ptr, res_id); + if (!priority_scores_struct_ptr) { + PriorityScoresList_t priority_scores_list; + InsertIfNotPresent(nodes_priority_scores_ptr, res_id, priority_scores_list); + priority_scores_struct_ptr = FindOrNull(*nodes_priority_scores_ptr, res_id); + } + CHECK_NOTNULL(priority_scores_struct_ptr); + PriorityScore_t& prefer_avoid_pods_priority = + priority_scores_struct_ptr->prefer_avoid_pods_priority; + if ((rd.avoids_size()) + && (!td.owner_ref_kind().compare(string("ReplicationController")) + || !td.owner_ref_kind().compare(string("ReplicaSet")))) { + for (auto avoid : rd.avoids()) { + if ((!td.owner_ref_kind().compare(avoid.kind())) + && (!td.owner_ref_uid().compare(avoid.uid()))) { + // Avoid pods annotations matched. + // Score should be high so that cost will be high. + prefer_avoid_pods_priority.score = omega_; + return; + } + } + } + // No match for avoid pods annotations. + // Score should be zero so that cost is not affected byt this. + prefer_avoid_pods_priority.score = 0; +} + // Pod affinity/anti-affinity symmetry. void CpuCostModel::UpdateResourceToTaskSymmetryMap(ResourceID_t res_id, TaskID_t task_id) { @@ -1444,6 +1504,11 @@ vector* CpuCostModel::GetEquivClassToEquivClassesArcs( } } } + + // Calculate prefer avoid pods priority for node + if (rd.avoids_size()) { + CalculateNodePreferAvoidPodsPriority(rd, *td_ptr, ec); + } } CpuMemResVector_t available_resources; available_resources.cpu_cores_ = diff --git a/src/scheduling/flow/cpu_cost_model.h b/src/scheduling/flow/cpu_cost_model.h index d0b7c4bbb..748f1b1dc 100644 --- a/src/scheduling/flow/cpu_cost_model.h +++ b/src/scheduling/flow/cpu_cost_model.h @@ -43,12 +43,14 @@ struct CpuMemCostVector_t { uint64_t node_affinity_soft_cost_; uint64_t pod_affinity_soft_cost_; uint64_t intolerable_taints_cost_; + uint64_t prefer_avoid_pods_cost_; CpuMemCostVector_t() : cpu_mem_cost_(0), balanced_res_cost_(0), node_affinity_soft_cost_(0), pod_affinity_soft_cost_(0), - intolerable_taints_cost_(0) {} + intolerable_taints_cost_(0), + prefer_avoid_pods_cost_(0) {} }; struct CpuMemResVector_t { @@ -81,6 +83,7 @@ struct PriorityScoresList_t { PriorityScore_t node_affinity_priority; PriorityScore_t pod_affinity_priority; PriorityScore_t intolerable_taints_priority; + PriorityScore_t prefer_avoid_pods_priority; }; class CpuCostModel : public CostModelInterface { @@ -157,6 +160,9 @@ class CpuCostModel : public CostModelInterface { void CalculateIntolerableTaintsCost(const ResourceDescriptor& rd, const TaskDescriptor* td, const EquivClass_t ec); + void CalculateNodePreferAvoidPodsPriority(const ResourceDescriptor rd, + const TaskDescriptor td, + const EquivClass_t ec); // Pod affinity/anti-affinity symmetry bool CheckPodAffinityAntiAffinitySymmetryConflict(TaskDescriptor* td_ptr); void UpdateResourceToTaskSymmetryMap(ResourceID_t res_id, TaskID_t td);