Skip to content
This repository was archived by the owner on Oct 16, 2024. It is now read-only.

Commit 3754524

Browse files
authored
Merge pull request #319 from Yelp/u/wilmerrafael/CLUSTERMAN-808_adding_alert_option_on_max_cap_pools
Adding alert on maximum capacity option for pools
2 parents 7dfaf51 + 754f31d commit 3754524

File tree

9 files changed

+22
-3
lines changed

9 files changed

+22
-3
lines changed

acceptance/srv-configs/clusterman-clusters/local-dev/default.kubernetes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,5 @@ autoscale_signal:
1919
autoscaling:
2020
prevent_scale_down_after_capacity_loss: true
2121
instance_loss_threshold: 3
22+
23+
alert_on_max_capacity: false

acceptance/srv-configs/clusterman-clusters/local-dev/default.mesos

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,5 @@ autoscale_signal:
2727
- name: disk_allocated
2828
type: system_metrics
2929
minute_range: 10
30+
31+
alert_on_max_capacity: false

clusterman/autoscaler/autoscaler.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,10 @@ def run(self, dry_run: bool = False, timestamp: Optional[arrow.Arrow] = None) ->
179179
capacity_offset = get_capacity_offset(self.cluster, self.pool, self.scheduler, timestamp)
180180
new_target_capacity = self._compute_target_capacity(resource_request) + capacity_offset
181181
self.target_capacity_gauge.set(new_target_capacity, {"dry_run": dry_run})
182-
self.max_capacity_gauge.set(self.pool_manager.max_capacity, {"dry_run": dry_run})
182+
self.max_capacity_gauge.set(
183+
self.pool_manager.max_capacity,
184+
{"dry_run": dry_run, "alert_on_max_capacity": self.pool_manager.alert_on_max_capacity},
185+
)
183186
self.setpoint_gauge.set(self.autoscaling_config.setpoint, {"dry_run": dry_run})
184187
self._emit_requested_resource_metrics(resource_request, dry_run=dry_run)
185188

clusterman/autoscaler/pool_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def __init__(
8585
self.killable_nodes_prioritizing_v2 = self.pool_config.read_bool(
8686
"autoscaling.killable_nodes_prioritizing_v2", default=False
8787
)
88+
self.alert_on_max_capacity = self.pool_config.read_bool("alert_on_max_capacity", default=True)
8889
monitoring_info = {"cluster": cluster, "pool": pool}
8990
self.killable_nodes_counter = get_monitoring_client().create_counter(SFX_KILLABLE_NODES_COUNT, monitoring_info)
9091

clusterman/simulator/simulated_pool_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def __init__(
5858
self.pool_config.read_int("scaling_limits.min_node_scalein_uptime_seconds", default=-1),
5959
MAX_MIN_NODE_SCALEIN_UPTIME_SECONDS,
6060
)
61+
self.alert_on_max_capacity = self.pool_config.read_bool("alert_on_max_capacity", default=True)
6162
self.killable_nodes_prioritizing_v2 = self.pool_config.read_bool(
6263
"autoscaling.killable_nodes_prioritizing_v2", default=False
6364
)

examples/schemas/pool.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@
6363
},
6464
"additionalProperties": false
6565
},
66-
"sensu_config": {"$ref": "definitions.json#sensu_config"}
66+
"sensu_config": {"$ref": "definitions.json#sensu_config"},
67+
"alert_on_max_capacity": {"type": "boolean"}
6768
},
6869
"additionalProperties": false
6970
}

itests/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def setup_configurations(context):
120120
{"name": "cost", "type": APP_METRICS, "minute_range": 30},
121121
],
122122
},
123+
"alert_on_max_capacity": True,
123124
}
124125
kube_pool_config = {
125126
"resource_groups": [
@@ -142,6 +143,7 @@ def setup_configurations(context):
142143
"internal": True,
143144
"period_minutes": 7,
144145
},
146+
"alert_on_max_capacity": True,
145147
}
146148
with staticconf.testing.MockConfiguration(
147149
boto_config, namespace=CREDENTIALS_NAMESPACE

tests/autoscaler/autoscaler_test.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def pool_configs():
4848
"max_weight_to_add": 200,
4949
"max_weight_to_remove": 10,
5050
},
51+
"alert_on_max_capacity": True,
5152
},
5253
namespace=POOL_NAMESPACE.format(pool="bar", scheduler="mesos"),
5354
):
@@ -86,6 +87,10 @@ def mock_autoscaler():
8687
"scaling_limits.max_capacity",
8788
namespace=POOL_NAMESPACE.format(pool="bar", scheduler="mesos"),
8889
)
90+
mock_autoscaler.pool_manager.alert_on_max_capacity = staticconf.read_bool(
91+
"alert_on_max_capacity",
92+
namespace=POOL_NAMESPACE.format(pool="bar", scheduler="mesos"),
93+
)
8994
mock_autoscaler.pool_manager.non_orphan_fulfilled_capacity = 0
9095

9196
mock_autoscaler.target_capacity_gauge = mock.Mock(spec=GaugeProtocol)
@@ -155,7 +160,7 @@ def test_autoscaler_run(dry_run, mock_autoscaler, run_timestamp):
155160

156161
assert mock_autoscaler.target_capacity_gauge.set.call_args == mock.call(100, {"dry_run": dry_run})
157162
assert mock_autoscaler.max_capacity_gauge.set.call_args == mock.call(
158-
mock_autoscaler.pool_manager.max_capacity, {"dry_run": dry_run}
163+
mock_autoscaler.pool_manager.max_capacity, {"dry_run": dry_run, "alert_on_max_capacity": True}
159164
)
160165
assert mock_autoscaler.setpoint_gauge.set.call_args == mock.call(0.7, {"dry_run": dry_run})
161166
assert mock_autoscaler._compute_target_capacity.call_args == mock.call(resource_request)

tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def clusterman_pool_config():
144144
{"name": "cost", "type": APP_METRICS, "minute_range": 30},
145145
],
146146
},
147+
"alert_on_max_capacity": True,
147148
}
148149
with staticconf.testing.MockConfiguration(config, namespace="bar.mesos_config"):
149150
yield
@@ -200,6 +201,7 @@ def clusterman_k8s_pool_config():
200201
},
201202
"disable_autoscaling": False,
202203
},
204+
"alert_on_max_capacity": False,
203205
}
204206
with staticconf.testing.MockConfiguration(config, namespace="bar.kubernetes_config"):
205207
yield

0 commit comments

Comments
 (0)