From 69ad0dd69b1191a6aa97c369780270ae2d4095da Mon Sep 17 00:00:00 2001 From: Aleksandr Bykov Date: Mon, 9 Dec 2024 15:21:19 +0700 Subject: [PATCH] fix(nemesis-target-pool): set method attribute for target pool Decorator `disrupt_method_wrapper` rewrites target node pool and always set Nemesis._target_node_pool as data_nodes. This cause that target node is chosen from cluster.data_nodes always. This happemed because we have decorators chain: disrupt_method_wrapper -> target_(data/zero/all)_nodes -> disrupt_* Target node is chosen in `disrupt_method_wrapper`, and any changes in target_*_pool does not affect of target node if it is run in complex nemesis as SisyphusMonkey and also has affect in parallel nemesis run Set method attribute disrupt_*.target_pool for later use in decorator `disrupt_method_wrapper`, allows correctly set target pool for disrupt nemesis Using solution from PR: #9502 to store pool type instead of cache of filtered nodes --- sdcm/nemesis.py | 64 +++++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/sdcm/nemesis.py b/sdcm/nemesis.py index 6b4f5bf34e..7bca9bd460 100644 --- a/sdcm/nemesis.py +++ b/sdcm/nemesis.py @@ -29,6 +29,7 @@ import traceback import json import itertools +import enum from distutils.version import LooseVersion from contextlib import ExitStack, contextmanager from typing import Any, List, Optional, Type, Tuple, Callable, Dict, Set, Union, Iterable @@ -171,6 +172,13 @@ ) NEMESIS_TARGET_SELECTION_LOCK = Lock() +DISRUPT_POOL_PROPERTY_NAME = "target_pool" + + +class NEMESIS_TARGET_POOLS(enum.Enum): + data_nodes = "data_nodes" + zero_nodes = "zero_nodes" + all_nodes = "nodes" class DefaultValue: # pylint: disable=too-few-public-methods @@ -180,37 +188,19 @@ class DefaultValue: # pylint: disable=too-few-public-methods ... -def target_data_nodes(func): - @wraps(func) - def wrapper(*args, **kwargs): - try: - args[0].set_target_node_pool(args[0].cluster.data_nodes) - return func(*args, **kwargs) - finally: - args[0].set_target_node_pool(args[0].cluster.data_nodes) - return wrapper +def target_data_nodes(func: Callable) -> Callable: + setattr(func, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.data_nodes) + return func -def target_zero_nodes(func): - @wraps(func) - def wrapper(*args, **kwargs): - try: - args[0].set_target_node_pool(args[0].cluster.zero_nodes) - return func(*args, **kwargs) - finally: - args[0].set_target_node_pool(args[0].cluster.data_nodes) - return wrapper +def target_zero_nodes(func: Callable) -> Callable: + setattr(func, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.zero_nodes) + return func -def target_all_nodes(func): - @wraps(func) - def wrapper(*args, **kwargs): - try: - args[0].set_target_node_pool(args[0].cluster.nodes) - return func(*args, **kwargs) - finally: - args[0].set_target_node_pool(args[0].cluster.data_nodes) - return wrapper +def target_all_nodes(func: Callable) -> Callable: + setattr(func, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.all_nodes) + return func class Nemesis: # pylint: disable=too-many-instance-attributes,too-many-public-methods @@ -287,7 +277,7 @@ def __init__(self, tester_obj, termination_event, *args, nemesis_selector=None, } self.es_publisher = NemesisElasticSearchPublisher(self.tester) self._init_num_deletions_factor() - self._target_node_pool = self.cluster.data_nodes + self._target_node_pool_type = NEMESIS_TARGET_POOLS.data_nodes def _init_num_deletions_factor(self): # num_deletions_factor is a numeric divisor. It's a factor by which the available-partitions-for-deletion @@ -397,12 +387,9 @@ def unset_current_running_nemesis(node): with NEMESIS_TARGET_SELECTION_LOCK: node.running_nemesis = None - def set_target_node_pool(self, nodelist: list[BaseNode] | None = None): - """Set pool of nodes to choose target node """ - if not nodelist: - self._target_node_pool = self.cluster.data_nodes - else: - self._target_node_pool = nodelist + def set_target_node_pool_type(self, pool_type: NEMESIS_TARGET_POOLS = NEMESIS_TARGET_POOLS.data_nodes): + """Set pool type to choose nodes for target node """ + self._target_node_pool_type = pool_type def _get_target_nodes( self, @@ -424,7 +411,8 @@ def _get_target_nodes( """ if is_seed is DefaultValue: is_seed = False if self.filter_seed else None - nodes = [node for node in self._target_node_pool if not node.running_nemesis] + self.log.debug("Target node pool type: %s", self._target_node_pool_type) + nodes = [node for node in getattr(self.cluster, self._target_node_pool_type.value) if not node.running_nemesis] if is_seed is not None: nodes = [node for node in nodes if node.is_seed == is_seed] if dc_idx is not None: @@ -4220,7 +4208,7 @@ def _decommission_nodes(self, nodes_number, rack, is_seed: Optional[Union[bool, if self._is_it_on_kubernetes(): if rack is None and self._is_it_on_kubernetes(): rack = 0 - self.set_target_node_pool(self.cluster.data_nodes) + self.set_target_node_pool_type(NEMESIS_TARGET_POOLS.data_nodes) self.set_target_node(rack=rack, is_seed=is_seed, allow_only_last_node_in_rack=True) else: rack_idx = rack if rack is not None else idx % self.cluster.racks_count @@ -5397,6 +5385,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL # pylint: disable=too-many-locals # pylint: disable=too-many-branches method_name = method.__name__ + target_pool_type = getattr(method, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.data_nodes) nemesis_run_info_key = f"{id(args[0])}--{method_name}" try: NEMESIS_LOCK.acquire() # pylint: disable=consider-using-with @@ -5409,6 +5398,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL time.sleep(10) current_disruption = "".join(p.capitalize() for p in method_name.replace("disrupt_", "").split("_")) + args[0].set_target_node_pool_type(target_pool_type) args[0].set_target_node(current_disruption=current_disruption) args[0].cluster.check_cluster_health() @@ -5523,7 +5513,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL # gets killed/aborted. So, use safe 'pop' call with the default 'None' value. NEMESIS_RUN_INFO.pop(nemesis_run_info_key, None) - args[0].set_target_node_pool(args[0].cluster.data_nodes) + args[0].set_target_node_pool_type(NEMESIS_TARGET_POOLS.data_nodes) return result