Skip to content

Commit

Permalink
fix(nemesis-target-pool): set method attribute for target pool
Browse files Browse the repository at this point in the history
Decorator `disrupt_method_wrapper` rewrites target node pool and
always set Nemesis._target_node_pool as data_nodes. This cause
that target node is chosen from cluster.data_nodes always.
This happemed because we have decorators chain:
disrupt_method_wrapper -> target_(data/zero/all)_nodes -> disrupt_*

Target node is chosen in `disrupt_method_wrapper`, and any changes in
target_*_pool does not affect of target node if it is run in complex
nemesis as SisyphusMonkey and also has affect in parallel nemesis run

Set method attribute disrupt_*.target_pool for later use in
decorator `disrupt_method_wrapper`, allows correctly set
target pool for disrupt nemesis

Using solution from PR: #9502 to store pool type instead of
cache of filtered nodes

(cherry picked from commit 69ad0dd)
  • Loading branch information
aleksbykov authored and fruch committed Dec 11, 2024
1 parent 9630f55 commit 6be2880
Showing 1 changed file with 27 additions and 37 deletions.
64 changes: 27 additions & 37 deletions sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import traceback
import json
import itertools
import enum
from distutils.version import LooseVersion
from contextlib import ExitStack, contextmanager
from typing import Any, List, Optional, Type, Tuple, Callable, Dict, Set, Union, Iterable
Expand Down Expand Up @@ -172,6 +173,13 @@
)

NEMESIS_TARGET_SELECTION_LOCK = Lock()
DISRUPT_POOL_PROPERTY_NAME = "target_pool"


class NEMESIS_TARGET_POOLS(enum.Enum):
data_nodes = "data_nodes"
zero_nodes = "zero_nodes"
all_nodes = "nodes"


class DefaultValue: # pylint: disable=too-few-public-methods
Expand All @@ -181,37 +189,19 @@ class DefaultValue: # pylint: disable=too-few-public-methods
...


def target_data_nodes(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
args[0].set_target_node_pool(args[0].cluster.data_nodes)
return func(*args, **kwargs)
finally:
args[0].set_target_node_pool(args[0].cluster.data_nodes)
return wrapper
def target_data_nodes(func: Callable) -> Callable:
setattr(func, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.data_nodes)
return func


def target_zero_nodes(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
args[0].set_target_node_pool(args[0].cluster.zero_nodes)
return func(*args, **kwargs)
finally:
args[0].set_target_node_pool(args[0].cluster.data_nodes)
return wrapper
def target_zero_nodes(func: Callable) -> Callable:
setattr(func, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.zero_nodes)
return func


def target_all_nodes(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
args[0].set_target_node_pool(args[0].cluster.nodes)
return func(*args, **kwargs)
finally:
args[0].set_target_node_pool(args[0].cluster.data_nodes)
return wrapper
def target_all_nodes(func: Callable) -> Callable:
setattr(func, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.all_nodes)
return func


class Nemesis: # pylint: disable=too-many-instance-attributes,too-many-public-methods
Expand Down Expand Up @@ -288,7 +278,7 @@ def __init__(self, tester_obj, termination_event, *args, nemesis_selector=None,
}
self.es_publisher = NemesisElasticSearchPublisher(self.tester)
self._init_num_deletions_factor()
self._target_node_pool = self.cluster.data_nodes
self._target_node_pool_type = NEMESIS_TARGET_POOLS.data_nodes

def _init_num_deletions_factor(self):
# num_deletions_factor is a numeric divisor. It's a factor by which the available-partitions-for-deletion
Expand Down Expand Up @@ -391,12 +381,9 @@ def unset_current_running_nemesis(node):
with NEMESIS_TARGET_SELECTION_LOCK:
node.running_nemesis = None

def set_target_node_pool(self, nodelist: list[BaseNode] | None = None):
"""Set pool of nodes to choose target node """
if not nodelist:
self._target_node_pool = self.cluster.data_nodes
else:
self._target_node_pool = nodelist
def set_target_node_pool_type(self, pool_type: NEMESIS_TARGET_POOLS = NEMESIS_TARGET_POOLS.data_nodes):
"""Set pool type to choose nodes for target node """
self._target_node_pool_type = pool_type

def _get_target_nodes(
self,
Expand All @@ -418,7 +405,8 @@ def _get_target_nodes(
"""
if is_seed is DefaultValue:
is_seed = False if self.filter_seed else None
nodes = [node for node in self._target_node_pool if not node.running_nemesis]
self.log.debug("Target node pool type: %s", self._target_node_pool_type)
nodes = [node for node in getattr(self.cluster, self._target_node_pool_type.value) if not node.running_nemesis]
if is_seed is not None:
nodes = [node for node in nodes if node.is_seed == is_seed]
if dc_idx is not None:
Expand Down Expand Up @@ -4168,7 +4156,7 @@ def _decommission_nodes(self, nodes_number, rack, is_seed: Optional[Union[bool,
if self._is_it_on_kubernetes():
if rack is None and self._is_it_on_kubernetes():
rack = 0
self.set_target_node_pool(self.cluster.data_nodes)
self.set_target_node_pool_type(NEMESIS_TARGET_POOLS.data_nodes)
self.set_target_node(rack=rack, is_seed=is_seed, allow_only_last_node_in_rack=True)
else:
rack_idx = rack if rack is not None else idx % self.cluster.racks_count
Expand Down Expand Up @@ -5312,6 +5300,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL
# pylint: disable=too-many-locals
# pylint: disable=too-many-branches
method_name = method.__name__
target_pool_type = getattr(method, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.data_nodes)
nemesis_run_info_key = f"{id(args[0])}--{method_name}"
try:
NEMESIS_LOCK.acquire() # pylint: disable=consider-using-with
Expand All @@ -5324,6 +5313,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL
time.sleep(10)

current_disruption = "".join(p.capitalize() for p in method_name.replace("disrupt_", "").split("_"))
args[0].set_target_node_pool_type(target_pool_type)
args[0].set_target_node(current_disruption=current_disruption)

args[0].cluster.check_cluster_health()
Expand Down Expand Up @@ -5438,7 +5428,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL
# gets killed/aborted. So, use safe 'pop' call with the default 'None' value.
NEMESIS_RUN_INFO.pop(nemesis_run_info_key, None)

args[0].set_target_node_pool(args[0].cluster.data_nodes)
args[0].set_target_node_pool_type(NEMESIS_TARGET_POOLS.data_nodes)

return result

Expand Down

0 comments on commit 6be2880

Please sign in to comment.