Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 6.2] fix(nemesis-target-pool): set method attribute for target pool #9527

Merged
merged 1 commit into from
Dec 11, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 27 additions & 37 deletions sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import traceback
import json
import itertools
import enum
from distutils.version import LooseVersion
from contextlib import ExitStack, contextmanager
from typing import Any, List, Optional, Type, Tuple, Callable, Dict, Set, Union, Iterable
Expand Down Expand Up @@ -172,6 +173,13 @@
)

NEMESIS_TARGET_SELECTION_LOCK = Lock()
DISRUPT_POOL_PROPERTY_NAME = "target_pool"


class NEMESIS_TARGET_POOLS(enum.Enum):
data_nodes = "data_nodes"
zero_nodes = "zero_nodes"
all_nodes = "nodes"


class DefaultValue: # pylint: disable=too-few-public-methods
Expand All @@ -181,37 +189,19 @@ class DefaultValue: # pylint: disable=too-few-public-methods
...


def target_data_nodes(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
args[0].set_target_node_pool(args[0].cluster.data_nodes)
return func(*args, **kwargs)
finally:
args[0].set_target_node_pool(args[0].cluster.data_nodes)
return wrapper
def target_data_nodes(func: Callable) -> Callable:
setattr(func, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.data_nodes)
return func


def target_zero_nodes(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
args[0].set_target_node_pool(args[0].cluster.zero_nodes)
return func(*args, **kwargs)
finally:
args[0].set_target_node_pool(args[0].cluster.data_nodes)
return wrapper
def target_zero_nodes(func: Callable) -> Callable:
setattr(func, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.zero_nodes)
return func


def target_all_nodes(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
args[0].set_target_node_pool(args[0].cluster.nodes)
return func(*args, **kwargs)
finally:
args[0].set_target_node_pool(args[0].cluster.data_nodes)
return wrapper
def target_all_nodes(func: Callable) -> Callable:
setattr(func, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.all_nodes)
return func


class Nemesis: # pylint: disable=too-many-instance-attributes,too-many-public-methods
Expand Down Expand Up @@ -288,7 +278,7 @@ def __init__(self, tester_obj, termination_event, *args, nemesis_selector=None,
}
self.es_publisher = NemesisElasticSearchPublisher(self.tester)
self._init_num_deletions_factor()
self._target_node_pool = self.cluster.data_nodes
self._target_node_pool_type = NEMESIS_TARGET_POOLS.data_nodes

def _init_num_deletions_factor(self):
# num_deletions_factor is a numeric divisor. It's a factor by which the available-partitions-for-deletion
Expand Down Expand Up @@ -391,12 +381,9 @@ def unset_current_running_nemesis(node):
with NEMESIS_TARGET_SELECTION_LOCK:
node.running_nemesis = None

def set_target_node_pool(self, nodelist: list[BaseNode] | None = None):
"""Set pool of nodes to choose target node """
if not nodelist:
self._target_node_pool = self.cluster.data_nodes
else:
self._target_node_pool = nodelist
def set_target_node_pool_type(self, pool_type: NEMESIS_TARGET_POOLS = NEMESIS_TARGET_POOLS.data_nodes):
"""Set pool type to choose nodes for target node """
self._target_node_pool_type = pool_type

def _get_target_nodes(
self,
Expand All @@ -418,7 +405,8 @@ def _get_target_nodes(
"""
if is_seed is DefaultValue:
is_seed = False if self.filter_seed else None
nodes = [node for node in self._target_node_pool if not node.running_nemesis]
self.log.debug("Target node pool type: %s", self._target_node_pool_type)
nodes = [node for node in getattr(self.cluster, self._target_node_pool_type.value) if not node.running_nemesis]
if is_seed is not None:
nodes = [node for node in nodes if node.is_seed == is_seed]
if dc_idx is not None:
Expand Down Expand Up @@ -4167,7 +4155,7 @@ def _decommission_nodes(self, nodes_number, rack, is_seed: Optional[Union[bool,
if self._is_it_on_kubernetes():
if rack is None and self._is_it_on_kubernetes():
rack = 0
self.set_target_node_pool(self.cluster.data_nodes)
self.set_target_node_pool_type(NEMESIS_TARGET_POOLS.data_nodes)
self.set_target_node(rack=rack, is_seed=is_seed, allow_only_last_node_in_rack=True)
else:
rack_idx = rack if rack is not None else idx % self.cluster.racks_count
Expand Down Expand Up @@ -5311,6 +5299,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL
# pylint: disable=too-many-locals
# pylint: disable=too-many-branches
method_name = method.__name__
target_pool_type = getattr(method, DISRUPT_POOL_PROPERTY_NAME, NEMESIS_TARGET_POOLS.data_nodes)
nemesis_run_info_key = f"{id(args[0])}--{method_name}"
try:
NEMESIS_LOCK.acquire() # pylint: disable=consider-using-with
Expand All @@ -5323,6 +5312,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL
time.sleep(10)

current_disruption = "".join(p.capitalize() for p in method_name.replace("disrupt_", "").split("_"))
args[0].set_target_node_pool_type(target_pool_type)
args[0].set_target_node(current_disruption=current_disruption)

args[0].cluster.check_cluster_health()
Expand Down Expand Up @@ -5437,7 +5427,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL
# gets killed/aborted. So, use safe 'pop' call with the default 'None' value.
NEMESIS_RUN_INFO.pop(nemesis_run_info_key, None)

args[0].set_target_node_pool(args[0].cluster.data_nodes)
args[0].set_target_node_pool_type(NEMESIS_TARGET_POOLS.data_nodes)

return result

Expand Down
Loading