Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(perf): grow shrink cluster 3 nodes in parallel #7504

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configurations/grow-shrink-cluster-parallel-nemesis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nemesis_class_name: 'GrowShrinkClusterParallelNemesis'
5 changes: 5 additions & 0 deletions configurations/tablets-initial-128.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
append_scylla_yaml: |
experimental_features:
- tablets
- consistent-topology-changes
tablets_initial_scale_factor: 16
5 changes: 5 additions & 0 deletions configurations/tablets-initial-256.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
append_scylla_yaml: |
experimental_features:
- tablets
- consistent-topology-changes
tablets_initial_scale_factor: 32
3 changes: 1 addition & 2 deletions defaults/aws_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@ user_credentials_path: '~/.ssh/scylla_test_id_ed25519'
instance_type_loader: 'c6i.xlarge'
instance_type_monitor: 't3.large'
# manual on creating loader AMI's: see docs/new_loader_ami.md
ami_id_loader: 'scylla-qa-loader-ami-v20-ubuntu22'
ami_id_loader: 'ami-07dc305835cc4ccbf'
soyacz marked this conversation as resolved.
Show resolved Hide resolved
# manual on updating monitor images see: docs/monitoring-images.md
ami_id_monitor: 'scylladb-monitor-4-7-2-2024-05-13t08-38-47z'

availability_zone: 'a'
root_disk_size_monitor: 50 # GB, remove this field if default disk size should be used
root_disk_size_db: 30 # GB, increase root disk for larger swap (maximum: 16G)
root_disk_size_loader: 20
ami_db_scylla_user: 'scyllaadm'
ami_loader_user: 'ubuntu'
ami_monitor_user: 'ubuntu'
Expand Down
4 changes: 2 additions & 2 deletions defaults/test_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ email_subject_postfix: ''
collect_logs: false

hinted_handoff: 'disabled'
parallel_node_operations: false # supported from Scylla 6.0
parallel_node_operations: true # supported from Scylla 6.0

server_encrypt: false
client_encrypt: false
Expand Down Expand Up @@ -216,7 +216,7 @@ stress_image:
kcl: 'scylladb/hydra-loaders:kcl-jdk8-20210526-ShardSyncStrategyType-PERIODIC'
harry: 'scylladb/hydra-loaders:cassandra-harry-jdk11-20220816'
latte: 'scylladb/hydra-loaders:latte-0.25.2-scylladb'
cql-stress-cassandra-stress: 'scylladb/hydra-loaders:cql-stress-cassandra-stress-20240119'
cql-stress-cassandra-stress: 'scylladb/hydra-loaders:cql-stress-cassandra-stress-20240606'

service_level_shares: [1000]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
availability_zone: 'a,b,c',
availability_zone: 'a',
test_name: "performance_regression_test.PerformanceRegressionTest",
test_config: """["test-cases/performance/perf-regression-latency-650gb-grow-shrink.yaml", "configurations/tablets-initial-32.yaml"]""",
test_config: """["test-cases/performance/perf-regression-latency-650gb-grow-shrink.yaml", "configurations/tablets-initial-128.yaml", "configurations/grow-shrink-cluster-parallel-nemesis.yaml"]""",
sub_tests: ["test_latency_write_with_nemesis", "test_latency_read_with_nemesis", "test_latency_mixed_with_nemesis"],
test_email_title: "latency during grow-shrink (tablets)",
)
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
availability_zone: 'a,b,c',
availability_zone: 'a',
test_name: "performance_regression_test.PerformanceRegressionTest",
test_config: """["test-cases/performance/perf-regression-latency-650gb-grow-shrink.yaml", "configurations/tablets-initial-32.yaml"]""",
test_config: """["test-cases/performance/perf-regression-latency-650gb-grow-shrink.yaml", "configurations/tablets-initial-128.yaml", "configurations/grow-shrink-cluster-parallel-nemesis.yaml"]""",
sub_tests: ["test_latency_write_with_nemesis", "test_latency_read_with_nemesis", "test_latency_mixed_with_nemesis"],
test_email_title: "latency during grow-shrink (tablets)",
)
4 changes: 3 additions & 1 deletion performance_regression_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,14 @@ def run_workload(self, stress_cmd, nemesis=False, sub_type=None):
# allow to correctly save results for future compare
if sub_type is None:
sub_type = 'read' if ' read ' in stress_cmd else 'write' if ' write ' in stress_cmd else 'mixed'
# TODO: replace self.stress_cmd hack with proper implementation
self.stress_cmd = stress_cmd # pylint: disable=attribute-defined-outside-init
test_index = f'latency-during-ops-{sub_type}'
self.create_test_stats(sub_type=sub_type, append_sub_test_to_name=False, test_index=test_index)
stress_queue = self.run_stress_thread(stress_cmd=stress_cmd, stress_num=1, stats_aggregate_cmds=False)
if nemesis:
interval = self.params.get('nemesis_interval')
time.sleep(interval * 60) # Sleeping one interval (in minutes) before starting the nemesis
time.sleep(5 * 60) # Sleeping one interval (in minutes) before starting the nemesis
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just for testing ? right ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I try to make it working ASAP, so I took some shortcuts here and there. Later I'll remove them.

self.db_cluster.add_nemesis(nemesis=self.get_nemesis_class(), tester_obj=self)
self.db_cluster.start_nemesis(interval=interval, cycles_count=1)
self._stop_load_when_nemesis_threads_end()
Expand Down
1 change: 1 addition & 0 deletions sdcm/db_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,7 @@ def get_setup_details(self):

setup_details['db_cluster_node_details'] = {}
setup_details['sysctl_output'] = []
setup_details['append_scylla_yaml'] = str(self.params.get('append_scylla_yaml') or '')
return setup_details

def get_test_details(self):
Expand Down
108 changes: 103 additions & 5 deletions sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
from sdcm.provision.scylla_yaml import SeedProvider
from sdcm.provision.helpers.certificate import update_certificate, TLSAssets
from sdcm.remote.libssh2_client.exceptions import UnexpectedExit as Libssh2UnexpectedExit
from sdcm.rest.remote_curl_client import RemoteCurlClient
from sdcm.sct_events import Severity
from sdcm.sct_events.database import DatabaseLogEvent
from sdcm.sct_events.decorators import raise_event_on_failure
Expand Down Expand Up @@ -1264,6 +1265,30 @@ def _add_and_init_new_cluster_node(self, old_node_ip=None, host_id=None,
InfoEvent(message="FinishEvent - New Node is up and normal").publish()
return new_node

def _add_and_init_new_cluster_node_parallel(self, count, timeout=MAX_TIME_WAIT_FOR_NEW_NODE_UP, rack=0):
self.log.info("Adding %s new nodes to cluster...", count)
InfoEvent(message=f'StartEvent - Adding {count} new nodes to cluster').publish()
new_nodes = self.cluster.add_nodes(
count=count, dc_idx=self.target_node.dc_idx, enable_auto_bootstrap=True, rack=rack)
self.monitoring_set.reconfigure_scylla_monitoring()

try:
with adaptive_timeout(Operations.NEW_NODE, node=self.cluster.nodes[0], timeout=timeout):
self.cluster.wait_for_init(node_list=new_nodes, timeout=timeout, check_node_health=False)
self.cluster.set_seeds()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Messing with the seed isn't really needed anymore, just before starting a node.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I'll remove it. See I trimmed original procedure (like setting nemesis target) and I'm not sure of it yet.

self.cluster.update_seed_provider()
except (NodeSetupFailed, NodeSetupTimeout):
self.log.warning("TestConfig of the '%s' failed, removing them from list of nodes" % new_nodes)
for node in new_nodes:
self.cluster.nodes.remove(node)
self.log.warning("Nodes will not be terminated. Please terminate manually!!!")
raise
for new_node in new_nodes:
new_node.wait_native_transport()
self.cluster.wait_for_nodes_up_and_normal(nodes=new_nodes)
InfoEvent(message="FinishEvent - New Nodes are up and normal").publish()
return new_nodes

@decorate_with_context(ignore_ycsb_connection_refused)
def _terminate_cluster_node(self, node):
self.cluster.terminate_node(node)
Expand Down Expand Up @@ -3989,6 +4014,12 @@ def disrupt_corrupt_then_scrub(self):
def add_new_node(self, rack=0):
return self._add_and_init_new_cluster_node(rack=rack)

@latency_calculator_decorator(legend="Adding new nodes parallel")
def add_new_nodes_parallel(self, count=1, rack=0):
nodes = self._add_and_init_new_cluster_node_parallel(count=count, rack=rack)
self._wait_for_tablets_balanced()
return nodes

@latency_calculator_decorator(legend="Decommission node: remove node from cluster")
def decommission_node(self, node):
self.cluster.decommission(node)
Expand All @@ -4014,6 +4045,18 @@ def decommission_nodes(self, add_nodes_number, rack, is_seed: Optional[Union[boo
InfoEvent(f'FinishEvent - ShrinkCluster failed decommissioning a node {self.target_node} with error '
f'{str(exc)}').publish()

@latency_calculator_decorator(legend="Decommission node: remove node from cluster")
def decommission_nodes_parallel(self, add_nodes_number, rack, is_seed: Optional[Union[bool, DefaultValue]] = DefaultValue,
dc_idx: Optional[int] = None):
parallel_obj = ParallelObject(objects=self.cluster.nodes[:add_nodes_number], timeout=7200)
try:
InfoEvent(f'StartEvent - ShrinkCluster started decommissioning {add_nodes_number} nodes').publish()
parallel_obj.run(self.cluster.decommission, ignore_exceptions=False, unpack_objects=True)
InfoEvent(f'FinishEvent - ShrinkCluster has done decommissioning {add_nodes_number} nodes').publish()
except Exception as exc: # pylint: disable=broad-except
InfoEvent(f'FinishEvent - ShrinkCluster failed decommissioning a node {self.target_node} with error '
f'{str(exc)}').publish()

def disrupt_grow_shrink_cluster(self):
sleep_time_between_ops = self.cluster.params.get('nemesis_sequence_sleep_between_ops')
if not self.has_steady_run and sleep_time_between_ops:
Expand All @@ -4022,6 +4065,19 @@ def disrupt_grow_shrink_cluster(self):
self._grow_cluster(rack=None)
self._shrink_cluster(rack=None)

def disrupt_grow_shrink_cluster_parallel(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Later we might want to use the same nemesis

But have it configurable based on scylla capabilities and SCT configuration

sleep_time_between_ops = self.cluster.params.get('nemesis_sequence_sleep_between_ops')
if not self.has_steady_run and sleep_time_between_ops:
self.steady_state_latency()
self.has_steady_run = True
self._grow_cluster_parallel(rack=None)
self.log.info("Doubling the load on the cluster")
stress_queue = self.tester.run_stress_thread(
stress_cmd=self.tester.stress_cmd, stress_num=1, stats_aggregate_cmds=False, duration=30)
results = self.tester.get_stress_results(queue=stress_queue, store_results=False)
self.log.info(f"Double load results: {results}")
self._shrink_cluster(rack=None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need here the part that wait for the balanced cluster and then double the load.
During doubling the load we should also measure latency.

I think it’s more important to deal with than the parallel decommission for now.


# NOTE: version limitation is caused by the following:
# - https://github.com/scylladb/scylla-enterprise/issues/3211
# - https://github.com/scylladb/scylladb/issues/14184
Expand Down Expand Up @@ -4049,6 +4105,16 @@ def _grow_cluster(self, rack=None):
self.log.info("Finish cluster grow")
time.sleep(self.interval)

def _grow_cluster_parallel(self, rack=None):
if rack is None:
rack = 0
add_nodes_number = self.tester.params.get('nemesis_add_node_cnt')
self.log.info("Start grow cluster on %s nodes", add_nodes_number)
InfoEvent(message=f"Start grow cluster on {add_nodes_number} nodes").publish()
self.add_new_nodes_parallel(count=add_nodes_number, rack=rack)
self.log.info("Finish cluster grow")
time.sleep(300) # TODO: currently, just in case, to be removed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider if we can remove


def _shrink_cluster(self, rack=None):
add_nodes_number = self.tester.params.get('nemesis_add_node_cnt')
self.log.info("Start shrink cluster by %s nodes", add_nodes_number)
Expand All @@ -4074,11 +4140,18 @@ def _shrink_cluster(self, rack=None):
# Currently on kubernetes first two nodes of each rack are getting seed status
# Because of such behavior only way to get them decommission is to enable decommissioning
# TBD: After https://github.com/scylladb/scylla-operator/issues/292 is fixed remove is_seed parameter
self.decommission_nodes(
decommission_nodes_number,
rack,
is_seed=None if self._is_it_on_kubernetes() else DefaultValue,
dc_idx=self.target_node.dc_idx)
if self.cluster.parallel_startup:
self.decommission_nodes_parallel(
decommission_nodes_number,
rack,
is_seed=None if self._is_it_on_kubernetes() else DefaultValue,
dc_idx=self.target_node.dc_idx)
else:
self.decommission_nodes(
decommission_nodes_number,
rack,
is_seed=None if self._is_it_on_kubernetes() else DefaultValue,
dc_idx=self.target_node.dc_idx)
num_of_nodes = len(self.cluster.nodes)
self.log.info("Cluster shrink finished. Current number of nodes %s", num_of_nodes)
InfoEvent(message=f'Cluster shrink finished. Current number of nodes {num_of_nodes}').publish()
Expand Down Expand Up @@ -5019,6 +5092,22 @@ def disrupt_disable_binary_gossip_execute_major_compaction(self):
self.target_node.restart_scylla_server()
raise

def _wait_for_tablets_balanced(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put it into sdcm.utils

"""
Waiting for tablets to be balanced using REST API.

doing it several times as there's a risk of:
"currently a small time window after adding nodes and before load balancing starts during which
topology may appear as quiesced because the state machine goes through an idle state before it enters load balancing state"
"""
time.sleep(60) # one minute gap before checking, just to give some time to state machine
client = RemoteCurlClient(host="127.0.0.1:10000", endpoint="", node=self.cluster.nodes[0])
self.log.info("Waiting for tablets to be balanced")
for _ in range(3):
client.run_remoter_curl(method="POST", path="storage_service/quiesce_topology", params={}, timeout=3600)
time.sleep(5)
self.log.info("Tablets are balanced")


def disrupt_method_wrapper(method, is_exclusive=False): # pylint: disable=too-many-statements # noqa: PLR0915
"""
Expand Down Expand Up @@ -5262,6 +5351,15 @@ def disrupt(self):
self.disrupt_grow_shrink_cluster()


class GrowShrinkClusterParallelNemesis(Nemesis):
disruptive = True
kubernetes = False
topology_changes = True

def disrupt(self):
self.disrupt_grow_shrink_cluster_parallel()


class AddRemoveRackNemesis(Nemesis):
disruptive = True
kubernetes = True
Expand Down
1 change: 1 addition & 0 deletions sdcm/provision/scylla_yaml/scylla_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ def set_authorizer(cls, authorizer: str):
audit_categories: str = None # None
audit_tables: str = None # None
audit_keyspaces: str = None # None
enable_tablets: bool = None # False, but default scylla.yaml sets to true in some versions

compaction_collection_items_count_warning_threshold: int = None # None

Expand Down
5 changes: 4 additions & 1 deletion sdcm/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ def setUp(self): # pylint: disable=too-many-branches,too-many-statements
self.download_db_packages()
if self.is_encrypt_keys_needed:
self.download_encrypt_keys()
self.prepare_kms_host()
# self.prepare_kms_host()

self.init_resources()

Expand Down Expand Up @@ -1953,6 +1953,7 @@ def run_stress_cassandra_thread( # noqa: PLR0913
# stress_cmd = self._cs_add_node_flag(stress_cmd)
if duration:
timeout = self.get_duration(duration)
stress_cmd = re.sub(r'\sduration=\d+[mhd]\s', f' duration={duration}m ', stress_cmd)
elif self._stress_duration and ' duration=' in stress_cmd:
timeout = self.get_duration(self._stress_duration)
stress_cmd = re.sub(r'\sduration=\d+[mhd]\s', f' duration={self._stress_duration}m ', stress_cmd)
Expand Down Expand Up @@ -1986,6 +1987,7 @@ def run_cql_stress_cassandra_thread( # noqa: PLR0913
# pylint: disable=too-many-locals
if duration:
timeout = self.get_duration(duration)
stress_cmd = re.sub(r'\sduration=\d+[mhd]\s', f' duration={duration}m ', stress_cmd)
elif self._stress_duration and ' duration=' in stress_cmd:
timeout = self.get_duration(self._stress_duration)
stress_cmd = re.sub(r'\sduration=\d+[mhd]\s', f' duration={self._stress_duration}m ', stress_cmd)
Expand Down Expand Up @@ -2018,6 +2020,7 @@ def run_stress_thread_bench(self, stress_cmd, duration=None, round_robin=False,

if duration:
timeout = self.get_duration(duration)
stress_cmd = re.sub(r'\sduration=\d+[mhd]\s', f' duration={duration}m ', stress_cmd)
elif self._stress_duration and '-duration=' in stress_cmd:
timeout = self.get_duration(self._stress_duration)
stress_cmd = re.sub(r'\s-duration[=\s]+\d+[mhd]+\s*', f' -duration={self._stress_duration}m ', stress_cmd)
Expand Down
Loading