From 3dc80611dd4387b63fbfb4cb167bdc41f947b9d1 Mon Sep 17 00:00:00 2001 From: yarongilor Date: Sun, 8 Dec 2024 10:50:28 +0200 Subject: [PATCH] test(elacticity with nemesis): test 90 percent with nemesis test supported nemesis with 90 percent --- data_dir/nemesis.yml | 34 +++++++++++++++ ...ticity-90-percent-with-nemesis.jenkinsfile | 12 ++++++ sdcm/nemesis.py | 41 +++++++++++++++++++ .../elasticity-90-percent-with-nemesis.yaml | 35 ++++++++++++++++ 4 files changed, 122 insertions(+) create mode 100644 jenkins-pipelines/oss/features/elasticity-90-percent-with-nemesis.jenkinsfile create mode 100644 test-cases/features/elasticity-90-percent-with-nemesis.yaml diff --git a/data_dir/nemesis.yml b/data_dir/nemesis.yml index 718f24ef81d..b05dfb0f45b 100644 --- a/data_dir/nemesis.yml +++ b/data_dir/nemesis.yml @@ -55,9 +55,11 @@ - kubernetes = True - disrupt_disable_binary_gossip_execute_major_compaction: - disruptive = True + - elasticity = True - kubernetes = True - disrupt_disable_enable_ldap_authorization: - disruptive = True + - elasticity = True - limited = True - disrupt_drain_kubernetes_node_then_decommission_and_add_scylla_node: - disruptive = True @@ -84,36 +86,44 @@ - config_changes = True - disrupt_hard_reboot_node: - disruptive = True + - elasticity = True - kubernetes = True - limited = True - free_tier_set = True - disrupt_hot_reloading_internode_certificate: - disruptive = False - config_changes = True + - elasticity = True - disrupt_increase_shares_by_attach_another_sl_during_load: - disruptive = True + - elasticity = True - sla = True - disrupt_kill_scylla: - disruptive = True + - elasticity = True - kubernetes = True - free_tier_set = True - disrupt_ldap_connection_toggle: - disruptive = False - limited = True + - elasticity = True - disrupt_load_and_stream: - disruptive = False - run_with_gemini = False - kubernetes = True - limited = True + - elasticity = True - disrupt_major_compaction: - disruptive = False - kubernetes = True - limited = True + - elasticity = True - disrupt_maximum_allowed_sls_with_max_shares_during_load: - disruptive = False - sla = True - disrupt_memory_stress: - disruptive = True + - elasticity = True - free_tier_set = True - disrupt_mgmt_backup: - manager_operation = True @@ -143,41 +153,50 @@ - limited = True - schema_changes = True - free_tier_set = True + - elasticity = True - disrupt_multiple_hard_reboot_node: - disruptive = True + - elasticity = True - kubernetes = True - free_tier_set = True - disrupt_network_block: - disruptive = True + - elasticity = True - networking = True - run_with_gemini = False - kubernetes = True - disrupt_network_random_interruptions: - disruptive = True + - elasticity = True - networking = True - run_with_gemini = False - kubernetes = True - disrupt_network_reject_inter_node_communication: - disruptive = True + - elasticity = True - networking = True - run_with_gemini = False - free_tier_set = True - disrupt_network_reject_node_exporter: - disruptive = True + - elasticity = True - networking = True - run_with_gemini = False - disrupt_network_reject_thrift: - disruptive = True + - elasticity = True - networking = True - run_with_gemini = False - disrupt_network_start_stop_interface: - disruptive = True + - elasticity = True - networking = True - run_with_gemini = False - disrupt_no_corrupt_repair: - disruptive = False - kubernetes = True - limited = True + - elasticity = True - disrupt_nodetool_cleanup: - disruptive = False - kubernetes = True @@ -206,6 +225,7 @@ - run_with_gemini = False - kubernetes = True - limited = True + - elasticity = True - disrupt_nodetool_refresh: - disruptive = False - run_with_gemini = False @@ -221,6 +241,7 @@ - topology_changes = True - disrupt_remove_service_level_while_load: - disruptive = True + - elasticity = True - sla = True - disrupt_repair_streaming_err: - disruptive = True @@ -237,10 +258,12 @@ - disrupt_resetlocalschema: - disruptive = False - config_changes = True + - elasticity = True - free_tier_set = True - disrupt_restart_then_repair_node: - disruptive = True - kubernetes = True + - elasticity = True - disrupt_restart_with_resharding: - disruptive = True - kubernetes = True @@ -250,14 +273,17 @@ - disruptive = True - full_cluster_restart = True - config_changes = True + - elasticity = True - disrupt_rolling_restart_cluster: - disruptive = True + - elasticity = True - kubernetes = True - free_tier_set = True - disrupt_rolling_restart_cluster: - disruptive = True - kubernetes = True - config_changes = True + - elasticity = True - free_tier_set = True - disrupt_run_cdcstressor_tool: - disruptive = False @@ -282,6 +308,7 @@ - limited = True - disrupt_soft_reboot_node: - disruptive = True + - elasticity = True - kubernetes = True - limited = True - free_tier_set = True @@ -295,14 +322,17 @@ - disruptive = False - disrupt_stop_start_scylla_server: - disruptive = True + - elasticity = True - kubernetes = True - limited = True - disrupt_stop_wait_start_scylla_server: - disruptive = True + - elasticity = True - kubernetes = True - limited = True - disrupt_switch_between_password_authenticator_and_saslauthd_authenticator_and_back: - disruptive = True + - elasticity = True - config_changes = True - disrupt_terminate_and_replace_node: - disruptive = True @@ -316,6 +346,7 @@ - kubernetes = True - disrupt_toggle_audit_syslog: - disruptive = True + - elasticity = True - schema_changes = True - config_changes = True - free_tier_set = True @@ -323,6 +354,7 @@ - disruptive = False - schema_changes = True - config_changes = True + - elasticity = True - free_tier_set = True - disrupt_toggle_table_gc_mode: - kubernetes = True @@ -338,11 +370,13 @@ - kubernetes = True - limited = True - free_tier_set = True + - elasticity = True - disrupt_truncate_large_partition: - disruptive = False - kubernetes = True - free_tier_set = True - disrupt_validate_hh_short_downtime: - disruptive = True + - elasticity = True - kubernetes = True - free_tier_set = True diff --git a/jenkins-pipelines/oss/features/elasticity-90-percent-with-nemesis.jenkinsfile b/jenkins-pipelines/oss/features/elasticity-90-percent-with-nemesis.jenkinsfile new file mode 100644 index 00000000000..d5f5e63cdf8 --- /dev/null +++ b/jenkins-pipelines/oss/features/elasticity-90-percent-with-nemesis.jenkinsfile @@ -0,0 +1,12 @@ +#!groovy + +// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43 +def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm) + +perfRegressionParallelPipeline( + backend: "aws", + availability_zone: 'a', + test_name: "performance_regression_test.PerformanceRegressionTest", + test_config: """["test-cases/features/elasticity-90-percent-with-nemesis.yaml"]""", + sub_tests: ["test_latency_write_with_nemesis", "test_latency_read_with_nemesis", "test_latency_mixed_with_nemesis"], +) diff --git a/sdcm/nemesis.py b/sdcm/nemesis.py index a3862136f57..40d1f465899 100644 --- a/sdcm/nemesis.py +++ b/sdcm/nemesis.py @@ -213,6 +213,7 @@ class Nemesis: # pylint: disable=too-many-instance-attributes,too-many-public-m # i.e. adding/removing nodes/data centers disruptive: bool = False # flag that signal that nemesis disrupts node/cluster, # i.e reboot,kill, hardreboot, terminate + elasticity: bool = False # supported in a 90% disk utilization scenario run_with_gemini: bool = True # flag that signal that nemesis runs with gemini tests networking: bool = False # flag that signal that nemesis interact with nemesis, # i.e switch off/on network interface, network issues @@ -504,6 +505,7 @@ def report(self): def get_list_of_methods_compatible_with_backend( self, disruptive: Optional[bool] = None, + elasticity: Optional[bool] = None, run_with_gemini: Optional[bool] = None, networking: Optional[bool] = None, limited: Optional[bool] = None, @@ -516,6 +518,7 @@ def get_list_of_methods_compatible_with_backend( ) -> List[str]: return self.get_list_of_methods_by_flags( disruptive=disruptive, + elasticity=elasticity, run_with_gemini=run_with_gemini, networking=networking, kubernetes=self._is_it_on_kubernetes() or None, @@ -535,6 +538,7 @@ def _is_it_on_kubernetes(self) -> bool: def get_list_of_methods_by_flags( # pylint: disable=too-many-locals # noqa: PLR0913 self, disruptive: Optional[bool] = None, + elasticity: Optional[bool] = None, run_with_gemini: Optional[bool] = None, networking: Optional[bool] = None, kubernetes: Optional[bool] = None, @@ -549,6 +553,7 @@ def get_list_of_methods_by_flags( # pylint: disable=too-many-locals # noqa: PL ) -> List[str]: subclasses_list = self._get_subclasses( disruptive=disruptive, + elasticity=elasticity, run_with_gemini=run_with_gemini, networking=networking, kubernetes=kubernetes, @@ -5577,6 +5582,7 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL class SslHotReloadingNemesis(Nemesis): disruptive = False config_changes = True + elasticity = True def disrupt(self): self.disrupt_hot_reloading_internode_certificate() @@ -5637,6 +5643,7 @@ def disrupt(self): class StopWaitStartMonkey(Nemesis): disruptive = True + elasticity = True kubernetes = True limited = True zero_node_changes = True @@ -5647,6 +5654,7 @@ def disrupt(self): class StopStartMonkey(Nemesis): disruptive = True + elasticity = True kubernetes = True limited = True @@ -5688,6 +5696,7 @@ def disrupt(self): class RestartThenRepairNodeMonkey(Nemesis): disruptive = True kubernetes = True + elasticity = True def disrupt(self): self.disrupt_restart_then_repair_node() @@ -5695,6 +5704,7 @@ def disrupt(self): class MultipleHardRebootNodeMonkey(Nemesis): disruptive = True + elasticity = True kubernetes = True free_tier_set = True @@ -5704,6 +5714,7 @@ def disrupt(self): class HardRebootNodeMonkey(Nemesis): disruptive = True + elasticity = True kubernetes = True limited = True free_tier_set = True @@ -5714,6 +5725,7 @@ def disrupt(self): class SoftRebootNodeMonkey(Nemesis): disruptive = True + elasticity = True kubernetes = True limited = True free_tier_set = True @@ -5735,6 +5747,7 @@ def disrupt(self): class CorruptThenRepairMonkey(Nemesis): disruptive = True kubernetes = True + elasticity = True def disrupt(self): self.disrupt_destroy_data_then_repair() @@ -5778,6 +5791,7 @@ class MajorCompactionMonkey(Nemesis): disruptive = False kubernetes = True limited = True + elasticity = True def disrupt(self): self.disrupt_major_compaction() @@ -5788,6 +5802,7 @@ class RefreshMonkey(Nemesis): run_with_gemini = False kubernetes = True limited = True + elasticity = False def disrupt(self): self.disrupt_nodetool_refresh(big_sstable=False) @@ -5798,6 +5813,7 @@ class LoadAndStreamMonkey(Nemesis): run_with_gemini = False kubernetes = True limited = True + elasticity = True def disrupt(self): self.disrupt_load_and_stream() @@ -5807,6 +5823,7 @@ class RefreshBigMonkey(Nemesis): disruptive = False run_with_gemini = False kubernetes = True + elasticity = False def disrupt(self): self.disrupt_nodetool_refresh(big_sstable=True) @@ -5824,6 +5841,7 @@ class EnospcMonkey(Nemesis): disruptive = True kubernetes = True limited = True + elasticity = True def disrupt(self): self.disrupt_nodetool_enospc() @@ -5832,6 +5850,7 @@ def disrupt(self): class EnospcAllNodesMonkey(Nemesis): disruptive = True kubernetes = True + elasticity = True def disrupt(self): self.disrupt_nodetool_enospc(all_nodes=True) @@ -5841,6 +5860,7 @@ class NodeToolCleanupMonkey(Nemesis): disruptive = False kubernetes = True limited = True + elasticity = True def disrupt(self): self.disrupt_nodetool_cleanup() @@ -5851,6 +5871,7 @@ class TruncateMonkey(Nemesis): kubernetes = True limited = True free_tier_set = True + elasticity = True def disrupt(self): self.disrupt_truncate() @@ -5860,6 +5881,7 @@ class TruncateLargeParititionMonkey(Nemesis): disruptive = False kubernetes = True free_tier_set = True + elasticity = True def disrupt(self): self.disrupt_truncate_large_partition() @@ -6171,6 +6193,7 @@ class ModifyTableMonkey(Nemesis): limited = True schema_changes = True free_tier_set = True + elasticity = True def disrupt(self): self.disrupt_modify_table() @@ -6184,6 +6207,7 @@ class AddDropColumnMonkey(Nemesis): limited = True schema_changes = True free_tier_set = True + elasticity = True def disrupt(self): self.disrupt_add_drop_column() @@ -6203,6 +6227,7 @@ class ToggleGcModeMonkey(Nemesis): disruptive = False schema_changes = True free_tier_set = True + elasticity = True def disrupt(self): self.disrupt_toggle_table_gc_mode() @@ -6241,6 +6266,7 @@ class MgmtRepair(Nemesis): disruptive = False kubernetes = True limited = True + elasticity = True def disrupt(self): self.log.info('disrupt_mgmt_repair_cli Nemesis begin') @@ -6253,6 +6279,7 @@ class MgmtCorruptThenRepair(Nemesis): manager_operation = True disruptive = True kubernetes = True + elasticity = True def disrupt(self): self.disrupt_mgmt_corrupt_then_repair() @@ -6262,6 +6289,7 @@ class AbortRepairMonkey(Nemesis): disruptive = False kubernetes = True limited = True + elasticity = True def disrupt(self): self.disrupt_abort_repair() @@ -6378,6 +6406,7 @@ def disrupt(self): class ScyllaKillMonkey(Nemesis): disruptive = True + elasticity = True kubernetes = True free_tier_set = True @@ -6398,6 +6427,7 @@ class SnapshotOperations(Nemesis): disruptive = False kubernetes = True limited = True + elasticity = True def disrupt(self): self.disrupt_snapshot_operations() @@ -6415,6 +6445,7 @@ def disrupt(self): class ClusterRollingRestart(Nemesis): disruptive = True + elasticity = True kubernetes = True free_tier_set = True @@ -6424,6 +6455,7 @@ def disrupt(self): class RollingRestartConfigChangeInternodeCompression(Nemesis): disruptive = True + elasticity = True full_cluster_restart = True config_changes = True @@ -6433,6 +6465,7 @@ def disrupt(self): class ClusterRollingRestartRandomOrder(Nemesis): disruptive = True + elasticity = True kubernetes = True free_tier_set = True @@ -6452,6 +6485,7 @@ class TopPartitions(Nemesis): disruptive = False kubernetes = True limited = True + elasticity = True def disrupt(self): self.disrupt_show_toppartitions() @@ -6718,6 +6752,7 @@ def disrupt(self): class MemoryStressMonkey(Nemesis): disruptive = True + elasticity = True free_tier_set = True def disrupt(self): @@ -6735,6 +6770,7 @@ def disrupt(self): class StartStopMajorCompaction(Nemesis): disruptive = False + elasticity = True def disrupt(self): self.disrupt_start_stop_major_compaction() @@ -6749,6 +6785,7 @@ def disrupt(self): class StartStopCleanupCompaction(Nemesis): disruptive = False + elasticity = True def disrupt(self): self.disrupt_start_stop_cleanup_compaction() @@ -6868,6 +6905,7 @@ def disrupt(self): class ToggleAuditNemesisSyslog(Nemesis): disruptive = True + elasticity = True schema_changes = True config_changes = True free_tier_set = True @@ -6880,6 +6918,7 @@ class BootstrapStreamingErrorNemesis(Nemesis): disruptive = True topology_changes = True + elasticity = True def disrupt(self): self.disrupt_bootstrap_streaming_error() @@ -6887,6 +6926,7 @@ def disrupt(self): class DisableBinaryGossipExecuteMajorCompaction(Nemesis): disruptive = True + elasticity = True kubernetes = True def disrupt(self): @@ -6930,6 +6970,7 @@ class SerialRestartOfElectedTopologyCoordinatorNemesis(Nemesis): disruptive = True topology_changes = True + elasticity = True def disrupt(self): self.disrupt_serial_restart_elected_topology_coordinator() diff --git a/test-cases/features/elasticity-90-percent-with-nemesis.yaml b/test-cases/features/elasticity-90-percent-with-nemesis.yaml new file mode 100644 index 00000000000..83bd302e7f1 --- /dev/null +++ b/test-cases/features/elasticity-90-percent-with-nemesis.yaml @@ -0,0 +1,35 @@ +test_duration: 3000 + +prepare_write_cmd: + - "cassandra-stress write no-warmup cl=ALL n=78125000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=1..78125000" + - "cassandra-stress write no-warmup cl=ALL n=78125000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=78125001..156250000" + - "cassandra-stress write no-warmup cl=ALL n=78125000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=156250001..234375000" + - "cassandra-stress write no-warmup cl=ALL n=78125000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=200 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=234375001..312500000" + +stress_cmd: + - "cassandra-stress read no-warmup cl=QUORUM duration=2400m -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate 'threads=200 fixed=3000/s' -col 'size=FIXED(128) n=FIXED(8)' -pop 'dist=gauss(1..162500000,81250000,1625000)'" + - "cassandra-stress write no-warmup cl=ONE duration=2400m -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate 'threads=1 fixed=10/s' -col 'size=FIXED(128) n=FIXED(8)' -pop 'dist=gauss(1..162500000,81250000,102500)'" + +pre_create_keyspace: "CREATE KEYSPACE IF NOT EXISTS keyspace1 WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3} AND tablets = {'initial': 64};" + +n_db_nodes: 3 +nemesis_add_node_cnt: 3 +n_loaders: 4 +n_monitor_nodes: 1 +nemesis_grow_shrink_instance_type: 'i4i.large' + +instance_type_loader: 'c6i.2xlarge' +instance_type_monitor: 't3.large' +instance_type_db: 'i4i.large' + +nemesis_class_name: 'SisyphusMonkey' +nemesis_selector: ['elasticity'] +nemesis_seed: '019' +nemesis_interval: 5 +nemesis_during_prepare: false + +user_prefix: 'elasticity-test-nemesis' +space_node_threshold: 6442450 +ami_id_db_scylla_desc: 'VERSION_DESC' +round_robin: true +append_scylla_args: '--blocked-reactor-notify-ms 5 --abort-on-lsa-bad-alloc 1 --abort-on-seastar-bad-alloc --abort-on-internal-error 1 --abort-on-ebadf 1'