Skip to content

Commit

Permalink
fix(nemesis): filter raft-topology errors when starting/stopping nodes
Browse files Browse the repository at this point in the history
raft is generating the following errors:
raft_topology - topology change coordinator fiber got error std::runtime_error
when one of the nodes is stopped, it was decided we can safely ignore those errors
fixes:9031

(cherry picked from commit 2093714)
  • Loading branch information
timtimb0t authored and fruch committed Dec 23, 2024
1 parent a31a657 commit b60c19e
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ def _kill_scylla_daemon(self):
self.target_node.wait_jmx_up()
self.cluster.wait_for_schema_agreement()

@decorate_with_context(ignore_raft_topology_cmd_failing)
def disrupt_stop_wait_start_scylla_server(self, sleep_time=300): # pylint: disable=invalid-name
self.target_node.stop_scylla_server(verify_up=False, verify_down=True)
self.log.info("Sleep for %s seconds", sleep_time)
Expand Down Expand Up @@ -1097,7 +1098,7 @@ def get_all_sstables(self, tables: list[str], node: BaseNode = None):

return sstables

@decorate_with_context(ignore_ycsb_connection_refused)
@decorate_with_context([ignore_ycsb_connection_refused, ignore_raft_topology_cmd_failing])
def _destroy_data_and_restart_scylla(self, keyspaces_for_destroy: list = None, sstables_to_destroy_perc: int = 50): # pylint: disable=too-many-statements
tables = self.cluster.get_non_system_ks_cf_list(db_node=self.target_node, filter_empty_tables=False,
filter_by_keyspace=keyspaces_for_destroy)
Expand Down Expand Up @@ -1304,7 +1305,7 @@ def _add_and_init_new_cluster_nodes(self, count, timeout=MAX_TIME_WAIT_FOR_NEW_N
InfoEvent(message="FinishEvent - New Nodes are up and normal").publish()
return new_nodes

@decorate_with_context(ignore_ycsb_connection_refused)
@decorate_with_context([ignore_ycsb_connection_refused, ignore_raft_topology_cmd_failing])
def _terminate_cluster_node(self, node):
self.cluster.terminate_node(node)
self.monitoring_set.reconfigure_scylla_monitoring()
Expand Down Expand Up @@ -1623,7 +1624,7 @@ def wait_for_old_node_to_removed():
new_node.set_seed_flag(True)
self.cluster.update_seed_provider()

@decorate_with_context(ignore_ycsb_connection_refused)
@decorate_with_context([ignore_ycsb_connection_refused, ignore_raft_topology_cmd_failing])
def disrupt_kill_scylla(self):
self._kill_scylla_daemon()

Expand Down

0 comments on commit b60c19e

Please sign in to comment.