From bc3552ae58212493d4af3cf3fe01e21671095b59 Mon Sep 17 00:00:00 2001 From: Lukasz Sojka Date: Wed, 20 Nov 2024 10:54:28 +0100 Subject: [PATCH] improvement(perf): add validation rules for latency decorator Added validation rules for results sent by `latency_calculator_decorator` to Argus. Each workload and result name (nemesis, predefined step) may set own rules. Current rules were created based on existing results - to pass typical good results. closes: https://github.com/scylladb/scylla-cluster-tests/issues/9237 --- ...-error-thresholds-nemesis-ent-tablets.yaml | 51 +++++++++++++ ...r-error-thresholds-nemesis-ent-vnodes.yaml | 51 +++++++++++++ ...or-error-thresholds-steps-ent-tablets.yaml | 73 +++++++++++++++++++ ...tor-error-thresholds-steps-ent-vnodes.yaml | 73 +++++++++++++++++++ defaults/test_default.yaml | 24 ++++++ docs/configuration_options.md | 7 ++ ...ncy-650gb-with-nemesis-tablets.jenkinsfile | 2 +- ...ion-latency-650gb-with-nemesis.jenkinsfile | 2 +- ...efined-throughput-steps-vnodes.jenkinsfile | 2 +- ...fined-throughput-steps-tablets.jenkinsfile | 2 +- ...efined-throughput-steps-vnodes.jenkinsfile | 2 +- ...throughput-steps-write-tablets.jenkinsfile | 2 +- ...-throughput-steps-write-vnodes.jenkinsfile | 2 +- sdcm/argus_results.py | 15 ++-- sdcm/sct_config.py | 4 + sdcm/utils/decorators.py | 4 +- unit_tests/test_argus_results.py | 14 ++-- 17 files changed, 308 insertions(+), 22 deletions(-) create mode 100644 configurations/performance/latency-decorator-error-thresholds-nemesis-ent-tablets.yaml create mode 100644 configurations/performance/latency-decorator-error-thresholds-nemesis-ent-vnodes.yaml create mode 100644 configurations/performance/latency-decorator-error-thresholds-steps-ent-tablets.yaml create mode 100644 configurations/performance/latency-decorator-error-thresholds-steps-ent-vnodes.yaml diff --git a/configurations/performance/latency-decorator-error-thresholds-nemesis-ent-tablets.yaml b/configurations/performance/latency-decorator-error-thresholds-nemesis-ent-tablets.yaml new file mode 100644 index 0000000000..d5a726de3f --- /dev/null +++ b/configurations/performance/latency-decorator-error-thresholds-nemesis-ent-tablets.yaml @@ -0,0 +1,51 @@ +latency_decorator_error_thresholds: + write: + _mgmt_repair_cli: + duration: + fixed_limit: 7200 + _terminate_and_wait: + duration: + fixed_limit: 450 + add_new_nodes: + duration: + fixed_limit: 2500 + decommission_nodes: + duration: + fixed_limit: 1800 + replace_node: + duration: + fixed_limit: 3600 + + read: + _mgmt_repair_cli: + duration: + fixed_limit: 3200 + _terminate_and_wait: + duration: + fixed_limit: 450 + add_new_nodes: + duration: + fixed_limit: 3200 + decommission_nodes: + duration: + fixed_limit: 1800 + replace_node: + duration: + fixed_limit: 3000 + + mixed: + _mgmt_repair_cli: + duration: + fixed_limit: 4200 + _terminate_and_wait: + duration: + fixed_limit: 450 + add_new_nodes: + duration: + fixed_limit: 2500 + decommission_nodes: + duration: + fixed_limit: 1600 + replace_node: + duration: + fixed_limit: 3000 diff --git a/configurations/performance/latency-decorator-error-thresholds-nemesis-ent-vnodes.yaml b/configurations/performance/latency-decorator-error-thresholds-nemesis-ent-vnodes.yaml new file mode 100644 index 0000000000..691c44217b --- /dev/null +++ b/configurations/performance/latency-decorator-error-thresholds-nemesis-ent-vnodes.yaml @@ -0,0 +1,51 @@ +latency_decorator_error_thresholds: + write: + _mgmt_repair_cli: + duration: + fixed_limit: 7200 + _terminate_and_wait: + duration: + fixed_limit: 450 + add_new_nodes: + duration: + fixed_limit: 4200 + decommission_nodes: + duration: + fixed_limit: 5200 + replace_node: + duration: + fixed_limit: 1800 + + read: + _mgmt_repair_cli: + duration: + fixed_limit: 2000 + _terminate_and_wait: + duration: + fixed_limit: 450 + add_new_nodes: + duration: + fixed_limit: 1800 + decommission_nodes: + duration: + fixed_limit: 2500 + replace_node: + duration: + fixed_limit: 1300 + + mixed: + _mgmt_repair_cli: + duration: + fixed_limit: 2500 + _terminate_and_wait: + duration: + fixed_limit: 450 + add_new_nodes: + duration: + fixed_limit: 2400 + decommission_nodes: + duration: + fixed_limit: 2800 + replace_node: + duration: + fixed_limit: 1500 diff --git a/configurations/performance/latency-decorator-error-thresholds-steps-ent-tablets.yaml b/configurations/performance/latency-decorator-error-thresholds-steps-ent-tablets.yaml new file mode 100644 index 0000000000..85731d8090 --- /dev/null +++ b/configurations/performance/latency-decorator-error-thresholds-steps-ent-tablets.yaml @@ -0,0 +1,73 @@ +latency_decorator_error_thresholds: + write: + unthrottled: + P90 write: + fixed_limit: null + P99 write: + fixed_limit: null + Throughput write: + best_pct: 5 + + read: + "150000": + P90 read: + fixed_limit: 1 + P99 read: + fixed_limit: 1 + "300000": + P90 read: + fixed_limit: 1 + P99 read: + fixed_limit: 1 + "450000": + P90 read: + fixed_limit: 1 + P99 read: + fixed_limit: 3 + unthrottled: + P90 read: + fixed_limit: null + P99 read: + fixed_limit: null + Throughput read: + best_pct: 5 + + mixed: + "50000": + P90 write: + fixed_limit: 1 + P90 read: + fixed_limit: 1 + P99 write: + fixed_limit: 3 + P99 read: + fixed_limit: 3 + "150000": + P90 write: + fixed_limit: 1 + P90 read: + fixed_limit: 2 + P99 write: + fixed_limit: 3 + P99 read: + fixed_limit: 3 + "300000": + P90 write: + fixed_limit: 3 + P90 read: + fixed_limit: 3 + P99 write: + fixed_limit: 5 + P99 read: + fixed_limit: 5 + unthrottled: + P90 write: + fixed_limit: null + P90 read: + fixed_limit: null + P99 write: + fixed_limit: null + P99 read: + fixed_limit: null + Throughput write: + best_pct: 5 diff --git a/configurations/performance/latency-decorator-error-thresholds-steps-ent-vnodes.yaml b/configurations/performance/latency-decorator-error-thresholds-steps-ent-vnodes.yaml new file mode 100644 index 0000000000..b2a440f2e3 --- /dev/null +++ b/configurations/performance/latency-decorator-error-thresholds-steps-ent-vnodes.yaml @@ -0,0 +1,73 @@ +latency_decorator_error_thresholds: + write: + unthrottled: + P90 write: + fixed_limit: null + P99 write: + fixed_limit: null + Throughput write: + best_pct: 5 + + read: + "150000": + P90 read: + fixed_limit: 1 + P99 read: + fixed_limit: 1 + "300000": + P90 read: + fixed_limit: 1 + P99 read: + fixed_limit: 1 + "450000": + P90 read: + fixed_limit: 1 + P99 read: + fixed_limit: 5 + unthrottled: + P90 read: + fixed_limit: null + P99 read: + fixed_limit: null + Throughput read: + best_pct: 5 + + mixed: + "50000": + P90 write: + fixed_limit: 1 + P90 read: + fixed_limit: 1 + P99 write: + fixed_limit: 3 + P99 read: + fixed_limit: 3 + "150000": + P90 write: + fixed_limit: 1 + P90 read: + fixed_limit: 2 + P99 write: + fixed_limit: 3 + P99 read: + fixed_limit: 3 + "300000": + P90 write: + fixed_limit: 3 + P90 read: + fixed_limit: 3 + P99 write: + fixed_limit: 5 + P99 read: + fixed_limit: 5 + unthrottled: + P90 write: + fixed_limit: null + P90 read: + fixed_limit: null + P99 write: + fixed_limit: null + P99 read: + fixed_limit: null + Throughput write: + best_pct: 5 diff --git a/defaults/test_default.yaml b/defaults/test_default.yaml index 80fd85736f..1837a8ba46 100644 --- a/defaults/test_default.yaml +++ b/defaults/test_default.yaml @@ -268,3 +268,27 @@ zero_token_instance_type_db: 'i4i.large' use_zero_nodes: false latte_schema_parameters: {} + +latency_decorator_error_thresholds: + write: + default: + P90 write: + fixed_limit: 5 + P99 write: + fixed_limit: 10 + read: + default: + P90 read: + fixed_limit: 5 + P99 read: + fixed_limit: 10 + mixed: + default: + P90 write: + fixed_limit: 5 + P90 read: + fixed_limit: 5 + P99 write: + fixed_limit: 10 + P99 read: + fixed_limit: 10 diff --git a/docs/configuration_options.md b/docs/configuration_options.md index f42e51ed68..c372308353 100644 --- a/docs/configuration_options.md +++ b/docs/configuration_options.md @@ -2735,3 +2735,10 @@ Instance type for zero token node AWS account id on behalf of which the test is run **default:** N/A + + +## **latency_decorator_error_thresholds** / SCT_LATENCY_DECORATOR_ERROR_THRESHOLDS + +Error thresholds for latency decorator. Defined by dict: {: {:{: {: }}} + +**default:** {'write': {'default': {'P90 write': {'fixed_limit': 5}, 'P99 write': {'fixed_limit': 10}}}, 'read': {'default': {'P90 read': {'fixed_limit': 5}, 'P99 read': {'fixed_limit': 10}}}, 'mixed': {'default': {'P90 write': {'fixed_limit': 5}, 'P90 read': {'fixed_limit': 5}, 'P99 write': {'fixed_limit': 10}, 'P99 read': {'fixed_limit': 10}}}} diff --git a/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-latency-650gb-with-nemesis-tablets.jenkinsfile b/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-latency-650gb-with-nemesis-tablets.jenkinsfile index 2a1807b27c..3967f50a13 100644 --- a/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-latency-650gb-with-nemesis-tablets.jenkinsfile +++ b/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-latency-650gb-with-nemesis-tablets.jenkinsfile @@ -6,7 +6,7 @@ def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm) perfRegressionParallelPipeline( backend: "aws", test_name: "performance_regression_test.PerformanceRegressionTest", - test_config: """["test-cases/performance/perf-regression-latency-650gb-with-nemesis.yaml", "configurations/disable_kms.yaml"]""", + test_config: """["test-cases/performance/perf-regression-latency-650gb-with-nemesis.yaml", "configurations/disable_kms.yaml", "configurations/performance/latency-decorator-error-thresholds-nemesis-ent-tablets.yaml"]""", sub_tests: ["test_latency_write_with_nemesis", "test_latency_read_with_nemesis", "test_latency_mixed_with_nemesis"], test_email_title: "latency during operations / tablets", perf_extra_jobs_to_compare: "scylla-master/perf-regression/scylla-master-perf-regression-latency-650gb-with-nemesis-tablets", diff --git a/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-latency-650gb-with-nemesis.jenkinsfile b/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-latency-650gb-with-nemesis.jenkinsfile index 21cacdb0ba..9c87619e86 100644 --- a/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-latency-650gb-with-nemesis.jenkinsfile +++ b/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-latency-650gb-with-nemesis.jenkinsfile @@ -6,7 +6,7 @@ def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm) perfRegressionParallelPipeline( backend: "aws", test_name: "performance_regression_test.PerformanceRegressionTest", - test_config: """["test-cases/performance/perf-regression-latency-650gb-with-nemesis.yaml", "configurations/tablets_disabled.yaml", "configurations/disable_kms.yaml"]""", + test_config: """["test-cases/performance/perf-regression-latency-650gb-with-nemesis.yaml", "configurations/tablets_disabled.yaml", "configurations/disable_kms.yaml", "configurations/performance/latency-decorator-error-thresholds-nemesis-ent-vnodes.yaml"]""", sub_tests: ["test_latency_write_with_nemesis", "test_latency_read_with_nemesis", "test_latency_mixed_with_nemesis"], perf_extra_jobs_to_compare: """["scylla-enterprise/scylla-enterprise-perf-regression-latency-650gb-with-nemesis","scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-latency-650gb-with-nemesis"]""", ) diff --git a/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-vnodes.jenkinsfile b/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-vnodes.jenkinsfile index 40b3952ce3..88a2ad2171 100644 --- a/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-vnodes.jenkinsfile +++ b/jenkins-pipelines/performance/branch-perf-v15/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-vnodes.jenkinsfile @@ -7,6 +7,6 @@ perfRegressionParallelPipeline( backend: "aws", aws_region: "us-east-1", test_name: "performance_regression_gradual_grow_throughput.PerformanceRegressionPredefinedStepsTest", - test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/tablets_disabled.yaml", "configurations/disable_speculative_retry.yaml"]''', + test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/tablets_disabled.yaml", "configurations/disable_speculative_retry.yaml", "configurations/performance/latency-decorator-error-thresholds-steps-ent-vnodes.yaml"]''', sub_tests: ["test_write_gradual_increase_load", "test_read_gradual_increase_load", "test_mixed_gradual_increase_load"], ) diff --git a/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-tablets.jenkinsfile b/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-tablets.jenkinsfile index df9ea40138..9c8771530b 100644 --- a/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-tablets.jenkinsfile +++ b/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-tablets.jenkinsfile @@ -7,6 +7,6 @@ perfRegressionParallelPipeline( backend: "aws", aws_region: "us-east-1", test_name: "performance_regression_gradual_grow_throughput.PerformanceRegressionPredefinedStepsTest", - test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/disable_speculative_retry.yaml"]''', + test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/disable_speculative_retry.yaml", "configurations/performance/latency-decorator-error-thresholds-steps-ent-tablets.yaml"]''', sub_tests: ["test_read_gradual_increase_load", "test_mixed_gradual_increase_load"], ) diff --git a/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-vnodes.jenkinsfile b/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-vnodes.jenkinsfile index 939768fba9..19d99da9e9 100644 --- a/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-vnodes.jenkinsfile +++ b/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-vnodes.jenkinsfile @@ -7,6 +7,6 @@ perfRegressionParallelPipeline( backend: "aws", aws_region: "us-east-1", test_name: "performance_regression_gradual_grow_throughput.PerformanceRegressionPredefinedStepsTest", - test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/tablets_disabled.yaml", "configurations/disable_speculative_retry.yaml"]''', + test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/tablets_disabled.yaml", "configurations/disable_speculative_retry.yaml", "configurations/performance/latency-decorator-error-thresholds-steps-ent-vnodes.yaml"]''', sub_tests: ["test_read_gradual_increase_load", "test_mixed_gradual_increase_load"], ) diff --git a/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-write-tablets.jenkinsfile b/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-write-tablets.jenkinsfile index ab623dcded..d313c69019 100644 --- a/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-write-tablets.jenkinsfile +++ b/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-write-tablets.jenkinsfile @@ -7,6 +7,6 @@ perfRegressionParallelPipeline( backend: "aws", aws_region: "us-east-1", test_name: "performance_regression_gradual_grow_throughput.PerformanceRegressionPredefinedStepsTest", - test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/disable_speculative_retry.yaml","configurations/perf-loaders-shard-aware-config.yaml"]''', + test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/disable_speculative_retry.yaml","configurations/perf-loaders-shard-aware-config.yaml", "configurations/performance/latency-decorator-error-thresholds-steps-ent-tablets.yaml"]''', sub_tests: ["test_write_gradual_increase_load"], ) diff --git a/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-write-vnodes.jenkinsfile b/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-write-vnodes.jenkinsfile index c55db32132..7d118b6772 100644 --- a/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-write-vnodes.jenkinsfile +++ b/jenkins-pipelines/performance/branch-perf-v16/scylla-enterprise/perf-regression/scylla-enterprise-perf-regression-predefined-throughput-steps-write-vnodes.jenkinsfile @@ -7,6 +7,6 @@ perfRegressionParallelPipeline( backend: "aws", aws_region: "us-east-1", test_name: "performance_regression_gradual_grow_throughput.PerformanceRegressionPredefinedStepsTest", - test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/tablets_disabled.yaml", "configurations/disable_speculative_retry.yaml","configurations/perf-loaders-shard-aware-config.yaml"]''', + test_config: '''["test-cases/performance/perf-regression-predefined-throughput-steps.yaml", "configurations/performance/cassandra_stress_gradual_load_steps_enterprise.yaml", "configurations/disable_kms.yaml", "configurations/tablets_disabled.yaml", "configurations/disable_speculative_retry.yaml","configurations/perf-loaders-shard-aware-config.yaml, "configurations/performance/latency-decorator-error-thresholds-steps-ent-vnodes.yaml"]''', sub_tests: ["test_write_gradual_increase_load"], ) diff --git a/sdcm/argus_results.py b/sdcm/argus_results.py index b265c5b686..d3e0ee3b9f 100644 --- a/sdcm/argus_results.py +++ b/sdcm/argus_results.py @@ -154,11 +154,13 @@ def submit_results_to_argus(argus_client: ArgusClient, result_table: GenericResu def send_result_to_argus(argus_client: ArgusClient, workload: str, name: str, description: str, cycle: int, result: dict, - start_time: float = 0): + start_time: float = 0, error_thresholds: dict = None): result_table = workload_to_table[workload]() result_table.name = f"{workload} - {name} - latencies" result_table.description = f"{workload} workload - {description}" - operation_error_thresholds = LATENCY_ERROR_THRESHOLDS.get(name, LATENCY_ERROR_THRESHOLDS["default"]) + if error_thresholds: + error_thresholds = error_thresholds[workload]["default"] | error_thresholds[workload].get(name, {}) + result_table.validation_rules = {metric: ValidationRule(**rules) for metric, rules in error_thresholds.items()} try: start_time = datetime.fromtimestamp(start_time or time.time(), tz=timezone.utc).strftime('%H:%M:%S') except ValueError: @@ -172,16 +174,15 @@ def send_result_to_argus(argus_client: ArgusClient, workload: str, name: str, de result_table.add_result(column=f"P{percentile} {operation}", row=f"Cycle #{cycle}", value=value, - status=Status.PASS if value < operation_error_thresholds[f"percentile_{percentile}"] else Status.ERROR) + status=Status.UNSET) if value := summary[operation.upper()].get("throughput", None): - # TODO: This column will be validated in the gradual test. `PASS` is temporary status. Should be handled later result_table.add_result(column=f"Throughput {operation.lower()}", row=f"Cycle #{cycle}", value=value, status=Status.UNSET) result_table.add_result(column="duration", row=f"Cycle #{cycle}", - value=result["duration_in_sec"], status=Status.PASS) + value=result["duration_in_sec"], status=Status.UNSET) try: overview_screenshot = [screenshot for screenshot in result["screenshots"] if "overview" in screenshot][0] result_table.add_result(column="Overview", row=f"Cycle #{cycle}", @@ -205,10 +206,10 @@ def send_result_to_argus(argus_client: ArgusClient, workload: str, name: str, de result_table.name = f"{workload} - {name} - stalls - {event_name}" result_table.description = f"{event_name} event counts" result_table.add_result(column="total", row=f"Cycle #{cycle}", - value=stall_stats["counter"], status=Status.PASS) + value=stall_stats["counter"], status=Status.UNSET) for interval, value in stall_stats["ms"].items(): result_table.add_result(column=f"{interval}ms", row=f"Cycle #{cycle}", - value=value, status=Status.PASS) + value=value, status=Status.UNSET) submit_results_to_argus(argus_client, result_table) diff --git a/sdcm/sct_config.py b/sdcm/sct_config.py index 706aaccbf1..93bf2b79ae 100644 --- a/sdcm/sct_config.py +++ b/sdcm/sct_config.py @@ -1688,6 +1688,10 @@ class SCTConfiguration(dict): dict(name="sct_aws_account_id", env="SCT_AWS_ACCOUNT_ID", type=str, help="AWS account id on behalf of which the test is run"), + dict(name="latency_decorator_error_thresholds", env="SCT_LATENCY_DECORATOR_ERROR_THRESHOLDS", type=dict_or_str, + help="Error thresholds for latency decorator." + " Defined by dict: {: {:{: {: }}}"), + ] required_params = ['cluster_backend', 'test_duration', 'n_db_nodes', 'n_loaders', 'use_preinstalled_scylla', diff --git a/sdcm/utils/decorators.py b/sdcm/utils/decorators.py index 0298da30b9..704513f4c2 100644 --- a/sdcm/utils/decorators.py +++ b/sdcm/utils/decorators.py @@ -254,7 +254,7 @@ def wrapped(*args, **kwargs): # noqa: PLR0914 hdr_throughput += values["throughput"] result["cycle_hdr_throughput"] = round(hdr_throughput) result["reactor_stalls_stats"] = reactor_stall_stats - + error_thresholds = tester.params.get("latency_decorator_error_thresholds") if "steady" in func_name.lower(): if 'Steady State' not in latency_results: latency_results['Steady State'] = result @@ -266,6 +266,7 @@ def wrapped(*args, **kwargs): # noqa: PLR0914 cycle=0, result=result, start_time=start, + error_thresholds=error_thresholds, ) else: latency_results[func_name]['cycles'].append(result) @@ -277,6 +278,7 @@ def wrapped(*args, **kwargs): # noqa: PLR0914 cycle=len(latency_results[func_name]['cycles']), result=result, start_time=start, + error_thresholds=error_thresholds, ) with open(latency_results_file_path, 'w', encoding="utf-8") as file: diff --git a/unit_tests/test_argus_results.py b/unit_tests/test_argus_results.py index 4a16306f41..c0464e239d 100644 --- a/unit_tests/test_argus_results.py +++ b/unit_tests/test_argus_results.py @@ -38,11 +38,11 @@ def test_send_latency_decorator_result_to_argus(): sut_timestamp=0, sut_details='', results=[ - Cell(column='P90 write', row='Cycle #1', value=2.15, status=Status.PASS), - Cell(column='P99 write', row='Cycle #1', value=3.62, status=Status.PASS), - Cell(column='P90 read', row='Cycle #1', value=2.86, status=Status.PASS), - Cell(column='P99 read', row='Cycle #1', value=5.36, status=Status.PASS), - Cell(column='duration', row='Cycle #1', value=2654, status=Status.PASS), + Cell(column='P90 write', row='Cycle #1', value=2.15, status=Status.UNSET), + Cell(column='P99 write', row='Cycle #1', value=3.62, status=Status.UNSET), + Cell(column='P90 read', row='Cycle #1', value=2.86, status=Status.UNSET), + Cell(column='P99 read', row='Cycle #1', value=5.36, status=Status.UNSET), + Cell(column='duration', row='Cycle #1', value=2654, status=Status.UNSET), Cell(column='Overview', row='Cycle #1', value='https://cloudius-jenkins-test.s3.amazonaws.com/a9b9a308-6ff8-4cc8-b33d-c439f75c9949/20240721_125838/' 'grafana-screenshot-overview-20240721_125838-perf-latency-grow-shrink-ubuntu-monitor-node-a9b9a308-1.png', @@ -59,8 +59,8 @@ def test_send_latency_decorator_result_to_argus(): sut_timestamp=0, sut_details='', results=[ - Cell(column='total', row='Cycle #1', value=18, status=Status.PASS), - Cell(column='10ms', row='Cycle #1', value=18, status=Status.PASS) + Cell(column='total', row='Cycle #1', value=18, status=Status.UNSET), + Cell(column='10ms', row='Cycle #1', value=18, status=Status.UNSET) ] )) ]