rptest: Handle tier name before cluster creation

savex · savex · commit c992293e21e4 · 2023-09-20T11:37:34.000-05:00
diff --git a/tests/rptest/redpanda_cloud_tests/high_throughput_test.py b/tests/rptest/redpanda_cloud_tests/high_throughput_test.py
@@ -14,7 +14,7 @@
 import time
 import json
 
-from ducktape.mark import ignore, ok_to_fail
+from ducktape.mark import ignore, ok_to_fail, parametrize
 from ducktape.tests.test import TestContext
 from ducktape.utils.util import wait_until
 from rptest.clients.rpk import RpkTool
@@ -30,7 +30,7 @@
 from rptest.services.openmessaging_benchmark_configs import \
     OMBSampleConfigurations
 from rptest.services.producer_swarm import ProducerSwarm
-from rptest.services.redpanda_cloud import AdvertisedTierConfigs, CloudTierName
+from rptest.services.redpanda_cloud import AdvertisedTierConfigs, CloudTierName, get_tier_name
 from rptest.services.redpanda import (RESTART_LOG_ALLOW_LIST, MetricsEndpoint,
                                       SISettings, RedpandaServiceCloud)
 from rptest.services.rpk_consumer import RpkConsumer
@@ -163,13 +163,13 @@ def traffic_generator(context, redpanda, tier_cfg, *args, **kwargs):
         ) >= tier_cfg.egress_rate, f"Observed consumer throughput {consumer_throughput} too low, expected: {tier_cfg.egress_rate}"
 
 
-def get_globals_value(globals, key_name, default=None):
+def get_cloud_globals(globals):
     _config = {}
     if RedpandaServiceCloud.GLOBAL_CLOUD_CLUSTER_CONFIG in globals:
         # Load needed config values from cloud section
         # of globals prior to actual cluster creation
         _config = globals[RedpandaServiceCloud.GLOBAL_CLOUD_CLUSTER_CONFIG]
-    return _config.get(key_name, default)
+    return _config
 
 
 class HighThroughputTest(RedpandaTest):
@@ -179,11 +179,8 @@ class HighThroughputTest(RedpandaTest):
 
     def __init__(self, test_ctx: TestContext, *args, **kwargs):
         self._ctx = test_ctx
-        # Get tier value
-        cloud_tier_str = get_globals_value(self._ctx.globals,
-                                           "config_profile_name",
-                                           default="tier-1-aws")
-        cloud_tier = CloudTierName(cloud_tier_str)
+        # Get tier name
+        cloud_tier = get_tier_name(get_cloud_globals(self._ctx.globals))
         extra_rp_conf = None
         num_brokers = None
 
@@ -1152,3 +1149,93 @@ def producer_complete():
 
         consumer.stop()
         consumer.free()
+
+    def _prepare_omb_workload(self, ramp_time, duration, partitions, rate,
+                              msg_size):
+        return {
+            "name": "HT004-MINPARTOMB",
+            "topics": 1,
+            "partitions_per_topic": partitions,
+            "subscriptions_per_topic": 1,
+            "consumer_per_subscription": 3,
+            "producers_per_topic": 1,
+            "producer_rate": rate,
+            "message_size": msg_size,
+            "consumer_backlog_size_GB": 0,
+            "test_duration_minutes": duration,
+            "warmup_duration_minutes": ramp_time,
+            "use_randomized_payloads": True,
+            "random_bytes_ratio": 0.5,
+            "randomized_payload_pool_size": 100,
+        }
+
+    def _run_bench(self, workload, validator_overrides):
+        _bench = OpenMessagingBenchmark(
+            self._ctx, self.redpanda, "SIMPLE_DRIVER",
+            (workload, OMBSampleConfigurations.UNIT_TEST_LATENCY_VALIDATOR
+             | validator_overrides))
+        _bench.start()
+        benchmark_time_min = _bench.benchmark_time() + 1
+        _bench.wait(timeout_sec=benchmark_time_min * 60)
+        _metrics = json.loads(_bench.node.account.ssh_output(_bench.chart_cmd))
+        return _bench, list(_metrics.values())[0]
+
+    @cluster(num_nodes=6, log_allow_list=RESTART_LOG_ALLOW_LIST)
+    @parametrize(partitions="min")
+    @parametrize(partitions="max")
+    def test_htt_partitions_omb(self, partitions):
+        def _format_metrics(idle, tier):
+            keys = idle.keys()
+            return "\n".join([f"{k} = {idle[k]} / {tier[k]} " for k in keys])
+
+        # Get values for almost idle cluster load
+        _min_idle_lat = 1000
+        # Assume we have 1 partition per shard,
+        # then number of CPU should be equal to min number of partitions
+        # to get idle-like activity
+        _num_partitions = 8
+
+        if partitions not in ["min", "max"]:
+            raise RuntimeError("Test parameter for partitions invalid")
+
+        idle_validators = {
+            OMBSampleConfigurations.E2E_LATENCY_50PCT:
+            [OMBSampleConfigurations.lte(_min_idle_lat)],
+            OMBSampleConfigurations.E2E_LATENCY_AVG:
+            [OMBSampleConfigurations.lte(_min_idle_lat * 3)],
+        }
+        idle_workload = self._prepare_omb_workload(1, 2, _num_partitions,
+                                                   1 * MiB, 8 * KiB)
+        _, idle_metrics = self._run_bench(idle_workload, idle_validators)
+
+        # Get values for idle workload
+        k_e2e_50pct = idle_metrics[OMBSampleConfigurations.E2E_LATENCY_50PCT]
+        k_e2e_avg = idle_metrics[OMBSampleConfigurations.E2E_LATENCY_AVG]
+
+        # Calculate target throughput latencies
+        target_e2e_50pct = k_e2e_50pct + 51
+        target_e2e_avg = k_e2e_avg + 145
+
+        # Measure with target load
+        validator_overrides = {
+            OMBSampleConfigurations.E2E_LATENCY_50PCT:
+            [OMBSampleConfigurations.lte(target_e2e_50pct)],
+            OMBSampleConfigurations.E2E_LATENCY_AVG:
+            [OMBSampleConfigurations.lte(target_e2e_avg)],
+        }
+        # Select number of partitions
+        if partitions == "min":
+            _num_partitions = self.tier_config.partitions_min
+        elif partitions == "max":
+            _num_partitions = self.tier_config.partitions_upper_limit
+
+        workload = self._prepare_omb_workload(1, 2, _num_partitions,
+                                              self.tier_config.ingress_rate,
+                                              8 * KiB)
+        benchmark, metrics = self._run_bench(workload, validator_overrides)
+        benchmark.check_succeed()
+
+        # Tier metrics should not diviate from idle
+        # metrics more than 145 ms on the average
+        self.logger.info('Workload metrics (idle/tier): '
+                         '"{}"'.format(_format_metrics(idle_metrics, metrics)))
diff --git a/tests/rptest/services/redpanda_cloud.py b/tests/rptest/services/redpanda_cloud.py
@@ -37,6 +37,20 @@ def load_tier_profiles():
 TIER_DEFAULTS = {PROVIDER_AWS: "tier-1-aws", PROVIDER_GCP: "tier-1-gcp"}
 
 
+def get_tier_name(config):
+    """
+    Gets tier name befor cluster creation
+    """
+    if not config:
+        return CloudTierName("docker_local")
+    else:
+        _provider = config['provider'].upper()
+        if config['config_profile_name'] == "default":
+            return CloudTierName(TIER_DEFAULTS[_provider])
+        else:
+            return CloudTierName(_provider)
+
+
 class CloudTierName(Enum):
     DOCKER = 'docker-local'
     AWS_1 = 'tier-1-aws'