Skip to content

Commit

Permalink
Merge pull request #24710 from redpanda-data/stephan/mpt-new-limits
Browse files Browse the repository at this point in the history
ducktape: Increase partition density to 3k per shard in the MPT
  • Loading branch information
StephanDollberg authored Jan 17, 2025
2 parents fef74e4 + 49e4fb0 commit 564fbb5
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 12 deletions.
16 changes: 12 additions & 4 deletions tests/rptest/scale_tests/many_partitions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,16 @@
BIG_FETCH = 104857600

# How much memory to assign to redpanda per partition. Redpanda will be started
# with MIB_PER_PARTITION * PARTITIONS_PER_SHARD * CORE_COUNT memory
DEFAULT_MIB_PER_PARTITION = 4
# with (MIB_PER_PARTITION * PARTITIONS_PER_SHARD * CORE_COUNT) / (PARTITIONS_MEMORY_ALLOCATION_PERCENT / 100) memory
DEFAULT_MIB_PER_PARTITION = 0.2

# How many partitions we will create per shard: this is the primary scaling
# factor that controls how many partitions a given cluster will get.
DEFAULT_PARTITIONS_PER_SHARD = 1000
DEFAULT_PARTITIONS_PER_SHARD = 3000

# How much memory is reserved for partitions
# aka: topic_partitions_memory_allocation_percent config
DEFAULT_PARTITIONS_MEMORY_ALLOCATION_PERCENT = 15

# Large volume of data to write. If tiered storage is enabled this is the
# amount of data to retain total. Otherwise, this can be used as a large volume
Expand Down Expand Up @@ -812,7 +816,9 @@ def _test_many_partitions(self,
replication_factor,
mib_per_partition,
topic_partitions_per_shard,
tiered_storage_enabled=tiered_storage_enabled)
tiered_storage_enabled=tiered_storage_enabled,
partition_memory_reserve_percentage=
DEFAULT_PARTITIONS_MEMORY_ALLOCATION_PERCENT)

# Run with one huge topic: it is more stressful for redpanda when clients
# request the metadata for many partitions at once, and the simplest way
Expand Down Expand Up @@ -849,6 +855,8 @@ def _test_many_partitions(self,
topic_partitions_per_shard,
'topic_memory_per_partition':
mib_per_partition * 1024 * 1024,
'topic_partitions_memory_allocation_percent':
DEFAULT_PARTITIONS_MEMORY_ALLOCATION_PERCENT,
})

self.redpanda.start()
Expand Down
2 changes: 1 addition & 1 deletion tests/rptest/services/kgo_repeater_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def group_ready():

def _get_status_reports(self):
for node in self.nodes:
r = requests.get(self._remote_url(node, "status"), timeout=10)
r = requests.get(self._remote_url(node, "status"), timeout=30)
r.raise_for_status()
node_status = r.json()
for worker_status in node_status:
Expand Down
16 changes: 9 additions & 7 deletions tests/rptest/utils/scale_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ def __init__(self,
replication_factor,
mib_per_partition,
topic_partitions_per_shard,
tiered_storage_enabled=False):
tiered_storage_enabled=False,
partition_memory_reserve_percentage=10):
self.redpanda = redpanda
self.tiered_storage_enabled = tiered_storage_enabled
self.partition_memory_reserve_percentage = partition_memory_reserve_percentage

node_count = len(self.redpanda.nodes)

Expand Down Expand Up @@ -171,23 +173,23 @@ def __init__(self,
# Not all internal partitions have rf=replication_factor so this
# over-allocates but making it more accurate would be complicated.
per_node_slack = internal_partition_slack * replication_factor / node_count
partition_mem_total_per_node = mib_per_partition * (
partition_replicas_per_node + per_node_slack)
memory_setting = mib_per_partition * (
partition_replicas_per_node +
per_node_slack) / (self.partition_memory_reserve_percentage / 100.)

resource_settings_args = {}
if not self.redpanda.dedicated_nodes:
# In docker, assume we're on a laptop drive and not doing
# real testing, so disable fsync to make test run faster.
resource_settings_args['bypass_fsync'] = True

partition_mem_total_per_node = max(partition_mem_total_per_node,
500)
memory_setting = max(memory_setting, 500)
else:
# On dedicated nodes we will use an explicit reactor stall threshold
# as a success condition.
resource_settings_args['reactor_stall_threshold'] = 100

resource_settings_args['memory_mb'] = int(partition_mem_total_per_node)
resource_settings_args['memory_mb'] = int(memory_setting)

self.redpanda.set_resource_settings(
ResourceSettings(**resource_settings_args))
Expand All @@ -198,7 +200,7 @@ def __init__(self,

# Should not happen on the expected EC2 instance types where
# the cores-RAM ratio is sufficient to meet our shards-per-core
if effective_node_memory < partition_mem_total_per_node:
if effective_node_memory < memory_setting:
raise RuntimeError(
f"Node memory is too small ({node_memory}MB - {reserved_memory}MB)"
)
Expand Down

0 comments on commit 564fbb5

Please sign in to comment.