Skip to content

Commit 3db527f

Browse files
nipunn1313Convex, Inc.
authored andcommitted
Split the reader/writer in system table cleanup. Parallelize writes (#36014)
This should help increase throughput of the system table cleanup worker. The unit test helps confirm it's working. GitOrigin-RevId: 4af2239a1a8914d728a03f97ee47eccdebbaeeec
1 parent 2c0c0a2 commit 3db527f

File tree

2 files changed

+108
-29
lines changed

2 files changed

+108
-29
lines changed

crates/application/src/system_table_cleanup/mod.rs

Lines changed: 104 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,20 @@ use common::{
1515
ParseDocument,
1616
ParsedDocument,
1717
CREATION_TIME_FIELD_PATH,
18+
ID_FIELD_PATH,
1819
},
1920
errors::report_error,
2021
knobs::{
2122
MAX_EXPIRED_SNAPSHOT_AGE,
2223
MAX_IMPORT_AGE,
2324
MAX_SESSION_CLEANUP_DURATION,
25+
SESSION_CLEANUP_DELETE_CONCURRENCY,
2426
SYSTEM_TABLE_CLEANUP_CHUNK_SIZE,
2527
SYSTEM_TABLE_CLEANUP_FREQUENCY,
2628
SYSTEM_TABLE_ROWS_PER_SECOND,
2729
},
2830
query::{
31+
Expression,
2932
IndexRange,
3033
IndexRangeExpression,
3134
Order,
@@ -49,7 +52,11 @@ use database::{
4952
SystemMetadataModel,
5053
TableModel,
5154
};
52-
use futures::Future;
55+
use futures::{
56+
Future,
57+
StreamExt,
58+
TryStreamExt,
59+
};
5360
use governor::Quota;
5461
use keybroker::Identity;
5562
use metrics::{
@@ -64,7 +71,10 @@ use model::{
6471
};
6572
use rand::Rng;
6673
use storage::Storage;
74+
use tokio_stream::wrappers::ReceiverStream;
6775
use value::{
76+
ConvexValue,
77+
ResolvedDocumentId,
6878
TableNamespace,
6979
TabletId,
7080
};
@@ -136,6 +146,7 @@ impl<RT: Runtime> SystemTableCleanupWorker<RT> {
136146
session_requests_cutoff
137147
.map_or(CreationTimeInterval::None, CreationTimeInterval::Before),
138148
&rate_limiter,
149+
*SESSION_CLEANUP_DELETE_CONCURRENCY,
139150
)
140151
.await?;
141152
}
@@ -256,19 +267,17 @@ impl<RT: Runtime> SystemTableCleanupWorker<RT> {
256267
table: &TableName,
257268
to_delete: CreationTimeInterval,
258269
rate_limiter: &RateLimiter<RT>,
270+
num_deleters: usize,
259271
) -> anyhow::Result<usize> {
272+
let _timer = system_table_cleanup_timer();
260273
let mut cursor = None;
261274

262-
let mut deleted = 0;
263-
loop {
264-
let _timer = system_table_cleanup_timer();
275+
let (tx, rx) = tokio::sync::mpsc::channel(1);
276+
let deleter = |chunk: Vec<ResolvedDocumentId>| async {
265277
let deleted_chunk = self
266-
.cleanup_system_table_chunk(namespace, table, to_delete, &mut cursor)
278+
.cleanup_system_table_delete_chunk(namespace, table, chunk)
267279
.await?;
268-
deleted += deleted_chunk;
269-
if deleted_chunk == 0 {
270-
break Ok(deleted);
271-
}
280+
272281
for _ in 0..deleted_chunk {
273282
// Don't rate limit within transactions, because that would just increase
274283
// contention. Rate limit between transactions to limit
@@ -278,53 +287,104 @@ impl<RT: Runtime> SystemTableCleanupWorker<RT> {
278287
self.runtime.wait(delay).await;
279288
}
280289
}
281-
}
290+
Ok(deleted_chunk)
291+
};
292+
let deleters = ReceiverStream::new(rx)
293+
.map(deleter)
294+
.buffer_unordered(num_deleters)
295+
.try_fold(0, |acc, x| async move { Ok(acc + x) });
296+
297+
let reader = async move {
298+
loop {
299+
let deleted_chunk = self
300+
.cleanup_system_table_read_chunk(namespace, table, to_delete, &mut cursor)
301+
.await?;
302+
if deleted_chunk.is_empty() {
303+
return Ok::<_, anyhow::Error>(());
304+
}
305+
tx.send(deleted_chunk).await?;
306+
}
307+
};
308+
309+
let ((), deleted) = futures::try_join!(reader, deleters)?;
310+
Ok(deleted)
282311
}
283312

284-
async fn cleanup_system_table_chunk(
313+
async fn cleanup_system_table_read_chunk(
285314
&self,
286315
namespace: TableNamespace,
287316
table: &TableName,
288317
to_delete: CreationTimeInterval,
289-
cursor: &mut Option<CreationTime>,
290-
) -> anyhow::Result<usize> {
318+
cursor: &mut Option<(CreationTime, ResolvedDocumentId)>,
319+
) -> anyhow::Result<Vec<ResolvedDocumentId>> {
291320
let mut tx = self.database.begin(Identity::system()).await?;
292321
if !TableModel::new(&mut tx).table_exists(namespace, table) {
293-
return Ok(0);
322+
return Ok(vec![]);
294323
}
295324
if matches!(to_delete, CreationTimeInterval::None) {
296-
return Ok(0);
325+
return Ok(vec![]);
297326
}
298327
let mut range = match to_delete {
299-
CreationTimeInterval::None => return Ok(0),
328+
CreationTimeInterval::None => return Ok(vec![]),
300329
CreationTimeInterval::All => vec![],
301330
CreationTimeInterval::Before(cutoff) => vec![IndexRangeExpression::Lt(
302331
CREATION_TIME_FIELD_PATH.clone(),
303332
f64::from(cutoff).into(),
304333
)],
305334
};
306-
if let Some(cursor) = cursor {
335+
if let Some((creation_time, _id)) = cursor {
307336
// The semantics of the cursor mean that all documents <= cursor have been
308337
// deleted, but retention might not have run yet, so we skip over their
309338
// tombstones.
310339
range.push(IndexRangeExpression::Gte(
311340
CREATION_TIME_FIELD_PATH.clone(),
312-
f64::from(*cursor).into(),
341+
f64::from(*creation_time).into(),
313342
));
314343
}
315-
let index_scan = Query::index_range(IndexRange {
344+
let mut index_scan = Query::index_range(IndexRange {
316345
index_name: IndexName::by_creation_time(table.clone()),
317346
range,
318347
order: Order::Asc,
319-
})
320-
.limit(*SYSTEM_TABLE_CLEANUP_CHUNK_SIZE);
348+
});
349+
if let Some((creation_time, id)) = cursor {
350+
index_scan = index_scan.filter(Expression::Or(vec![
351+
Expression::Neq(
352+
Box::new(Expression::Field(CREATION_TIME_FIELD_PATH.clone())),
353+
Box::new(Expression::Literal(
354+
ConvexValue::from(f64::from(*creation_time)).into(),
355+
)),
356+
),
357+
Expression::Gt(
358+
Box::new(Expression::Field(ID_FIELD_PATH.clone())),
359+
Box::new(Expression::Literal(ConvexValue::from(*id).into())),
360+
),
361+
]));
362+
}
363+
index_scan = index_scan.limit(*SYSTEM_TABLE_CLEANUP_CHUNK_SIZE);
321364
let mut query = ResolvedQuery::new(&mut tx, namespace, index_scan)?;
322-
let mut deleted_count = 0;
365+
let mut docs = vec![];
323366
while let Some(document) = query.next(&mut tx, None).await? {
367+
docs.push(document.id());
368+
*cursor = Some((document.creation_time(), document.id()));
369+
}
370+
if let Some((creation_time, _id)) = cursor {
371+
log_system_table_cursor_lag(table, *creation_time);
372+
}
373+
Ok(docs)
374+
}
375+
376+
async fn cleanup_system_table_delete_chunk(
377+
&self,
378+
namespace: TableNamespace,
379+
table: &TableName,
380+
docs: Vec<ResolvedDocumentId>,
381+
) -> anyhow::Result<usize> {
382+
let mut tx = self.database.begin(Identity::system()).await?;
383+
let mut deleted_count = 0;
384+
for doc in docs {
324385
SystemMetadataModel::new(&mut tx, namespace)
325-
.delete(document.id())
386+
.delete(doc)
326387
.await?;
327-
*cursor = Some(document.creation_time());
328388
deleted_count += 1;
329389
}
330390
if deleted_count == 0 {
@@ -335,9 +395,6 @@ impl<RT: Runtime> SystemTableCleanupWorker<RT> {
335395
.await?;
336396
tracing::info!("deleted {deleted_count} documents from {table}");
337397
log_system_table_cleanup_rows(table, deleted_count);
338-
if let Some(cursor) = cursor {
339-
log_system_table_cursor_lag(table, *cursor);
340-
}
341398
Ok(deleted_count)
342399
}
343400

@@ -413,8 +470,10 @@ mod tests {
413470
SystemTableCleanupWorker,
414471
};
415472

416-
#[convex_macro::test_runtime]
417-
async fn test_system_table_cleanup(rt: TestRuntime) -> anyhow::Result<()> {
473+
async fn test_system_table_cleanup_helper(
474+
rt: TestRuntime,
475+
num_deleters: usize,
476+
) -> anyhow::Result<()> {
418477
let DbFixtures { db, .. } = DbFixtures::new_with_model(&rt).await?;
419478
let exports_storage = Arc::new(LocalDirStorage::new(rt.clone())?);
420479
let worker = SystemTableCleanupWorker {
@@ -455,6 +514,7 @@ mod tests {
455514
&SESSION_REQUESTS_TABLE,
456515
CreationTimeInterval::Before(cutoff),
457516
&rate_limiter,
517+
num_deleters,
458518
)
459519
.await?;
460520
assert_eq!(deleted, 3);
@@ -467,4 +527,19 @@ mod tests {
467527
assert_eq!(count, Some(7));
468528
Ok(())
469529
}
530+
531+
#[convex_macro::test_runtime]
532+
async fn test_system_table_cleanup_1(rt: TestRuntime) -> anyhow::Result<()> {
533+
test_system_table_cleanup_helper(rt, 1).await
534+
}
535+
536+
#[convex_macro::test_runtime]
537+
async fn test_system_table_cleanup_2(rt: TestRuntime) -> anyhow::Result<()> {
538+
test_system_table_cleanup_helper(rt, 2).await
539+
}
540+
541+
#[convex_macro::test_runtime]
542+
async fn test_system_table_cleanup_8(rt: TestRuntime) -> anyhow::Result<()> {
543+
test_system_table_cleanup_helper(rt, 8).await
544+
}
470545
}

crates/common/src/knobs.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,10 @@ pub static MAX_SESSION_CLEANUP_DURATION: LazyLock<Option<Duration>> = LazyLock::
595595
}
596596
});
597597

598+
/// Number of concurrent commits to use for deleting session requests.
599+
pub static SESSION_CLEANUP_DELETE_CONCURRENCY: LazyLock<usize> =
600+
LazyLock::new(|| env_config("SESSION_CLEANUP_DELETE_CONCURRENCY", 1));
601+
598602
/// Snapshots that expired more than this number of days ago are purged
599603
/// from storage.
600604
pub static MAX_EXPIRED_SNAPSHOT_AGE: LazyLock<Duration> = LazyLock::new(|| {

0 commit comments

Comments
 (0)