Skip to content

Commit f21daaa

Browse files
Fix the deadlock during statements gossiping (#9868)
# Description During statement store benchmarking we experienced deadlock-like behavior which we found happened during statement propagation. Every second statements were propagating, locking the index which possibly caused the deadlock. After the fix, the observed behavior no longer occurs. Even though there is a possibility to unsync the DB and the index for read operations and release locks earlier, which should be harmless, it leads to regressions. I suspect because of concurrent access to many calls of db.get(). Checked with the benchmarks in #9884 ## Integration This PR should not affect downstream projects. --------- Co-authored-by: cmd[bot] <41898282+github-actions[bot]@users.noreply.github.com> (cherry picked from commit ed4eebb)
1 parent a75af0e commit f21daaa

File tree

2 files changed

+21
-13
lines changed

2 files changed

+21
-13
lines changed

prdoc/pr_9868.prdoc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
title: Fix the deadlock during statements gossiping
2+
doc:
3+
- audience: Node Dev
4+
description: |-
5+
During high load the statement store experiences deadlock-like behavior: every second statements were gossiping, locking the index which possibly caused the deadlock. After the fix, the observed behavior no longer occurs.
6+
7+
crates:
8+
- name: sc-statement-store
9+
bump: patch

substrate/client/statement-store/src/lib.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -488,12 +488,7 @@ impl Store {
488488
where
489489
Block: BlockT,
490490
Block::Hash: From<BlockHash>,
491-
Client: ProvideRuntimeApi<Block>
492-
+ HeaderBackend<Block>
493-
+ sc_client_api::ExecutorProvider<Block>
494-
+ Send
495-
+ Sync
496-
+ 'static,
491+
Client: ProvideRuntimeApi<Block> + HeaderBackend<Block> + Send + Sync + 'static,
497492
Client::Api: ValidateStatement<Block>,
498493
{
499494
let store = Arc::new(Self::new(path, options, client, keystore, prometheus)?);
@@ -671,21 +666,25 @@ impl Store {
671666
/// Perform periodic store maintenance
672667
pub fn maintain(&self) {
673668
log::trace!(target: LOG_TARGET, "Started store maintenance");
674-
let deleted = self.index.write().maintain(self.timestamp());
669+
let (deleted, active_count, expired_count): (Vec<_>, usize, usize) = {
670+
let mut index = self.index.write();
671+
let deleted = index.maintain(self.timestamp());
672+
(deleted, index.entries.len(), index.expired.len())
673+
};
675674
let deleted: Vec<_> =
676675
deleted.into_iter().map(|hash| (col::EXPIRED, hash.to_vec(), None)).collect();
677-
let count = deleted.len() as u64;
676+
let deleted_count = deleted.len() as u64;
678677
if let Err(e) = self.db.commit(deleted) {
679678
log::warn!(target: LOG_TARGET, "Error writing to the statement database: {:?}", e);
680679
} else {
681-
self.metrics.report(|metrics| metrics.statements_pruned.inc_by(count));
680+
self.metrics.report(|metrics| metrics.statements_pruned.inc_by(deleted_count));
682681
}
683682
log::trace!(
684683
target: LOG_TARGET,
685684
"Completed store maintenance. Purged: {}, Active: {}, Expired: {}",
686-
count,
687-
self.index.read().entries.len(),
688-
self.index.read().expired.len()
685+
deleted_count,
686+
active_count,
687+
expired_count
689688
);
690689
}
691690

@@ -764,7 +763,7 @@ impl StatementStore for Store {
764763
fn statements(&self) -> Result<Vec<(Hash, Statement)>> {
765764
let index = self.index.read();
766765
let mut result = Vec::with_capacity(index.entries.len());
767-
for h in self.index.read().entries.keys() {
766+
for h in index.entries.keys() {
768767
let encoded = self.db.get(col::STATEMENTS, h).map_err(|e| Error::Db(e.to_string()))?;
769768
if let Some(encoded) = encoded {
770769
if let Ok(statement) = Statement::decode(&mut encoded.as_slice()) {

0 commit comments

Comments
 (0)