Skip to content

Commit d97b534

Browse files
Improve support for sparse cache.
With the addition of a notes backed cache we will have the possibility of avoiding a "cold start" by transferring just the notes to another repo. To keep the amount of extra data small the notes cache is kept sparse: Not all commits have and entry and also in shards by sequence number, as old entries are unlikely to be relevant. The old traversal logic did not perform very well with sparse cache. Especially "find_known" proved to be a bottleneck. So the traversal is revised now to work better in the sparse cache case. Change: cache-sparse-shards
1 parent e8feff5 commit d97b534

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+325
-119
lines changed

josh-core/src/cache.rs

Lines changed: 99 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,20 @@ use std::sync::{LazyLock, RwLock};
99
pub(crate) const CACHE_VERSION: u64 = 24;
1010

1111
pub trait CacheBackend: Send + Sync {
12-
fn read(&self, filter: filter::Filter, from: git2::Oid) -> JoshResult<Option<git2::Oid>>;
13-
14-
fn write(&self, filter: filter::Filter, from: git2::Oid, to: git2::Oid) -> JoshResult<()>;
12+
fn read(
13+
&self,
14+
filter: filter::Filter,
15+
from: git2::Oid,
16+
sequence_number: u128,
17+
) -> JoshResult<Option<git2::Oid>>;
18+
19+
fn write(
20+
&self,
21+
filter: filter::Filter,
22+
from: git2::Oid,
23+
to: git2::Oid,
24+
sequence_number: u128,
25+
) -> JoshResult<()>;
1526
}
1627

1728
pub trait FilterHook {
@@ -323,6 +334,11 @@ impl Transaction {
323334
}
324335

325336
pub fn insert(&self, filter: filter::Filter, from: git2::Oid, to: git2::Oid, store: bool) {
337+
let sequence_number = if filter != filter::sequence_number() {
338+
compute_sequence_number(self, from).expect("compute_sequence_number failed")
339+
} else {
340+
0
341+
};
326342
let mut t2 = self.t2.borrow_mut();
327343
t2.commit_map
328344
.entry(filter.id())
@@ -334,14 +350,13 @@ impl Transaction {
334350
// the history length by a very large factor.
335351
if store || from.as_bytes()[0] == 0 {
336352
t2.cache
337-
.write_all(filter, from, to)
353+
.write_all(filter, from, to, sequence_number)
338354
.expect("Failed to write cache");
339355
}
340356
}
341357

342358
pub fn get_missing(&self) -> Vec<(filter::Filter, git2::Oid)> {
343359
let mut missing = self.t2.borrow().missing.clone();
344-
missing.sort_by_key(|(f, i)| (filter::nesting(*f), *f, *i));
345360
missing.dedup();
346361
missing.retain(|(f, i)| !self.known(*f, *i));
347362
self.t2.borrow_mut().missing = missing.clone();
@@ -358,7 +373,9 @@ impl Transaction {
358373
} else {
359374
let mut t2 = self.t2.borrow_mut();
360375
t2.misses += 1;
361-
t2.missing.push((filter, from));
376+
if !t2.missing.contains(&(filter, from)) {
377+
t2.missing.insert(0, (filter, from));
378+
}
362379
None
363380
}
364381
}
@@ -367,6 +384,11 @@ impl Transaction {
367384
if filter == filter::nop() {
368385
return Some(from);
369386
}
387+
let sequence_number = if filter != filter::sequence_number() {
388+
compute_sequence_number(self, from).expect("compute_sequence_number failed")
389+
} else {
390+
0
391+
};
370392
let t2 = self.t2.borrow_mut();
371393
if let Some(m) = t2.commit_map.get(&filter.id()) {
372394
if let Some(oid) = m.get(&from).cloned() {
@@ -376,7 +398,7 @@ impl Transaction {
376398

377399
let oid = t2
378400
.cache
379-
.read_propagate(filter, from)
401+
.read_propagate(filter, from, sequence_number)
380402
.expect("Failed to read from cache backend");
381403

382404
let oid = if let Some(oid) = oid { Some(oid) } else { None };
@@ -385,6 +407,9 @@ impl Transaction {
385407
if oid == git2::Oid::zero() {
386408
return Some(oid);
387409
}
410+
if filter == filter::sequence_number() {
411+
return Some(oid);
412+
}
388413

389414
if self.repo.odb().unwrap().exists(oid) {
390415
// Only report an object as cached if it exists in the object database.
@@ -396,3 +421,70 @@ impl Transaction {
396421
None
397422
}
398423
}
424+
425+
/// Encode a `u128` into a 20-byte git OID (SHA-1 sized).
426+
/// The high 4 bytes of the OID are zero; the low 16 bytes
427+
/// contain the big-endian integer.
428+
pub fn oid_from_u128(n: u128) -> git2::Oid {
429+
let mut bytes = [0u8; 20];
430+
// place the 16 integer bytes at the end (big-endian)
431+
bytes[20 - 16..].copy_from_slice(&n.to_be_bytes());
432+
// Safe: length is exactly 20
433+
git2::Oid::from_bytes(&bytes).expect("20-byte OID construction cannot fail")
434+
}
435+
436+
/// Decode a `u128` previously encoded by `oid_from_u128`.
437+
pub fn u128_from_oid(oid: git2::Oid) -> u128 {
438+
let b = oid.as_bytes();
439+
let mut n = [0u8; 16];
440+
n.copy_from_slice(&b[20 - 16..]); // take the last 16 bytes
441+
u128::from_be_bytes(n)
442+
}
443+
444+
pub fn compute_sequence_number(
445+
transaction: &cache::Transaction,
446+
input: git2::Oid,
447+
) -> JoshResult<u128> {
448+
if let Some(count) = transaction.get(filter::sequence_number(), input) {
449+
return Ok(u128_from_oid(count));
450+
}
451+
452+
let commit = transaction.repo().find_commit(input)?;
453+
if let Some(p) = commit.parent_ids().next() {
454+
if let Some(count) = transaction.get(filter::sequence_number(), p) {
455+
let pc = u128_from_oid(count);
456+
transaction.insert(
457+
filter::sequence_number(),
458+
input,
459+
oid_from_u128(pc + 1),
460+
true,
461+
);
462+
return Ok(pc + 1);
463+
}
464+
}
465+
466+
let mut walk = transaction.repo().revwalk()?;
467+
walk.set_sorting(git2::Sort::REVERSE | git2::Sort::TOPOLOGICAL)?;
468+
walk.push(input)?;
469+
470+
for c in walk {
471+
let commit = transaction.repo().find_commit(c?)?;
472+
let pc = if let Some(p) = commit.parent_ids().next() {
473+
compute_sequence_number(transaction, p)?
474+
} else {
475+
0
476+
};
477+
478+
transaction.insert(
479+
filter::sequence_number(),
480+
commit.id(),
481+
oid_from_u128(pc + 1),
482+
true,
483+
);
484+
}
485+
if let Some(count) = transaction.get(filter::sequence_number(), input) {
486+
Ok(u128_from_oid(count))
487+
} else {
488+
Err(josh_error("missing sequence_number"))
489+
}
490+
}

josh-core/src/cache_notes.rs

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::JoshResult;
22
use crate::cache::{CACHE_VERSION, CacheBackend};
3+
use crate::filter;
34
use crate::filter::Filter;
45

56
pub struct NotesCacheBackend {
@@ -15,24 +16,53 @@ impl NotesCacheBackend {
1516
}
1617
}
1718

18-
fn is_note_eligible(oid: git2::Oid) -> bool {
19-
oid.as_bytes()[0] == 0
19+
// The notes cache is meant to be sparse. That is, not all entries are actually persisted.
20+
// This makes it smaller and faster to download.
21+
// It is expected that on any node (server, proxy, local repo) a full "dense" local cache
22+
// is used in addition to the sparse note cache.
23+
// The note cache is mostly only used for initial "cold starts" or longer "catch up".
24+
// For incremental filtering it's fine re-filter commits and rely on the local "dense" cache.
25+
// We store entries for 1% of all commits, and additionally all merges and orphans.
26+
fn is_note_eligible(repo: &git2::Repository, oid: git2::Oid, sequence_number: u128) -> bool {
27+
let parent_count = if let Ok(c) = repo.find_commit(oid) {
28+
c.parent_ids().count()
29+
} else {
30+
return false;
31+
};
32+
33+
sequence_number % 100 == 0 || parent_count != 1
2034
}
2135

22-
fn note_path(key: git2::Oid) -> String {
23-
format!("refs/josh/{}/{}", CACHE_VERSION, key)
36+
// To additionally limit the size of the note trees the cache is also sharded by sequence
37+
// number in groups of 10000. Note that this does not limit the number of entried per bucket
38+
// as branches mean many commits share the same sequence number.
39+
fn note_path(key: git2::Oid, sequence_number: u128) -> String {
40+
format!(
41+
"refs/josh/{}/{}/{}",
42+
CACHE_VERSION,
43+
sequence_number / 10000,
44+
key,
45+
)
2446
}
2547

2648
impl CacheBackend for NotesCacheBackend {
27-
fn read(&self, filter: Filter, from: git2::Oid) -> JoshResult<Option<git2::Oid>> {
49+
fn read(
50+
&self,
51+
filter: Filter,
52+
from: git2::Oid,
53+
sequence_number: u128,
54+
) -> JoshResult<Option<git2::Oid>> {
55+
if filter == filter::sequence_number() {
56+
return Ok(None);
57+
}
2858
let repo = self.repo.lock()?;
29-
let key = crate::filter::as_tree(&repo, filter)?;
30-
31-
if !is_note_eligible(from) {
59+
if !is_note_eligible(&repo, from, sequence_number) {
3260
return Ok(None);
3361
}
3462

35-
if let Ok(note) = repo.find_note(Some(&note_path(key)), from) {
63+
let key = crate::filter::as_tree(&*repo, filter)?;
64+
65+
if let Ok(note) = repo.find_note(Some(&note_path(key, sequence_number)), from) {
3666
let message = note.message().unwrap_or("");
3767
let result = git2::Oid::from_str(message)?;
3868

@@ -42,20 +72,29 @@ impl CacheBackend for NotesCacheBackend {
4272
}
4373
}
4474

45-
fn write(&self, filter: Filter, from: git2::Oid, to: git2::Oid) -> JoshResult<()> {
46-
let repo = self.repo.lock()?;
47-
let key = crate::filter::as_tree(&repo, filter)?;
75+
fn write(
76+
&self,
77+
filter: Filter,
78+
from: git2::Oid,
79+
to: git2::Oid,
80+
sequence_number: u128,
81+
) -> JoshResult<()> {
82+
if filter == filter::sequence_number() {
83+
return Ok(());
84+
}
4885

49-
if !is_note_eligible(from) {
86+
let repo = self.repo.lock()?;
87+
if !is_note_eligible(&*repo, from, sequence_number) {
5088
return Ok(());
5189
}
5290

91+
let key = crate::filter::as_tree(&*repo, filter)?;
5392
let signature = crate::cache::josh_commit_signature()?;
5493

5594
repo.note(
5695
&signature,
5796
&signature,
58-
Some(&note_path(key)),
97+
Some(&note_path(key, sequence_number)),
5998
from,
6099
&to.to_string(),
61100
true,

josh-core/src/cache_sled.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,12 @@ fn insert_sled_tree(filter: Filter) -> sled::Tree {
8080
}
8181

8282
impl CacheBackend for SledCacheBackend {
83-
fn read(&self, filter: Filter, from: git2::Oid) -> JoshResult<Option<git2::Oid>> {
83+
fn read(
84+
&self,
85+
filter: Filter,
86+
from: git2::Oid,
87+
_sequence_number: u128,
88+
) -> JoshResult<Option<git2::Oid>> {
8489
let mut trees = self.trees.lock()?;
8590
let tree = trees
8691
.entry(filter.id())
@@ -94,7 +99,13 @@ impl CacheBackend for SledCacheBackend {
9499
}
95100
}
96101

97-
fn write(&self, filter: Filter, from: git2::Oid, to: git2::Oid) -> JoshResult<()> {
102+
fn write(
103+
&self,
104+
filter: Filter,
105+
from: git2::Oid,
106+
to: git2::Oid,
107+
_sequence_number: u128,
108+
) -> JoshResult<()> {
98109
let mut trees = self.trees.lock()?;
99110
let tree = trees
100111
.entry(filter.id())

josh-core/src/cache_stack.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,10 @@ impl CacheStack {
3333
filter: filter::Filter,
3434
from: git2::Oid,
3535
to: git2::Oid,
36+
sequence_number: u128,
3637
) -> JoshResult<()> {
3738
for backend in &self.backends {
38-
backend.write(filter, from, to)?;
39+
backend.write(filter, from, to, sequence_number)?;
3940
}
4041

4142
Ok(())
@@ -51,16 +52,19 @@ impl CacheStack {
5152
&self,
5253
filter: filter::Filter,
5354
from: git2::Oid,
55+
sequence_number: u128,
5456
) -> JoshResult<Option<git2::Oid>> {
5557
let values = self
5658
.backends
5759
.iter()
5860
.enumerate()
59-
.find_map(|(index, backend)| match backend.read(filter, from) {
60-
Ok(None) => None,
61-
Ok(Some(oid)) => Some(Ok((index, oid))),
62-
Err(e) => Some(Err(e)),
63-
});
61+
.find_map(
62+
|(index, backend)| match backend.read(filter, from, sequence_number) {
63+
Ok(None) => None,
64+
Ok(Some(oid)) => Some(Ok((index, oid))),
65+
Err(e) => Some(Err(e)),
66+
},
67+
);
6468

6569
let (index, oid) = match values {
6670
// None of the backends had the value
@@ -74,7 +78,7 @@ impl CacheStack {
7478
self.backends
7579
.iter()
7680
.take(index)
77-
.try_for_each(|backend| backend.write(filter, from, oid))?;
81+
.try_for_each(|backend| backend.write(filter, from, oid, sequence_number))?;
7882

7983
Ok(Some(oid))
8084
}

0 commit comments

Comments
 (0)