From 8138341bedd418bf5d28ef742ee1ee61cc8997b2 Mon Sep 17 00:00:00 2001 From: David Buchanan Date: Thu, 12 Dec 2024 12:51:47 +0000 Subject: [PATCH] inclusion/exclusion proofs - very untested --- README.md | 2 +- src/atmst/blockstore/car_file.py | 2 +- src/atmst/cartool.py | 2 +- src/atmst/mst/diff.py | 8 ++--- src/atmst/mst/node_walker.py | 52 +++++++++++++++++++++++--------- src/atmst/mst/proof.py | 33 ++++++++++++++++++++ tests/test_mst_diff.py | 6 ++-- 7 files changed, 81 insertions(+), 24 deletions(-) create mode 100644 src/atmst/mst/proof.py diff --git a/README.md b/README.md index 127d164..3894178 100644 --- a/README.md +++ b/README.md @@ -39,5 +39,5 @@ publishing to pypi: (this one is mainly for my benefit!) ``` python3 -m build -python3 -m twine upload --repository pypi dist/* +python3 -m twine upload --repository pypi dist/atmst-*.tar.gz ``` diff --git a/src/atmst/blockstore/car_file.py b/src/atmst/blockstore/car_file.py index 7b36d35..6dbb40a 100644 --- a/src/atmst/blockstore/car_file.py +++ b/src/atmst/blockstore/car_file.py @@ -38,7 +38,7 @@ class ReadOnlyCARBlockStore(BlockStore): proofs provided in CAR format, and for testing. """ - car_roots: List[CID] + car_root: CID block_offsets: Dict[bytes, Tuple[int, int]] # CID -> (offset, length) def __init__(self, file: BinaryIO, validate_hashes: bool=True) -> None: diff --git a/src/atmst/cartool.py b/src/atmst/cartool.py index 3b97a54..bc97c89 100644 --- a/src/atmst/cartool.py +++ b/src/atmst/cartool.py @@ -57,7 +57,7 @@ def dump_all(car_path: str): def dump_record(car_path: str, key: str): bs, commit = open_car(car_path) - val = NodeWalker(NodeStore(bs), commit["data"]).find_value(key) + val = NodeWalker(NodeStore(bs), commit["data"]).find_rpath(key) if val is None: print("Record not found!", file=sys.stderr) sys.exit(-1) diff --git a/src/atmst/mst/diff.py b/src/atmst/mst/diff.py index a9b0130..fff0b65 100644 --- a/src/atmst/mst/diff.py +++ b/src/atmst/mst/diff.py @@ -134,7 +134,7 @@ def _mst_diff_recursive(created: Set[CID], deleted: Set[CID], a: NodeWalker, b: a.down() deleted.add(a.frame.node.cid) else: - a.right() + a.right_or_up() # catch up cursor b, likewise while b.rpath < a.rpath and not b.is_final: @@ -142,7 +142,7 @@ def _mst_diff_recursive(created: Set[CID], deleted: Set[CID], a: NodeWalker, b: b.down() created.add(b.frame.node.cid) else: - b.right() + b.right_or_up() # the rpaths now match, but the subrees below us might not @@ -153,5 +153,5 @@ def _mst_diff_recursive(created: Set[CID], deleted: Set[CID], a: NodeWalker, b: if a.rpath == a.stack[0].rpath and b.rpath == b.stack[0].rpath: break - a.right() - b.right() + a.right_or_up() + b.right_or_up() diff --git a/src/atmst/mst/node_walker.py b/src/atmst/mst/node_walker.py index 4321d08..ffa7fca 100644 --- a/src/atmst/mst/node_walker.py +++ b/src/atmst/mst/node_walker.py @@ -54,6 +54,10 @@ def subtree_walker(self) -> "Self": def frame(self) -> StackFrame: return self.stack[-1] + @property + def height(self) -> int: + return self.frame.node.height + @property def lpath(self) -> str: return self.frame.lpath if self.frame.idx == 0 else self.frame.node.keys[self.frame.idx - 1] @@ -76,21 +80,31 @@ def rval(self) -> Optional[CID]: @property def is_final(self) -> bool: + # is (not self.stack) really necesasry here? is that a reachable state? return (not self.stack) or (self.subtree is None and self.rpath == self.stack[0].rpath) - def right(self) -> None: - if (self.frame.idx + 1) >= len(self.frame.node.subtrees): + @property + def can_go_right(self) -> bool: + return (self.frame.idx + 1) < len(self.frame.node.subtrees) + + def right_or_up(self) -> None: + if not self.can_go_right: # we reached the end of this node, go up a level self.stack.pop() if not self.stack: raise StopIteration # you probably want to check .final instead of hitting this - return self.right() # we need to recurse, to skip over empty intermediates on the way back up + return self.right_or_up() # we need to recurse, to skip over empty intermediates on the way back up + self.frame.idx += 1 + + def right(self) -> None: + if not self.can_go_right: + raise Exception("cursor is already at rightmost position in node") self.frame.idx += 1 def down(self) -> None: subtree = self.frame.node.subtrees[self.frame.idx] if subtree is None: - raise Exception("oi, you can't recurse here mate") + raise Exception("oi, you can't recurse here mate (subtree is None)") self.stack.append(self.StackFrame( node=self.ns.get_node(subtree), @@ -105,7 +119,7 @@ def down(self) -> None: def next_kv(self) -> Tuple[str, CID]: while self.subtree: # recurse down every subtree self.down() - self.right() + self.right_or_up() return self.lpath, self.lval # the kv pair we just jumped over # iterate over every k/v pair in key-sorted order @@ -121,7 +135,7 @@ def iter_nodes(self) -> Iterable[MSTNode]: while self.subtree: # recurse down every subtree self.down() yield self.frame.node - self.right() + self.right_or_up() def iter_node_cids(self) -> Iterable[CID]: for node in self.iter_nodes(): @@ -131,7 +145,7 @@ def iter_node_cids(self) -> Iterable[CID]: def iter_kv_range(self, start: str, end: str, end_inclusive: bool=False) -> Iterable[Tuple[str, CID]]: while True: while self.rpath < start: - self.right() + self.right_or_up() if not self.subtree: break self.down() @@ -140,14 +154,22 @@ def iter_kv_range(self, start: str, end: str, end_inclusive: bool=False) -> Iter if k > end or (not end_inclusive and k == end): break yield k, v - - def find_value(self, key: str) -> Optional[CID]: + + # TODO: we need to make this early-exit so that it can work with concise deletion proofs, maybe + # (early exit based on key height - might need significant rewrite) + def find_rpath(self, rpath: str) -> Optional[CID]: + rpath_height = MSTNode.key_height(rpath) while True: - while self.rpath < key: + # if the rpath we're looking for is higher than the current cursor, + # we're never going to find it (i.e. we early-exit) + if rpath_height > self.height: + return None + while self.rpath < rpath: # either look for the rpath, or the right point to go down + if not self.can_go_right: + return None self.right() - if self.rpath == key or not self.subtree: - break + if self.rpath == rpath: + return self.rval # found it! + if not self.subtree: + return None # need to go down, but we can't self.down() - if self.rpath != key: - return None - return self.rval diff --git a/src/atmst/mst/proof.py b/src/atmst/mst/proof.py new file mode 100644 index 0000000..55b05dc --- /dev/null +++ b/src/atmst/mst/proof.py @@ -0,0 +1,33 @@ +from typing import Set, Tuple, Optional + +from cbrrr import CID + +from .node import MSTNode +from .node_store import NodeStore +from .node_walker import NodeWalker + +class InvalidProof(Exception): + pass + +# works for both inclusion and exclusion proofs +def find_rpath_and_build_proof(ns: NodeStore, root_cid: CID, rpath: str) -> Tuple[Optional[CID], Set[CID]]: + walker = NodeWalker(ns, root_cid) + value = walker.find_rpath(rpath) # returns None if not found + proof = {frame.node.cid for frame in walker.stack} + return value, proof + +def verify_inclusion(ns: NodeStore, root_cid: CID, rpath: str) -> None: + walker = NodeWalker(ns, root_cid) + try: + if walker.find_rpath(rpath) is None: + raise InvalidProof("rpath not present in MST") + except KeyError: + raise InvalidProof("missing MST blocks") + +def verify_exclusion(ns: NodeStore, root_cid: CID, rpath: str) -> None: + walker = NodeWalker(ns, root_cid) + try: + if walker.find_rpath(rpath) is not None: + raise InvalidProof("rpath *is* present in MST") + except KeyError: + raise InvalidProof("missing MST blocks") diff --git a/tests/test_mst_diff.py b/tests/test_mst_diff.py index 7b5a58d..31f71bf 100644 --- a/tests/test_mst_diff.py +++ b/tests/test_mst_diff.py @@ -19,12 +19,14 @@ def setUp(self): i = 0 for height in [0, 1, 0, 2, 0, 1, 0]: # if all these keys are added to a MST, it'll form a perfect binary tree. while True: - key = f"{i:04d}" + key = f"k/{i:02d}" i += 1 if MSTNode.key_height(key) == height: keys.append(key) break - + + #print(keys) + bs = MemoryBlockStore() self.ns = NodeStore(bs) wrangler = NodeWrangler(self.ns)