From 349b4669eb1b12d758e6ca8c25043246283f44b2 Mon Sep 17 00:00:00 2001 From: Cisphyx Date: Mon, 17 Jun 2024 12:37:06 -0400 Subject: [PATCH] Add nodup option to ScanKeys (SYN-7497) (#3755) --- synapse/lib/layer.py | 8 ++++---- synapse/lib/lmdbslab.py | 27 +++++++++++++++++---------- synapse/tests/test_lib_lmdbslab.py | 13 +++++++++++++ 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/synapse/lib/layer.py b/synapse/lib/layer.py index 9df33318a7f..41ffede69ff 100644 --- a/synapse/lib/layer.py +++ b/synapse/lib/layer.py @@ -3817,7 +3817,7 @@ def _editNodeEdgeDel(self, buid, form, edit, sode, meta): async def getEdgeVerbs(self): - for lkey in self.layrslab.scanKeys(db=self.byverb): + for lkey in self.layrslab.scanKeys(db=self.byverb, nodup=True): yield lkey.decode() async def getEdges(self, verb=None): @@ -4089,7 +4089,7 @@ async def iterNodeDataKeys(self, buid): ''' Return a generator of all a buid's node data keys ''' - for lkey in self.dataslab.scanKeysByPref(buid, db=self.nodedata): + for lkey in self.dataslab.scanKeysByPref(buid, db=self.nodedata, nodup=True): abrv = lkey[32:] prop = self.getAbrvProp(abrv) yield prop[0] @@ -4160,14 +4160,14 @@ async def confirmLayerEditPerms(self, user, gateiden, delete=False): # nodedata if not allow_ndata: - async for abrv in s_coro.pause(self.dataslab.scanKeys(db=self.dataname)): + async for abrv in s_coro.pause(self.dataslab.scanKeys(db=self.dataname, nodup=True)): name, _ = self.getAbrvProp(abrv) perm = perm_ndata + (name,) user.confirm(perm, gateiden=gateiden) # edges if not allow_edges: - async for verb in s_coro.pause(self.layrslab.scanKeys(db=self.byverb)): + async for verb in s_coro.pause(self.layrslab.scanKeys(db=self.byverb, nodup=True)): perm = perm_edges + (verb.decode(),) user.confirm(perm, gateiden=gateiden) diff --git a/synapse/lib/lmdbslab.py b/synapse/lib/lmdbslab.py index 8fbbd1cd4a5..db0667ec9dc 100644 --- a/synapse/lib/lmdbslab.py +++ b/synapse/lib/lmdbslab.py @@ -1253,18 +1253,18 @@ def stat(self, db=None): finally: self._relXactForReading() - def scanKeys(self, db=None): + def scanKeys(self, db=None, nodup=False): - with ScanKeys(self, db) as scan: + with ScanKeys(self, db, nodup=nodup) as scan: if not scan.first(): return yield from scan.iternext() - def scanKeysByPref(self, byts, db=None): + def scanKeysByPref(self, byts, db=None, nodup=False): - with ScanKeys(self, db) as scan: + with ScanKeys(self, db, nodup=nodup) as scan: if not scan.set_range(byts): return @@ -1283,7 +1283,7 @@ async def countByPref(self, byts, db=None, maxsize=None): ''' count = 0 size = len(byts) - with ScanKeys(self, db) as scan: + with ScanKeys(self, db, nodup=True) as scan: if not scan.set_range(byts): return 0 @@ -1293,7 +1293,7 @@ async def countByPref(self, byts, db=None, maxsize=None): if lkey[:size] != byts: return count - count += 1 + count += scan.curs.count() if maxsize is not None and maxsize == count: return count @@ -1756,14 +1756,21 @@ class ScanKeys(Scan): An iterator over the keys of the database. If the database is dupsort, a key with multiple values with be yielded once for each value. ''' + def __init__(self, slab, db, nodup=False): + Scan.__init__(self, slab, db) + self.nodup = nodup + def iterfunc(self): if self.dupsort: - return Scan.iterfunc(self) + if self.nodup: + return self.curs.iternext_nodup(keys=True, values=False) + else: + return Scan.iterfunc(self) return self.curs.iternext(keys=True, values=False) def resume(self): - if self.dupsort: + if self.dupsort and not self.nodup: return Scan.resume(self) return self.curs.set_range(self.atitem) @@ -1772,13 +1779,13 @@ def isatitem(self): ''' Returns if the cursor is at the value in atitem ''' - if self.dupsort: + if self.dupsort and not self.nodup: return Scan.isatitem(self) return self.atitem == self.curs.key() def iternext(self): - if self.dupsort: + if self.dupsort and not self.nodup: yield from (item[0] for item in Scan.iternext(self)) return diff --git a/synapse/tests/test_lib_lmdbslab.py b/synapse/tests/test_lib_lmdbslab.py index 2585ff27a28..8980c888e03 100644 --- a/synapse/tests/test_lib_lmdbslab.py +++ b/synapse/tests/test_lib_lmdbslab.py @@ -57,9 +57,11 @@ async def test_lmdbslab_scankeys(self): testgenr = slab.scanKeys(db=testdb) dupsgenr = slab.scanKeys(db=dupsdb) + nodupsgenr = slab.scanKeys(db=dupsdb, nodup=True) testlist = [next(testgenr)] dupslist = [next(dupsgenr)] + nodupslist = [next(nodupsgenr)] slab.put(b'derp', b'derp', db=editdb) @@ -68,16 +70,25 @@ async def test_lmdbslab_scankeys(self): testlist.extend(testgenr) dupslist.extend(dupsgenr) + nodupslist.extend(nodupsgenr) self.eq(testlist, (b'hehe', b'hoho')) self.eq(dupslist, (b'hehe', b'hehe', b'hoho')) + self.eq(nodupslist, (b'hehe', b'hoho')) + + self.eq([b'hehe', b'hehe', b'hoho'], list(slab.scanKeysByPref(b'h', db=dupsdb))) + self.eq([b'hehe', b'hoho'], list(slab.scanKeysByPref(b'h', db=dupsdb, nodup=True))) + + self.eq(3, await slab.countByPref(b'h', db=dupsdb)) # now lets delete the key we're on testgenr = slab.scanKeys(db=testdb) dupsgenr = slab.scanKeys(db=testdb) + nodupsgenr = slab.scanKeys(db=testdb, nodup=True) testlist = [next(testgenr)] dupslist = [next(dupsgenr)] + nodupslist = [next(nodupsgenr)] slab.delete(b'hehe', db=testdb) for lkey, lval in slab.scanByDups(b'hehe', db=dupsdb): @@ -87,9 +98,11 @@ async def test_lmdbslab_scankeys(self): testlist.extend(testgenr) dupslist.extend(dupsgenr) + nodupslist.extend(nodupsgenr) self.eq(testlist, (b'hehe', b'hoho')) self.eq(dupslist, (b'hehe', b'hoho')) + self.eq(nodupslist, (b'hehe', b'hoho')) # by pref self.eq([b'hoho'], list(slab.scanKeysByPref(b'h', db=dupsdb)))