Skip to content

Commit 1e6624d

Browse files
committed
add db cache
1 parent 5aa8192 commit 1e6624d

File tree

2 files changed

+95
-20
lines changed

2 files changed

+95
-20
lines changed

elixir/data.py

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# You should have received a copy of the GNU Affero General Public License
1919
# along with Elixir. If not, see <http://www.gnu.org/licenses/>.
2020

21+
from typing import OrderedDict
2122
import berkeleydb
2223
import re
2324
from . import lib
@@ -208,18 +209,94 @@ def close(self):
208209
def __len__(self):
209210
return self.db.stat()["nkeys"]
210211

212+
class CachedBsdDB:
213+
def __init__(self, filename, readonly, contentType, cachesize):
214+
self.filename = filename
215+
self.db = berkeleydb.db.DB()
216+
flags = 0
217+
218+
self.cachesize = cachesize
219+
self.cache = OrderedDict()
220+
221+
if readonly:
222+
flags |= berkeleydb.db.DB_RDONLY
223+
self.db.open(filename, flags=flags)
224+
else:
225+
flags |= berkeleydb.db.DB_CREATE
226+
self.db.open(filename, flags=flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE)
227+
self.ctype = contentType
228+
229+
def exists(self, key):
230+
if key in self.cache:
231+
return True
232+
233+
key = autoBytes(key)
234+
return self.db.exists(key)
235+
236+
def get(self, key):
237+
if key in self.cache:
238+
self.cache.move_to_end(key)
239+
return self.cache[key]
240+
241+
key = autoBytes(key)
242+
p = self.db.get(key)
243+
if p is None:
244+
return None
245+
p = self.ctype(p)
246+
247+
self.cache[key] = p
248+
self.cache.move_to_end(key)
249+
if len(self.cache) > self.cachesize:
250+
old_k, old_v = self.cache.popitem(last=False)
251+
self.put_raw(old_k, old_v)
252+
253+
return p
254+
255+
def get_keys(self):
256+
return self.db.keys()
257+
258+
def put(self, key, val):
259+
self.cache[key] = val
260+
self.cache.move_to_end(key)
261+
if len(self.cache) > self.cachesize:
262+
old_k, old_v = self.cache.popitem(last=False)
263+
self.put_raw(old_k, old_v)
264+
265+
def put_raw(self, key, val, sync=False):
266+
key = autoBytes(key)
267+
val = autoBytes(val)
268+
if type(val) is not bytes:
269+
val = val.pack()
270+
self.db.put(key, val)
271+
if sync:
272+
self.db.sync()
273+
274+
def sync(self):
275+
for k, v in self.cache.items():
276+
self.put_raw(k, v)
277+
278+
self.db.sync()
279+
280+
def close(self):
281+
self.sync()
282+
self.db.close()
283+
284+
def __len__(self):
285+
return self.db.stat()["nkeys"]
286+
211287
class DB:
212-
def __init__(self, dir, readonly=True, dtscomp=False, shared=False, update_cache=False):
288+
def __init__(self, dir, readonly=True, dtscomp=False, shared=False, update_cache=None):
213289
if os.path.isdir(dir):
214290
self.dir = dir
215291
else:
216292
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), dir)
217293

218294
ro = readonly
219-
cachesize = None
220295

221296
if update_cache:
222-
cachesize = CACHESIZE
297+
db_cls = lambda dir, ro, ctype: CachedBsdDB(dir, ro, ctype, cachesize=update_cache)
298+
else:
299+
db_cls = lambda dir, ro, ctype: BsdDB(dir, ro, ctype, shared=shared)
223300

224301
self.vars = BsdDB(dir + '/variables.db', ro, lambda x: int(x.decode()), shared=shared)
225302
# Key-value store of basic information
@@ -230,20 +307,20 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False, update_cache
230307
self.file = BsdDB(dir + '/filenames.db', ro, lambda x: x.decode(), shared=shared)
231308
# Map serial number to filename
232309
self.vers = BsdDB(dir + '/versions.db', ro, PathList, shared=shared)
233-
self.defs = BsdDB(dir + '/definitions.db', ro, DefList, shared=shared, cachesize=cachesize)
310+
self.defs = db_cls(dir + '/definitions.db', ro, DefList)
234311
self.defs_cache = {}
235312
NOOP = lambda x: x
236313
self.defs_cache['C'] = BsdDB(dir + '/definitions-cache-C.db', ro, NOOP, shared=shared)
237314
self.defs_cache['K'] = BsdDB(dir + '/definitions-cache-K.db', ro, NOOP, shared=shared)
238315
self.defs_cache['D'] = BsdDB(dir + '/definitions-cache-D.db', ro, NOOP, shared=shared)
239316
self.defs_cache['M'] = BsdDB(dir + '/definitions-cache-M.db', ro, NOOP, shared=shared)
240317
assert sorted(self.defs_cache.keys()) == sorted(lib.CACHED_DEFINITIONS_FAMILIES)
241-
self.refs = BsdDB(dir + '/references.db', ro, RefList, shared=shared, cachesize=cachesize)
242-
self.docs = BsdDB(dir + '/doccomments.db', ro, RefList, shared=shared, cachesize=cachesize)
318+
self.refs = db_cls(dir + '/references.db', ro, RefList)
319+
self.docs = db_cls(dir + '/doccomments.db', ro, RefList)
243320
self.dtscomp = dtscomp
244321
if dtscomp:
245-
self.comps = BsdDB(dir + '/compatibledts.db', ro, RefList, shared=shared, cachesize=cachesize)
246-
self.comps_docs = BsdDB(dir + '/compatibledts_docs.db', ro, RefList, shared=shared, cachesize=cachesize)
322+
self.comps = db_cls(dir + '/compatibledts.db', ro, RefList)
323+
self.comps_docs = db_cls(dir + '/compatibledts_docs.db', ro, RefList)
247324
# Use a RefList in case there are multiple doc comments for an identifier
248325

249326
def close(self):

elixir/update.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os.path
22
import logging
3-
from multiprocessing import cpu_count
3+
from multiprocessing import cpu_count, set_start_method
44
from multiprocessing.pool import Pool
55
from typing import Dict, Iterable, List, Optional, Tuple, Set
66

@@ -36,17 +36,13 @@
3636
IdxCache = Dict[int, Tuple[bytes, str, bool]]
3737

3838
# Check if definition for ident is visible in current version
39-
def def_in_version(db: DB, def_cache: Set[bytes], idx_to_hash_and_filename: IdxCache, ident: bytes) -> bool:
40-
if ident in def_cache:
41-
return True
42-
39+
def def_in_version(db: DB, idx_to_hash_and_filename: IdxCache, ident: bytes) -> bool:
4340
defs_this_ident = db.defs.get(ident)
4441
if not defs_this_ident:
4542
return False
4643

4744
for def_idx, _, _, _ in defs_this_ident.iter():
4845
if def_idx in idx_to_hash_and_filename:
49-
def_cache.add(ident)
5046
return True
5147

5248
return False
@@ -64,12 +60,14 @@ def add_defs(db: DB, defs: DefsDict):
6460
db.defs.put(ident, obj)
6561

6662
# Add references to database
67-
def add_refs(db: DB, def_cache: Set[bytes], idx_to_hash_and_filename: IdxCache, refs: RefsDict):
63+
def add_refs(db: DB, idx_to_hash_and_filename: IdxCache, refs: RefsDict):
6864
for ident, idx_to_lines in refs.items():
6965
deflist = db.defs.get(ident)
70-
in_version = def_in_version(db, def_cache, idx_to_hash_and_filename, ident)
66+
if deflist is None:
67+
continue
7168

72-
if deflist is None or not in_version:
69+
in_version = def_in_version(db, idx_to_hash_and_filename, ident)
70+
if not in_version:
7371
continue
7472

7573
def deflist_exists(idx: int, line: int):
@@ -320,10 +318,9 @@ def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
320318

321319
logger.info("dts comps docs done")
322320

323-
def_cache = set()
324321
for result in pool.imap_unordered(get_refs, idxes, chunksize):
325322
if result is not None:
326-
add_refs(db, def_cache, idx_to_hash_and_filename, result)
323+
add_refs(db, idx_to_hash_and_filename, result)
327324

328325
logger.info("refs done")
329326

@@ -334,10 +331,11 @@ def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
334331
dts_comp_support = bool(int(script('dts-comp')))
335332
db = None
336333

334+
set_start_method('spawn')
337335
with Pool() as pool:
338336
for tag in scriptLines('list-tags'):
339337
if db is None:
340-
db = DB(getDataDir(), readonly=False, dtscomp=dts_comp_support, shared=False, update_cache=True)
338+
db = DB(getDataDir(), readonly=False, dtscomp=dts_comp_support, shared=False, update_cache=50000)
341339

342340
if not db.vers.exists(tag):
343341
logger.info("updating tag %s", tag)

0 commit comments

Comments
 (0)