-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
99 lines (76 loc) · 2.87 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import time
import yaml
from dbs import DBS
from dvclive import Live
from helpers.data_processor import DocumentProcessor
from helpers.dp_connector import DBConector
from tqdm import tqdm
# --------------------------------------------------------------------------------------
# constants
# --------------------------------------------------------------------------------------
NS2MS = 1e-6
# --------------------------------------------------------------------------------------
# helper funcs
# --------------------------------------------------------------------------------------
def benchmark_insert(dp: DocumentProcessor, db: DBConector, n_documents: int) -> int:
dp.reset_parser()
dp.fill_queue()
t = time.time_ns()
for i, doc in tqdm(enumerate(dp), leave=False):
if i >= n_documents:
break
db.insert_document(doc)
return time.time_ns() - t
def benchmark_query(
dp: DocumentProcessor, db: DBConector, n_documents: int, n_results: int
):
dp.fill_queue()
t = time.time_ns()
for i, doc in tqdm(enumerate(dp), leave=False):
if i >= n_documents:
break
db.query_db(doc, n_results)
return time.time_ns() - t
def benchmark_remove(dp: DocumentProcessor, db: DBConector, n_documents: int):
dp.reset_parser()
dp.fill_queue()
t = time.time_ns()
for i, doc in tqdm(enumerate(dp), leave=False):
if i >= n_documents:
break
db.remove_document(doc.id)
return time.time_ns() - t
# --------------------------------------------------------------------------------------
# main
# --------------------------------------------------------------------------------------
def main():
# params
cfg = yaml.safe_load(open("params.yaml"))
data_path = cfg["data_path"]
multiprocess = cfg["multiprocess"]
n_docs = cfg["n_docs"]
n_query_results = cfg["n_query_results"]
# document processor
dp = DocumentProcessor(file_path=data_path, multiprocess=multiprocess, qsize=n_docs)
dp.start_parser()
# benchmarks
with Live() as live:
for m in (pbar := tqdm(DBS, leave=False)):
pbar.desc = m.name
db = m.value()
tqdm.write(f"{m.name}:")
# document insert
ins = benchmark_insert(dp, db, n_docs) * NS2MS / n_docs
live.log_metric(f"{m.name}/insert", ins)
tqdm.write(f"insert: {ins} ms/insert")
# document query
qry = benchmark_query(dp, db, n_docs, n_query_results) * NS2MS / n_docs
live.log_metric(f"{m.name}/query", qry)
tqdm.write(f"query: {qry} ms/query")
# document insert
rem = benchmark_remove(dp, db, n_docs) * NS2MS / n_docs
live.log_metric(f"{m.name}/remove", rem)
tqdm.write(f"remove: {rem} ms/removal")
dp.stop_parser()
if __name__ == "__main__":
main()