-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark_to_dict_nested_directory_search.py
127 lines (114 loc) · 3.95 KB
/
benchmark_to_dict_nested_directory_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import sys
import json
import yaml
import pickle
import itertools
import pandas as pd
from glob import glob
from typing import Dict, List, Tuple
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
def load_data(path: str) -> Dict:
if path.endswith("json"):
with open(path, "r") as f:
data = json.load(f)
return data
elif path.endswith("pkl") or path.endswith("pickle"):
with open(path, "rb") as f:
data = pickle.load(f)
return data
elif path.endswith("yaml") or path.endswith("yml"):
with open(path, "r") as f:
data = yaml.load(f, Loader=yaml.FullLoader)
return data
def modify_data(path: str, data: Dict) -> Dict:
# if you want to get specify data leave, use this function.
# example:
# new_data = {}
# if path.endswith("summary.json"):
# new_data["AUROC"] = data["AUROC"]
# # return new_data
# if path.endswith("stats.json"):
# data.pop("num_samples")
# new_data = data
# return new_data
return data
class LoopsNestedDictionary:
def __init__(
self,
mother_root: str,
root_names: List[str],
*args,
**kwargs,
):
# note that kwargs has a order.
self.mother_root = mother_root
self.root_names = root_names
self.all_keys = list(kwargs.keys())
self.pairs = self.get_pairs(kwargs)
def get_pairs(self, kwargs: Dict[str, List]) -> List[List[str]]:
prod = itertools.product(*kwargs.values())
return list(map(list, prod))
def search(self, pair: List[str]) -> Tuple[str, List[str]]:
all_data_from_a_pair = {}
for root_name in self.root_names:
new_pair = [self.mother_root] + pair
new_pair.append("**")
new_pair.append(root_name)
p = "/".join(new_pair)
g_all = glob(p, recursive=True)
try:
path = g_all[0]
if len(g_all) > 1:
if not isinstance(all_data_from_a_pair, list):
all_data_from_a_pair = [{} for i in range(len(g_all))]
for i, path in enumerate(g_all):
all_data_from_a_pair[i].update(
modify_data(path, load_data(path))
)
else:
all_data_from_a_pair.update(
modify_data(path, load_data(path))
)
except IndexError:
print(f"not found p: {p}")
continue
return "/".join(pair), all_data_from_a_pair
def to_dict(self) -> Dict:
result_list = []
for pair in self.pairs:
collect = {
key: pair[i]
for i, key in enumerate(self.all_keys)
}
pair_name, all_data_from_a_pair = self.search(pair)
if isinstance(all_data_from_a_pair, list):
collect = [
{
key: pair[i]
for i, key in enumerate(self.all_keys)
}
for _ in range(len(all_data_from_a_pair))
]
for i, _all_data_from_a_pair in enumerate(all_data_from_a_pair):
collect[i].update(_all_data_from_a_pair)
result_list.extend(collect)
else:
collect.update(all_data_from_a_pair)
result_list.append(collect)
df = pd.DataFrame(result_list)
return df
if __name__ == "__main__":
dataset = ["dataset1", "dataset2"]
models = ["model1", "model2", "model3"]
root_names = ["summary.json", "stats.json"]
looper = LoopsNestedDictionary(
mother_root=".",
root_names=root_names,
dataset=dataset,
models=models,
)
df = looper.to_dict()
print(df)