-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathislandora_tree.py
105 lines (62 loc) · 2.3 KB
/
islandora_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# Builds a directed graph datastructure of islandora collections
# In[ ]:
import pandas as pd
import collections
import json
# In[ ]:
data_file = "data/report_data_may_2019.csv"
reports_dir = "report_prod_may_2019/"
# In[ ]:
# Build child parent tuple tree, recursively
def get_collection_members(df, collection_tree, collection):
children = []
is_collection_member = df['isMemberOfCollection'] == collection
member_list = df[is_collection_member]
## Loop through each member, if it is a collection model, then call this again!
for index, row in member_list.iterrows():
pid = row["PID"]
cmodel = row["cmodel"]
child = {}
if cmodel == "info:fedora/islandora:collectionCModel":
child["name"] = pid
child["parent"] = collection
collection_tree = get_collection_members(df, collection_tree, "info:fedora/"+pid)
collection_name = collection.replace("info:fedora/", "")
tup2 = (collection_name, pid);
collection_tree.append(tup2)
return collection_tree
# In[ ]:
df = pd.read_csv(data_file)
# In[ ]:
tree = lambda: collections.defaultdict(tree)
collection_tree = tree()
collection = "info:fedora/dsu:root"
path_list = ["root"]
collection_tree =[]
## Build child parent tuple tree, recursively
collection_tree = get_collection_members(df, collection_tree, collection)
print(collection_tree)
# In[ ]:
# Build a Directed Graph
# https://stackoverflow.com/questions/45460653/given-a-flat-list-of-parent-child-create-a-hierarchical-dictionary-tree
lst = collection_tree
# Build a directed graph and a list of all names that have no parent
graph = {name: set() for tup in lst for name in tup}
has_parent = {name: False for tup in lst for name in tup}
for parent, child in lst:
graph[parent].add(child)
has_parent[child] = True
# All names that have absolutely no parent:
roots = [name for name, parents in has_parent.items() if not parents]
# traversal of the graph (doesn't care about duplicates and cycles)
treeData = []
def traverse(hierarchy, graph, names):
for name in names:
hierarchy[name] = traverse({}, graph, graph[name])
return hierarchy
aa = traverse({}, graph, roots)
json.dumps(aa)
# In[ ]: