-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyse_schedules.py
131 lines (103 loc) · 3.72 KB
/
analyse_schedules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/python3
import os
import sys
household = []
person = []
activities = []
important_files = [(household, "household", "hid"), (person, "person", "pid"), (activities, "locat", None)]
activity_type_dictionary = {
0: "trip",
1: "home",
2: "work",
3: "shop",
4: "other",
5: "school",
6: "unknown_other",
7: "religious"
}
"""Get the required files from the passed directory"""
for f in os.listdir(sys.argv[1]):
if f.endswith("csv"):
for x in important_files:
if x[1] in f:
x[0].append(os.path.join(sys.argv[1], f))
print(activities)
def test_duplicate(files: (list, str, str), key: str) -> (int, dict):
"""Test if there are duplicate keys in a file"""
duplicates = dict()
ids = list()
for f in files:
with open(f, 'r') as fi:
lines = fi.read().splitlines()
headers = lines[0].split(",")
for l in lines[1:]:
line = dict(zip(headers, l.split(",")))
id = line[key]
if id in ids:
try:
duplicates[id] += 1
except:
duplicates[id] = 1
else:
ids.append(id)
return len(ids), duplicates
def read_activities():
activity_schedule = []
for f in activities:
with open(f, 'r') as fi:
lines = fi.readlines()
headers = lines[0].split(",")
for l in lines[1:]:
activity_schedule.append(dict(zip(headers, l.split(","))))
return activity_schedule
def group_activities_per_day(activities):
total_per_day = dict()
activities_per_day = dict()
for activity in activities:
day = start_time_to_day(activity["start_time"])
activity_type = activity_type_dictionary[int(activity["activity_type"])]
duration = int(activity["duration"])
if day not in activities_per_day:
activities_per_day[day] = dict()
total_per_day[day] = 0
if activity_type not in activities_per_day[day]:
activities_per_day[day][activity_type] = dict(count=0, duration=0)
activities_per_day[day][activity_type]["count"] += 1
activities_per_day[day][activity_type]["duration"] += duration
total_per_day[day] += 1
return activities_per_day, total_per_day
def start_time_to_day(start_time: int or str) -> int:
"""Convert a start time (seconds since sunday midnight) to a day of week"""
return int(int(start_time) / 60 / 60 / 24)
def test_duplicates():
for f in important_files:
if f[2] is not None:
unique, duplicate_dict = test_duplicate(f[0], f[2])
print(f[1], "{1} values with unique key '{0}', {2} entries occur more than once".format(f[2], unique, len(
duplicate_dict)))
def process_activities_per_day(activities_per_day, total_per_day):
for day in activities_per_day:
print(day)
for t in sorted(activities_per_day[day], key=lambda k: activities_per_day[day][k]["count"], reverse=True):
print("\t{type} Count: {count} duration: {duration}".format(type=t, **activities_per_day[day][t]))
print("\tDay total: ", total_per_day[day])
def write_activities_per_day(out, activities_per_day, total_per_day):
ordered_types = list(map(lambda day_index: activity_type_dictionary[day_index], range(len(activity_type_dictionary))))
headers = list(map(lambda l: f"{l}_count", ordered_types))
headers += list(map(lambda l: f"{l}_duration", ordered_types))
with open(out, 'w') as fo:
fo.write("day;total;" + ";".join(headers) + "\n")
for day in activities_per_day:
fo.write(f"{day};")
fo.write(f"{total_per_day[day]}")
for k in ["count", "duration"]:
for t in ordered_types:
fo.write(f";{activities_per_day[day][t][k]}" if t in activities_per_day[day] else ";")
fo.write("\n")
print(f"Stored activity summary for {sys.argv[1]} in {out}")
if __name__ == "__main__":
# test_duplicates()
activities, total = group_activities_per_day(read_activities())
process_activities_per_day(activities, total)
if len(sys.argv) > 2:
write_activities_per_day(sys.argv[2], activities, total)