-
Notifications
You must be signed in to change notification settings - Fork 0
/
WikiAnalyzeLinks2.py
89 lines (75 loc) · 4.22 KB
/
WikiAnalyzeLinks2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import csv
import os
import sys
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
def main():
# Get the directory to search for CSV files.
root_dir = os.path.join(sys.argv[1], 'Brexit')
#root_dir = os.path.join(sys.argv[1], 'DonaldTrump')
#root_dir = os.path.join(sys.argv[1], 'GravitationalWave')
# Loop over the folders in the root directory.
for folder_name in os.listdir(root_dir):
folder_path = os.path.join(root_dir, folder_name)
# Check if the folder contains CSV files.
if os.path.isdir(folder_path):
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
if len(csv_files) > 0:
# Create a list to hold the "Views" values and file names from each CSV file.
views_data = []
percent_data = []
x = []
y = []
# Loop over the CSV files in the folder.
for csv_file in csv_files:
with open(os.path.join(folder_path, csv_file), 'r') as f:
csv_reader = csv.reader(f)
next(csv_reader) # skip header row
rows = list(csv_reader)
views = [int(row[1]) for row in rows[2:58]] # select rows 3 to 57
baseline_index = views.index(min(views))
baseline_value = round(sum(views[baseline_index-2:baseline_index+3])/5, 2)
baseline_date = datetime.strptime(rows[baseline_index+2][0], '%Y%m%d').strftime('%m/%d/%Y')
peak_index = views.index(max(views))
peak_value = round(sum(views[peak_index-2:peak_index+3])/5, 2)
peak_date = datetime.strptime(rows[peak_index+2][0], '%Y%m%d').strftime('%m/%d/%Y')
try:
percent_increase = round((peak_value - baseline_value) / baseline_value * 100, 2)
except ZeroDivisionError:
percent_increase = 0
views_data.append([os.path.splitext(csv_file)[0], baseline_value, baseline_date, peak_value, peak_date, percent_increase])
views_data.sort(key=lambda x: x[5], reverse = True)
percent_data.append([os.path.splitext(csv_file)[0], percent_increase])
percent_data.sort(key=lambda x: x[1], reverse = True)
x = [i[0] for i in percent_data]
y = [int(j[1]) for j in percent_data]
fig = plt.figure()
fig.set_figwidth(500)
plt.bar(x[:25],y[:25])
plt.xticks(rotation=90)
plt.margins(x = 0, y = 0)
plt.xlabel('Topic', labelpad = 2)
plt.ylabel('Percent Increase')
plt.title('Brexit')
#plt.title('Trump Election Victory, 2016')
#plt.title('Observation of Gravitational Waves)
plt.show()
# Write the "Views" values and file names to a new CSV file.
output_dir = os.path.join(sys.argv[1], 'Analysis')
os.makedirs(output_dir, exist_ok=True)
output_file_path = os.path.join(output_dir, f'{folder_name}.csv')
with open(output_file_path, 'w', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(['File', 'Baseline', 'Baseline Date', 'Peak', 'Peak Date', 'Percent Increase'])
csv_writer.writerows(views_data)
output_dir1 = os.path.join(sys.argv[1], 'Graph Analysis')
os.makedirs(output_dir1, exist_ok=True)
output_file_path = os.path.join(output_dir1, f'{folder_name}.csv')
with open(output_file_path, 'w', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(['File', 'Percent Increase'])
csv_writer.writerows(percent_data)
print('Analysis complete. Output saved in:', output_dir + " and " + output_dir1 + "/")
if __name__ == '__main__':
main()