This repository has been archived by the owner on Jan 3, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnCoV_parse.py
executable file
·237 lines (200 loc) · 8.46 KB
/
nCoV_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/usr/bin/python3
# Coronavirus Disease Tracker v10.7d
# By Ellie Morissette (@memmam on Github)
# Last updated: 2020-04-15
#
# A Twitter/Discord bot for posting information on the spread of the COVID-19
# outbreak
#
# Uses Requests, Tweepy, pandas, discord-webhook, and matplotlib libraries
#
# File: nCoV_parse.py
# Purpose: Methods for parsing Johns Hopkins data
# Import nCoV-parse methods for parsing Johns Hopkins data
from nCoV_fetch import *
from nCoV_twitter import *
# Build individual report field for Discord embed
def build_discord(c_flag, c_name, diff_arr, diff_offset, jh_dict,
friendlyname=None):
# Create title for field
if friendlyname == None:
friendlyname = c_name
stats_label = f"{c_flag} {friendlyname}:"
# Generate statistics figures for fields
# Total cases
stats_content = f"😷 {jh_dict[c_name]['Confirmed']:,}"
# Deltas?
if diff_arr[diff_offset + 0] != 0:
stats_content = f"{stats_content} ({diff_arr[diff_offset]:+,})\n"
else:
stats_content = f"{stats_content}\n"
# Active cases
active_cases = \
jh_dict[c_name]['Confirmed'] - jh_dict[c_name]['Deaths'] \
- jh_dict[c_name]['Recovered']
stats_content = f"{stats_content}🏥 {active_cases:,}"
# Deltas?
if diff_arr[diff_offset] - diff_arr[diff_offset + 1] - diff_arr \
[diff_offset + 2] != 0:
active_case_delta = diff_arr[diff_offset] - diff_arr[diff_offset + 1] \
- diff_arr[diff_offset + 2]
stats_content = f"{stats_content} ({active_case_delta:+,})\n"
else:
stats_content = f"{stats_content}\n"
# Dead
stats_content = f"{stats_content}💀 {jh_dict[c_name]['Deaths']:,}"
# Deltas?
if diff_arr[diff_offset + 1] != 0:
stats_content = f"{stats_content} ({diff_arr[diff_offset + 1]:+,})\n"
else:
stats_content = f"{stats_content}\n"
# Recovered
stats_content = f"{stats_content}👍 {jh_dict[c_name]['Recovered']:,}"
# Deltas?
if diff_arr[diff_offset + 2] != 0:
stats_content = f"{stats_content} ({diff_arr[diff_offset + 2]:+,})"
else:
stats_content = f"{stats_content}"
return stats_label, stats_content
# Build individual reports for tweet
def build_twitter(c_flag, c_name, jh_dict, stats_twitter):
# Total cases
stats_twitter = f"{stats_twitter}{c_flag}😷{jh_dict[c_name]['Confirmed']:,}"
# Active
active_cases = jh_dict[c_name]['Confirmed'] - jh_dict[c_name]['Deaths'] \
- jh_dict[c_name]['Recovered']
stats_twitter = f"{stats_twitter} 🏥{active_cases:,}"
# Dead
stats_twitter = f"{stats_twitter} 💀{jh_dict[c_name]['Deaths']:,}"
# Recovered
stats_twitter = f"{stats_twitter} 👍{jh_dict[c_name]['Recovered']:,}\n"
return stats_twitter
# Build stats tweet from requested data
def build_stats(send_flag, api, datecode, datecode_hour, hour):
# Scrape data
jh_dict, jh_csv_dict = get_jh()
# Allows clock emoji in timestamps to reflect current hour
clocks = ["🕛", "🕐", "🕑", "🕒", "🕓", "🕔", \
"🕕", "🕖", "🕗", "🕘", "🕙", "🕚"]
# Using scraped data and config/data file, generate the following:
# prev_arr: data as of last update
# header_arr: config data for each country
# curr_arr: data as of now
# update_arr: new config/data file to be saved
prev_arr, header_arr, curr_arr, update_arr = comp_nCoV(clocks[hour%12], \
send_flag, api, datecode, \
jh_dict)
# Create array of deltas from last update to now
diff_arr = []
for i in range(len(prev_arr)):
diff_arr.append(curr_arr[i] - prev_arr[i])
# Choose correct world emoji for 'Worldwide' count to reflect current
# real-world daytime
if hour >= 23 or hour <= 7:
globe = "🌏"
elif hour >= 8 and hour <= 13:
globe = "🌍"
elif hour >= 14 and hour <= 22:
globe = "🌎"
# Update globe in header array based on previous
header_arr[0] = globe
# Create empty string to store tweet data
stats_twitter = ""
# Generate tweet data for the topmost five entries in config file
for i in range(0,25,5):
stats_twitter = build_twitter(header_arr[i], header_arr[i+1], jh_dict, \
stats_twitter)
# Append emoji key footer to tweet
stats_twitter = f"{stats_twitter}\n😷Total 🏥Active 💀Dead 👍Recovered"
# If a webhook file exists, create Discord embed data
try:
with open ("webhooks.txt", "r") as webhook_file:
webhook_file.close()
length = len(header_arr)
stats_discord = {}
diff_offset = 0
# Iterate along header data five entries at a time, creating list of
# Discord embed fields for each entry in the config file
for i in range(0,length,5):
stats_label, stats_content = build_discord(header_arr[i], \
header_arr[i+1], diff_arr, \
diff_offset, jh_dict, friendlyname \
= header_arr[i+2])
stats_discord[stats_label] = stats_content
diff_offset = diff_offset + 3
# Create Discord datestamp
date_discord = f"{clocks[hour%12]} Last Updated: {datecode}"
except:
stats_discord = ''
date_discord = ''
# Build statistics tweet
stats_tweet = (f"{clocks[hour%12]}{datecode_hour} #COVID19 #Coronavirus\n\n"
f"{stats_twitter}")
return stats_tweet, stats_discord, date_discord, stats_tweet, jh_dict, \
jh_csv_dict, header_arr, update_arr
# Generate case data based on config/data file and scraped JHU web data
def comp_nCoV(clock, send_flag, api, datecode, jh_dict):
# Check for existence of config/data file
try:
with open ("config.txt", "r") as config_file:
config_data = config_file.readlines()
config_file.close()
except:
print("No config file!")
sys.exit()
length = len(config_data)
# Strip newlines from config data, and convert non-string data to what it's
# supposed to be instead of a string
for i in range(length):
config_data[i] = config_data[i].replace("\n", "")
if i % 8 == 3:
config_data[i] = int(config_data[i])
elif i % 8 == 4:
# Empty filter lists are often misinterpreted by the file reader.
# I've seen three different types of erroneous reads, this
# conditional handles all of them
if config_data[i] != '[]' and config_data[i] != '' and \
config_data[i] != ['']:
config_data[i] = list(config_data[i].split(','))
else:
config_data[i] = []
# Create array of last-run case data, for use in generating deltas
compare_data = []
for i in range(length):
if i % 8 > 4:
compare_data.append(int(config_data[i]))
# Create array of per-country metadata, for use in generating output
header_data = []
for i in range(length):
if i % 8 < 5:
header_data.append(config_data[i])
# Create new config file dadta
update_data = []
length = len(header_data)
for i in range(length):
if i % 5 == 4:
update_data.append(','.join(header_data[i]))
update_data.append(jh_dict[header_data[i - 3]]['Confirmed'])
update_data.append(jh_dict[header_data[i - 3]]['Deaths'])
update_data.append(jh_dict[header_data[i - 3]]['Recovered'])
else:
update_data.append(header_data[i])
# Create array of current case data, for use...basically everywhere
tweet_data_arr = []
for i in range(0, length, 5):
tweet_data_arr.append(jh_dict[header_data[i + 1]]['Confirmed'])
tweet_data_arr.append(jh_dict[header_data[i + 1]]['Deaths'])
tweet_data_arr.append(jh_dict[header_data[i + 1]]['Recovered'])
# Check if data has changed since last run; if not, abort (but only if
# posting an update)
if send_flag == True:
new_data = False
length = len(compare_data)
for i in range(length):
if tweet_data_arr[i] != compare_data[i]:
new_data = True
if new_data == False:
print("No new data")
lastcheckedupdate(clock, send_flag, api, datecode)
exit()
return compare_data, header_data, tweet_data_arr, update_data