-
Notifications
You must be signed in to change notification settings - Fork 6
/
s07_zoom_meeting_generator.py
277 lines (240 loc) · 11.4 KB
/
s07_zoom_meeting_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# Project: ISCA 2021 Script
# Filename: s07_zoom_meeting_generator.py
# Date: March 16, 2021
# Author: Bagus Hanindhito (hanindhito[at]bagus[dot]my[dot]id)
# Title: PC Meeting - Zoom Breakout Room Generator
# Description:
## This script will generate zoom breakout room configuration for each paper based on PC conflict
#%% Import some libraries that are needed
import pandas as pd
import numpy as np
import tqdm
import re
import hashlib
import os
from fuzzywuzzy import process
#%% Define the input and output CSV filename
# Input CSV filename
## Note: Sample data is unavailable
paper_authors_filename = 'sample-data/input/isca2021-authors.csv'
paper_data_filename = 'sample-data/input/isca2021-paperdata.csv'
paper_pc_conflict_filename = 'sample-data/input/isca2021-pcconflicts.csv'
pcpc_conflict_info_filename = 'sample-data/output/isca2021-pcconflict-crosscheck.csv'
pc_member_zoom_info_filename = 'sample-data/input/isca2021-pczoom.csv'
# Output CSV filename
zoom_csv_config_non_hash_folder = 'sample-data/output/zoom'
zoom_csv_config_hash_folder = 'sample-data/output/zoom_hashed'
conflict_csv_non_hash_folder = 'sample-data/output/conflict'
conflict_csv_hash_folder = 'sample-data/output/conflict_hashed'
paper_summary_filename = 'sample-data/output/isca2021-paper-summary.csv'
#%% Define constant
# Predefined Email
lizy_discussion_account = '[email protected]'
lizy_conflict_account = '[email protected]'
sandhya_discussion_account = '[email protected]'
bagus_discussion_account = '[email protected]'
aman_discussion_account = '[email protected]'
bagus_hotcrp_email = '[email protected]'
aman_hotcrp_email = '[email protected]'
lizy_hotcrp_email = '[email protected]'
sandhya_hotcrp_email = '[email protected]'
#Define Room Name
discussion_room = 'Discussion Room'
conflict_room = 'Conflict Room'
#%% Load CSV to Pandas DF
# Load the Paper Authors
paper_authors_df = pd.read_csv(paper_authors_filename)
# Load the Paper Data
paper_data_df = pd.read_csv(paper_data_filename)
# Load the Paper PC Conflict
paper_pc_conflict_df = pd.read_csv(paper_pc_conflict_filename)
# Load the PC PC Conflict Info
pcpc_conflict_info_df = pd.read_csv(pcpc_conflict_info_filename, converters={
'conflict_only_dblp_name': eval,
'conflict_only_dblp_url': eval,
'conflict_only_hotcrp': eval}
)
# %% Process paper authors
paper_authors_df['full name'] = paper_authors_df['first'] + ' ' + paper_authors_df['last']
paper_authors_merge_df = paper_authors_df.groupby('paper').agg({'full name': lambda x: list(x)})
# %% Process paper_data
paper_data_df['Tags'] = paper_data_df['Tags'].fillna('#NA')
paper_data_df['Tags'] = paper_data_df['Tags'].apply(lambda x: list(x.split(' ')))
# %% Process paper_pc_conflict
paper_pc_conflict_df['full name'] = paper_pc_conflict_df['first'] + ' ' + paper_pc_conflict_df['last']
paper_pc_conflict_merge_df = paper_pc_conflict_df.groupby('paper').agg({'full name': lambda x: list(x), 'email': lambda x: list(x)})
#paper_pc_conflict_merge_df.to_csv('Output-CSV/test.csv', index=False)
# %% Create new dataframe for papers data
papers_df = paper_data_df[['ID','Title','Tags']]
# Merge the authors
papers_df['authors'] = papers_df.ID.map(paper_authors_merge_df['full name'])
# Merge PC Conflict Name
papers_df['pc conflict name'] = papers_df.ID.map(paper_pc_conflict_merge_df['full name']).fillna("#NA")
papers_df['pc conflict email'] = papers_df.ID.map(paper_pc_conflict_merge_df['email']).fillna("#NA")
# Generate Paper Hash based on Title and Paper ID
#hashlib.sha256().hexdigest()
papers_df['hash'] = papers_df.apply(lambda x: hashlib.sha256((str(x.ID) + '@' + x.Title).encode('utf-8')).hexdigest()[:6], axis=1)
papers_df['hash'] = papers_df['hash'].astype(str)
#papers_df.to_csv('Output-CSV/test.csv', index=False)
# %% Load PC Zoom Info
pc_member_zoom_info_df = pd.read_csv(pc_member_zoom_info_filename)
#pc_member_zoom_info_df['Zoom email 1'] = pc_member_zoom_info_df.apply(lambda x: x[x.last_valid_index()], axis=1)
#pc_member_zoom_info_df['Zoom email 2'] = pc_member_zoom_info_df.apply(lambda x: x[x.last_valid_index()], axis=1)
pc_member_zoom_info_df['Zoom email 2'] = pc_member_zoom_info_df['Zoom email 2'].fillna('#na')
# %% Merge PC Zoom Info and PC Conflict Info
pcpc_merged_info_df = pd.merge(pc_member_zoom_info_df, pcpc_conflict_info_df, on='hotcrp_email')
# %% Make Folder
if not os.path.exists(zoom_csv_config_non_hash_folder):
os.makedirs(zoom_csv_config_non_hash_folder)
if not os.path.exists(zoom_csv_config_hash_folder):
os.makedirs(zoom_csv_config_hash_folder)
if not os.path.exists(conflict_csv_non_hash_folder):
os.makedirs(conflict_csv_non_hash_folder)
if not os.path.exists(conflict_csv_hash_folder):
os.makedirs(conflict_csv_hash_folder)
# %% Iterate through each paper
for index,paper in tqdm.tqdm(papers_df.iterrows(), total=papers_df.shape[0]):
participant_list = []
conflict_list = []
conflict_email_list = []
discussion_email_list = []
# Loop through PC Member
for index,pc_member in pcpc_merged_info_df.iterrows():
is_this_pc_conflict = False
# Check author-side conflict
if(pc_member['hotcrp_email'] in paper['pc conflict email']):
# This pc member has conflict with the paper
is_this_pc_conflict = True
# Check pc-side conflict
# DBLP
#if(is_this_pc_conflict == False):
# for author in paper['authors']:
# if(pd.isna(author)):
# continue
# highest = process.extractOne(author,pc_member['conflict_only_dblp_name'])
# if highest is not None:
# if(highest[1]>=95):
# ## Possible Conflict with author
# print("["+str(paper['ID']) + "] Possible DBLP conflict author " + author + " with pcpc " + pc_member['Name'] + " with probability " + str(highest[1]) +"\n")
# is_this_pc_conflict = True
# HOTCRP
#if(is_this_pc_conflict == False):
# for author in paper['authors']:
# if(pd.isna(author)):
# continue
# highest = process.extractOne(author,pc_member['conflict_only_hotcrp'])
# if highest is not None:
# if(highest[1]>=95):
# ## Possible Conflict with author
# print("["+str(paper['ID']) + "] Possible HOTCRP conflict author " + author + " with pcpc " + pc_member['Name'] + " with probability " + str(highest[1]) +"\n")
# is_this_pc_conflict = True
# Register PC Member as Conflict or Discussion
if(is_this_pc_conflict == True):
# Conflict
conflict_email_list.append(pc_member['hotcrp_email'])
participant_dict = \
{
"Pre-assign Room Name" : conflict_room,
"Email Address" : pc_member['Zoom email 1']
}
participant_list.append(participant_dict)
if(pc_member['Zoom email 1']!=pc_member['Zoom email 2'] and pc_member['Zoom email 2'] != '#na'):
# Multiple zoom account handle
participant_dict = \
{
"Pre-assign Room Name" : conflict_room,
"Email Address" : pc_member['Zoom email 2']
}
participant_list.append(participant_dict)
conflict_list.append(pc_member['Name'] + " (" + pc_member['Institution'] + ")")
else :
# No Conflict
discussion_email_list.append(pc_member['hotcrp_email'])
participant_dict = \
{
"Pre-assign Room Name" : discussion_room,
"Email Address" : pc_member['Zoom email 1']
}
participant_list.append(participant_dict)
if(pc_member['Zoom email 1']!=pc_member['Zoom email 2'] and pc_member['Zoom email 2'] != '#na'):
# Multiple zoom account handle
participant_dict = \
{
"Pre-assign Room Name" : discussion_room,
"Email Address" : pc_member['Zoom email 2']
}
participant_list.append(participant_dict)
# post Processing
# make sure lizy email is in discussion room
if(lizy_hotcrp_email not in discussion_email_list):
if(sandhya_hotcrp_email in discussion_email_list):
print("["+str(paper['ID']) + "] Lizy conflict is detected and Sandhya replaces Lizy\n")
else:
print("["+str(paper['ID']) + "] !!!! Something Has Gone Wrong !!!!\n")
# Second, default assignment for bagus discussion account and lizy conflict account
# Always add Bagus Discussion Account to Discussion Room
participant_dict = \
{
"Pre-assign Room Name" : discussion_room,
"Email Address" : bagus_discussion_account
}
participant_list.append(participant_dict)
# Keep Lizy Gmail Account in the Main Room
## Always add Lizy Conflict Account to Conflict Room
#participant_dict = \
#{
# "Pre-assign Room Name" : conflict_room,
# "Email Address" : lizy_conflict_account
#}
#participant_list.append(participant_dict)
# Always add Lizy Conflict Account to Conflict Room
participant_dict = \
{
"Pre-assign Room Name" : discussion_room,
"Email Address" : '[email protected]'
}
participant_list.append(participant_dict)
# Then, handle Aman Conflict
# Does not need to be so precise -- depends only on the authors-side
# since he will not be able to see the paper on HotCRP.
if(aman_hotcrp_email in paper['pc conflict email']) :
# Aman is in conflict with the paper
# Add Aman to Conflict Room
participant_dict = \
{
"Pre-assign Room Name" : conflict_room,
"Email Address" : aman_discussion_account
}
participant_list.append(participant_dict)
participant_dict = \
{
"Pre-assign Room Name" : conflict_room,
"Email Address" : '[email protected]'
}
participant_list.append(participant_dict)
#conflict_list.append("Aman Arora (UT Austin)")
else :
# Aman is not in conflict with the paper
# Add Aman to Discussion Room
participant_dict = \
{
"Pre-assign Room Name" : discussion_room,
"Email Address" : aman_discussion_account
}
participant_list.append(participant_dict)
participant_dict = \
{
"Pre-assign Room Name" : discussion_room,
"Email Address" : '[email protected]'
}
participant_list.append(participant_dict)
# Write the data to the output
participant_df = pd.DataFrame(participant_list)
conflict_df = pd.DataFrame(conflict_list)
participant_df.to_csv(zoom_csv_config_non_hash_folder+"/"+str(paper['ID'])+".csv", index=False)
participant_df.to_csv(zoom_csv_config_hash_folder+"/"+paper['hash']+".csv", index=False)
conflict_df.to_csv(conflict_csv_non_hash_folder+"/"+str(paper['ID'])+".csv", index=False, header=False)
conflict_df.to_csv(conflict_csv_hash_folder+"/"+paper['hash']+".csv", index=False, header=False)
# %% Save Paper Summary
papers_df.rename(columns={'authors':'Authors'}, inplace=True)
papers_df.to_csv(paper_summary_filename, index=False)