-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenerate_user_problem_dataset.py
89 lines (64 loc) · 2.66 KB
/
generate_user_problem_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import csv
import json
# Load JSON data
with open('checkpoints/checkpoint_400.json') as f:
data = json.load(f)
# Extract user information
users = data['eligibleUsers']
# Define CSV file path
csv_file1 = 'data/igm_to_lgm/user_problem.csv'
csv_file2 = 'data/igm_to_lgm/user_tags.csv'
csv_file3 = "data/igm_to_lgm/user_ratings.csv"
# Open CSV file in write mode
with open(csv_file1, 'w', newline='') as f:
# Create a CSV writer object
writer = csv.writer(f)
# Write header row
writer.writerow(['user_handle', 'problem_id', 'timestamp', 'problem_rating', 'problem_tags'])
# Iterate over users
for user in users:
handle = user['handle']
problems = user['problems']
# Iterate over problems of the user
for problem in problems:
writer.writerow([handle, problem[0], problem[1], problem[2], problem[3]])
print("user_problem file generated successfully!")
for user in users:
user['tagFreq']['0user_handle'] = user['handle']
user['ratingFreq']['0user_handle'] = user['handle']
list_of_dicts = [user['tagFreq'] for user in users]
# Extract all unique tags from all dictionaries
all_tags = set().union(*(d.keys() for d in list_of_dicts))
# Sort the tags alphabetically
sorted_tags = sorted(all_tags)
# Open the output file in write mode
with open(csv_file2, 'w', newline='') as csvfile:
# Create a CSV writer object
writer = csv.DictWriter(csvfile, fieldnames=sorted_tags)
# Write the header row with field names
writer.writeheader()
# Iterate over each dictionary in the list
for d in list_of_dicts:
# Create a new dictionary with all keys initialized to None
row_data = {key: d.get(key, None) for key in sorted_tags}
# Write the row to the CSV file
writer.writerow(row_data)
print("user_tags file generated successfully!")
list_of_dicts = [user['ratingFreq'] for user in users]
# Extract all unique tags from all dictionaries
all_tags = set().union(*(d.keys() for d in list_of_dicts))
# Sort the tags alphabetically
sorted_tags = sorted(all_tags)
# Open the output file in write mode
with open(csv_file3, 'w', newline='') as csvfile:
# Create a CSV writer object
writer = csv.DictWriter(csvfile, fieldnames=sorted_tags)
# Write the header row with field names
writer.writeheader()
# Iterate over each dictionary in the list
for d in list_of_dicts:
# Create a new dictionary with all keys initialized to None
row_data = {key: d.get(key, None) for key in sorted_tags}
# Write the row to the CSV file
writer.writerow(row_data)
print("user_ratings file generated successfully!")