Skip to content

Commit

Permalink
Add update_categories script (#741)
Browse files Browse the repository at this point in the history
  • Loading branch information
yeganehkordi authored Mar 25, 2022
1 parent cfff66b commit 1042884
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Or if you're comfortable with json files, here is how it would look like:
{
"Contributors": [""],
"Source": [""],
"URL": [""],
"Categories": [""],
"Reasoning": [""],
"Definition": [""],
Expand Down
1 change: 1 addition & 0 deletions src/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
# read all the tasks and make sure that they're following the right pattern
tasks_path = 'tasks/'

#TODO: We need to add URL here after updating the tasks
expected_keys = [
"Definition",
"Input_language",
Expand Down
29 changes: 29 additions & 0 deletions src/update_category_assignment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python3
import json
import pandas as pd
from collections import OrderedDict

tasks_path = 'tasks/'
meta = pd.read_csv("Natural Instructions V2 Exps - tasks review 0323 (final).csv").values
key_order = ("Contributors","Source","URL","Categories","Reasoning","Definition","Input_language","Output_language","Instruction_language","Domains","Positive Examples","Negative Examples","Instances")

def ordered(d, key_order):
return OrderedDict([(key, d[key]) for key in key_order])

for row in range(len(meta)):
file = meta[row][0] + '.json'
file_path = tasks_path + file
with open(file_path, 'r') as f:
try:
data = json.load(f)
except:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)

data['Categories'] = [meta[row][5]]
data['Source'] = [meta[row][2]]
data['URL'] = [meta[row][3]]

data = ordered(data, key_order)
with open(file_path, 'w', encoding='utf-8') as outfile:
json.dump(data, outfile, indent=4,ensure_ascii=False)

0 comments on commit 1042884

Please sign in to comment.