-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_util.py
65 lines (48 loc) · 1.62 KB
/
data_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import json
import csv
import time
import openai
OPENAI_API_KEY = ''
openai.api_key = OPENAI_API_KEY
def translate(J_sentence):
response = openai.ChatCompletion.create(
model = 'gpt-3.5-turbo-16k-0613',
messages = [
{
'role':'system',
'content':'You will be provided with a sentence in Japanese, and your task is to translate it into English.'
},
{
'role':'user',
'content':J_sentence
}
],
temperature=0,
max_tokens=500,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
return response["choices"][0]['message']['content']
def translate_data(file_name):
with open(file_name,'r',encoding='utf-8') as f:
scenario_data = [json.loads(line) for line in f]
en_scenario = []
for sample in scenario_data:
action = sample['action']
utterance = sample['utterance']
description = sample['description']
e_action = translate(action)
e_utterance = translate(utterance)
e_description = translate(description)
time.sleep(5)
en_scenario.append({'filename':sample['filename'],'action':e_action,'utterance':e_utterance,'description':e_description})
print(len(en_scenario))
return en_scenario
def main():
en_scenario_data = translate_data('./data/scenario/scenario.json')
with open('./data/scenario/scenario_en.json','w',encoding='utf-8') as fw:
json.dump(en_scenario_data, fw)
if __name__ == "__main__":
main()