ma08 · derekmpham · Apr 5, 2022 · Apr 6, 2022 · Apr 10, 2022 · Apr 10, 2022
diff --git a/Completions/run_davinci.py b/Completions/run_davinci.py
@@ -4,15 +4,14 @@
 import openai
 import json
 import shutil
+import numpy as np
 
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
-TEMPERATURE = 0.5 #T
-N_SOLUTIONS = 2 #k
 ENGINE = "code-davinci-002"
-MAX_TOKENS=4096
+MAX_TOKENS=4000
 
-def run_davinci(path, out_dir):
+def run_davinci(path, out_dir, n_itr):
     with open(path) as f:
         prompt = f.read()
 
@@ -21,62 +20,70 @@ def run_davinci(path, out_dir):
 
     print("--------------------------")
 
-    response = openai.Completion.create(
-        engine=ENGINE,
-        prompt=input_prompt,
-        temperature=TEMPERATURE,
-        max_tokens=MAX_TOKENS,
-        top_p=1,
-        frequency_penalty=0,
-        presence_penalty=0,
-        n=N_SOLUTIONS
-    )
-
-    print(response)
-
-    output = {"prompt":input_prompt, "solutions":[]}
-    solution_set = set()
-
-    for i in range(len(response["choices"])):
-    # for choice in response["choices"]:
-        choice = response["choices"][i]
-        print(i, choice)
-
-
-
-        finish_reason = choice["finish_reason"]
-        print(f"REASON {finish_reason}")
-        if(finish_reason == "stop"):
-            if("text" in choice):
-                solution_set.add(choice["text"])
-            shutil.copyfile(path, f"{out_dir}/question.txt")
-            prompt_file_folder = os.path.dirname(path)
-            try:
-                shutil.copyfile(f"{prompt_file_folder}/metadata.json", f"{out_dir}/metadata.json")
-                shutil.copyfile(f"{prompt_file_folder}/solutions.json", f"{out_dir}/solutions.json")
-                shutil.copyfile(f"{prompt_file_folder}/input_output.json", f"{out_dir}/input_output.json")
-            except Exception as e:
-                print(path, e)
-
-            # shutil.copyfile(path, f"{out_dir}/solutions.json")
-            # shutil.copyfile(path, f"{out_dir}/input_output.json")
-            # with open(f"{out_dir}/gen_code_out_{i}.py", "w") as fp:
-                # fp.write(choice["text"])
+    for itr in n_itr:
+        TEMPERATURE = np.random.randint(0, 10)/10
+        N_SOLUTIONS = np.random.randint(1, 100)
+
+        response = openai.Completion.create(
+            engine=ENGINE,
+            prompt=input_prompt,
+            temperature=TEMPERATURE,
+            max_tokens=MAX_TOKENS,
+            top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0,
+            n=N_SOLUTIONS
+        )
+
+        print(response)
+
+        output = {"prompt":input_prompt, "solutions":[]}
+        solution_set = set()
+
+        for i in range(len(response["choices"])):
+        # for choice in response["choices"]:
+            choice = response["choices"][i]
+            print(i, choice)
+
+
+            finish_reason = choice["finish_reason"]
+            print(f"REASON {finish_reason}")
+            if(finish_reason == "stop"):
+                if("text" in choice):
+                    solution_set.add(choice["text"])
+                if not os.path.exists(f"{out_dir}/question.txt"):
+                    shutil.copyfile(path, f"{out_dir}/question.txt")
+                prompt_file_folder = os.path.dirname(path)
+                try:
+                    if not os.path.exists(f"{out_dir}/metadata.json"):
+                        shutil.copyfile(f"{prompt_file_folder}/metadata.json", f"{out_dir}/metadata.json")
+                    if not os.path.exists(f"{out_dir}/solutions.json"):
+                        shutil.copyfile(f"{prompt_file_folder}/solutions.json", f"{out_dir}/solutions.json")
+                    if not os.path.exists(f"{out_dir}/input_output.json"):
+                        shutil.copyfile(f"{prompt_file_folder}/input_output.json", f"{out_dir}/input_output.json")
+                except Exception as e:
+                    print(path, e)
+
+                # shutil.copyfile(path, f"{out_dir}/solutions.json")
+                # shutil.copyfile(path, f"{out_dir}/input_output.json")
+                # with open(f"{out_dir}/gen_code_out_{i}.py", "w") as fp:
+                    # fp.write(choice["text"])
+
+        output["solutions"].extend(solution_set)
+
+
+        pref = str(TEMPERATURE) + 'T1_' + str(N_SOLUTIONS) + 'k1_'
+        if not os.path.exists(f"{out_dir}/{pref}codex_solutions.json"):
+            with open(f'{out_dir}/{pref}codex_solutions.json', 'w') as outfile:
+                json.dump(output["solutions"], outfile)
 
-    output["solutions"].extend(solution_set)
-
-
-
-    with open(f'{out_dir}/codex_solutions.json', 'w') as outfile:
-        json.dump(output["solutions"], outfile)
-
-
 
 
 if __name__ == "__main__":
     #Example: python3 ./test/0179/question.txt
     path = sys.argv[1] #test/sort-questions.txt_dir/4997/question.txt
     out_dir = sys.argv[2] #davinci_runs/test/sort-questions.txt_dir
+    n_itr = sys.argv[3] # number of times want to sample
 
     # split_parts = prompt_file_path.split('/')
     # num = split_parts[2]
@@ -87,5 +94,5 @@ def run_davinci(path, out_dir):
     # os.makedirs(out_dir, exist_ok=True)
 
     sys.stdout = open(f'{out_dir}/out.log', 'w')
-    run_davinci(path, out_dir)
-    sys.stdout.close()
+    run_davinci(path, out_dir, n_itr)
+    sys.stdout.close()
diff --git a/Edit_Insert/mass_run_edit.sh b/Edit_Insert/mass_run_edit.sh
@@ -2,7 +2,7 @@
 input_dir="davinci_runs/test/intro-questions.txt_dir"
 COUNTER=$(( 0 ))
 LIMIT=$(( 200 ))
-for question in $input_dir/*/codex_solutions.json; do
+for question in $input_dir/*/*codex_solutions.json; do
     echo $question
     python3 run_edit_module.py $question 2>&1 
     (( COUNTER++ ))

diff --git a/Edit_Insert/run_edit_module.py b/Edit_Insert/run_edit_module.py
@@ -6,6 +6,7 @@
 from datetime import datetime
 import random
 import time
+import numpy as np
 
 old_print = print
 
@@ -28,11 +29,11 @@ def set_api_key_rand():
 
 EDIT_ENGINE = "code-davinci-edit-001"
 
-TEMPERATURE = 0.3
-N_SOLUTIONS = 2
+# TEMPERATURE = 0.3
+# N_SOLUTIONS = 2
 
 # EDIT_OPERATIONS = ["fix spelling mistakes", "fix syntax error", "cleanup code"]
-EDIT_OPERATIONS = ["fix spelling mistakes", "fix syntax errors"]
+EDIT_OPERATIONS = ["fix spelling mistakes", "fix syntax errors"] # PROBABLY NEED TO AUTOMATE THIS PART AS WELL, BUT NEED TO DISCUSS PROCESS IN MORE DETAIL...
 
 
 """
@@ -43,16 +44,16 @@ def set_api_key_rand():
 
 output_codes: list of strings containing the code after application of operation
 """
-def run_edit(input_code, operation):
+def run_edit(input_code, operation, temp, k):
 
     # set_api_key_rand()
 
     response = openai.Edit.create(
         engine= EDIT_ENGINE,
         input=input_code,
         instruction=operation,
-        temperature=TEMPERATURE,
-        n=N_SOLUTIONS
+        temperature=temp,
+        n=k
     )
     time.sleep(2)
 
@@ -91,7 +92,7 @@ def run_edit(input_code, operation):
 Should we save intermediary states?
 
 """
-def run_edit_multiple_op(input_code, operations):
+def run_edit_multiple_op(input_code, operations, temp, k):
 
     states = [input_code]
     num_operations = 0
@@ -103,7 +104,7 @@ def run_edit_multiple_op(input_code, operations):
 
         print(f"size on input set {len(current_input_set)}")
         for code in current_input_set:
-            gen_codes = run_edit(code, operation)
+            gen_codes = run_edit(code, operation, temp, k)
             print(operation, len(gen_codes), gen_codes)
             current_output_set.update(gen_codes)
 
@@ -134,36 +135,46 @@ def save_strings_to_py_file(solution_strings, folder_name="edit_sol_pys"):
 They are multiple outputs for a single input depending upon k
 The file has outputs for multiple prompts
 """
-def run(file_name,out_dir="."):
+def run(file_name,out_dir=".",n_itr):
     with open(file_name,"r") as input_fp:
         data = json.load(input_fp)
 
-        output = {"solutions":[]}
+        for itr in n_itr:
+            # can later change values of TEMPERATURE and N_SOLUTIONS
+            TEMPERATURE = np.random.randint(0, 10)/10
+            N_SOLUTIONS = np.random.randint(1, 100)
+            # would sample different edit operations here (ultimately put in array format) as well, but need to discuss further on automating selection of edit operations, as mentioned in beginning of file
 
-        total_output_set = set()
-        for solution in data:
-            outputs = run_edit_multiple_op(solution, EDIT_OPERATIONS)
-            total_output_set.update(outputs)
+            output = {"solutions":[]}
 
-        output["solutions"].extend(total_output_set)
+            total_output_set = set()
+            for solution in data:
+                outputs = run_edit_multiple_op(solution, EDIT_OPERATIONS, TEMPERATURE, N_SOLUTIONS)
+                total_output_set.update(outputs)
 
-        # json_output = json.dumps(output)
+            output["solutions"].extend(total_output_set)
 
+            # json_output = json.dumps(output)
 
-        with open(f'{out_dir}/codex_edit_solutions.json', 'w') as outfile:
-            json.dump(output["solutions"], outfile)
-
-        save_strings_to_py_file(output["solutions"], f"{out_dir}/edit_sol_pys")
+            # File naming format: <edit params><completion params>_codex_solutions.json
+            pref = str(TEMPERATURE) + 'T2_' + str(N_SOLUTIONS) + 'k2_'
+            if not os.path.exists(f"{out_dir}/{pref}{file_name}"):
+                with open(f'{out_dir}/{pref}{file_name}', 'w') as outfile:
+                    json.dump(output["solutions"], outfile)
+
+                fold_name = pref + file_name[:-4] + "_pys" # removes .json extension
+                save_strings_to_py_file(output["solutions"], f"{out_dir}/{fold_name}")
 
 
 
 
 
 if __name__ == "__main__":
 
-    #python3 run_edit_module.py example_output.json
+    #python3 run_edit_module.py example_output.json 100
     input_file_name = sys.argv[1]
     out_dir = os.path.dirname(input_file_name)
+    n_itr = sys.argv[2]
 
     #The following code is to save the example prompt and outputs
     """
@@ -181,12 +192,6 @@ def run(file_name,out_dir="."):
 
 
     sys.stdout = open(f'{out_dir}/edit_out.log', 'w')
-    run(input_file_name,out_dir=out_dir)
+    run(input_file_name,out_dir=out_dir,n_itr)
     sys.stdout.close()
 
-
-
-
-
-
-