diff --git a/evals/ai_api_calls/get_open_ai_completion.py b/evals/ai_api_calls/get_open_ai_completion.py
index 0e706da71..1cee85f69 100644
--- a/evals/ai_api_calls/get_open_ai_completion.py
+++ b/evals/ai_api_calls/get_open_ai_completion.py
@@ -1,4 +1,5 @@
import os
+from typing import Any, Dict, Optional
from openai import OpenAI
def get_open_ai_completion(prompt: str):
@@ -9,5 +10,22 @@ def get_open_ai_completion(prompt: str):
messages=[{"role": "user", "content": prompt}],
temperature=0.0
)
+
+ response_content = response.choices[0].message.content
+ return get_code_block_from_message(response_content)
+
+
+
+def get_code_block_from_message(message: str) -> str:
+ """
+ Extract the first code block from a message. A code block is a block of
+ text that starts with ```python and ends with ```.
+ """
+ print(f"Message: {message}")
+
+ # If ```python is not part of the message, then we assume that the
+ # entire message is the code block
+ if "```python" not in message:
+ return message
- return response.choices[0].message.content
\ No newline at end of file
+ return message.split('```python')[1].split('```')[0]
diff --git a/evals/main.py b/evals/main.py
index cb3da7836..ad2271e21 100644
--- a/evals/main.py
+++ b/evals/main.py
@@ -60,6 +60,7 @@
# Create the actual code script produced by the LLM
prompt = prompt_generator.get_prompt(test.user_input, test.notebook_state)
ai_generated_code = get_open_ai_completion(prompt)
+ print(f"AI generated code:\n{ai_generated_code}")
actual_code = current_cell_contents_script + "\n" + ai_generated_code
# So that we can compare the results of the two scripts, create global context for
diff --git a/evals/notebook_states.py b/evals/notebook_states.py
index 354b9dc49..095ef474f 100644
--- a/evals/notebook_states.py
+++ b/evals/notebook_states.py
@@ -83,8 +83,8 @@
'excel_transactions': pd.DataFrame({'Transaction ID': [12975, 16889, 57686, 53403, 42699], 'Share Quantity': [20, 25, 24, 22, 40]}),
'excel_transactions': pd.DataFrame({'Transaction ID': [12975, 16889, 57686, 53403, 42699], 'Share Quantity': [20, 25, 24, 22, 0]})},
cell_contents=["""import pandas as pd
-excel_transactions = pd.read_excel('evals/data/simple_recon/transactions_excel.csv')
-eagle_transactions = pd.read_excel('evals/data/simple_recon/transactions_eagle.csv')
+excel_transactions = pd.read_csv('evals/data/simple_recon/transactions_excel.csv')
+eagle_transactions = pd.read_csv('evals/data/simple_recon/transactions_eagle.csv')
""", '']
)
diff --git a/evals/prompts/__init__.py b/evals/prompts/__init__.py
index 507c2b7cd..ddee8a746 100644
--- a/evals/prompts/__init__.py
+++ b/evals/prompts/__init__.py
@@ -1,7 +1,10 @@
from evals.prompts.single_shot_prompt import single_shot_prompt_generator
from evals.prompts.multi_shot_prompt import multi_shot_prompt_generator
-
+from evals.prompts.production_prompt_v1 import production_prompt_v1_generator
+from evals.prompts.production_prompt_v2 import production_prompt_v2_generator
PROMPT_GENERATORS = [
- single_shot_prompt_generator,
- multi_shot_prompt_generator
-]
\ No newline at end of file
+ #single_shot_prompt_generator,
+ #multi_shot_prompt_generator,
+ production_prompt_v1_generator,
+ production_prompt_v2_generator
+]
diff --git a/evals/prompts/multi_shot_pandas_focussed_prompt.py b/evals/prompts/multi_shot_pandas_focussed_prompt.py
new file mode 100644
index 000000000..d17d89f2e
--- /dev/null
+++ b/evals/prompts/multi_shot_pandas_focussed_prompt.py
@@ -0,0 +1,97 @@
+from evals.eval_types import NotebookState, PromptGenerator
+
+__all__ = ['multi_shot_pandas_focussed_prompt']
+
+class _MultiShotPandasFocussedPrompt(PromptGenerator):
+ prompt_name = "multi_shot_pandas_focussed_prompt"
+
+ def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:
+
+ return f"""You are an expert python programmer writing a script in a Jupyter notebook. You are given a set of variables, existing code, and a task.
+
+Respond with the updated active code cell and a short explanation of the changes you made.
+
+When responding:
+- Do not use the word "I"
+- Do not recreate variables that already exist
+- Keep as much of the original code as possible
+
+
+
+Defined Variables:
+{{
+ 'loan_multiplier': 1.5,
+ 'sales_df': pd.DataFrame({{
+ 'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
+ 'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
+ 'units_sold': [1, 2, 1, 4, 5],
+ 'total_price': [10, 19.98, 13.99, 84.00, 500]
+ }})
+}}
+
+Code in the active code cell:
+```python
+import pandas as pd
+sales_df = pd.read_csv('./sales.csv')
+```
+
+Your task: convert the transaction_date column to datetime and then multiply the total_price column by the sales_multiplier.
+
+Output:
+
+```python
+import pandas as pd
+sales_df = pd.read_csv('./sales.csv')
+sales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])
+sales_df['total_price'] = sales_df['total_price'] * sales_multiplier
+```
+
+Converted the `transaction_date` column to datetime using the built-in pd.to_datetime function and multiplied the `total_price` column by the `sales_multiplier` variable.
+
+
+
+Defined Variables:
+{{
+ 'df': pd.DataFrame({{
+ 'id': ['id-49830', 'id-39301', 'id-85011', 'id-51892', 'id-99111'],
+ 'name': ['Tamir', 'Aaron', 'Grace', 'Nawaz', 'Julia'],
+ 'age': [29, 31, 26, 21, 30],
+ 'dob': ['1994-06-15', '1992-03-27', '1997-04-11', '2002-07-05', '1993-08-22'],
+ 'city': ['San Francisco', 'New York', 'Los Angeles', 'Chicago', 'Houston'],
+ 'state': ['CA', 'NY', 'CA', 'IL', 'TX'],
+ 'zip': ['94103', '10001', '90038', '60611', '77002'],
+ 'start_date': ['2024-01-01', '2024-01-01', '2024-01-01', '2024-01-01', '2024-01-01'],
+ 'department': ['Engineering', 'Sales', 'Marketing', 'Operations', 'Finance'],
+ 'salary': ['$100,000', '$50,000', '$60,000', '$55,000', '$70,000']
+ }})
+}}
+
+Code in the active code cell:
+```python
+
+```
+
+Your task: Calculate the weekly salary for each employee.
+
+Output:
+
+```python
+df['salary'] = df['salary'].str[1:].replace(',', '', regex=True).astype('float')
+df['weekly_salary'] = df['salary'] / 52
+```
+
+Remove the `$` and `,` from the `salary` in order to convert it to a float. Then, divide the salary by 52 to get the weekly salary.
+
+
+Defined Variables:
+{notebook_state.global_vars}
+
+Code in the active code cell:
+
+```python
+{notebook_state.cell_contents[-1] if len(notebook_state.cell_contents) > 0 else ""}
+```
+
+Your task: ${user_input}"""
+
+multi_shot_pandas_focussed_prompt = _MultiShotPandasFocussedPrompt()
diff --git a/evals/prompts/multi_shot_prompt.py b/evals/prompts/multi_shot_prompt.py
index 30d68a135..9b41556d1 100644
--- a/evals/prompts/multi_shot_prompt.py
+++ b/evals/prompts/multi_shot_prompt.py
@@ -8,7 +8,7 @@ class _MultiShotPromptGenerator(PromptGenerator):
def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:
return f"""You are an expert python programmer. You are given a set of variables, existing code, and a task.
-Respond with the python code and nothing else.
+Respond with the python code that starts with ```python and ends with ```. Do not return anything else.
You have these variables:
diff --git a/evals/prompts/production_prompt_v1.py b/evals/prompts/production_prompt_v1.py
new file mode 100644
index 000000000..a32315a2c
--- /dev/null
+++ b/evals/prompts/production_prompt_v1.py
@@ -0,0 +1,61 @@
+from evals.eval_types import NotebookState, PromptGenerator
+
+__all__ = ['production_prompt_v1_generator']
+
+class _ProductionPromptV1(PromptGenerator):
+ prompt_name = "production_prompt_v1"
+
+ def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:
+
+ return f"""You have access to the following variables:
+
+{notebook_state.global_vars}
+
+Complete the task below. Decide what variables to use and what changes you need to make to the active code cell. Only return the full new active code cell and a concise explanation of the changes you made.
+
+
+
+Do not:
+- Use the word "I"
+- Include multiple approaches in your response
+- Recreate variables that already exist
+
+Do:
+- Use the variables that you have access to
+- Keep as much of the original code as possible
+- Ask for more context if you need it.
+
+
+
+
+
+Code in the active code cell:
+
+```python
+import pandas as pd
+loans_df = pd.read_csv('./loans.csv')
+```
+
+Your task: convert the issue_date column to datetime.
+
+Output:
+
+```python
+import pandas as pd
+loans_df = pd.read_csv('./loans.csv')
+loans_df['issue_date'] = pd.to_datetime(loans_df['issue_date'])
+```
+
+Use the pd.to_datetime function to convert the issue_date column to datetime.
+
+
+
+Code in the active code cell:
+
+```python
+{notebook_state.cell_contents[-1] if len(notebook_state.cell_contents) > 0 else ""}
+```
+
+Your task: ${user_input}"""
+
+production_prompt_v1_generator = _ProductionPromptV1()
\ No newline at end of file
diff --git a/evals/prompts/production_prompt_v2.py b/evals/prompts/production_prompt_v2.py
new file mode 100644
index 000000000..4b7760223
--- /dev/null
+++ b/evals/prompts/production_prompt_v2.py
@@ -0,0 +1,64 @@
+from evals.eval_types import NotebookState, PromptGenerator
+
+__all__ = ['production_prompt_v2_generator']
+
+class _ProductionPromptV2(PromptGenerator):
+ prompt_name = "production_prompt_v2"
+
+ def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:
+
+ return f"""You are an expert python programmer writing a script in a Jupyter notebook. You are given a set of variables, existing code, and a task.
+
+Respond with the updated active code cell and a short explanation of the changes you made.
+
+When responding:
+- Do not use the word "I"
+- Do not recreate variables that already exist
+- Keep as much of the original code as possible
+
+
+
+Defined Variables:
+{{
+ 'loan_multiplier': 1.5,
+ 'sales_df': pd.DataFrame({{
+ 'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
+ 'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
+ 'units_sold': [1, 2, 1, 4, 5],
+ 'total_price': [10, 19.98, 13.99, 84.00, 500]
+ }})
+}}
+
+Code in the active code cell:
+```python
+import pandas as pd
+sales_df = pd.read_csv('./sales.csv')
+```
+
+Your task: convert the transaction_date column to datetime and then multiply the total_price column by the sales_multiplier.
+
+Output:
+
+```python
+import pandas as pd
+sales_df = pd.read_csv('./sales.csv')
+sales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])
+sales_df['total_price'] = sales_df['total_price'] * sales_multiplier
+```
+
+Converted the `transaction_date` column to datetime using the built-in pd.to_datetime function and multiplied the `total_price` column by the `sales_multiplier` variable.
+
+
+
+Defined Variables:
+{notebook_state.global_vars}
+
+Code in the active code cell:
+
+```python
+{notebook_state.cell_contents[-1] if len(notebook_state.cell_contents) > 0 else ""}
+```
+
+Your task: ${user_input}"""
+
+production_prompt_v2_generator = _ProductionPromptV2()
\ No newline at end of file
diff --git a/evals/prompts/single_shot_prompt.py b/evals/prompts/single_shot_prompt.py
index a8fbe451e..95c5b60af 100644
--- a/evals/prompts/single_shot_prompt.py
+++ b/evals/prompts/single_shot_prompt.py
@@ -8,7 +8,7 @@ class _SingleShotPromptGenerator(PromptGenerator):
def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:
return f"""You are an expert python programmer. You are given a set of variables, existing code, and a task.
-Respond with the python code and nothing else.
+Respond with the python code that starts with ```python and ends with ```. Do not return anything else.
You have these variables:
diff --git a/evals/utils.py b/evals/utils.py
index b7f687e25..1b898c4af 100644
--- a/evals/utils.py
+++ b/evals/utils.py
@@ -86,4 +86,4 @@ def are_globals_equal(globals1: Dict[str, Any], globals2: Dict[str, Any]) -> boo
if var_one != var_two:
return False
- return True
\ No newline at end of file
+ return True
diff --git a/mito-ai/src/Extensions/AiChat/PromptManager.tsx b/mito-ai/src/Extensions/AiChat/PromptManager.tsx
index 63156095c..0a0f28100 100644
--- a/mito-ai/src/Extensions/AiChat/PromptManager.tsx
+++ b/mito-ai/src/Extensions/AiChat/PromptManager.tsx
@@ -5,56 +5,63 @@ export function createBasicPrompt(
activeCellCode: string,
input: string
): string {
- return `You have access to the following variables:
+ const prompt = `You are an expert python programmer writing a script in a Jupyter notebook. You are given a set of variables, existing code, and a task.
-${variables?.map(variable => `${JSON.stringify(variable, null, 2)}\n`).join('')}
-
-Complete the task below. Decide what variables to use and what changes you need to make to the active code cell. Only return the full new active code cell and a concise explanation of the changes you made.
-
-
-
-Do not:
-- Use the word "I"
-- Include multiple approaches in your response
-- Recreate variables that already exist
+Respond with the updated active code cell and a short explanation of the changes you made.
-Do:
-- Use the variables that you have access to
+When responding:
+- Do not use the word "I"
+- Do not recreate variables that already exist
- Keep as much of the original code as possible
-- Ask for more context if you need it.
-
-
-Code in the active code cell:
+Defined Variables:
+{{
+ 'loan_multiplier': 1.5,
+ 'sales_df': pd.DataFrame({{
+ 'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
+ 'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
+ 'units_sold': [1, 2, 1, 4, 5],
+ 'total_price': [10, 19.98, 13.99, 84.00, 500]
+ }})
+}}
+Code in the active code cell:
\`\`\`python
import pandas as pd
-loans_df = pd.read_csv('./loans.csv')
+sales_df = pd.read_csv('./sales.csv')
\`\`\`
-Your task: convert the issue_date column to datetime.
+Your task: convert the transaction_date column to datetime and then multiply the total_price column by the sales_multiplier.
Output:
\`\`\`python
import pandas as pd
-loans_df = pd.read_csv('./loans.csv')
-loans_df['issue_date'] = pd.to_datetime(loans_df['issue_date'])
+sales_df = pd.read_csv('./sales.csv')
+sales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])
+sales_df['total_price'] = sales_df['total_price'] * sales_multiplier
\`\`\`
-Use the pd.to_datetime function to convert the issue_date column to datetime.
+Converted the \`transaction_date\` column to datetime using the built-in pd.to_datetime function and multiplied the \`total_price\` column by the \`sales_multiplier\` variable.
+Defined Variables:
+
+${variables?.map(variable => `${JSON.stringify(variable, null, 2)}\n`).join('')}
Code in the active code cell:
\`\`\`python
${activeCellCode}
\`\`\`
-Your task: ${input}`;
+Your task: ${input}`
+
+ console.log(prompt);
+
+ return prompt;
}
export function createErrorPrompt(activeCellCode: string, errorMessage: string): string {