Skip to content

Commit

Permalink
Merge pull request #1379 from mito-ds/improve-ai-chat-prompt
Browse files Browse the repository at this point in the history
Improve ai chat prompt
  • Loading branch information
aarondr77 authored Nov 26, 2024
2 parents c1724d0 + a7a0367 commit 8ad504c
Show file tree
Hide file tree
Showing 11 changed files with 284 additions and 33 deletions.
20 changes: 19 additions & 1 deletion evals/ai_api_calls/get_open_ai_completion.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from typing import Any, Dict, Optional
from openai import OpenAI

def get_open_ai_completion(prompt: str):
Expand All @@ -9,5 +10,22 @@ def get_open_ai_completion(prompt: str):
messages=[{"role": "user", "content": prompt}],
temperature=0.0
)

response_content = response.choices[0].message.content
return get_code_block_from_message(response_content)



def get_code_block_from_message(message: str) -> str:
"""
Extract the first code block from a message. A code block is a block of
text that starts with ```python and ends with ```.
"""
print(f"Message: {message}")

# If ```python is not part of the message, then we assume that the
# entire message is the code block
if "```python" not in message:
return message

return response.choices[0].message.content
return message.split('```python')[1].split('```')[0]
1 change: 1 addition & 0 deletions evals/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
# Create the actual code script produced by the LLM
prompt = prompt_generator.get_prompt(test.user_input, test.notebook_state)
ai_generated_code = get_open_ai_completion(prompt)
print(f"AI generated code:\n{ai_generated_code}")
actual_code = current_cell_contents_script + "\n" + ai_generated_code

# So that we can compare the results of the two scripts, create global context for
Expand Down
4 changes: 2 additions & 2 deletions evals/notebook_states.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@
'excel_transactions': pd.DataFrame({'Transaction ID': [12975, 16889, 57686, 53403, 42699], 'Share Quantity': [20, 25, 24, 22, 40]}),
'excel_transactions': pd.DataFrame({'Transaction ID': [12975, 16889, 57686, 53403, 42699], 'Share Quantity': [20, 25, 24, 22, 0]})},
cell_contents=["""import pandas as pd
excel_transactions = pd.read_excel('evals/data/simple_recon/transactions_excel.csv')
eagle_transactions = pd.read_excel('evals/data/simple_recon/transactions_eagle.csv')
excel_transactions = pd.read_csv('evals/data/simple_recon/transactions_excel.csv')
eagle_transactions = pd.read_csv('evals/data/simple_recon/transactions_eagle.csv')
""", '']
)

Expand Down
11 changes: 7 additions & 4 deletions evals/prompts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from evals.prompts.single_shot_prompt import single_shot_prompt_generator
from evals.prompts.multi_shot_prompt import multi_shot_prompt_generator

from evals.prompts.production_prompt_v1 import production_prompt_v1_generator
from evals.prompts.production_prompt_v2 import production_prompt_v2_generator
PROMPT_GENERATORS = [
single_shot_prompt_generator,
multi_shot_prompt_generator
]
#single_shot_prompt_generator,
#multi_shot_prompt_generator,
production_prompt_v1_generator,
production_prompt_v2_generator
]
97 changes: 97 additions & 0 deletions evals/prompts/multi_shot_pandas_focussed_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from evals.eval_types import NotebookState, PromptGenerator

__all__ = ['multi_shot_pandas_focussed_prompt']

class _MultiShotPandasFocussedPrompt(PromptGenerator):
prompt_name = "multi_shot_pandas_focussed_prompt"

def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:

return f"""You are an expert python programmer writing a script in a Jupyter notebook. You are given a set of variables, existing code, and a task.
Respond with the updated active code cell and a short explanation of the changes you made.
When responding:
- Do not use the word "I"
- Do not recreate variables that already exist
- Keep as much of the original code as possible
<Example 1>
Defined Variables:
{{
'loan_multiplier': 1.5,
'sales_df': pd.DataFrame({{
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
'units_sold': [1, 2, 1, 4, 5],
'total_price': [10, 19.98, 13.99, 84.00, 500]
}})
}}
Code in the active code cell:
```python
import pandas as pd
sales_df = pd.read_csv('./sales.csv')
```
Your task: convert the transaction_date column to datetime and then multiply the total_price column by the sales_multiplier.
Output:
```python
import pandas as pd
sales_df = pd.read_csv('./sales.csv')
sales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])
sales_df['total_price'] = sales_df['total_price'] * sales_multiplier
```
Converted the `transaction_date` column to datetime using the built-in pd.to_datetime function and multiplied the `total_price` column by the `sales_multiplier` variable.
</Example 1>
<Example 2>
Defined Variables:
{{
'df': pd.DataFrame({{
'id': ['id-49830', 'id-39301', 'id-85011', 'id-51892', 'id-99111'],
'name': ['Tamir', 'Aaron', 'Grace', 'Nawaz', 'Julia'],
'age': [29, 31, 26, 21, 30],
'dob': ['1994-06-15', '1992-03-27', '1997-04-11', '2002-07-05', '1993-08-22'],
'city': ['San Francisco', 'New York', 'Los Angeles', 'Chicago', 'Houston'],
'state': ['CA', 'NY', 'CA', 'IL', 'TX'],
'zip': ['94103', '10001', '90038', '60611', '77002'],
'start_date': ['2024-01-01', '2024-01-01', '2024-01-01', '2024-01-01', '2024-01-01'],
'department': ['Engineering', 'Sales', 'Marketing', 'Operations', 'Finance'],
'salary': ['$100,000', '$50,000', '$60,000', '$55,000', '$70,000']
}})
}}
Code in the active code cell:
```python
```
Your task: Calculate the weekly salary for each employee.
Output:
```python
df['salary'] = df['salary'].str[1:].replace(',', '', regex=True).astype('float')
df['weekly_salary'] = df['salary'] / 52
```
Remove the `$` and `,` from the `salary` in order to convert it to a float. Then, divide the salary by 52 to get the weekly salary.
</Example 2>
Defined Variables:
{notebook_state.global_vars}
Code in the active code cell:
```python
{notebook_state.cell_contents[-1] if len(notebook_state.cell_contents) > 0 else ""}
```
Your task: ${user_input}"""

multi_shot_pandas_focussed_prompt = _MultiShotPandasFocussedPrompt()
2 changes: 1 addition & 1 deletion evals/prompts/multi_shot_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class _MultiShotPromptGenerator(PromptGenerator):
def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:
return f"""You are an expert python programmer. You are given a set of variables, existing code, and a task.
Respond with the python code and nothing else.
Respond with the python code that starts with ```python and ends with ```. Do not return anything else.
<Example 1>
You have these variables:
Expand Down
61 changes: 61 additions & 0 deletions evals/prompts/production_prompt_v1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from evals.eval_types import NotebookState, PromptGenerator

__all__ = ['production_prompt_v1_generator']

class _ProductionPromptV1(PromptGenerator):
prompt_name = "production_prompt_v1"

def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:

return f"""You have access to the following variables:
{notebook_state.global_vars}
Complete the task below. Decide what variables to use and what changes you need to make to the active code cell. Only return the full new active code cell and a concise explanation of the changes you made.
<Reminders>
Do not:
- Use the word "I"
- Include multiple approaches in your response
- Recreate variables that already exist
Do:
- Use the variables that you have access to
- Keep as much of the original code as possible
- Ask for more context if you need it.
</Reminders>
<Example>
Code in the active code cell:
```python
import pandas as pd
loans_df = pd.read_csv('./loans.csv')
```
Your task: convert the issue_date column to datetime.
Output:
```python
import pandas as pd
loans_df = pd.read_csv('./loans.csv')
loans_df['issue_date'] = pd.to_datetime(loans_df['issue_date'])
```
Use the pd.to_datetime function to convert the issue_date column to datetime.
</Example>
Code in the active code cell:
```python
{notebook_state.cell_contents[-1] if len(notebook_state.cell_contents) > 0 else ""}
```
Your task: ${user_input}"""

production_prompt_v1_generator = _ProductionPromptV1()
64 changes: 64 additions & 0 deletions evals/prompts/production_prompt_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from evals.eval_types import NotebookState, PromptGenerator

__all__ = ['production_prompt_v2_generator']

class _ProductionPromptV2(PromptGenerator):
prompt_name = "production_prompt_v2"

def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:

return f"""You are an expert python programmer writing a script in a Jupyter notebook. You are given a set of variables, existing code, and a task.
Respond with the updated active code cell and a short explanation of the changes you made.
When responding:
- Do not use the word "I"
- Do not recreate variables that already exist
- Keep as much of the original code as possible
<Example>
Defined Variables:
{{
'loan_multiplier': 1.5,
'sales_df': pd.DataFrame({{
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
'units_sold': [1, 2, 1, 4, 5],
'total_price': [10, 19.98, 13.99, 84.00, 500]
}})
}}
Code in the active code cell:
```python
import pandas as pd
sales_df = pd.read_csv('./sales.csv')
```
Your task: convert the transaction_date column to datetime and then multiply the total_price column by the sales_multiplier.
Output:
```python
import pandas as pd
sales_df = pd.read_csv('./sales.csv')
sales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])
sales_df['total_price'] = sales_df['total_price'] * sales_multiplier
```
Converted the `transaction_date` column to datetime using the built-in pd.to_datetime function and multiplied the `total_price` column by the `sales_multiplier` variable.
</Example>
Defined Variables:
{notebook_state.global_vars}
Code in the active code cell:
```python
{notebook_state.cell_contents[-1] if len(notebook_state.cell_contents) > 0 else ""}
```
Your task: ${user_input}"""

production_prompt_v2_generator = _ProductionPromptV2()
2 changes: 1 addition & 1 deletion evals/prompts/single_shot_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class _SingleShotPromptGenerator(PromptGenerator):
def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str:
return f"""You are an expert python programmer. You are given a set of variables, existing code, and a task.
Respond with the python code and nothing else.
Respond with the python code that starts with ```python and ends with ```. Do not return anything else.
<Example>
You have these variables:
Expand Down
2 changes: 1 addition & 1 deletion evals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,4 @@ def are_globals_equal(globals1: Dict[str, Any], globals2: Dict[str, Any]) -> boo
if var_one != var_two:
return False

return True
return True
Loading

0 comments on commit 8ad504c

Please sign in to comment.