From a7a0367850133f49aacaca0495c06374bd24403b Mon Sep 17 00:00:00 2001 From: Aaron Diamond-Reivich Date: Mon, 25 Nov 2024 23:02:25 -0500 Subject: [PATCH] evals: add pandas-focussed multi shot prompt --- evals/notebook_states.py | 4 +- .../multi_shot_pandas_focussed_prompt.py | 97 +++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 evals/prompts/multi_shot_pandas_focussed_prompt.py diff --git a/evals/notebook_states.py b/evals/notebook_states.py index 354b9dc49..095ef474f 100644 --- a/evals/notebook_states.py +++ b/evals/notebook_states.py @@ -83,8 +83,8 @@ 'excel_transactions': pd.DataFrame({'Transaction ID': [12975, 16889, 57686, 53403, 42699], 'Share Quantity': [20, 25, 24, 22, 40]}), 'excel_transactions': pd.DataFrame({'Transaction ID': [12975, 16889, 57686, 53403, 42699], 'Share Quantity': [20, 25, 24, 22, 0]})}, cell_contents=["""import pandas as pd -excel_transactions = pd.read_excel('evals/data/simple_recon/transactions_excel.csv') -eagle_transactions = pd.read_excel('evals/data/simple_recon/transactions_eagle.csv') +excel_transactions = pd.read_csv('evals/data/simple_recon/transactions_excel.csv') +eagle_transactions = pd.read_csv('evals/data/simple_recon/transactions_eagle.csv') """, ''] ) diff --git a/evals/prompts/multi_shot_pandas_focussed_prompt.py b/evals/prompts/multi_shot_pandas_focussed_prompt.py new file mode 100644 index 000000000..d17d89f2e --- /dev/null +++ b/evals/prompts/multi_shot_pandas_focussed_prompt.py @@ -0,0 +1,97 @@ +from evals.eval_types import NotebookState, PromptGenerator + +__all__ = ['multi_shot_pandas_focussed_prompt'] + +class _MultiShotPandasFocussedPrompt(PromptGenerator): + prompt_name = "multi_shot_pandas_focussed_prompt" + + def get_prompt(self, user_input: str, notebook_state: NotebookState) -> str: + + return f"""You are an expert python programmer writing a script in a Jupyter notebook. You are given a set of variables, existing code, and a task. + +Respond with the updated active code cell and a short explanation of the changes you made. + +When responding: +- Do not use the word "I" +- Do not recreate variables that already exist +- Keep as much of the original code as possible + + + +Defined Variables: +{{ + 'loan_multiplier': 1.5, + 'sales_df': pd.DataFrame({{ + 'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'], + 'price_per_unit': [10, 9.99, 13.99, 21.00, 100], + 'units_sold': [1, 2, 1, 4, 5], + 'total_price': [10, 19.98, 13.99, 84.00, 500] + }}) +}} + +Code in the active code cell: +```python +import pandas as pd +sales_df = pd.read_csv('./sales.csv') +``` + +Your task: convert the transaction_date column to datetime and then multiply the total_price column by the sales_multiplier. + +Output: + +```python +import pandas as pd +sales_df = pd.read_csv('./sales.csv') +sales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date']) +sales_df['total_price'] = sales_df['total_price'] * sales_multiplier +``` + +Converted the `transaction_date` column to datetime using the built-in pd.to_datetime function and multiplied the `total_price` column by the `sales_multiplier` variable. + + + +Defined Variables: +{{ + 'df': pd.DataFrame({{ + 'id': ['id-49830', 'id-39301', 'id-85011', 'id-51892', 'id-99111'], + 'name': ['Tamir', 'Aaron', 'Grace', 'Nawaz', 'Julia'], + 'age': [29, 31, 26, 21, 30], + 'dob': ['1994-06-15', '1992-03-27', '1997-04-11', '2002-07-05', '1993-08-22'], + 'city': ['San Francisco', 'New York', 'Los Angeles', 'Chicago', 'Houston'], + 'state': ['CA', 'NY', 'CA', 'IL', 'TX'], + 'zip': ['94103', '10001', '90038', '60611', '77002'], + 'start_date': ['2024-01-01', '2024-01-01', '2024-01-01', '2024-01-01', '2024-01-01'], + 'department': ['Engineering', 'Sales', 'Marketing', 'Operations', 'Finance'], + 'salary': ['$100,000', '$50,000', '$60,000', '$55,000', '$70,000'] + }}) +}} + +Code in the active code cell: +```python + +``` + +Your task: Calculate the weekly salary for each employee. + +Output: + +```python +df['salary'] = df['salary'].str[1:].replace(',', '', regex=True).astype('float') +df['weekly_salary'] = df['salary'] / 52 +``` + +Remove the `$` and `,` from the `salary` in order to convert it to a float. Then, divide the salary by 52 to get the weekly salary. + + +Defined Variables: +{notebook_state.global_vars} + +Code in the active code cell: + +```python +{notebook_state.cell_contents[-1] if len(notebook_state.cell_contents) > 0 else ""} +``` + +Your task: ${user_input}""" + +multi_shot_pandas_focussed_prompt = _MultiShotPandasFocussedPrompt()