Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Helper Functions #23

Merged
merged 3 commits into from
Sep 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions automation-api/lib/llms/iflytek/spark_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ class SparkClient:

def __init__(self, appid: str, api_key: str, api_secret: str) -> None:
self.appid = appid
self.ws_url = Ws_Param(appid, api_key, api_secret, self.gpt_url).create_url()
self.api_key = api_key
self.api_secret = api_secret

def gen_parameters(
self,
Expand Down Expand Up @@ -133,9 +134,12 @@ def gen_payload(self, content):
return data

def generate_text(self, content, **kwargs) -> Dict[str, Any]:
ws_url = Ws_Param(
self.appid, self.api_key, self.api_secret, self.gpt_url
).create_url()
data = self.gen_parameters(**kwargs)
data.update(self.gen_payload(content))
res = get_reply(self.ws_url, data)
res = get_reply(ws_url, data)
return res

def chat(self):
Expand Down
31 changes: 23 additions & 8 deletions automation-api/lib/pilot/helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import uuid
from datetime import datetime
from typing import Any, Dict, List, Tuple
from typing import Any, Dict, List, Optional, Tuple

import pandas as pd
from langchain.chains import LLMChain
Expand Down Expand Up @@ -81,19 +81,29 @@ def class_objects_from_df(df: pd.DataFrame, cls: type) -> list:
return [cls(**rec) for rec in df.to_dict(orient="records")]


def get_questions(sheet: AiEvalData) -> List[QuestionAndOptions]:
questions = filter_included_rows(sheet.questions.data.df)
def get_questions(
sheet: AiEvalData, include_all: bool = False, language: Optional[str] = None
) -> List[QuestionAndOptions]:
if include_all:
questions = sheet.questions.data.df
else:
questions = filter_included_rows(sheet.questions.data.df)

if language is not None:
questions = questions.loc[questions["language"] == language]

options = sheet.question_options.data.df
qs = class_objects_from_df(questions, Question)

res = []
for q in qs:
qid = q.question_id
lang = q.language
qopts = [
QuestionOption(**rec)
for rec in options.loc[options["question_id"] == qid].to_dict(
orient="records"
)
for rec in options.loc[
(options["question_id"] == qid) & (options["language"] == lang)
].to_dict(orient="records")
]
res.append((q, qopts))

Expand Down Expand Up @@ -198,8 +208,13 @@ def check_llm_eval_output(eval_output: str) -> str:
return "failed"


def get_prompt_variants(sheet: AiEvalData) -> List[PromptVariation]:
prompt_variations = filter_included_rows(sheet.prompt_variations.data.df)
def get_prompt_variants(
sheet: AiEvalData, include_all: bool = False
) -> List[PromptVariation]:
if include_all:
prompt_variations = sheet.prompt_variations.data.df
else:
prompt_variations = filter_included_rows(sheet.prompt_variations.data.df)
res = class_objects_from_df(prompt_variations, PromptVariation)
return res

Expand Down
72 changes: 69 additions & 3 deletions automation-api/notebooks/exploration-notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@
# -





# ## Read from AI Eval spreadsheet


Expand Down Expand Up @@ -116,3 +113,72 @@
ai_eval_data.gen_ai_models.data.df


# ## Test individual question and prompt
# Here is how you can ask one question to llm

# read the ai eval spreadsheet
ai_eval_data = read_ai_eval_data(ai_eval_spreadsheet)

# there are some helper functions to parse the spreadsheet
from lib.pilot.helpers import get_questions, get_prompt_variants, create_question_data_for_test

# load all questions and filter by language
qs = get_questions(ai_eval_data, include_all=True, language='en-US')

# define a function to load one question
def get_question(qid, qs):
if isinstance(qid, int):
qid = str(qid)
for q, opts in qs:
if q.question_id == qid:
return (q, opts)


q = get_question("10", qs)

q

# load all prompt variants
prompts = get_prompt_variants(ai_eval_data, include_all=True)
prompts

# define a function to load one prompt
def get_prompt_variant(pid, ps):
for p in ps:
if pid == p.variation_id:
return p


pv = get_prompt_variant("simple", prompts)
pv

# There are many fields in PromptVariant objects
# - pv.question_template: how to format the question.
# Expect `question`, `option_a`, `option_b`, `option_c` as input.
# - pv.question_prompt_template: how to format the prompt to input into LLM.
# Expect `question` as input (which formatted according to the above template)
# - pv.ai_prefix and pv.question_prefix are for memory, which will be the prefixes
# for question messages and llm response prefix.
# - pv.follow_up_answer_correctness_evaluation_prompt_template: the template to
# format a followup question to double check the answer.
# Expect `question`, `option_a`, `option_b`, `option_c`, `text` as input.

# to run a model with given prompt and question:

# format the question with question template
qd = create_question_data_for_test(pv.question_template, q) # return a dict
print(qd['question'])

# get llm and run model
llm = get_openai_model('gpt-3.5-turbo', temperature=0.1)
answer = run_model(llm, pv.question_prompt_template, verbose=True, **qd)
print(answer)

# if the llm is good at following instructions and produce answers in the format we want
# we can check the answer with this function
from lib.pilot.helpers import simple_evaluation

simple_evaluation(q, answer)

# +
# otherwise, we can use another LLM to check if the answer is correct.
Loading