diff --git a/automation-api/lib/llms/iflytek/spark_api.py b/automation-api/lib/llms/iflytek/spark_api.py index ab51604..8811cc6 100644 --- a/automation-api/lib/llms/iflytek/spark_api.py +++ b/automation-api/lib/llms/iflytek/spark_api.py @@ -100,7 +100,8 @@ class SparkClient: def __init__(self, appid: str, api_key: str, api_secret: str) -> None: self.appid = appid - self.ws_url = Ws_Param(appid, api_key, api_secret, self.gpt_url).create_url() + self.api_key = api_key + self.api_secret = api_secret def gen_parameters( self, @@ -133,9 +134,12 @@ def gen_payload(self, content): return data def generate_text(self, content, **kwargs) -> Dict[str, Any]: + ws_url = Ws_Param( + self.appid, self.api_key, self.api_secret, self.gpt_url + ).create_url() data = self.gen_parameters(**kwargs) data.update(self.gen_payload(content)) - res = get_reply(self.ws_url, data) + res = get_reply(ws_url, data) return res def chat(self): diff --git a/automation-api/lib/pilot/helpers.py b/automation-api/lib/pilot/helpers.py index e7503fd..1465c98 100644 --- a/automation-api/lib/pilot/helpers.py +++ b/automation-api/lib/pilot/helpers.py @@ -1,7 +1,7 @@ import json import uuid from datetime import datetime -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Optional, Tuple import pandas as pd from langchain.chains import LLMChain @@ -81,19 +81,29 @@ def class_objects_from_df(df: pd.DataFrame, cls: type) -> list: return [cls(**rec) for rec in df.to_dict(orient="records")] -def get_questions(sheet: AiEvalData) -> List[QuestionAndOptions]: - questions = filter_included_rows(sheet.questions.data.df) +def get_questions( + sheet: AiEvalData, include_all: bool = False, language: Optional[str] = None +) -> List[QuestionAndOptions]: + if include_all: + questions = sheet.questions.data.df + else: + questions = filter_included_rows(sheet.questions.data.df) + + if language is not None: + questions = questions.loc[questions["language"] == language] + options = sheet.question_options.data.df qs = class_objects_from_df(questions, Question) res = [] for q in qs: qid = q.question_id + lang = q.language qopts = [ QuestionOption(**rec) - for rec in options.loc[options["question_id"] == qid].to_dict( - orient="records" - ) + for rec in options.loc[ + (options["question_id"] == qid) & (options["language"] == lang) + ].to_dict(orient="records") ] res.append((q, qopts)) @@ -198,8 +208,13 @@ def check_llm_eval_output(eval_output: str) -> str: return "failed" -def get_prompt_variants(sheet: AiEvalData) -> List[PromptVariation]: - prompt_variations = filter_included_rows(sheet.prompt_variations.data.df) +def get_prompt_variants( + sheet: AiEvalData, include_all: bool = False +) -> List[PromptVariation]: + if include_all: + prompt_variations = sheet.prompt_variations.data.df + else: + prompt_variations = filter_included_rows(sheet.prompt_variations.data.df) res = class_objects_from_df(prompt_variations, PromptVariation) return res diff --git a/automation-api/notebooks/exploration-notebook.py b/automation-api/notebooks/exploration-notebook.py index 003ab18..ce818e0 100644 --- a/automation-api/notebooks/exploration-notebook.py +++ b/automation-api/notebooks/exploration-notebook.py @@ -76,9 +76,6 @@ # - - - - # ## Read from AI Eval spreadsheet @@ -116,3 +113,72 @@ ai_eval_data.gen_ai_models.data.df +# ## Test individual question and prompt +# Here is how you can ask one question to llm + +# read the ai eval spreadsheet +ai_eval_data = read_ai_eval_data(ai_eval_spreadsheet) + +# there are some helper functions to parse the spreadsheet +from lib.pilot.helpers import get_questions, get_prompt_variants, create_question_data_for_test + +# load all questions and filter by language +qs = get_questions(ai_eval_data, include_all=True, language='en-US') + +# define a function to load one question +def get_question(qid, qs): + if isinstance(qid, int): + qid = str(qid) + for q, opts in qs: + if q.question_id == qid: + return (q, opts) + + +q = get_question("10", qs) + +q + +# load all prompt variants +prompts = get_prompt_variants(ai_eval_data, include_all=True) +prompts + +# define a function to load one prompt +def get_prompt_variant(pid, ps): + for p in ps: + if pid == p.variation_id: + return p + + +pv = get_prompt_variant("simple", prompts) +pv + +# There are many fields in PromptVariant objects +# - pv.question_template: how to format the question. +# Expect `question`, `option_a`, `option_b`, `option_c` as input. +# - pv.question_prompt_template: how to format the prompt to input into LLM. +# Expect `question` as input (which formatted according to the above template) +# - pv.ai_prefix and pv.question_prefix are for memory, which will be the prefixes +# for question messages and llm response prefix. +# - pv.follow_up_answer_correctness_evaluation_prompt_template: the template to +# format a followup question to double check the answer. +# Expect `question`, `option_a`, `option_b`, `option_c`, `text` as input. + +# to run a model with given prompt and question: + +# format the question with question template +qd = create_question_data_for_test(pv.question_template, q) # return a dict +print(qd['question']) + +# get llm and run model +llm = get_openai_model('gpt-3.5-turbo', temperature=0.1) +answer = run_model(llm, pv.question_prompt_template, verbose=True, **qd) +print(answer) + +# if the llm is good at following instructions and produce answers in the format we want +# we can check the answer with this function +from lib.pilot.helpers import simple_evaluation + +simple_evaluation(q, answer) + +# + +# otherwise, we can use another LLM to check if the answer is correct.