diff --git a/ai_ta_backend/filtering_contexts.py b/ai_ta_backend/filtering_contexts.py index 7c5985f2..b5bef90c 100644 --- a/ai_ta_backend/filtering_contexts.py +++ b/ai_ta_backend/filtering_contexts.py @@ -62,7 +62,7 @@ def run_context_filtering(contexts, user_query, max_time_before_return=45, max_c def filter_context(context, user_query, langsmith_prompt_obj): api_start_time = time.monotonic() - print("API start time: ", api_start_time) + final_prompt = str(langsmith_prompt_obj.format(context=context['text'], user_query=user_query)) try: #completion = run_anyscale(final_prompt) @@ -76,7 +76,8 @@ def filter_context(context, user_query, langsmith_prompt_obj): max_tokens=250, ) completion = ret["choices"][0]["message"]["content"] - print("API call time: ", (time.monotonic() - api_start_time)) + + print(f"⏰ Anyscale runtime: {(time.monotonic() - api_start_time):.2f} seconds") return {"completion": completion, "context": context} except Exception as e: print(f"Error: {e}") @@ -95,7 +96,7 @@ def parse_result(result): #----------------------- OLD CODE BELOW ----------------------------------------------------------------------------# -#@ray.remote +# @ray.remote # class AsyncActor: # def __init__(self): # pass @@ -107,10 +108,7 @@ def parse_result(result): # # completion = run_model(final_prompt) # #completion = run_replicate(final_prompt) # completion = run_anyscale(final_prompt) -# #clean_text = context['text'].replace('\n', '') -# #print("Context: ", clean_text) -# #print("Completion: ", completion) - + # return {"completion": completion, "context": context} # except Exception as e: # print(f"Error: {e}") @@ -154,7 +152,7 @@ def parse_result(result): # return output # def run_anyscale(prompt): - +# api_start_time = time.monotonic() # ret = openai.ChatCompletion.create( # api_base = "https://api.endpoints.anyscale.com/v1", # api_key=os.environ["ANYSCALE_ENDPOINT_TOKEN"], @@ -166,12 +164,12 @@ def parse_result(result): # temperature=0.3, # max_tokens=250, # ) - +# print(f"⏰ Anyscale runtime: {(time.monotonic() - api_start_time):.2f} seconds") # return ret["choices"][0]["message"]["content"] # def parse_result(result): -# lines = result['completion'].split('\n') +# lines = result.split('\n') # for line in lines: # if 'Final answer' in line: # return 'yes' in line.lower() diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 6d396245..c054411e 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -9,6 +9,7 @@ from langchain.embeddings import OpenAIEmbeddings from nomic import AtlasProject, atlas +OPENAI_API_TYPE = "azure" def log_convo_to_nomic(course_name: str, conversation) -> str: nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app @@ -115,7 +116,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: }] # create embeddings - embeddings_model = OpenAIEmbeddings(openai_api_type="azure") # type: ignore + embeddings_model = OpenAIEmbeddings(openai_api_type=OPENAI_API_TYPE) # type: ignore embeddings = embeddings_model.embed_documents(user_queries) # add embeddings to the project @@ -279,7 +280,7 @@ def create_nomic_map(course_name: str, log_data: list): metadata.append(metadata_row) metadata = pd.DataFrame(metadata) - embeddings_model = OpenAIEmbeddings(openai_api_type="azure") # type: ignore + embeddings_model = OpenAIEmbeddings(openai_api_type=OPENAI_API_TYPE) # type: ignore embeddings = embeddings_model.embed_documents(user_queries) # create Atlas project diff --git a/ai_ta_backend/vector_database.py b/ai_ta_backend/vector_database.py index 89383d95..8c101007 100644 --- a/ai_ta_backend/vector_database.py +++ b/ai_ta_backend/vector_database.py @@ -43,9 +43,9 @@ from ai_ta_backend.extreme_context_stuffing import OpenAIAPIProcessor from ai_ta_backend.utils_tokenization import count_tokens_and_cost from ai_ta_backend.parallel_context_processing import context_processing +#from ai_ta_backend.filtering_contexts import run from ai_ta_backend.filtering_contexts import run_context_filtering - MULTI_QUERY_PROMPT = hub.pull("langchain-ai/rag-fusion-query-generation") OPENAI_API_TYPE = "azure" # "openai" or "azure" @@ -1158,6 +1158,10 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit print(f"⏰ Multi-query processing runtime: {(time.monotonic() - mq_start_time):.2f} seconds") + # filtered_docs = run_context_filtering(contexts=found_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100) + # print(f"Number of docs after context filtering: {len(filtered_docs)}") + # exit() + # 'context padding' // 'parent document retriever' final_docs = context_processing(found_docs, search_query, course_name) print(f"Number of final docs after context padding: {len(final_docs)}") @@ -1167,6 +1171,8 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit token_counter, _ = count_tokens_and_cost(pre_prompt + '\n\nNow please respond to my query: ' + search_query) # type: ignore filtered_docs = run_context_filtering(contexts=final_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100) + #filtered_docs = list(run(contexts=final_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100)) + print(f"Number of docs after context filtering: {len(filtered_docs)}") if len(filtered_docs) > 0: final_docs_used = filtered_docs else: