added filtering to the retrieval pipeline

UIUC-Chatbot · Dec 6, 2023 · c55f365 · c55f365
1 parent 256002e
commit c55f365
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 6 deletions.
diff --git a/ai_ta_backend/filtering_contexts.py b/ai_ta_backend/filtering_contexts.py
@@ -33,7 +33,7 @@ def __init__(self):
     pass
 
   def filter_context(self, context, user_query, langsmith_prompt_obj):
-    final_prompt = str(langsmith_prompt_obj.format(context=context, user_query=user_query))
+    final_prompt = str(langsmith_prompt_obj.format(context=context['text'], user_query=user_query))
     print(f"-------\nfinal_prompt:\n{final_prompt}\n^^^^^^^^^^^^^")
     try: 
       # completion = run_model(final_prompt)
@@ -131,7 +131,7 @@ def run(contexts, user_query, max_tokens_to_return=3000, max_time_before_return=
   #exit()
 
   actor = AsyncActor.options(max_concurrency=max_concurrency).remote()
-  result_futures = [actor.filter_context.remote(c['text'], user_query, langsmith_prompt_obj) for c in contexts]
+  result_futures = [actor.filter_context.remote(c, user_query, langsmith_prompt_obj) for c in contexts]
   print("Num futures:", len(result_futures))
   #print("Result futures:", result_futures)
 

diff --git a/ai_ta_backend/vector_database.py b/ai_ta_backend/vector_database.py
@@ -1406,14 +1406,14 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit
       final_filtered_docs = list(run(contexts=valid_docs, user_query=search_query, max_time_before_return=45, max_concurrency=20))
 
       print("Length of final filtered docs: ", len(final_filtered_docs))
-      #print("FINAL FILTERED DOCS: ", final_filtered_docs)
+      print("FINAL FILTERED DOCS: ", final_filtered_docs)
 
-      print(f"Total tokens used: {token_counter} total docs: {len(found_docs)} num docs used: {len(valid_docs)}")
+      print(f"Total tokens used: {token_counter} total docs: {len(found_docs)} num docs used: {len(final_filtered_docs)}")
       print(f"Course: {course_name} ||| search_query: {search_query}")
       print(f"⏰ ^^ Runtime of getTopContextsWithMQR: {(time.monotonic() - start_time_overall):.2f} seconds")
-      if len(valid_docs) == 0:
+      if len(final_filtered_docs) == 0:
         return []
-      return self.format_for_json_mqr(valid_docs)
+      return self.format_for_json_mqr(final_filtered_docs)
     except Exception as e:
       # return full traceback to front end
       err: str = f"ERROR: In /getTopContextsWithMQR. Course: {course_name} ||| search_query: {search_query}\nTraceback: {traceback.format_exc()}❌❌ Error in {inspect.currentframe().f_code.co_name}:\n{e}"  # type: ignore