changed workers to 3

UIUC-Chatbot · Dec 12, 2023 · 064e37b · 064e37b
1 parent 0e57a97
commit 064e37b
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 6 deletions.
diff --git a/ai_ta_backend/filtering_contexts.py b/ai_ta_backend/filtering_contexts.py
@@ -34,18 +34,29 @@ def run_context_filtering(contexts, user_query, max_time_before_return=45, max_c
   start_time = time.monotonic()
   langsmith_prompt_obj = hub.pull("kastanday/filter-unrelated-contexts-zephyr")
 
+  # passages = []
+  # for docs in contexts:
+  #   for doc in docs:
+  #     print("doc: ", doc)
+  #     exit()
+  #     passages.append(doc)
+
+  # print("Num jobs to run:", len(passages))
+
   # call filter contexts function
   with Manager() as manager:
     filtered_contexts = manager.list()
     partial_func1 = partial(filter_context, user_query=user_query, langsmith_prompt_obj=langsmith_prompt_obj)
     partial_func2 = partial(select_context, result=filtered_contexts)
 
-    with ProcessPoolExecutor(max_workers=30) as executor:
+    with ProcessPoolExecutor(max_workers=100) as executor:
       anyscale_responses = list(executor.map(partial_func1, contexts))
       if len(anyscale_responses) > 0:
         executor.map(partial_func2, anyscale_responses)
       else:
         print("LLM responses are empty.")
+
+      executor.shutdown(wait=True)
 
     filtered_contexts = list(filtered_contexts)
   print(f"⏰ Context filtering runtime: {(time.monotonic() - start_time):.2f} seconds")

diff --git a/ai_ta_backend/vector_database.py b/ai_ta_backend/vector_database.py
@@ -1145,7 +1145,11 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit
       batch_found_docs: list[list[Document]] = self.batch_vector_search(search_queries=generated_queries, course_name=course_name)
 
       # filtered_docs = run_context_filtering(contexts=batch_found_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100)
+      # print(f"Number of docs after context filtering: {len(filtered_docs)}")
+
+      # print(filtered_docs[0])
       # exit()
+
       found_docs = self.reciprocal_rank_fusion(batch_found_docs)
       found_docs = [doc for doc, score in found_docs]
       print(f"Number of docs found with multiple queries: {len(found_docs)}")
@@ -1154,10 +1158,6 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit
 
       print(f"⏰ Multi-query processing runtime: {(time.monotonic() - mq_start_time):.2f} seconds")
 
-      # Context filtering
-      #filtered_docs = list(run(contexts=found_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100))
-      #print(f"Number of docs after context filtering: {len(filtered_docs)}")
-
       # 'context padding' // 'parent document retriever' 
       final_docs = context_processing(found_docs, search_query, course_name)
       print(f"Number of final docs after context padding: {len(final_docs)}")

diff --git a/run.sh b/run.sh
@@ -3,4 +3,4 @@
 # Docs https://docs.gunicorn.org/en/stable/settings.html#workers
 
 export PYTHONPATH=$PYTHONPATH:$(pwd)/ai_ta_backend
-exec gunicorn --workers=30 --threads=16 --worker-class=gthread ai_ta_backend.main:app --timeout 1800
+exec gunicorn --workers=3 --threads=16 --worker-class=gthread ai_ta_backend.main:app --timeout 1800