From 064e37be307f7f84e02117ca79afa97505ec3360 Mon Sep 17 00:00:00 2001
From: star-nox <dabholkar.asmita@gmail.com>
Date: Mon, 11 Dec 2023 20:01:09 -0600
Subject: [PATCH] changed workers to 3

---
 ai_ta_backend/filtering_contexts.py | 13 ++++++++++++-
 ai_ta_backend/vector_database.py    |  8 ++++----
 run.sh                              |  2 +-
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/ai_ta_backend/filtering_contexts.py b/ai_ta_backend/filtering_contexts.py
index 58683cdc..d194f017 100644
--- a/ai_ta_backend/filtering_contexts.py
+++ b/ai_ta_backend/filtering_contexts.py
@@ -34,18 +34,29 @@ def run_context_filtering(contexts, user_query, max_time_before_return=45, max_c
   start_time = time.monotonic()
   langsmith_prompt_obj = hub.pull("kastanday/filter-unrelated-contexts-zephyr")
   
+  # passages = []
+  # for docs in contexts:
+  #   for doc in docs:
+  #     print("doc: ", doc)
+  #     exit()
+  #     passages.append(doc)
+  
+  # print("Num jobs to run:", len(passages))
+  
   # call filter contexts function
   with Manager() as manager:
     filtered_contexts = manager.list()
     partial_func1 = partial(filter_context, user_query=user_query, langsmith_prompt_obj=langsmith_prompt_obj)
     partial_func2 = partial(select_context, result=filtered_contexts)
 
-    with ProcessPoolExecutor(max_workers=30) as executor:
+    with ProcessPoolExecutor(max_workers=100) as executor:
       anyscale_responses = list(executor.map(partial_func1, contexts))
       if len(anyscale_responses) > 0:
         executor.map(partial_func2, anyscale_responses)
       else:
         print("LLM responses are empty.")
+      
+      executor.shutdown(wait=True)
  
     filtered_contexts = list(filtered_contexts)
   print(f"⏰ Context filtering runtime: {(time.monotonic() - start_time):.2f} seconds")
diff --git a/ai_ta_backend/vector_database.py b/ai_ta_backend/vector_database.py
index a1052319..89383d95 100644
--- a/ai_ta_backend/vector_database.py
+++ b/ai_ta_backend/vector_database.py
@@ -1145,7 +1145,11 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit
       batch_found_docs: list[list[Document]] = self.batch_vector_search(search_queries=generated_queries, course_name=course_name)
       
       # filtered_docs = run_context_filtering(contexts=batch_found_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100)
+      # print(f"Number of docs after context filtering: {len(filtered_docs)}")
+
+      # print(filtered_docs[0])
       # exit()
+
       found_docs = self.reciprocal_rank_fusion(batch_found_docs)
       found_docs = [doc for doc, score in found_docs]
       print(f"Number of docs found with multiple queries: {len(found_docs)}")
@@ -1154,10 +1158,6 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit
 
       print(f"⏰ Multi-query processing runtime: {(time.monotonic() - mq_start_time):.2f} seconds")
 
-      # Context filtering
-      #filtered_docs = list(run(contexts=found_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100))
-      #print(f"Number of docs after context filtering: {len(filtered_docs)}")
-
       # 'context padding' // 'parent document retriever' 
       final_docs = context_processing(found_docs, search_query, course_name)
       print(f"Number of final docs after context padding: {len(final_docs)}")
diff --git a/run.sh b/run.sh
index aa6240b7..b925376d 100755
--- a/run.sh
+++ b/run.sh
@@ -3,4 +3,4 @@
 # Docs https://docs.gunicorn.org/en/stable/settings.html#workers
 
 export PYTHONPATH=$PYTHONPATH:$(pwd)/ai_ta_backend
-exec gunicorn --workers=30 --threads=16 --worker-class=gthread ai_ta_backend.main:app --timeout 1800
\ No newline at end of file
+exec gunicorn --workers=3 --threads=16 --worker-class=gthread ai_ta_backend.main:app --timeout 1800
\ No newline at end of file