diff --git a/ai_ta_backend/filtering_contexts.py b/ai_ta_backend/filtering_contexts.py index 58683cdc..d194f017 100644 --- a/ai_ta_backend/filtering_contexts.py +++ b/ai_ta_backend/filtering_contexts.py @@ -34,18 +34,29 @@ def run_context_filtering(contexts, user_query, max_time_before_return=45, max_c start_time = time.monotonic() langsmith_prompt_obj = hub.pull("kastanday/filter-unrelated-contexts-zephyr") + # passages = [] + # for docs in contexts: + # for doc in docs: + # print("doc: ", doc) + # exit() + # passages.append(doc) + + # print("Num jobs to run:", len(passages)) + # call filter contexts function with Manager() as manager: filtered_contexts = manager.list() partial_func1 = partial(filter_context, user_query=user_query, langsmith_prompt_obj=langsmith_prompt_obj) partial_func2 = partial(select_context, result=filtered_contexts) - with ProcessPoolExecutor(max_workers=30) as executor: + with ProcessPoolExecutor(max_workers=100) as executor: anyscale_responses = list(executor.map(partial_func1, contexts)) if len(anyscale_responses) > 0: executor.map(partial_func2, anyscale_responses) else: print("LLM responses are empty.") + + executor.shutdown(wait=True) filtered_contexts = list(filtered_contexts) print(f"⏰ Context filtering runtime: {(time.monotonic() - start_time):.2f} seconds") diff --git a/ai_ta_backend/vector_database.py b/ai_ta_backend/vector_database.py index a1052319..89383d95 100644 --- a/ai_ta_backend/vector_database.py +++ b/ai_ta_backend/vector_database.py @@ -1145,7 +1145,11 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit batch_found_docs: list[list[Document]] = self.batch_vector_search(search_queries=generated_queries, course_name=course_name) # filtered_docs = run_context_filtering(contexts=batch_found_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100) + # print(f"Number of docs after context filtering: {len(filtered_docs)}") + + # print(filtered_docs[0]) # exit() + found_docs = self.reciprocal_rank_fusion(batch_found_docs) found_docs = [doc for doc, score in found_docs] print(f"Number of docs found with multiple queries: {len(found_docs)}") @@ -1154,10 +1158,6 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit print(f"⏰ Multi-query processing runtime: {(time.monotonic() - mq_start_time):.2f} seconds") - # Context filtering - #filtered_docs = list(run(contexts=found_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100)) - #print(f"Number of docs after context filtering: {len(filtered_docs)}") - # 'context padding' // 'parent document retriever' final_docs = context_processing(found_docs, search_query, course_name) print(f"Number of final docs after context padding: {len(final_docs)}") diff --git a/run.sh b/run.sh index aa6240b7..b925376d 100755 --- a/run.sh +++ b/run.sh @@ -3,4 +3,4 @@ # Docs https://docs.gunicorn.org/en/stable/settings.html#workers export PYTHONPATH=$PYTHONPATH:$(pwd)/ai_ta_backend -exec gunicorn --workers=30 --threads=16 --worker-class=gthread ai_ta_backend.main:app --timeout 1800 \ No newline at end of file +exec gunicorn --workers=3 --threads=16 --worker-class=gthread ai_ta_backend.main:app --timeout 1800 \ No newline at end of file