diff --git a/ai_ta_backend/filtering_contexts.py b/ai_ta_backend/filtering_contexts.py index 659e565e..58683cdc 100644 --- a/ai_ta_backend/filtering_contexts.py +++ b/ai_ta_backend/filtering_contexts.py @@ -40,7 +40,7 @@ def run_context_filtering(contexts, user_query, max_time_before_return=45, max_c partial_func1 = partial(filter_context, user_query=user_query, langsmith_prompt_obj=langsmith_prompt_obj) partial_func2 = partial(select_context, result=filtered_contexts) - with ProcessPoolExecutor(max_workers=50) as executor: + with ProcessPoolExecutor(max_workers=30) as executor: anyscale_responses = list(executor.map(partial_func1, contexts)) if len(anyscale_responses) > 0: executor.map(partial_func2, anyscale_responses) diff --git a/ai_ta_backend/vector_database.py b/ai_ta_backend/vector_database.py index 521b4c13..a1052319 100644 --- a/ai_ta_backend/vector_database.py +++ b/ai_ta_backend/vector_database.py @@ -1143,7 +1143,9 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit print("generated_queries", generated_queries) batch_found_docs: list[list[Document]] = self.batch_vector_search(search_queries=generated_queries, course_name=course_name) - + + # filtered_docs = run_context_filtering(contexts=batch_found_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100) + # exit() found_docs = self.reciprocal_rank_fusion(batch_found_docs) found_docs = [doc for doc, score in found_docs] print(f"Number of docs found with multiple queries: {len(found_docs)}") diff --git a/run.sh b/run.sh index b925376d..aa6240b7 100755 --- a/run.sh +++ b/run.sh @@ -3,4 +3,4 @@ # Docs https://docs.gunicorn.org/en/stable/settings.html#workers export PYTHONPATH=$PYTHONPATH:$(pwd)/ai_ta_backend -exec gunicorn --workers=3 --threads=16 --worker-class=gthread ai_ta_backend.main:app --timeout 1800 \ No newline at end of file +exec gunicorn --workers=30 --threads=16 --worker-class=gthread ai_ta_backend.main:app --timeout 1800 \ No newline at end of file