From af30a812b1bbed8561402aa4dff7f5607fadd142 Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 18 Mar 2024 17:22:25 -0500 Subject: [PATCH] updated and switched parent doc to custom process pool --- ai_ta_backend/executors/process_pool_executor.py | 7 +++++++ ai_ta_backend/utils/context_parent_doc_padding.py | 9 ++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/ai_ta_backend/executors/process_pool_executor.py b/ai_ta_backend/executors/process_pool_executor.py index 81b4860c..b981d613 100644 --- a/ai_ta_backend/executors/process_pool_executor.py +++ b/ai_ta_backend/executors/process_pool_executor.py @@ -29,3 +29,10 @@ def submit(self, fn, *args, **kwargs): def map(self, fn, *iterables, timeout=None, chunksize=1): return self.executor.map(fn, *iterables, timeout=timeout, chunksize=chunksize) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.executor.shutdown(wait=True) + diff --git a/ai_ta_backend/utils/context_parent_doc_padding.py b/ai_ta_backend/utils/context_parent_doc_padding.py index 892cddc9..ed1d4988 100644 --- a/ai_ta_backend/utils/context_parent_doc_padding.py +++ b/ai_ta_backend/utils/context_parent_doc_padding.py @@ -4,6 +4,7 @@ from functools import partial from multiprocessing import Manager from ai_ta_backend.database.sql import SQLDatabase +from ai_ta_backend.executors.process_pool_executor import ProcessPoolExecutorAdapter # DOCUMENTS_TABLE = os.environ['SUPABASE_DOCUMENTS_TABLE'] @@ -12,12 +13,14 @@ SQL_DB = SQLDatabase() + def context_parent_doc_padding(found_docs, search_query, course_name): """ Takes top N contexts acquired from QRANT similarity search and pads them """ print("inside main context padding") start_time = time.monotonic() + #executor = ProcessPoolExecutorAdapter(max_workers=10) with Manager() as manager: qdrant_contexts = manager.list() @@ -25,7 +28,11 @@ def context_parent_doc_padding(found_docs, search_query, course_name): partial_func1 = partial(qdrant_context_processing, course_name=course_name, result_contexts=qdrant_contexts) partial_func2 = partial(supabase_context_padding, course_name=course_name, result_docs=supabase_contexts) - with ProcessPoolExecutor() as executor: + # with ProcessPoolExecutor() as executor: + # executor.map(partial_func1, found_docs[5:]) + # executor.map(partial_func2, found_docs[:5]) + + with ProcessPoolExecutorAdapter() as executor: executor.map(partial_func1, found_docs[5:]) executor.map(partial_func2, found_docs[:5])