diff --git a/ai_ta_backend/filtering_contexts.py b/ai_ta_backend/filtering_contexts.py
index 7c5985f2..b5bef90c 100644
--- a/ai_ta_backend/filtering_contexts.py
+++ b/ai_ta_backend/filtering_contexts.py
@@ -62,7 +62,7 @@ def run_context_filtering(contexts, user_query, max_time_before_return=45, max_c
   
 def filter_context(context, user_query, langsmith_prompt_obj):
   api_start_time = time.monotonic()
-  print("API start time: ", api_start_time)
+  
   final_prompt = str(langsmith_prompt_obj.format(context=context['text'], user_query=user_query))
   try: 
     #completion = run_anyscale(final_prompt)
@@ -76,7 +76,8 @@ def filter_context(context, user_query, langsmith_prompt_obj):
           max_tokens=250,
       )
     completion = ret["choices"][0]["message"]["content"]
-    print("API call time: ", (time.monotonic() - api_start_time))
+
+    print(f"⏰ Anyscale runtime: {(time.monotonic() - api_start_time):.2f} seconds")
     return {"completion": completion, "context": context}
   except Exception as e: 
     print(f"Error: {e}")
@@ -95,7 +96,7 @@ def parse_result(result):
 
 #----------------------- OLD CODE BELOW ----------------------------------------------------------------------------#
 
-#@ray.remote
+# @ray.remote
 # class AsyncActor:
 #   def __init__(self):
 #     pass
@@ -107,10 +108,7 @@ def parse_result(result):
 #       # completion = run_model(final_prompt)
 #       #completion = run_replicate(final_prompt)
 #       completion = run_anyscale(final_prompt)
-#       #clean_text = context['text'].replace('\n', '')
-#       #print("Context: ", clean_text)
-#       #print("Completion: ", completion)
-
+      
 #       return {"completion": completion, "context": context}
 #     except Exception as e: 
 #       print(f"Error: {e}")
@@ -154,7 +152,7 @@ def parse_result(result):
 #   return output
 
 # def run_anyscale(prompt):
-  
+#   api_start_time = time.monotonic()
 #   ret = openai.ChatCompletion.create(
 #           api_base = "https://api.endpoints.anyscale.com/v1",
 #           api_key=os.environ["ANYSCALE_ENDPOINT_TOKEN"],
@@ -166,12 +164,12 @@ def parse_result(result):
 #           temperature=0.3, 
 #           max_tokens=250,
 #       )
-
+#   print(f"⏰ Anyscale runtime: {(time.monotonic() - api_start_time):.2f} seconds")
 #   return ret["choices"][0]["message"]["content"]
 
 
 # def parse_result(result):
-#   lines = result['completion'].split('\n')
+#   lines = result.split('\n')
 #   for line in lines:
 #     if 'Final answer' in line:
 #       return 'yes' in line.lower()
diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py
index 6d396245..c054411e 100644
--- a/ai_ta_backend/nomic_logging.py
+++ b/ai_ta_backend/nomic_logging.py
@@ -9,6 +9,7 @@
 from langchain.embeddings import OpenAIEmbeddings
 from nomic import AtlasProject, atlas
 
+OPENAI_API_TYPE = "azure"
 
 def log_convo_to_nomic(course_name: str, conversation) -> str:
   nomic.login(os.getenv('NOMIC_API_KEY'))  # login during start of flask app
@@ -115,7 +116,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str:
       }]
 
       # create embeddings
-      embeddings_model = OpenAIEmbeddings(openai_api_type="azure") # type: ignore
+      embeddings_model = OpenAIEmbeddings(openai_api_type=OPENAI_API_TYPE) # type: ignore
       embeddings = embeddings_model.embed_documents(user_queries)
 
     # add embeddings to the project
@@ -279,7 +280,7 @@ def create_nomic_map(course_name: str, log_data: list):
       metadata.append(metadata_row)
 
     metadata = pd.DataFrame(metadata)
-    embeddings_model = OpenAIEmbeddings(openai_api_type="azure")  # type: ignore
+    embeddings_model = OpenAIEmbeddings(openai_api_type=OPENAI_API_TYPE)  # type: ignore
     embeddings = embeddings_model.embed_documents(user_queries)
 
     # create Atlas project
diff --git a/ai_ta_backend/vector_database.py b/ai_ta_backend/vector_database.py
index 89383d95..8c101007 100644
--- a/ai_ta_backend/vector_database.py
+++ b/ai_ta_backend/vector_database.py
@@ -43,9 +43,9 @@
 from ai_ta_backend.extreme_context_stuffing import OpenAIAPIProcessor
 from ai_ta_backend.utils_tokenization import count_tokens_and_cost
 from ai_ta_backend.parallel_context_processing import context_processing
+#from ai_ta_backend.filtering_contexts import run
 from ai_ta_backend.filtering_contexts import run_context_filtering
 
-
 MULTI_QUERY_PROMPT = hub.pull("langchain-ai/rag-fusion-query-generation")
 OPENAI_API_TYPE = "azure" # "openai" or "azure"
 
@@ -1158,6 +1158,10 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit
 
       print(f"⏰ Multi-query processing runtime: {(time.monotonic() - mq_start_time):.2f} seconds")
 
+      # filtered_docs = run_context_filtering(contexts=found_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100)
+      # print(f"Number of docs after context filtering: {len(filtered_docs)}")
+      # exit()
+
       # 'context padding' // 'parent document retriever' 
       final_docs = context_processing(found_docs, search_query, course_name)
       print(f"Number of final docs after context padding: {len(final_docs)}")
@@ -1167,6 +1171,8 @@ def getTopContextsWithMQR(self, search_query: str, course_name: str, token_limit
       token_counter, _ = count_tokens_and_cost(pre_prompt + '\n\nNow please respond to my query: ' + search_query) # type: ignore
 
       filtered_docs = run_context_filtering(contexts=final_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100)
+      #filtered_docs = list(run(contexts=final_docs, user_query=search_query, max_time_before_return=45, max_concurrency=100))
+      print(f"Number of docs after context filtering: {len(filtered_docs)}")
       if len(filtered_docs) > 0:
         final_docs_used = filtered_docs
       else: