From c13693013b13b91051846fa2c55ef83ceb8e013e Mon Sep 17 00:00:00 2001 From: aktoboy <111744450+aktoboy@users.noreply.github.com> Date: Sat, 30 Sep 2023 15:35:34 +0530 Subject: [PATCH 1/7] Create server.py --- server.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 server.py diff --git a/server.py b/server.py new file mode 100644 index 0000000..b1dbe83 --- /dev/null +++ b/server.py @@ -0,0 +1,25 @@ +from flask import Flask, request, jsonify +from privateGPT import init +import time + +app = Flask(__name__) +qa = None + +@app.route("/query", methods=["GET"]) +def query(): + q = request.args.get("q") + + if q is None or q == '': + return jsonify(query=q, answer="Empty input") + + start = time.time() + res = qa(q) + answer, docs = res['result'], [] + end = time.time() + print(f"Query: {q} | Answer: {answer} | Time: {end-start}") + return jsonify(query=q, answer=res['result']) + + +if __name__ == '__main__': + qa = init() + app.run(debug=True) From c6db01b20fd74829985773a462df62e306ec508c Mon Sep 17 00:00:00 2001 From: aktoboy <111744450+aktoboy@users.noreply.github.com> Date: Sat, 30 Sep 2023 15:36:11 +0530 Subject: [PATCH 2/7] Update privateGPT.py --- privateGPT.py | 68 ++++++++++++--------------------------------------- 1 file changed, 16 insertions(+), 52 deletions(-) diff --git a/privateGPT.py b/privateGPT.py index 75603c2..89cc127 100644 --- a/privateGPT.py +++ b/privateGPT.py @@ -7,8 +7,6 @@ from langchain.llms import GPT4All, LlamaCpp import chromadb import os -import argparse -import time if not load_dotenv(): print("Could not load .env file or it is empty. Please check if it exists and is readable.") @@ -20,68 +18,34 @@ model_type = os.environ.get('MODEL_TYPE') model_path = os.environ.get('MODEL_PATH') model_n_ctx = os.environ.get('MODEL_N_CTX') -model_n_batch = int(os.environ.get('MODEL_N_BATCH',8)) -target_source_chunks = int(os.environ.get('TARGET_SOURCE_CHUNKS',4)) +model_n_batch = int(os.environ.get('MODEL_N_BATCH', 8)) +target_source_chunks = int(os.environ.get('TARGET_SOURCE_CHUNKS', 4)) from constants import CHROMA_SETTINGS -def main(): + +def init(): # Parse the command line arguments - args = parse_arguments() embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name) - chroma_client = chromadb.PersistentClient(settings=CHROMA_SETTINGS , path=persist_directory) - db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS, client=chroma_client) + chroma_client = chromadb.PersistentClient(settings=CHROMA_SETTINGS, path=persist_directory) + db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS, + client=chroma_client) retriever = db.as_retriever(search_kwargs={"k": target_source_chunks}) # activate/deactivate the streaming StdOut callback for LLMs - callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()] + callbacks = [StreamingStdOutCallbackHandler()] # Prepare the LLM match model_type: case "LlamaCpp": - llm = LlamaCpp(model_path=model_path, max_tokens=model_n_ctx, n_batch=model_n_batch, callbacks=callbacks, verbose=False) + llm = LlamaCpp(model_path=model_path, max_tokens=model_n_ctx, n_batch=model_n_batch, callbacks=callbacks, + verbose=False) case "GPT4All": - llm = GPT4All(model=model_path, max_tokens=model_n_ctx, backend='gptj', n_batch=model_n_batch, callbacks=callbacks, verbose=False) + llm = GPT4All(model=model_path, max_tokens=model_n_ctx, backend='gptj', n_batch=model_n_batch, + callbacks=callbacks, verbose=False) case _default: # raise exception if model_type is not supported - raise Exception(f"Model type {model_type} is not supported. Please choose one of the following: LlamaCpp, GPT4All") - - qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents= not args.hide_source) - # Interactive questions and answers - while True: - query = input("\nEnter a query: ") - if query == "exit": - break - if query.strip() == "": - continue - - # Get the answer from the chain - start = time.time() - res = qa(query) - answer, docs = res['result'], [] if args.hide_source else res['source_documents'] - end = time.time() - - # Print the result - print("\n\n> Question:") - print(query) - print(f"\n> Answer (took {round(end - start, 2)} s.):") - print(answer) - - # Print the relevant sources used for the answer - for document in docs: - print("\n> " + document.metadata["source"] + ":") - print(document.page_content) - -def parse_arguments(): - parser = argparse.ArgumentParser(description='privateGPT: Ask questions to your documents without an internet connection, ' - 'using the power of LLMs.') - parser.add_argument("--hide-source", "-S", action='store_true', - help='Use this flag to disable printing of source documents used for answers.') - - parser.add_argument("--mute-stream", "-M", - action='store_true', - help='Use this flag to disable the streaming StdOut callback for LLMs.') - - return parser.parse_args() + raise Exception( + f"Model type {model_type} is not supported. Please choose one of the following: LlamaCpp, GPT4All") + qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False) -if __name__ == "__main__": - main() + return qa From f45a991803ac340fd4aaaa00813be64cfb1b2a54 Mon Sep 17 00:00:00 2001 From: aktoboy <111744450+aktoboy@users.noreply.github.com> Date: Sat, 30 Sep 2023 15:36:31 +0530 Subject: [PATCH 3/7] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f5906cf..2445bf1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ pandoc==2.3 pypandoc==1.11 tqdm==4.66.1 sentence_transformers==2.2.2 +flask==2.0.1 From e37672a445cc33031286ebae8df9fd4f61bc3d98 Mon Sep 17 00:00:00 2001 From: aktoboy <111744450+aktoboy@users.noreply.github.com> Date: Sat, 30 Sep 2023 16:12:26 +0530 Subject: [PATCH 4/7] Update server.py --- server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index b1dbe83..6ee3c11 100644 --- a/server.py +++ b/server.py @@ -22,4 +22,5 @@ def query(): if __name__ == '__main__': qa = init() - app.run(debug=True) + from waitress import serve + serve(app, host="0.0.0.0", port=5000) From 831b7f60898768228478e4dceb2bc066ebaa4fed Mon Sep 17 00:00:00 2001 From: aktoboy <111744450+aktoboy@users.noreply.github.com> Date: Sat, 30 Sep 2023 16:12:56 +0530 Subject: [PATCH 5/7] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 2445bf1..415278c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ pypandoc==1.11 tqdm==4.66.1 sentence_transformers==2.2.2 flask==2.0.1 +waitress==2.1.2 From f719dd39524a3302ad3f4915dd4884ba74232dfc Mon Sep 17 00:00:00 2001 From: aktoboy <111744450+aktoboy@users.noreply.github.com> Date: Sat, 30 Sep 2023 16:14:17 +0530 Subject: [PATCH 6/7] Update server.py --- server.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server.py b/server.py index 6ee3c11..2ff2598 100644 --- a/server.py +++ b/server.py @@ -19,6 +19,10 @@ def query(): print(f"Query: {q} | Answer: {answer} | Time: {end-start}") return jsonify(query=q, answer=res['result']) +@app.route("/health", methods=["GET"]) +def health(): + return jsonify({"status": "OK"}) + if __name__ == '__main__': qa = init() From 77260257938cb0cfd77f4dab9df04eed450ba971 Mon Sep 17 00:00:00 2001 From: aktoboy <111744450+aktoboy@users.noreply.github.com> Date: Sat, 30 Sep 2023 16:20:06 +0530 Subject: [PATCH 7/7] Update server.py --- server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index 2ff2598..abf1077 100644 --- a/server.py +++ b/server.py @@ -1,12 +1,15 @@ from flask import Flask, request, jsonify from privateGPT import init import time +import uuid app = Flask(__name__) qa = None @app.route("/query", methods=["GET"]) def query(): + req_id = str(uuid.uuid4()) + print(f"Request {req_id} received") q = request.args.get("q") if q is None or q == '': @@ -16,7 +19,7 @@ def query(): res = qa(q) answer, docs = res['result'], [] end = time.time() - print(f"Query: {q} | Answer: {answer} | Time: {end-start}") + print(f"Request {req_id} | Query: {q} | Answer: {answer} | Time: {end-start}") return jsonify(query=q, answer=res['result']) @app.route("/health", methods=["GET"])