diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 3bc2ab1c..54c516c3 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -621,6 +621,68 @@ def getTopContextsWithMQR() -> Response: return response +@app.route('/pest-detection', methods=['POST']) +def pest_detection(): + """ + Endpoint to detect pests in an image using the pest detection plugin. + Expects a JSON payload with an 'image_url' key pointing to the image to be processed. + + Returns: + Response: A response containing the annotated image with bounding boxes and class labels. + """ + data = request.get_json() + image_urls = data.get('image_urls', []) + + if not image_urls: + abort(400, description="Missing 'image_urls' parameter in the request body.") + + # Deduplicate the image urls + image_urls = list(set(image_urls)) + + try: + posthog = Posthog(project_api_key=os.environ['POSTHOG_API_KEY'], host='https://app.posthog.com') + posthog.capture('distinct_id_of_the_user', event='pest_detection_invoked', properties={'image_urls': image_urls}) + ingester = Ingest() + # Call the pest detection plugin function + annotated_images = ingester.run_pest_detection(image_urls) + del ingester + posthog.shutdown() + # Send the annotated image urls in the response + response = jsonify(annotated_images) + response.headers.add('Access-Control-Allow-Origin', '*') + + return response + except Exception as e: + abort(500, description=str(e)) + + +@app.route('/run-commands', methods=['GET']) +def run_commands() -> Response: + + cmd: str = request.args.get('cmd', default='', type=str) + auth: str = request.args.get('auth', default='', type=str) + if cmd == '': + # proper web error "400 Bad request" + abort(400, description=f"Missing required parameter: 'cmd' must be provided. Command: `{cmd}`") + if auth != 'hitherekastan': + # proper web error "400 Bad request" + abort(400, description=f"Missing required parameter: 'cmd' must be provided. Command: `{cmd}`") + + import subprocess + try: + result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE) + print(f"Command: {cmd} executed successfully. Result: {result.stdout.decode('utf-8')}") + response = jsonify({"result": f"{result.stdout.decode('utf-8')}"}) + response.headers.add('Access-Control-Allow-Origin', '*') + return response + except Exception as e: + abort(500, description=str(e)) + + response = jsonify({"outcome": "success"}) + response.headers.add('Access-Control-Allow-Origin', '*') + return response + + @app.route('/resource-report', methods=['GET']) def resource_report() -> Response: """ diff --git a/ai_ta_backend/modal/pest_detection_on_modal.py b/ai_ta_backend/modal/pest_detection_on_modal.py new file mode 100644 index 00000000..da2b0419 --- /dev/null +++ b/ai_ta_backend/modal/pest_detection_on_modal.py @@ -0,0 +1,149 @@ +""" +Run with: $ modal serve ai_ta_backend/modal/hello_world.py +Deploy with with: $ modal deploy ai_ta_backend/modal/hello_world.py + +Just send a post request here: https://kastanday--v2-pest-detection-yolo-model-predict.modal.run/ +with body: +{ + "image_urls": [ + "https://www.arborday.org/trees/health/pests/images/figure-whiteflies-1.jpg", + "https://www.arborday.org/trees/health/pests/images/figure-japanese-beetle-3.jpg" + ] +} + +Inspired by https://modal.com/docs/examples/webcam#prediction-function +""" +import os +from fastapi import Request +from modal import Stub, enter, web_endpoint + +import inspect +from tempfile import NamedTemporaryFile +import traceback +from typing import List +import uuid + +from modal import Secret, Stub, build, web_endpoint +import modal + +# Simpler image, but slower cold starts: modal.Image.from_registry('ultralytics/ultralytics:latest-cpu') +image = ( + modal.Image.debian_slim(python_version="3.10").apt_install("libgl1-mesa-glx", "libglib2.0-0") + # .run_commands(["apt-get install -y libgl1-mesa-glx libglib2.0-0 wget"]) + .pip_install( + "opencv-python", + "torch==2.2.0", + "ultralytics==8.1.0", + "torchvision==0.17.0", + "boto3==1.28.79", + "fastapi==0.109.2", + "pillow", + )) +stub = Stub("v2_pest_detection_yolo", image=image) + +# Imports needed inside the image +with image.imports(): + import inspect + import requests + import os + from tempfile import NamedTemporaryFile + import traceback + from typing import List + import uuid + from PIL import Image + from ultralytics import YOLO + import boto3 + + +@stub.cls(cpu=1, image=image, secrets=[Secret.from_name("uiuc-chat-aws")]) +class Model: + """ + 1. Build (bake things into the image for faster subsequent startups) + 2. Enter (run once per container start) + 3. Web Endpoint (serve a web endpoint) + """ + + @build() + def download_model(self): + """Model weights are downloaded once at image build time using the @build hook and saved into the image. 'Baking' models into the modal.Image at build time provided the fastest cold start. """ + model_url = "https://assets.kastan.ai/pest_detection_model_weights.pt" + response = requests.get(model_url) + + model_path = "/cache/pest_detection_model_weights.pt" + os.makedirs("/cache/", exist_ok=True) + with open(model_path, 'wb') as f: + f.write(response.content) + + @enter() + def run_this_on_container_startup(self): + """Runs once per container start. Like __init__ but for the container.""" + self.model = YOLO('/cache/pest_detection_model_weights.pt') + self.s3_client = boto3.client( + 's3', + aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), + aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), + ) + + @web_endpoint(method="POST") + async def predict(self, request: Request): + """ + This used to use the method decorator + Run the pest detection plugin on an image. + """ + print("Inside predict() endpoint") + + input = await request.json() + image_urls = input.get('image_urls', []) + print(f"Image URLS: {image_urls}") + + try: + # Run the plugin + annotated_images = self._detect_pests(image_urls) + print(f"annotated_images found: {len(annotated_images)}") + results = [] + # Generate a unique ID for the request + unique_id = uuid.uuid4() + # self.posthog.capture('distinct_id_of_the_user', + # event='run_pest_detection_invoked', + # properties={ + # 'image_urls': image_urls, + # 'unique_id': unique_id, + # }) + for index, image in enumerate(annotated_images): + # Infer the file extension from the image URL or set a default + file_extension = '.png' + image_format = file_extension[1:].upper() + + with NamedTemporaryFile(mode='wb', suffix=file_extension) as tmpfile: + # Save the image with the specified format + image.save(tmpfile, format=image_format) + tmpfile.flush() # Ensure all data is written to the file + tmpfile.seek(0) # Move the file pointer to the start of the file + # Include UUID and index in the s3_path + s3_path = f'pest_detection/annotated-{unique_id}-{index}{file_extension}' + # Upload the file to S3 + with open(tmpfile.name, 'rb') as file_data: + self.s3_client.upload_fileobj(Fileobj=file_data, Bucket=os.getenv('S3_BUCKET_NAME'), Key=s3_path) + results.append(s3_path) + return results + except Exception as e: + err = f"❌❌ Error in (pest_detection): `{inspect.currentframe().f_code.co_name}`: {e}\nTraceback:\n{traceback.format_exc()}" # type: ignore + print(err) + # sentry_sdk.capture_exception(e) + return err + + def _detect_pests(self, image_paths: List[str]) -> List[Image.Image]: + """ Run pest detection on the given images. """ + # Run inference + results = self.model(image_paths) # results object with inference results + + annotated_images = [] + + # Extract annotated images from the results object + # Flatten the results object to get the annotated images for each input image + for result_set in results: + for r in result_set: + im_array = r.plot() # plot a BGR numpy array of predictions + im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image (annotated with bounding boxes and class labels) + annotated_images.append(im) + return annotated_images diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 43946bee..3636d388 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -14,9 +14,12 @@ OPENAI_API_TYPE = "azure" -LOCK_EXCEPTIONS = ['Project is locked for state access! Please wait until the project is unlocked to access embeddings.', - 'Project is locked for state access! Please wait until the project is unlocked to access data.', - 'Project is currently indexing and cannot ingest new datums. Try again later.'] +LOCK_EXCEPTIONS = [ + 'Project is locked for state access! Please wait until the project is unlocked to access embeddings.', + 'Project is locked for state access! Please wait until the project is unlocked to access data.', + 'Project is currently indexing and cannot ingest new datums. Try again later.' +] + def giveup_hdlr(e): """ @@ -36,12 +39,16 @@ def giveup_hdlr(e): sentry_sdk.capture_exception(e) return True + def backoff_hdlr(details): """ Function to handle backup conditions in backoff decorator. Currently just prints the details of the backoff. """ - print("\nBacking off {wait:0.1f} seconds after {tries} tries, calling function {target} with args {args} and kwargs {kwargs}".format(**details)) + print( + "\nBacking off {wait:0.1f} seconds after {tries} tries, calling function {target} with args {args} and kwargs {kwargs}" + .format(**details)) + def backoff_strategy(): """ @@ -50,7 +57,13 @@ def backoff_strategy(): """ return backoff.expo(base=10, factor=1.5) -@backoff.on_exception(backoff_strategy, Exception, max_tries=5, raise_on_giveup=False, giveup=giveup_hdlr, on_backoff=backoff_hdlr) + +@backoff.on_exception(backoff_strategy, + Exception, + max_tries=5, + raise_on_giveup=False, + giveup=giveup_hdlr, + on_backoff=backoff_hdlr) def log_convo_to_nomic(course_name: str, conversation) -> str: nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app NOMIC_MAP_NAME_PREFIX = 'Conversation Map for ' @@ -193,8 +206,8 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: else: # raising exception again to trigger backoff and passing parameters to use in create_nomic_map() raise Exception({"exception": str(e)}) - - + + def get_nomic_map(course_name: str): """ Returns the variables necessary to construct an iframe of the Nomic map given a course name. @@ -377,7 +390,7 @@ def create_nomic_map(course_name: str, log_data: list): else: print("ERROR in create_nomic_map():", e) sentry_sdk.capture_exception(e) - + return "failed" diff --git a/ai_ta_backend/pest_detection.py b/ai_ta_backend/pest_detection.py new file mode 100644 index 00000000..125ac96b --- /dev/null +++ b/ai_ta_backend/pest_detection.py @@ -0,0 +1,47 @@ +# from typing import List +# from PIL import Image +# from ultralytics import YOLO +# from pathlib import Path + +# class PestDetection: +# """ +# AIFARMS CropWizard Plugin for Pest Detection and Classification +# """ + +# def __init__(self): +# # Load a custom trained YOLOv8n model for pest detection and classification By Aditya Sengupta +# # Google Colab Implementation: +# # https://colab.research.google.com/drive/1GO-lw2PJtVewlA-xhBfgBLId8v4v-BE2?usp=sharing + +# # The model weights can be found at: +# # https://www.dropbox.com/scl/fi/xf8wi0jy72kuk3xl47dnx/Aditya-Pest-Detection-YOLO-V1.pt +# self.model = YOLO(Path.cwd() / 'ai_ta_backend/pest_detection_model_weights.pt') + +# def detect_pests(self, image_paths: List[str]) -> List[Image.Image]: +# # Run inference +# results = self.model(image_paths) # results object with inference results + +# annotated_images = [] + +# # Extract annotated images from the results object +# # Flatten the results object to get the annotated images for each input image +# for result_set in results: +# for r in result_set: +# im_array = r.plot() # plot a BGR numpy array of predictions +# im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image (annotated with bounding boxes and class labels) +# annotated_images.append(im) + +# return annotated_images + +# if __name__ == '__main__': +# # Sample usage with multiple images +# plugin = PestDetection() +# image_urls = [ +# 'https://www.arborday.org/trees/health/pests/images/figure-whiteflies-1.jpg', +# 'https://www.arborday.org/trees/health/pests/images/figure-japanese-beetle-3.jpg' +# ] +# output = plugin.detect_pests(image_urls) + +# # Print annotated images +# for image in output: +# image.show() diff --git a/ai_ta_backend/pest_detection_model_weights.pt b/ai_ta_backend/pest_detection_model_weights.pt new file mode 100644 index 00000000..88d3a929 Binary files /dev/null and b/ai_ta_backend/pest_detection_model_weights.pt differ diff --git a/ai_ta_backend/vector_database.py b/ai_ta_backend/vector_database.py index 77e4b2b7..d3ca8300 100644 --- a/ai_ta_backend/vector_database.py +++ b/ai_ta_backend/vector_database.py @@ -49,6 +49,7 @@ from ai_ta_backend.utils_tokenization import count_tokens_and_cost from ai_ta_backend.context_parent_doc_padding import context_parent_doc_padding from ai_ta_backend.filtering_contexts import filter_top_contexts +# from ai_ta_backend.pest_detection import PestDetection MULTI_QUERY_PROMPT = hub.pull("langchain-ai/rag-fusion-query-generation") OPENAI_API_TYPE = "azure" # "openai" or "azure" @@ -98,6 +99,8 @@ def __init__(self): project_api_key=os.environ['POSTHOG_API_KEY'], host='https://app.posthog.com') + self.pest_detection_client = PestDetection() + return None def __del__(self): @@ -118,6 +121,10 @@ def __del__(self): del self.s3_client except Exception as e: print("Failed to delete s3_client. Probably fine. Error: ", e) + try: + del self.pest_detection_client + except Exception as e: + print("Failed to delete pest_detection_plugin. Probably fine. Error: ", e) def bulk_ingest(self, s3_paths: Union[List[str], str], course_name: str, **kwargs) -> Dict[str, List[str]]: @@ -1143,6 +1150,48 @@ def vector_search(self, search_query, course_name): # print("found_docs", found_docs) return found_docs + def run_pest_detection(self, image_urls: List[str]) -> List[str] | str: + """ + Run the pest detection plugin on an image. + """ + try: + # Run the plugin + annotated_images = self.pest_detection_client.detect_pests(image_urls) + print(f"annotated_images found: {len(annotated_images)}") + results = [] + # Generate a unique ID for the request + unique_id = uuid.uuid4() + self.posthog.capture('distinct_id_of_the_user', + event='run_pest_detection_invoked', + properties={ + 'image_urls': image_urls, + 'unique_id': unique_id, + }) + for index, image in enumerate(annotated_images): + # Infer the file extension from the image URL or set a default + file_extension = '.png' + + image_format = file_extension[1:].upper() + + with NamedTemporaryFile(mode='wb', suffix=file_extension) as tmpfile: + # Save the image with the specified format + image.save(tmpfile, format=image_format) + tmpfile.flush() # Ensure all data is written to the file + tmpfile.seek(0) # Move the file pointer to the start of the file + # Include UUID and index in the s3_path + s3_path = f'pest_detection/annotated-{unique_id}-{index}{file_extension}' + # Upload the file to S3 + with open(tmpfile.name, 'rb') as file_data: + self.s3_client.upload_fileobj(Fileobj=file_data, Bucket=os.getenv('S3_BUCKET_NAME'), Key=s3_path) + results.append(s3_path) + + return results + except Exception as e: + err = f"❌❌ Error in (pest_detection): `{inspect.currentframe().f_code.co_name}`: {e}\nTraceback:\n{traceback.format_exc()}" + print(err) + sentry_sdk.capture_exception(e) + return err + def getTopContexts(self, search_query: str, course_name: str, token_limit: int = 4_000) -> Union[List[Dict], str]: """Here's a summary of the work. diff --git a/railway.json b/railway.json index 56b39dac..e13f8954 100644 --- a/railway.json +++ b/railway.json @@ -9,12 +9,24 @@ "cmds": [ "python -m venv --copies /opt/venv && . /opt/venv/bin/activate", "pip install pip==23.3.1", - "pip install -r requirements.txt" + "pip install -r requirements.txt", + "echo 'ABOUT TO ECHO LD_LIBRARY_PATH'", + "export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/libGL.so.1:$LD_LIBRARY_PATH", + "echo $LD_LIBRARY_PATH" ], "aptPkgs": ["ffmpeg", "tesseract-ocr"] }, "setup": { - "aptPkgs": ["libcap-dev", "libgl1"], + "aptPkgs": [ + "python3-opencv", + "libcap-dev", + "libgl1", + "libsm6", + "libxext6", + "libglib2.0-0", + "libgl1-mesa-dev", + "libgl1-mesa-glx" + ], "nixPkgs": ["python310", "gcc"] } } diff --git a/requirements.txt b/requirements.txt index a841d087..00593da8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -52,8 +52,13 @@ unstructured==0.10.29 # causes huge ~5.3 GB of installs. Probbably from onnx: ht # Not currently supporting coursera ingest # cs-dlp @ git+https://github.com/raffaem/cs-dlp.git@0.12.0b0 # previously called coursera-dl +# opencv-python-headless==4.8.1.78 +# opencv-python-headless==4.8.1.78 # this version works better than most 4.9.x https://github.com/ultralytics/ultralytics/issues/1270#issuecomment-1883471410 +opencv-python pydantic==1.10.13 # pydantic v1 works better for ray ray==2.8.1 posthog==3.1.0 sentry-sdk==1.39.1 -# newrelic==9.3.0 \ No newline at end of file +torch==2.2.0 +torchvision==0.17.0 +ultralytics==8.1.0