diff --git a/kserve_wrapper/Dockerfile b/kserve_wrapper/Dockerfile new file mode 100644 index 0000000..0e8c2c9 --- /dev/null +++ b/kserve_wrapper/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.10 + +RUN apt-get update && \ + apt-get install ffmpeg libsm6 libxext6 -y + +WORKDIR /app + +COPY requirements.txt requirements.txt + +RUN pip install --no-cache-dir --upgrade pip && pip install --no-cache-dir -r requirements.txt + +COPY . . + +ENTRYPOINT ["python"] diff --git a/kserve_wrapper/build-docker.sh b/kserve_wrapper/build-docker.sh new file mode 100755 index 0000000..f1ee172 --- /dev/null +++ b/kserve_wrapper/build-docker.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# +TAG="v0.0.1.rc0" +docker build -t footprintai/fed-multimodal-restcol-kserve:${TAG} -f Dockerfile . +docker push footprintai/fed-multimodal-restcol-kserve:${TAG} diff --git a/kserve_wrapper/main.py b/kserve_wrapper/main.py new file mode 100644 index 0000000..8454c2f --- /dev/null +++ b/kserve_wrapper/main.py @@ -0,0 +1,15 @@ +import argparse +import os +import kserve + +from multimodal import MultiModalModel + +DEFAULT_MODEL_NAME = os.getenv('DEFAULT_MODEL_NAME') +parser = argparse.ArgumentParser(parents=[kserve.model_server.parser]) +parser.add_argument('--model_name', default=DEFAULT_MODEL_NAME) +args, _ = parser.parse_known_args() + +if __name__ == "__main__": + model = MultiModalModel(name=args.model_name) + model.load() + kserve.ModelServer().start([model]) diff --git a/kserve_wrapper/multimodal.py b/kserve_wrapper/multimodal.py new file mode 100644 index 0000000..326fe97 --- /dev/null +++ b/kserve_wrapper/multimodal.py @@ -0,0 +1,87 @@ +import cv2 +import numpy as np +import kserve +from typing import Dict + +# imread read image and converts it into GRB +def imread(filepath:str): + import cv2 + + im = cv2.imread(filepath,cv2.IMREAD_UNCHANGED) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + return im + +def base64decode(s:str): + import base64 + import cv2 + import numpy as np + + jpg_original = base64.b64decode(s) + jpg_as_np = np.frombuffer(jpg_original, dtype=np.uint8) + im = cv2.imdecode(jpg_as_np, cv2.IMREAD_UNCHANGED) + return im + +def base64encode(im) -> str: + import base64 + import cv2 + + im_encode = cv2.imencode('.jpg', im)[1] + return base64.b64encode(im_encode) + +class MultiModalModel(kserve.Model): + def __init__(self, name: str): + super().__init__(name) + self.name = name + + def load(self): + # TODO: load models + self.ready = True + + def predict(self, request: Dict, headers: Dict[str, str] = None) -> Dict: + inputs = request["instances"] + # request is wrapped the following format + # { + # "instances": [ + # { + # "image_bytes": { + # "b64": "", + # }, + # "audio_bytes": { + # "b64": "", + # }, + # "text": , + # "key": "somekeys", + # }, + # ], + # } + # and response is wrapped into the following + # { + # "predictions: [ + # { + # "predicted": {}, + # "key": "somekeys", + # "type": "multimodal-detector", + # }, + # ] + # } + + im1 = base64decode(inputs[0]["image_bytes"]["b64"]) + h, w, c = im1.shape + text = inputs[0]["text"] + + return { + "predictions": [ + { + "predicted": { + "image": { + "h": h, + "w": w, + "c": c, + }, + "text": text, + }, + "key": key, + "type": "multimodal-detector", + }, + ] + } diff --git a/kserve_wrapper/requirements.txt b/kserve_wrapper/requirements.txt new file mode 100644 index 0000000..af1d030 --- /dev/null +++ b/kserve_wrapper/requirements.txt @@ -0,0 +1,5 @@ +kserve==v0.10.1 +opencv-python==4.8.0.74 +opencv-python-headless==4.8.0.74 +opencv-contrib-python +numpy==1.24.4