FootprintAI · hsinatfootprintai · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/kserve_wrapper/Dockerfile b/kserve_wrapper/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.10
+
+RUN apt-get update && \
+    apt-get install ffmpeg libsm6 libxext6 -y
+
+WORKDIR /app
+
+COPY requirements.txt requirements.txt
+
+RUN pip install --no-cache-dir --upgrade pip && pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+ENTRYPOINT ["python"]
diff --git a/kserve_wrapper/build-docker.sh b/kserve_wrapper/build-docker.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+#
+TAG="v0.0.1.rc0"
+docker build -t footprintai/fed-multimodal-restcol-kserve:${TAG} -f Dockerfile .
+docker push footprintai/fed-multimodal-restcol-kserve:${TAG}
diff --git a/kserve_wrapper/main.py b/kserve_wrapper/main.py
@@ -0,0 +1,15 @@
+import argparse
+import os
+import kserve
+
+from multimodal import MultiModalModel
+
+DEFAULT_MODEL_NAME = os.getenv('DEFAULT_MODEL_NAME')
+parser = argparse.ArgumentParser(parents=[kserve.model_server.parser])
+parser.add_argument('--model_name', default=DEFAULT_MODEL_NAME)
+args, _ = parser.parse_known_args()
+
+if __name__ == "__main__":
+    model = MultiModalModel(name=args.model_name)
+    model.load()
+    kserve.ModelServer().start([model])
diff --git a/kserve_wrapper/multimodal.py b/kserve_wrapper/multimodal.py
@@ -0,0 +1,87 @@
+import cv2
+import numpy as np
+import kserve
+from typing import Dict
+
+# imread read image and converts it into GRB
+def imread(filepath:str):
+    import cv2
+
+    im = cv2.imread(filepath,cv2.IMREAD_UNCHANGED)
+    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    return im
+
+def base64decode(s:str):
+    import base64
+    import cv2
+    import numpy as np
+
+    jpg_original = base64.b64decode(s)
+    jpg_as_np = np.frombuffer(jpg_original, dtype=np.uint8)
+    im = cv2.imdecode(jpg_as_np, cv2.IMREAD_UNCHANGED)
+    return im
+
+def base64encode(im) -> str:
+    import base64
+    import cv2
+
+    im_encode = cv2.imencode('.jpg', im)[1]
+    return base64.b64encode(im_encode)
+
+class MultiModalModel(kserve.Model):
+    def __init__(self, name: str):
+        super().__init__(name)
+        self.name = name
+
+    def load(self):
+		# TODO: load models
+        self.ready = True
+
+    def predict(self, request: Dict, headers: Dict[str, str] = None) -> Dict:
+        inputs = request["instances"]
+        # request is wrapped the following format
+        # {
+        #   "instances": [
+        #     {
+        #       "image_bytes": {
+        #           "b64": "<b64-encoded>",
+        #       },
+		#		"audio_bytes": {
+        #           "b64": "<b64-encoded>",
+		#		},
+		#		"text": <string>,
+        #       "key": "somekeys",
+        #     },
+        #   ],
+        # }
+        # and response is wrapped into the following
+        # {
+        #  "predictions: [
+        #    {
+		#	   "predicted": {},
+        #      "key": "somekeys",
+        #      "type": "multimodal-detector",
+        #    },
+        #  ]
+        # }
+
+        im1 = base64decode(inputs[0]["image_bytes"]["b64"])
+        h, w, c = im1.shape
+        text = inputs[0]["text"]
+
+        return {
+                "predictions": [
+                {
+                    "predicted": {
+                        "image": {
+                            "h": h,
+                            "w": w,
+                            "c": c,
+                        },
+                        "text": text,
+                    },
+                    "key": key,
+                    "type": "multimodal-detector",
+                },
+            ]
+        }
diff --git a/kserve_wrapper/requirements.txt b/kserve_wrapper/requirements.txt
@@ -0,0 +1,5 @@
+kserve==v0.10.1
+opencv-python==4.8.0.74
+opencv-python-headless==4.8.0.74
+opencv-contrib-python
+numpy==1.24.4