Revert "[FS-182355]: Implement logging and use v2"

freshworksinc · Sep 10, 2024 · 6671c03 · 6671c03
1 parent 52e4739
commit 6671c03
Show file tree

Hide file tree

Showing 7 changed files with 90 additions and 485 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,10 +6,6 @@ __pycache__/
 # C extensions
 *.so
 
-# Pycharn
-.idea
-models/
-
 # Distribution / packaging
 .Python
 build/

diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile
@@ -1,6 +1,5 @@
-ARG BASE_IMAGE=nvidia/cuda:12.1.0-base-ubuntu22.04
 # Use the Python base image
-FROM $BASE_IMAGE AS base
+FROM nvidia/cuda:12.1.1-base-ubuntu22.04 AS base
 
 ENV PYTHONUNBUFFERED=1 \
     \
@@ -16,27 +15,20 @@ ENV PYTHONUNBUFFERED=1 \
     POETRY_NO_INTERACTION=1 \
     EXTRAS="all" \
     PYTHON="python3.11"
-
-RUN apt-get update && apt-get install build-essential python3-dev $PYTHON-venv $PYTHON curl -y
-
-# Set the working directory for the app
+RUN apt-get update && apt-get install build-essential python3-dev $PYTHON-venv $PYTHON curl -y 
 WORKDIR /app
 
 FROM base as builder
-
+# Set the working directory for the app
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
 ARG POETRY_VERSION=1.7.1
 ARG POETRY_HOME=/opt/poetry
-
 # Create a Python virtual environment for Poetry and install it
 RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON -
-
 ENV PATH=$POETRY_HOME/bin:$PATH
-
 # Test if Poetry is installed in the expected path
 RUN echo "Poetry version:" && poetry --version
-
 # Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
 COPY poetry.lock poetry.toml pyproject.toml README.md /app/
 # Install dependencies only
@@ -47,14 +39,75 @@ RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}"  --without li
 # remove cache
 RUN poetry cache clear pypi --all
 
-# Use a multi-stage build -> production version
-FROM base AS production
+FROM builder as testing
+# install lint and test dependencies
+RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}"
+# lint 
+RUN poetry run ruff .
+RUN poetry run black --check .
+RUN poetry run mypy .
+# pytest
+COPY tests tests
+# run end to end tests because of duration of build in github ci.
+# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
+# poetry run python -m pytest tests/end_to_end -x
+RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
+poetry run python -m pytest tests/end_to_end -x ; \
+else \
+poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+fi
+RUN echo "all tests passed" > "test_results.txt"
+
 
+# Use a multi-stage build -> production version, with download
+FROM base AS tested-builder
 COPY --from=builder /app /app
-COPY /models /models
-COPY environment_config.sh ./environment_config.sh
+# force testing stage to run
+COPY --from=testing /app/test_results.txt /app/test_results.txt
+ENV HF_HOME=/app/.cache/huggingface
+ENV PATH=/app/.venv/bin:$PATH
+# do nothing
+RUN echo "copied all files"
+
 
-ENV SENTENCE_TRANSFORMERS_HOME=/app/.cache/torch
+# Export with tensorrt, not recommended.
+# docker buildx build --target=production-tensorrt -f Dockerfile .
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt
+ENV PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=off \
+    PYTHON="python3.11"
+RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y 
+COPY --from=builder /app /app
+# force testing stage to run
+COPY --from=testing /app/test_results.txt /app/test_results.txt
+ENV HF_HOME=/app/.cache/torch
 ENV PATH=/app/.venv/bin:$PATH
+RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*"
+ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
+ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
+ENTRYPOINT ["infinity_emb"]
+
+
+# Use a multi-stage build -> production version, with download
+# docker buildx build --target=production-with-download \
+# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
+FROM tested-builder AS production-with-download
+# collect model name and engine from build args
+ARG MODEL_NAME
+RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
+ARG ENGINE
+RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
+ARG EXTRA_PACKAGES
+RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+# will exit with 3 if model is downloaded # TODO: better exit code
+RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
+ENTRYPOINT ["infinity_emb"]
 
-ENTRYPOINT ["/bin/bash" , "-c", "source ./environment_config.sh "]
+# flash attention fa2
+FROM tested-builder AS production-with-fa2
+RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
+ENTRYPOINT ["infinity_emb"]
+
+# Use a multi-stage build -> production version
+FROM tested-builder AS production
+ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/environment_config.sh b/libs/infinity_emb/environment_config.sh
diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py
@@ -25,11 +25,7 @@
     RerankInput,
     ReRankResult,
 )
-from infinity_emb.log_handler import (
-    UVICORN_LOG_LEVELS,
-    logger,
-    StructuredLoggingMiddleware,
-)
+from infinity_emb.log_handler import UVICORN_LOG_LEVELS, logger
 from infinity_emb.primitives import (
     Device,
     Dtype,
@@ -133,7 +129,6 @@ async def validate_token(
 
     instrumentator = Instrumentator().instrument(app)
     app.add_exception_handler(errors.OpenAIException, errors.openai_exception_handler)
-    app.add_middleware(StructuredLoggingMiddleware)
 
     @app.get("/health", operation_id="health", response_class=responses.ORJSONResponse)
     async def _health() -> dict[str, float]:
@@ -225,13 +220,13 @@ async def _embeddings(data: OpenAIEmbeddingInput):
             if isinstance(data.input, str):
                 data.input = [data.input]
 
-            logger.info("[📝] Received request with %s inputs ", len(data.input))
+            logger.debug("[📝] Received request with %s inputs ", len(data.input))
             start = time.perf_counter()
 
             embedding, usage = await engine.embed(sentences=data.input)
 
             duration = (time.perf_counter() - start) * 1000
-            logger.info("[✅] Done in %s ms", duration)
+            logger.debug("[✅] Done in %s ms", duration)
 
             return OpenAIEmbeddingResult.to_embeddings_response(
                 embeddings=embedding,