-
Notifications
You must be signed in to change notification settings - Fork 10k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
601 additions
and
585 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
# This needs to generally match the container host's environment. | ||
ARG CUDA_VERSION=12.6.0 | ||
# Target the CUDA build image | ||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} | ||
|
||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} | ||
|
||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build | ||
|
||
# CUDA architecture to build for (defaults to all supported archs) | ||
ARG CUDA_DOCKER_ARCH=default | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ | ||
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ | ||
fi && \ | ||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ | ||
cmake --build build --config Release -j$(nproc) && \ | ||
cp build/bin/* . | ||
|
||
RUN mkdir -p /app/lib && \ | ||
find build -name "*.so" -exec cp {} /app/lib \; | ||
|
||
|
||
FROM ${BASE_CUDA_RUN_CONTAINER} AS full | ||
|
||
COPY --from=build /app /app | ||
|
||
WORKDIR /app | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y \ | ||
python3 \ | ||
python3-pip \ | ||
git \ | ||
libgomp1 \ | ||
&& pip install --upgrade pip setuptools wheel \ | ||
&& pip install -r requirements.txt \ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
ENTRYPOINT ["/app/.devops/tools.sh"] | ||
|
||
FROM ${BASE_CUDA_RUN_CONTAINER} AS light | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y libgomp1 \ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
COPY --from=build /app/lib/ /app/ | ||
COPY --from=build /app/build/bin/llama-cli /app/ | ||
|
||
WORKDIR /app | ||
|
||
ENTRYPOINT [ "/app/llama-cli" ] | ||
|
||
FROM ${BASE_CUDA_RUN_CONTAINER} AS server | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y libgomp1 curl \ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
COPY --from=build /app/lib/ /app/ | ||
COPY --from=build /app/build/bin/llama-server /app/ | ||
|
||
WORKDIR /app | ||
|
||
# Must be set to 0.0.0.0 so it can listen to requests from host machine | ||
ENV LLAMA_ARG_HOST=0.0.0.0 | ||
|
||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] | ||
|
||
ENTRYPOINT [ "/app/llama-server" ] |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04 | ||
|
||
## Build Image | ||
|
||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build | ||
|
||
ARG GGML_SYCL_F16=OFF | ||
RUN apt-get update && \ | ||
apt-get install -y git libcurl4-openssl-dev | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ | ||
echo "GGML_SYCL_F16 is set" && \ | ||
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ | ||
fi && \ | ||
echo "Building with dynamic libs" && \ | ||
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ | ||
cmake --build build --config Release -j$(nproc) && \ | ||
cp build/bin/* . | ||
|
||
RUN mkdir -p /app/lib && \ | ||
find build -name "*.so" -exec cp {} /app/lib \; | ||
|
||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as full | ||
|
||
COPY --from=build /app /app | ||
|
||
WORKDIR /app | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y \ | ||
python3 \ | ||
python3-pip \ | ||
git \ | ||
libgomp1 \ | ||
&& pip install --upgrade pip setuptools wheel \ | ||
&& pip install -r requirements.txt \ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
ENTRYPOINT ["/app/.devops/tools.sh"] | ||
|
||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS light | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y libgomp1 \ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
COPY --from=build /app/lib/ /app/ | ||
COPY --from=build /app/build/bin/llama-cli /app/ | ||
|
||
WORKDIR /app | ||
|
||
ENTRYPOINT [ "/app/llama-cli" ] | ||
|
||
|
||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS server | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y libgomp1 curl \ | ||
&& apt autoremove -y \ | ||
&& apt clean -y \ | ||
&& rm -rf /tmp/* /var/tmp/* \ | ||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ | ||
&& find /var/cache -type f -delete | ||
|
||
COPY --from=build /app/lib/ /app/ | ||
COPY --from=build /app/build/bin/llama-server /app/ | ||
|
||
WORKDIR /app | ||
|
||
# Must be set to 0.0.0.0 so it can listen to requests from host machine | ||
ENV LLAMA_ARG_HOST=0.0.0.0 | ||
|
||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] | ||
|
||
ENTRYPOINT [ "/app/llama-server" ] |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.