Skip to content

Commit

Permalink
Merge branch 'master' into support_glm_edge_model
Browse files Browse the repository at this point in the history
  • Loading branch information
piDack authored Dec 19, 2024
2 parents bc93d2a + cd920d0 commit f91cf62
Show file tree
Hide file tree
Showing 212 changed files with 25,244 additions and 5,934 deletions.
31 changes: 22 additions & 9 deletions .devops/full.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,36 @@ ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
apt-get install -y build-essential git cmake libcurl4-openssl-dev

WORKDIR /app

COPY . .

COPY requirements.txt requirements.txt
COPY requirements requirements
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build build -j $(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib/ \;

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
FROM ubuntu:$UBUNTU_VERSION as runtime

WORKDIR /app

COPY . .
RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1

ENV LLAMA_CURL=1
COPY requirements.txt /app/requirements.txt
COPY requirements /app/requirements
COPY .devops/tools.sh /app/tools.sh

RUN pip install --upgrade pip setuptools wheel && \
pip install -r /app/requirements.txt

RUN make -j$(nproc)
COPY --from=build /app/build/bin/ /app/
COPY --from=build /app/lib/ /app/
COPY --from=build /app/convert_hf_to_gguf.py /app/
COPY --from=build /app/gguf-py /app/gguf-py

ENV LC_ALL=C.utf8

ENTRYPOINT ["/app/.devops/tools.sh"]
ENTRYPOINT ["/app/tools.sh"]
16 changes: 11 additions & 5 deletions .devops/llama-cli.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,27 @@ ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && \
apt-get install -y build-essential git
apt-get install -y build-essential git cmake libcurl4-openssl-dev

WORKDIR /app

COPY . .

RUN make -j$(nproc) llama-cli
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build build -j $(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib/ \;

FROM ubuntu:$UBUNTU_VERSION AS runtime

WORKDIR /app

RUN apt-get update && \
apt-get install -y libgomp1
apt-get install -y libcurl4-openssl-dev libgomp1 curl

COPY --from=build /app/llama-cli /llama-cli
COPY --from=build /app/build/bin/llama-cli /app/
COPY --from=build /app/lib/ /app/

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/llama-cli" ]
ENTRYPOINT [ "/app/llama-cli" ]
22 changes: 7 additions & 15 deletions .devops/llama-server.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,25 @@ WORKDIR /app

COPY . .


RUN \
# Build multiple versions of the CPU backend
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
# Build llama-server
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build build --target llama-server -j $(nproc) && \
# Copy the built libraries to /app/lib
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build build -j $(nproc) && \
mkdir -p /app/lib && \
mv libggml-cpu* /app/lib/ && \
find build -name "*.so" -exec cp {} /app/lib/ \;

FROM ubuntu:$UBUNTU_VERSION AS runtime

WORKDIR /app

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl

COPY --from=build /app/build/bin/llama-server /llama-server
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-server /app/
COPY --from=build /app/lib/ /app/

ENV LC_ALL=C.utf8
# Must be set to 0.0.0.0 so it can listen to requests from host machine
ENV LLAMA_ARG_HOST=0.0.0.0

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/llama-server" ]
ENTRYPOINT [ "/app/llama-server" ]
3 changes: 2 additions & 1 deletion .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# Increases the runtime closure size by ~700M
useMpi ? false,
useRocm ? config.rocmSupport,
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
enableCurl ? true,
useVulkan ? false,
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
Expand Down Expand Up @@ -188,7 +189,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
]
++ optionals useRocm [
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
]
++ optionals useMetalKit [
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
Expand Down
10 changes: 5 additions & 5 deletions .devops/tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@ arg1="$1"
shift

if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
python3 ./convert_hf_to_gguf.py "$@"
exec python3 ./convert_hf_to_gguf.py "$@"
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
./llama-quantize "$@"
exec ./llama-quantize "$@"
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
./llama-cli "$@"
exec ./llama-cli "$@"
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
echo "Converting PTH to GGML..."
for i in `ls $1/$2/ggml-model-f16.bin*`; do
if [ -f "${i/f16/q4_0}" ]; then
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
else
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
./llama-quantize "$i" "${i/f16/q4_0}" q4_0
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
fi
done
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
./llama-server "$@"
exec ./llama-server "$@"
else
echo "Unknown command: $arg1"
echo "Available commands: "
Expand Down
141 changes: 93 additions & 48 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ jobs:
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo apt-get update -y
sudo apt-get install -y build-essential vulkan-sdk
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk
- name: Build
id: cmake_build
Expand All @@ -327,6 +327,12 @@ jobs:
cmake -DGGML_VULKAN=ON ..
cmake --build . --config Release -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
ubuntu-22-cmake-hip:
runs-on: ubuntu-22.04
container: rocm/dev-ubuntu-22.04:6.0.2
Expand Down Expand Up @@ -552,35 +558,44 @@ jobs:
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
# TODO: tmp disabled. see for possible re-enable:
# https://github.com/ggerganov/llama.cpp/pull/10525
# macOS-latest-swift:
# runs-on: macos-latest
#
# strategy:
# matrix:
# destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
#
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v4
#
# - name: Dependencies
# id: depends
# continue-on-error: true
# run: |
# brew update
#
# - name: xcodebuild for swift package
# id: xcodebuild
# run: |
# xcodebuild -scheme llama -destination "${{ matrix.destination }}"
#
# - name: Build Swift Example
# id: make_build_swift_example
# run: |
# make swift
macOS-latest-swift:
runs-on: macos-latest

strategy:
matrix:
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: Dependencies
id: depends
continue-on-error: true
run: |
brew update
- name: Build llama.cpp with CMake
id: cmake_build
run: |
sysctl -a
mkdir build
cd build
cmake -G Xcode .. \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
sudo cmake --install . --config Release
- name: xcodebuild for swift package
id: xcodebuild
run: |
xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
windows-msys2:
runs-on: windows-latest
Expand Down Expand Up @@ -653,6 +668,8 @@ jobs:
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
- build: 'msvc-arm64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
- build: 'llvm-arm64-opencl-adreno'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'

steps:
- name: Clone
Expand Down Expand Up @@ -694,6 +711,28 @@ jobs:
run: |
choco install ninja
- name: Install OpenCL Headers and Libs
id: install_opencl
if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
run: |
git clone https://github.com/KhronosGroup/OpenCL-Headers
cd OpenCL-Headers
mkdir build && cd build
cmake .. `
-DBUILD_TESTING=OFF `
-DOPENCL_HEADERS_BUILD_TESTING=OFF `
-DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
cmake --build . --target install
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
cd OpenCL-ICD-Loader
mkdir build-arm64-release && cd build-arm64-release
cmake .. `
-A arm64 `
-DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
cmake --build . --target install --config release
- name: Build
id: cmake_build
run: |
Expand Down Expand Up @@ -723,7 +762,7 @@ jobs:
- name: Test
id: cmake_test
# not all machines have native AVX-512
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
run: |
cd build
ctest -L main -C Release --verbose --timeout 900
Expand Down Expand Up @@ -1104,6 +1143,29 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Build
id: cmake_build
run: |
sysctl -a
mkdir build
cd build
cmake -G Xcode .. \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=iOS \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
sudo cmake --install . --config Release
- name: xcodebuild for swift package
id: xcodebuild
run: |
xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
- name: Build Xcode project
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build

Expand Down Expand Up @@ -1131,23 +1193,6 @@ jobs:
./gradlew build --no-daemon
# freeBSD-latest:
# runs-on: macos-12
# steps:
# - name: Clone
# uses: actions/checkout@v4
#
# - name: Build
# uses: cross-platform-actions/[email protected]
# with:
# operating_system: freebsd
# version: '13.2'
# hypervisor: 'qemu'
# run: |
# sudo pkg update
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`

release:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
# Setup nodejs (to be used for verifying bundled index.html)
- uses: actions/setup-node@v4
with:
node-version: 22
node-version: '22.11.0'

- name: Verify bundled index.html
id: verify_server_index_html
Expand Down
8 changes: 3 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,9 @@ if (WIN32)
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
endif()

if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/source-charset:utf-8>")
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/source-charset:utf-8>")
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/execution-charset:utf-8>")
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/execution-charset:utf-8>")
if (MSVC)
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
endif()

#
Expand Down
Loading

0 comments on commit f91cf62

Please sign in to comment.