From b7f90e3cad116adcd5c4e1ba2cd9cc5a7ba2b3f4 Mon Sep 17 00:00:00 2001 From: Dustin Franklin Date: Wed, 13 Sep 2023 15:42:58 -0400 Subject: [PATCH] added exllamav2 --- .github/workflows/exllama-v2_jp51.yml | 52 +++++++++++++++++++++++++++ packages/llm/exllama/Dockerfile.v2 | 36 +++++++++++++++++++ packages/llm/exllama/config.py | 8 +++++ packages/llm/exllama/test_v2.sh | 5 +++ 4 files changed, 101 insertions(+) create mode 100644 .github/workflows/exllama-v2_jp51.yml create mode 100644 packages/llm/exllama/Dockerfile.v2 create mode 100644 packages/llm/exllama/test_v2.sh diff --git a/.github/workflows/exllama-v2_jp51.yml b/.github/workflows/exllama-v2_jp51.yml new file mode 100644 index 000000000..2e16b637e --- /dev/null +++ b/.github/workflows/exllama-v2_jp51.yml @@ -0,0 +1,52 @@ +name: "exllama-v2_jp51" +run-name: "Build exllama:v2 (JetPack 5.1)" +on: + workflow_dispatch: {} + push: + branches: + - 'dev' + paths: + - '.github/workflows/exllama-v2_jp51.yml' + - 'packages/llm/exllama/*' + - '!packages/llm/exllama/README.md' + - '!packages/llm/exllama/docs.md' + - 'packages/build-essential/*' + - '!packages/build-essential/README.md' + - '!packages/build-essential/docs.md' + - 'packages/python/*' + - '!packages/python/README.md' + - '!packages/python/docs.md' + - 'packages/numpy/*' + - '!packages/numpy/README.md' + - '!packages/numpy/docs.md' + - 'packages/cmake/cmake_pip/*' + - '!packages/cmake/cmake_pip/README.md' + - '!packages/cmake/cmake_pip/docs.md' + - 'packages/onnx/*' + - '!packages/onnx/README.md' + - '!packages/onnx/docs.md' + - 'packages/pytorch/*' + - '!packages/pytorch/README.md' + - '!packages/pytorch/docs.md' + - 'packages/llm/huggingface_hub/*' + - '!packages/llm/huggingface_hub/README.md' + - '!packages/llm/huggingface_hub/docs.md' +jobs: + exllama-v2_jp51: + runs-on: [self-hosted, jetson, jp51] + steps: + - run: | + cat /etc/nv_tegra_release + - name: "Checkout ${{ github.repository }} SHA=${{ github.sha }}" + run: | + echo "$RUNNER_WORKSPACE" + cd $RUNNER_WORKSPACE + git config --global user.email "dustinf@nvidia.com" + git config --global user.name "Dustin Franklin" + git clone $GITHUB_SERVER_URL/$GITHUB_REPOSITORY || echo 'repo already cloned or another error encountered' + cd jetson-containers + git fetch origin + git checkout $GITHUB_SHA + git status + ls -a + - run: ./build.sh --name=runner/ --push=dustynv exllama:v2 \ No newline at end of file diff --git a/packages/llm/exllama/Dockerfile.v2 b/packages/llm/exllama/Dockerfile.v2 new file mode 100644 index 000000000..cf8444fd3 --- /dev/null +++ b/packages/llm/exllama/Dockerfile.v2 @@ -0,0 +1,36 @@ +# +# Dockerfile for exllama_v2 (see config.py for package configuration) +# +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +ARG TORCH_CUDA_ARCH_LIST + +ARG EXLLAMA2_REPO=turboderp/exllamav2 +ARG EXLLAMA2_BRANCH=master + +WORKDIR /opt + +ADD https://api.github.com/repos/${EXLLAMA2_REPO}/git/refs/heads/${EXLLAMA2_BRANCH} /tmp/exllama2_version.json + +RUN git clone --branch=${EXLLAMA2_BRANCH} --depth=1 https://github.com/${EXLLAMA2_REPO} exllamav2 + +RUN sed 's|torch.*|torch|g' -i exllamav2/requirements.txt && \ + sed 's|torch.*"|torch"|g' -i exllamav2/setup.py && \ + cat exllamav2/requirements.txt && \ + cat exllamav2/setup.py + +# build the wheel +RUN cd exllamav2 && \ + python3 setup.py --verbose bdist_wheel && \ + cp dist/exllamav2*.whl /opt + +RUN pip3 install --no-cache-dir --verbose /opt/exllamav2*.whl + +WORKDIR / + +# this will build cuda_ext.py to ~/.cache/torch_extensions/ +RUN cd /opt/exllamav2 && python3 test_inference.py --help + +# make sure it loads +RUN pip3 show exllamav2 && python3 -c 'import exllamav2' diff --git a/packages/llm/exllama/config.py b/packages/llm/exllama/config.py index 1b574520c..d60001a31 100644 --- a/packages/llm/exllama/config.py +++ b/packages/llm/exllama/config.py @@ -4,3 +4,11 @@ package['build_args'] = { 'TORCH_CUDA_ARCH_LIST': ';'.join([f'{x/10:.1f}' for x in CUDA_ARCHITECTURES]) } + +exllama_v2 = package.copy() + +exllama_v2['name'] = 'exllama:v2' +exllama_v2['dockerfile'] = 'Dockerfile.v2' +exllama_v2['test'] = 'test_v2.sh' + +package = [package, exllama_v2] \ No newline at end of file diff --git a/packages/llm/exllama/test_v2.sh b/packages/llm/exllama/test_v2.sh new file mode 100644 index 000000000..ef33208cd --- /dev/null +++ b/packages/llm/exllama/test_v2.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +cd /opt/exllamav2 + +python3 test_inference.py --help