Skip to content

Commit

Permalink
added exllamav2
Browse files Browse the repository at this point in the history
  • Loading branch information
dusty-nv committed Sep 13, 2023
1 parent b4756cb commit b7f90e3
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 0 deletions.
52 changes: 52 additions & 0 deletions .github/workflows/exllama-v2_jp51.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: "exllama-v2_jp51"
run-name: "Build exllama:v2 (JetPack 5.1)"
on:
workflow_dispatch: {}
push:
branches:
- 'dev'
paths:
- '.github/workflows/exllama-v2_jp51.yml'
- 'packages/llm/exllama/*'
- '!packages/llm/exllama/README.md'
- '!packages/llm/exllama/docs.md'
- 'packages/build-essential/*'
- '!packages/build-essential/README.md'
- '!packages/build-essential/docs.md'
- 'packages/python/*'
- '!packages/python/README.md'
- '!packages/python/docs.md'
- 'packages/numpy/*'
- '!packages/numpy/README.md'
- '!packages/numpy/docs.md'
- 'packages/cmake/cmake_pip/*'
- '!packages/cmake/cmake_pip/README.md'
- '!packages/cmake/cmake_pip/docs.md'
- 'packages/onnx/*'
- '!packages/onnx/README.md'
- '!packages/onnx/docs.md'
- 'packages/pytorch/*'
- '!packages/pytorch/README.md'
- '!packages/pytorch/docs.md'
- 'packages/llm/huggingface_hub/*'
- '!packages/llm/huggingface_hub/README.md'
- '!packages/llm/huggingface_hub/docs.md'
jobs:
exllama-v2_jp51:
runs-on: [self-hosted, jetson, jp51]
steps:
- run: |
cat /etc/nv_tegra_release
- name: "Checkout ${{ github.repository }} SHA=${{ github.sha }}"
run: |
echo "$RUNNER_WORKSPACE"
cd $RUNNER_WORKSPACE
git config --global user.email "[email protected]"
git config --global user.name "Dustin Franklin"
git clone $GITHUB_SERVER_URL/$GITHUB_REPOSITORY || echo 'repo already cloned or another error encountered'
cd jetson-containers
git fetch origin
git checkout $GITHUB_SHA
git status
ls -a
- run: ./build.sh --name=runner/ --push=dustynv exllama:v2
36 changes: 36 additions & 0 deletions packages/llm/exllama/Dockerfile.v2
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#
# Dockerfile for exllama_v2 (see config.py for package configuration)
#
ARG BASE_IMAGE
FROM ${BASE_IMAGE}

ARG TORCH_CUDA_ARCH_LIST

ARG EXLLAMA2_REPO=turboderp/exllamav2
ARG EXLLAMA2_BRANCH=master

WORKDIR /opt

ADD https://api.github.com/repos/${EXLLAMA2_REPO}/git/refs/heads/${EXLLAMA2_BRANCH} /tmp/exllama2_version.json

RUN git clone --branch=${EXLLAMA2_BRANCH} --depth=1 https://github.com/${EXLLAMA2_REPO} exllamav2

RUN sed 's|torch.*|torch|g' -i exllamav2/requirements.txt && \
sed 's|torch.*"|torch"|g' -i exllamav2/setup.py && \
cat exllamav2/requirements.txt && \
cat exllamav2/setup.py

# build the wheel
RUN cd exllamav2 && \
python3 setup.py --verbose bdist_wheel && \
cp dist/exllamav2*.whl /opt

RUN pip3 install --no-cache-dir --verbose /opt/exllamav2*.whl

WORKDIR /

# this will build cuda_ext.py to ~/.cache/torch_extensions/
RUN cd /opt/exllamav2 && python3 test_inference.py --help

# make sure it loads
RUN pip3 show exllamav2 && python3 -c 'import exllamav2'
8 changes: 8 additions & 0 deletions packages/llm/exllama/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,11 @@
package['build_args'] = {
'TORCH_CUDA_ARCH_LIST': ';'.join([f'{x/10:.1f}' for x in CUDA_ARCHITECTURES])
}

exllama_v2 = package.copy()

exllama_v2['name'] = 'exllama:v2'
exllama_v2['dockerfile'] = 'Dockerfile.v2'
exllama_v2['test'] = 'test_v2.sh'

package = [package, exllama_v2]
5 changes: 5 additions & 0 deletions packages/llm/exllama/test_v2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bash

cd /opt/exllamav2

python3 test_inference.py --help

0 comments on commit b7f90e3

Please sign in to comment.