Skip to content

Commit

Permalink
llama.cpp: add scripts for building and running
Browse files Browse the repository at this point in the history
  • Loading branch information
danbev committed Aug 21, 2024
1 parent 2c58695 commit 485dbb7
Show file tree
Hide file tree
Showing 14 changed files with 96 additions and 0 deletions.
2 changes: 2 additions & 0 deletions fundamentals/llama.cpp/scripts/build-configure.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cmake -S . -B build -DLLAMA_CURL=ON
cmake --build build
2 changes: 2 additions & 0 deletions fundamentals/llama.cpp/scripts/build-cuda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cmake -S . -B build -DGGML_CUDA=On -DCMAKE_BUILD_TYPE=Debug
cmake --build build
2 changes: 2 additions & 0 deletions fundamentals/llama.cpp/scripts/build-debug.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug
cmake --build build
2 changes: 2 additions & 0 deletions fundamentals/llama.cpp/scripts/build-kompute.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cmake -S . -B build -DGGML_KOMPUTE=On
cmake --build build
5 changes: 5 additions & 0 deletions fundamentals/llama.cpp/scripts/build-rocm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#cmake -B ./build -S . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DGGML_HIPBLAS=ON -DCMAKE_C_COMPILER=/opt/rocm-6.1.2/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/rocm-6.1.2/llvm/bin/clang++ -DAMDGPU_TARGETS='gfx1030;gfx1100;gfx1101;gfx1102'

cmake -B ./build -S . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DGGML_HIPBLAS=ON -DCMAKE_C_COMPILER=/opt/rocm-6.1.2/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/rocm-6.1.2/llvm/bin/clang++ -DAMDGPU_TARGETS='gfx1030'

cmake --build ./build --config Release -- -j8
2 changes: 2 additions & 0 deletions fundamentals/llama.cpp/scripts/build-vulkan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cmake -S . -B build -DGGML_VULKAN=On
cmake --build build
11 changes: 11 additions & 0 deletions fundamentals/llama.cpp/scripts/convert-gemma2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
source venv/bin/activate

model_name=gemma-2-9b-it
model_dir=~/.cache/huggingface/hub/models--google--${model_name}/snapshots/93be03fbe3787f19bf03a4b1d3d75d36cb1f6ace

#python convert-hf-to-gguf.py $model_dir --outfile=models/${model_name}.gguf --outtype f16

python convert-hf-to-gguf.py $model_dir --outfile=models/${model_name}.gguf --outtype f16 --split-max-tensors 100

deactivate
3 changes: 3 additions & 0 deletions fundamentals/llama.cpp/scripts/inspect-model.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
source venv/bin/activate
gguf-py/scripts/gguf_dump.py $1
deactivate
4 changes: 4 additions & 0 deletions fundamentals/llama.cpp/scripts/run-embeddings.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#./llama-embedding -m models/llama-2-7b-chat.Q4_K_M.gguf --no-warmup --pooling mean -p "What is LoRA?"
#gdb --args ./llama-embedding -m models/llama-2-7b-chat.Q4_K_M.gguf --no-warmup --pooling mean -p "What is LoRA?"
#gdb --args ./llama-embedding -m models/llama-2-7b-chat.Q4_K_M.gguf --no-warmup --pooling last -p "What is LoRA?"
gdb --args ./llama-embedding -m models/llama-2-7b-chat.Q4_K_M.gguf --no-warmup --pooling cls -p "What is LoRA?"
41 changes: 41 additions & 0 deletions fundamentals/llama.cpp/scripts/run-main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt
#gdb --args ./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt


## Run with session file
#gdb --args ./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt --prompt-cache main-session.txt
#./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt --prompt-cache main-session.txt
#gdb --args ./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'Hello world' -n 3 --verbose-prompt --prompt-cache main-session.txt --dump-kv-cache


#gdb --args ./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'Hello world' -n 3 --verbose-prompt --dump-kv-cache

#gdb --args ./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/phi-2-GGUF/phi-2.Q3_K_M.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt --temp 0
#./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/phi-2-GGUF/phi-2.Q3_K_M.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt --temp 0 --top-p 0.950 --min-p 0.05 --repeat-penalty 1.1 --typical 1


#./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/phi-2-GGUF/phi-2.Q3_K_M.gguf --color -i -r "User:" -f prompts/chat-with-bob.txt

#./llama-cli -m models/gemma-2-9b.gguf -p "<start_of_turn>user
#What is LoRA?<end_of_turn>
#<start_of_turn>model"

#./llama-cli -m models/gemma-2-9b.gguf -ngl 15 -p "Hi"
#./llama-cli -m ~/.cache/lm-studio/models/bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_S.gguf -ngl 15 -p "<start_of_turn>user\nWhat is LoRA?<end_of_turn>\n<start_of_turn>model"
#./llama-cli -m models/gemma-2-9b-it.gguf -ngl 15 -p "<start_of_turn>user\nWhat is LoRA?<end_of_turn>\n<start_of_turn>model"
#./llama-cli -m models/gemma-2-9b-it.gguf -ngl 15 -p "What is LoRA?"
#./llama-cli -m models/gemma-2-9b-it.gguf -ngl 15 -p "Dan loves icecream"
#./llama-cli -m models/gemma-2-9b-it.gguf -dkvc -ngl 15 -p "Dan loves icecream"
#gdb --args ./llama-cli -m models/gemma-2-9b-it.gguf --grp-attn-n 2 --grp-attn-w 4 -p "Dan loves icecream"


#gdb --args ./llama-cli -m models/llama-2-7b.Q4_0.gguf --no-warmup --rope-scaling yarn --rope-freq-scale 1 --yarn-ext-factor 1.0 -ngl 10 -p "What is LoRA?" -n 10
#./llama-cli -m models/llama-2-7b.Q4_0.gguf --no-warmup --rope-scaling yarn --rope-freq-scale 1 --yarn-ext-factor 1.0 -ngl 10 -p "What is LoRA?" -n 10
# Testing Self-Extend
#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 --grp-attn-n 4 --grp-attn-w 32 -f pg1184.txt -c 16384 --temp 0
# llama-2-7b.Q4_0.gguf was trained on a 4096 context size
#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256 --grp-attn-n 4 --grp-attn-w 256
#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256
#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256

./llama-cli -m models/mamba-gpt-7b-q4_0.gguf --no-warmup -ngl 10 -p "What is LoRA?" -n 10
1 change: 1 addition & 0 deletions fundamentals/llama.cpp/scripts/run-passkey.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
make -j && ./llama-passkey -m ./models/llama-2-7b.Q4_0.gguf --junk 250
12 changes: 12 additions & 0 deletions fundamentals/llama.cpp/scripts/run-self-extend.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Testing Self-Extend
#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 --grp-attn-n 4 --grp-attn-w 32 -f pg1184.txt -c 16384 --temp 0
# llama-2-7b.Q4_0.gguf was trained on a 4096 context size
#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 100 --grp-attn-n 4 --grp-attn-w 32
#gdb --args ./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -ngl 10 -f self-extend.txt -c 4096 --temp 1 -n 100 --grp-attn-n 4 --grp-attn-w 512
#gdb --args ./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -ngl 10 -f self-extend.txt -c 8000 --temp 1 -n 200 --grp-attn-n 4 --grp-attn-w 128
#gdb --args ./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8000 --temp 1 -n 200 --grp-attn-n 2 --grp-attn-w 2048
#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8000 --temp 1 -n 200 --grp-attn-n 128 --grp-attn-w 2048
gdb --args ./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8000 --temp 1 --grp-attn-n 2 --grp-attn-w 2048
#./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -ngl 10 -f self-extend.txt -c 5000 --temp 1 -n 100 --grp-attn-n 4 --grp-attn-w 32
#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256
#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256
3 changes: 3 additions & 0 deletions fundamentals/llama.cpp/scripts/run-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
echo "Running tests target..."
cmake --build build --target test --verbose
6 changes: 6 additions & 0 deletions fundamentals/llama.cpp/scripts/run-tokenize.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#./llama-tokenize -m models/llama-2-7b.Q4_0.gguf -f self-extend.txt --show-count

#./llama-tokenize -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -f /home/danbev/work/lmstudio/llmster/electron/vendor/llm-engine/gtest/data/self-extend-test.txt --show-count

./llama-tokenize -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -f self-extend.txt --show-count
#./llama-tokenize -m models/llama-2-7b.Q4_0.gguf -f self-extend.txt --show-count

0 comments on commit 485dbb7

Please sign in to comment.