diff --git a/fundamentals/llama.cpp/scripts/build-configure.sh b/fundamentals/llama.cpp/scripts/build-configure.sh new file mode 100755 index 00000000..b9e93b51 --- /dev/null +++ b/fundamentals/llama.cpp/scripts/build-configure.sh @@ -0,0 +1,2 @@ +cmake -S . -B build -DLLAMA_CURL=ON +cmake --build build diff --git a/fundamentals/llama.cpp/scripts/build-cuda.sh b/fundamentals/llama.cpp/scripts/build-cuda.sh new file mode 100755 index 00000000..a8215049 --- /dev/null +++ b/fundamentals/llama.cpp/scripts/build-cuda.sh @@ -0,0 +1,2 @@ +cmake -S . -B build -DGGML_CUDA=On -DCMAKE_BUILD_TYPE=Debug +cmake --build build diff --git a/fundamentals/llama.cpp/scripts/build-debug.sh b/fundamentals/llama.cpp/scripts/build-debug.sh new file mode 100755 index 00000000..97e5287c --- /dev/null +++ b/fundamentals/llama.cpp/scripts/build-debug.sh @@ -0,0 +1,2 @@ +cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug +cmake --build build diff --git a/fundamentals/llama.cpp/scripts/build-kompute.sh b/fundamentals/llama.cpp/scripts/build-kompute.sh new file mode 100755 index 00000000..b3e07aae --- /dev/null +++ b/fundamentals/llama.cpp/scripts/build-kompute.sh @@ -0,0 +1,2 @@ +cmake -S . -B build -DGGML_KOMPUTE=On +cmake --build build diff --git a/fundamentals/llama.cpp/scripts/build-rocm.sh b/fundamentals/llama.cpp/scripts/build-rocm.sh new file mode 100755 index 00000000..5da9f78c --- /dev/null +++ b/fundamentals/llama.cpp/scripts/build-rocm.sh @@ -0,0 +1,5 @@ +#cmake -B ./build -S . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DGGML_HIPBLAS=ON -DCMAKE_C_COMPILER=/opt/rocm-6.1.2/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/rocm-6.1.2/llvm/bin/clang++ -DAMDGPU_TARGETS='gfx1030;gfx1100;gfx1101;gfx1102' + +cmake -B ./build -S . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DGGML_HIPBLAS=ON -DCMAKE_C_COMPILER=/opt/rocm-6.1.2/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/rocm-6.1.2/llvm/bin/clang++ -DAMDGPU_TARGETS='gfx1030' + +cmake --build ./build --config Release -- -j8 diff --git a/fundamentals/llama.cpp/scripts/build-vulkan.sh b/fundamentals/llama.cpp/scripts/build-vulkan.sh new file mode 100755 index 00000000..41f626df --- /dev/null +++ b/fundamentals/llama.cpp/scripts/build-vulkan.sh @@ -0,0 +1,2 @@ +cmake -S . -B build -DGGML_VULKAN=On +cmake --build build diff --git a/fundamentals/llama.cpp/scripts/convert-gemma2.sh b/fundamentals/llama.cpp/scripts/convert-gemma2.sh new file mode 100755 index 00000000..57113133 --- /dev/null +++ b/fundamentals/llama.cpp/scripts/convert-gemma2.sh @@ -0,0 +1,11 @@ +#!/bin/bash +source venv/bin/activate + +model_name=gemma-2-9b-it +model_dir=~/.cache/huggingface/hub/models--google--${model_name}/snapshots/93be03fbe3787f19bf03a4b1d3d75d36cb1f6ace + +#python convert-hf-to-gguf.py $model_dir --outfile=models/${model_name}.gguf --outtype f16 + +python convert-hf-to-gguf.py $model_dir --outfile=models/${model_name}.gguf --outtype f16 --split-max-tensors 100 + +deactivate diff --git a/fundamentals/llama.cpp/scripts/inspect-model.sh b/fundamentals/llama.cpp/scripts/inspect-model.sh new file mode 100755 index 00000000..52cd256d --- /dev/null +++ b/fundamentals/llama.cpp/scripts/inspect-model.sh @@ -0,0 +1,3 @@ +source venv/bin/activate +gguf-py/scripts/gguf_dump.py $1 +deactivate diff --git a/fundamentals/llama.cpp/scripts/run-embeddings.sh b/fundamentals/llama.cpp/scripts/run-embeddings.sh new file mode 100755 index 00000000..60d3509d --- /dev/null +++ b/fundamentals/llama.cpp/scripts/run-embeddings.sh @@ -0,0 +1,4 @@ +#./llama-embedding -m models/llama-2-7b-chat.Q4_K_M.gguf --no-warmup --pooling mean -p "What is LoRA?" +#gdb --args ./llama-embedding -m models/llama-2-7b-chat.Q4_K_M.gguf --no-warmup --pooling mean -p "What is LoRA?" +#gdb --args ./llama-embedding -m models/llama-2-7b-chat.Q4_K_M.gguf --no-warmup --pooling last -p "What is LoRA?" +gdb --args ./llama-embedding -m models/llama-2-7b-chat.Q4_K_M.gguf --no-warmup --pooling cls -p "What is LoRA?" diff --git a/fundamentals/llama.cpp/scripts/run-main.sh b/fundamentals/llama.cpp/scripts/run-main.sh new file mode 100755 index 00000000..96ea7bd7 --- /dev/null +++ b/fundamentals/llama.cpp/scripts/run-main.sh @@ -0,0 +1,41 @@ +#./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt +#gdb --args ./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt + + +## Run with session file +#gdb --args ./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt --prompt-cache main-session.txt +#./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt --prompt-cache main-session.txt +#gdb --args ./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'Hello world' -n 3 --verbose-prompt --prompt-cache main-session.txt --dump-kv-cache + + +#gdb --args ./llama-cli -m models/tinyllama-1.1b-1t-openorca.Q2_K.gguf --prompt 'Hello world' -n 3 --verbose-prompt --dump-kv-cache + +#gdb --args ./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/phi-2-GGUF/phi-2.Q3_K_M.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt --temp 0 +#./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/phi-2-GGUF/phi-2.Q3_K_M.gguf --prompt 'The answer to 1 + 1 is' -n 5 --verbose-prompt --temp 0 --top-p 0.950 --min-p 0.05 --repeat-penalty 1.1 --typical 1 + + +#./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/phi-2-GGUF/phi-2.Q3_K_M.gguf --color -i -r "User:" -f prompts/chat-with-bob.txt + +#./llama-cli -m models/gemma-2-9b.gguf -p "user +#What is LoRA? +#model" + +#./llama-cli -m models/gemma-2-9b.gguf -ngl 15 -p "Hi" +#./llama-cli -m ~/.cache/lm-studio/models/bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_S.gguf -ngl 15 -p "user\nWhat is LoRA?\nmodel" +#./llama-cli -m models/gemma-2-9b-it.gguf -ngl 15 -p "user\nWhat is LoRA?\nmodel" +#./llama-cli -m models/gemma-2-9b-it.gguf -ngl 15 -p "What is LoRA?" +#./llama-cli -m models/gemma-2-9b-it.gguf -ngl 15 -p "Dan loves icecream" +#./llama-cli -m models/gemma-2-9b-it.gguf -dkvc -ngl 15 -p "Dan loves icecream" +#gdb --args ./llama-cli -m models/gemma-2-9b-it.gguf --grp-attn-n 2 --grp-attn-w 4 -p "Dan loves icecream" + + +#gdb --args ./llama-cli -m models/llama-2-7b.Q4_0.gguf --no-warmup --rope-scaling yarn --rope-freq-scale 1 --yarn-ext-factor 1.0 -ngl 10 -p "What is LoRA?" -n 10 +#./llama-cli -m models/llama-2-7b.Q4_0.gguf --no-warmup --rope-scaling yarn --rope-freq-scale 1 --yarn-ext-factor 1.0 -ngl 10 -p "What is LoRA?" -n 10 +# Testing Self-Extend +#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 --grp-attn-n 4 --grp-attn-w 32 -f pg1184.txt -c 16384 --temp 0 +# llama-2-7b.Q4_0.gguf was trained on a 4096 context size +#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256 --grp-attn-n 4 --grp-attn-w 256 +#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256 +#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256 + +./llama-cli -m models/mamba-gpt-7b-q4_0.gguf --no-warmup -ngl 10 -p "What is LoRA?" -n 10 diff --git a/fundamentals/llama.cpp/scripts/run-passkey.sh b/fundamentals/llama.cpp/scripts/run-passkey.sh new file mode 100755 index 00000000..b8ab3b25 --- /dev/null +++ b/fundamentals/llama.cpp/scripts/run-passkey.sh @@ -0,0 +1 @@ +make -j && ./llama-passkey -m ./models/llama-2-7b.Q4_0.gguf --junk 250 diff --git a/fundamentals/llama.cpp/scripts/run-self-extend.sh b/fundamentals/llama.cpp/scripts/run-self-extend.sh new file mode 100755 index 00000000..e88b27dd --- /dev/null +++ b/fundamentals/llama.cpp/scripts/run-self-extend.sh @@ -0,0 +1,12 @@ +# Testing Self-Extend +#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 --grp-attn-n 4 --grp-attn-w 32 -f pg1184.txt -c 16384 --temp 0 +# llama-2-7b.Q4_0.gguf was trained on a 4096 context size +#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 100 --grp-attn-n 4 --grp-attn-w 32 +#gdb --args ./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -ngl 10 -f self-extend.txt -c 4096 --temp 1 -n 100 --grp-attn-n 4 --grp-attn-w 512 +#gdb --args ./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -ngl 10 -f self-extend.txt -c 8000 --temp 1 -n 200 --grp-attn-n 4 --grp-attn-w 128 +#gdb --args ./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8000 --temp 1 -n 200 --grp-attn-n 2 --grp-attn-w 2048 +#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8000 --temp 1 -n 200 --grp-attn-n 128 --grp-attn-w 2048 +gdb --args ./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8000 --temp 1 --grp-attn-n 2 --grp-attn-w 2048 +#./llama-cli -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -ngl 10 -f self-extend.txt -c 5000 --temp 1 -n 100 --grp-attn-n 4 --grp-attn-w 32 +#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256 +#./llama-cli -m models/llama-2-7b.Q4_0.gguf -ngl 10 -f self-extend.txt -c 8192 --temp 0 -n 256 diff --git a/fundamentals/llama.cpp/scripts/run-tests.sh b/fundamentals/llama.cpp/scripts/run-tests.sh new file mode 100755 index 00000000..3781e8c7 --- /dev/null +++ b/fundamentals/llama.cpp/scripts/run-tests.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo "Running tests target..." +cmake --build build --target test --verbose diff --git a/fundamentals/llama.cpp/scripts/run-tokenize.sh b/fundamentals/llama.cpp/scripts/run-tokenize.sh new file mode 100755 index 00000000..eabb9e68 --- /dev/null +++ b/fundamentals/llama.cpp/scripts/run-tokenize.sh @@ -0,0 +1,6 @@ +#./llama-tokenize -m models/llama-2-7b.Q4_0.gguf -f self-extend.txt --show-count + +#./llama-tokenize -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -f /home/danbev/work/lmstudio/llmster/electron/vendor/llm-engine/gtest/data/self-extend-test.txt --show-count + +./llama-tokenize -m /home/danbev/.cache/lm-studio/models/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q2_K.gguf -f self-extend.txt --show-count +#./llama-tokenize -m models/llama-2-7b.Q4_0.gguf -f self-extend.txt --show-count