diff --git a/launch.sh b/launch.sh index 236d816..a2ea306 100755 --- a/launch.sh +++ b/launch.sh @@ -4,6 +4,15 @@ cd ragnarok python3 -m venv venv source venv/bin/activate +# default llama-cpp-python does not support cuda, have to provide cmake_args. +export LLAMA_CUBLAS=1 +CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip3 install llama-cpp-python==0.2.26 + +# You can change the visible device values here to specify what GPU(s) that you want RAGnarok to use (or not use). +# For this example I'm only allowing RAGnarok to use GPU device 0. +# See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars for more details. +export CUDA_VISIBLE_DEVICES=0 + pip3 install -r requirements.txt # kick off the main app diff --git a/ragnarok/RAGnarok_Settings.py b/ragnarok/RAGnarok_Settings.py index 05aa72d..20e79bb 100755 --- a/ragnarok/RAGnarok_Settings.py +++ b/ragnarok/RAGnarok_Settings.py @@ -91,14 +91,14 @@ def wait_for_nemesis(nemesis_url, nemesis_user, nemesis_password, wait_timeout = default_index = 0 if "llm_model" in cookies: if "neural-chat" in cookies["llm_model"].lower(): - default_index = 0 + default_index = 0 elif "openchat" in cookies["llm_model"].lower(): default_index = 1 elif "starling" in cookies["llm_model"].lower(): default_index = 2 llm_model = st.selectbox( label='LLM model to use', - options=('Intel/neural-chat-7b-v3-3', 'openchat-3.5-0106', 'Starling-LM-7B-alpha'), + options=('Intel/neural-chat-7b-v3-3', 'openchat-3.5-1210', 'Starling-LM-7B-alpha'), help="The core LLM to use for chat over retrieved document snippets.", index=default_index ) diff --git a/ragnarok/pages/1_RAGnarok_Chat.py b/ragnarok/pages/1_RAGnarok_Chat.py index 62b45ca..3b1d176 100644 --- a/ragnarok/pages/1_RAGnarok_Chat.py +++ b/ragnarok/pages/1_RAGnarok_Chat.py @@ -102,8 +102,8 @@ def get_reranker(reranking_model, device): try: if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3": llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf", local_files_only=True) - elif cookies["llm_model"] == "openchat-3.5-0106": - llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf", local_files_only=True) + elif cookies["llm_model"] == "openchat-3.5-1210": + llm_model_path = hf_hub_download("TheBloke/openchat-3.5-1210-GGUF", filename="openchat-3.5-1210.Q8_0.gguf", local_files_only=True) elif cookies["llm_model"] == "Starling-LM-7B-alpha": llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf", local_files_only=True) else: @@ -113,8 +113,8 @@ def get_reranker(reranking_model, device): with st.spinner("Downloading LLM model (this will take some time)..."): if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3": llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf") - elif cookies["llm_model"] == "openchat-3.5-0106": - llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf") + elif cookies["llm_model"] == "openchat-3.5-1210": + llm_model_path = hf_hub_download("TheBloke/openchat-3.5-1210-GGUF", filename="openchat-3.5-1210.Q8_0.gguf") elif cookies["llm_model"] == "Starling-LM-7B-alpha": llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf") else: diff --git a/ragnarok/pages/2_Free_Chat.py b/ragnarok/pages/2_Free_Chat.py index 4f2eced..643b6e5 100644 --- a/ragnarok/pages/2_Free_Chat.py +++ b/ragnarok/pages/2_Free_Chat.py @@ -52,8 +52,8 @@ def get_llm(llm_model_path, n_gpu_layers): try: if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3": llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf", local_files_only=True) - elif cookies["llm_model"] == "openchat-3.5-0106": - llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf", local_files_only=True) + elif cookies["llm_model"] == "openchat-3.5-1210": + llm_model_path = hf_hub_download("TheBloke/openchat-3.5-1210-GGUF", filename="openchat-3.5-1210.Q8_0.gguf", local_files_only=True) elif cookies["llm_model"] == "Starling-LM-7B-alpha": llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf", local_files_only=True) else: @@ -63,8 +63,8 @@ def get_llm(llm_model_path, n_gpu_layers): with st.spinner("Downloading LLM model (this will take some time)..."): if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3": llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf") - elif cookies["llm_model"] == "openchat-3.5-0106": - llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf") + elif cookies["llm_model"] == "openchat-3.5-1210": + llm_model_path = hf_hub_download("TheBloke/openchat-3.5-1210-GGUF", filename="openchat-3.5-1210.Q8_0.gguf") elif cookies["llm_model"] == "Starling-LM-7B-alpha": llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf") else: diff --git a/ragnarok/requirements.txt b/ragnarok/requirements.txt index 019221c..c36bed7 100644 --- a/ragnarok/requirements.txt +++ b/ragnarok/requirements.txt @@ -3,7 +3,6 @@ sentence-transformers==2.2.2 huggingface-hub==0.20.3 langchain==0.1.2 langchain-community==0.0.14 -llama-cpp-python==0.2.26 streamlit==1.30.0 streamlit-extras==0.3.6 -streamlit-cookies-manager==0.2.0 \ No newline at end of file +streamlit-cookies-manager==0.2.0