diff --git a/launch.sh b/launch.sh
index 236d816..a2ea306 100755
--- a/launch.sh
+++ b/launch.sh
@@ -4,6 +4,15 @@
 cd ragnarok
 python3 -m venv venv
 source venv/bin/activate
+# default llama-cpp-python does not support cuda, have to provide cmake_args.
+export LLAMA_CUBLAS=1
+CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip3 install llama-cpp-python==0.2.26
+
+# You can change the visible device values here to specify what GPU(s) that you want RAGnarok to use (or not use). 
+# For this example I'm only allowing RAGnarok to use GPU device 0.
+# See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars for more details. 
+export CUDA_VISIBLE_DEVICES=0
+
 pip3 install -r requirements.txt
 
 # kick off the main app
diff --git a/ragnarok/RAGnarok_Settings.py b/ragnarok/RAGnarok_Settings.py
index 05aa72d..20e79bb 100755
--- a/ragnarok/RAGnarok_Settings.py
+++ b/ragnarok/RAGnarok_Settings.py
@@ -91,14 +91,14 @@ def wait_for_nemesis(nemesis_url, nemesis_user, nemesis_password, wait_timeout =
     default_index = 0
     if "llm_model" in cookies:
         if "neural-chat" in cookies["llm_model"].lower():
-            default_index = 0
+            default_index = 0  
         elif "openchat" in cookies["llm_model"].lower():
             default_index = 1
         elif "starling" in cookies["llm_model"].lower():
             default_index = 2
     llm_model = st.selectbox(
         label='LLM model to use',
-        options=('Intel/neural-chat-7b-v3-3', 'openchat-3.5-0106', 'Starling-LM-7B-alpha'),
+        options=('Intel/neural-chat-7b-v3-3', 'openchat-3.5-1210', 'Starling-LM-7B-alpha'),
         help="The core LLM to use for chat over retrieved document snippets.",
         index=default_index
     )
diff --git a/ragnarok/pages/1_RAGnarok_Chat.py b/ragnarok/pages/1_RAGnarok_Chat.py
index 62b45ca..3b1d176 100644
--- a/ragnarok/pages/1_RAGnarok_Chat.py
+++ b/ragnarok/pages/1_RAGnarok_Chat.py
@@ -102,8 +102,8 @@ def get_reranker(reranking_model, device):
 try:
     if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3":
         llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf", local_files_only=True)
-    elif cookies["llm_model"] == "openchat-3.5-0106":
-        llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf", local_files_only=True)
+    elif cookies["llm_model"] == "openchat-3.5-1210":
+        llm_model_path = hf_hub_download("TheBloke/openchat-3.5-1210-GGUF", filename="openchat-3.5-1210.Q8_0.gguf", local_files_only=True)
     elif cookies["llm_model"] == "Starling-LM-7B-alpha":
         llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf", local_files_only=True)
     else:
@@ -113,8 +113,8 @@ def get_reranker(reranking_model, device):
     with st.spinner("Downloading LLM model (this will take some time)..."):
         if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3":
             llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf")
-        elif cookies["llm_model"] == "openchat-3.5-0106":
-            llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf")
+        elif cookies["llm_model"] == "openchat-3.5-1210":
+            llm_model_path = hf_hub_download("TheBloke/openchat-3.5-1210-GGUF", filename="openchat-3.5-1210.Q8_0.gguf")
         elif cookies["llm_model"] == "Starling-LM-7B-alpha":
             llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf")
         else:
diff --git a/ragnarok/pages/2_Free_Chat.py b/ragnarok/pages/2_Free_Chat.py
index 4f2eced..643b6e5 100644
--- a/ragnarok/pages/2_Free_Chat.py
+++ b/ragnarok/pages/2_Free_Chat.py
@@ -52,8 +52,8 @@ def get_llm(llm_model_path, n_gpu_layers):
 try:
     if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3":
         llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf", local_files_only=True)
-    elif cookies["llm_model"] == "openchat-3.5-0106":
-        llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf", local_files_only=True)
+    elif cookies["llm_model"] == "openchat-3.5-1210":
+        llm_model_path = hf_hub_download("TheBloke/openchat-3.5-1210-GGUF", filename="openchat-3.5-1210.Q8_0.gguf", local_files_only=True)
     elif cookies["llm_model"] == "Starling-LM-7B-alpha":
         llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf", local_files_only=True)
     else:
@@ -63,8 +63,8 @@ def get_llm(llm_model_path, n_gpu_layers):
     with st.spinner("Downloading LLM model (this will take some time)..."):
         if cookies["llm_model"] == "Intel/neural-chat-7b-v3-3":
             llm_model_path = hf_hub_download("TheBloke/neural-chat-7B-v3-3-GGUF", filename="neural-chat-7b-v3-3.Q5_K_M.gguf")
-        elif cookies["llm_model"] == "openchat-3.5-0106":
-            llm_model_path = hf_hub_download("TheBloke/openchat-3.5-0106-GGUF", filename="openchat-3.5-0106.Q5_K_M.gguf")
+        elif cookies["llm_model"] == "openchat-3.5-1210":
+            llm_model_path = hf_hub_download("TheBloke/openchat-3.5-1210-GGUF", filename="openchat-3.5-1210.Q8_0.gguf")
         elif cookies["llm_model"] == "Starling-LM-7B-alpha":
             llm_model_path = hf_hub_download("TheBloke/Starling-LM-7B-alpha-GGUF", filename="starling-lm-7b-alpha.Q5_K_M.gguf")
         else:
diff --git a/ragnarok/requirements.txt b/ragnarok/requirements.txt
index 019221c..c36bed7 100644
--- a/ragnarok/requirements.txt
+++ b/ragnarok/requirements.txt
@@ -3,7 +3,6 @@ sentence-transformers==2.2.2
 huggingface-hub==0.20.3
 langchain==0.1.2
 langchain-community==0.0.14
-llama-cpp-python==0.2.26
 streamlit==1.30.0
 streamlit-extras==0.3.6
-streamlit-cookies-manager==0.2.0
\ No newline at end of file
+streamlit-cookies-manager==0.2.0