Skip to content

Commit

Permalink
Add -e llm.gpu boolean config variable
Browse files Browse the repository at this point in the history
  • Loading branch information
radare committed May 9, 2024
1 parent 9caaea7 commit 66e72b0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 16 deletions.
7 changes: 4 additions & 3 deletions r2ai/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,7 @@ def __init__(self):
self.system_message = ""
self.env["debug"] = "false"
self.env["llm.model"] = self.model ## TODO: dup. must get rid of self.model
self.env["llm.gpu"] = "true"
self.env["llm.window"] = "8096" # "4096" # context_window
self.env["llm.maxtokens"] = "4096" # "1750"
self.env["llm.maxmsglen"] = "8096" # "1750"
Expand Down Expand Up @@ -623,7 +624,7 @@ def keywords_ai(self, text):
words = []
mmname = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
ctxwindow = int(self.env["llm.window"])
mm = new_get_hf_llm(mmname, False, ctxwindow)
mm = new_get_hf_llm(self, mmname, False, ctxwindow)
msg = f"Considering the sentence \"{text}\" as input, Take the KEYWORDS or combination of TWO words from the given text and respond ONLY a comma separated list of the most relevant words. DO NOT introduce your response, ONLY show the words"
msg = f"Take \"{text}\" as input, and extract the keywords and combination of keywords to make a search online, the output must be a comma separated list" #Take the KEYWORDS or combination of TWO words from the given text and respond ONLY a comma separated list of the most relevant words. DO NOT introduce your response, ONLY show the words"
response = mm(msg, stream=False, temperature=0.1, stop="</s>", max_tokens=1750)
Expand Down Expand Up @@ -682,7 +683,7 @@ def chat(self, message=None):
try:
ctxwindow = int(self.env["llm.window"])
debug_mode = False # maybe true when debuglevel=2 ?
self.llama_instance = new_get_hf_llm(self.model, debug_mode, ctxwindow)
self.llama_instance = new_get_hf_llm(self, self.model, debug_mode, ctxwindow)
if self.llama_instance == None:
builtins.print("Cannot find the model")
return
Expand Down Expand Up @@ -767,7 +768,7 @@ def trimsource_ai(self, msg):
mmname = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
mmname = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
ctxwindow = int(self.env["llm.window"])
self.mistral = new_get_hf_llm(mmname, False, ctxwindow)
self.mistral = new_get_hf_llm(self, mmname, False, ctxwindow)
# q = f"Rewrite this code into shorter pseudocode (less than 500 tokens). keep the comments and essential logic:\n```\n{msg}\n```\n"
q = f"Rewrite this code into shorter pseudocode (less than 200 tokens). keep the relevant comments and essential logic:\n```\n{msg}\n```\n"
response = self.mistral(q, stream=False, temperature=0.1, stop="</s>", max_tokens=4096)
Expand Down
27 changes: 14 additions & 13 deletions r2ai/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,15 @@ def models():
-m Undi95/UtopiaXL-13B-GGUF
"""

def get_hf_llm(repo_id, debug_mode, context_window):
n_gpu_layers = -1
def gpulayers(ai):
if "llm.gpu" in ai.env:
if ai.env["llm.gpu"] == "true":
print("[r2ai] Using GPU")
return -1
print("[r2ai] Using CPU")
return 0

def get_hf_llm(ai, repo_id, debug_mode, context_window):
usermodels = None
try:
try:
Expand All @@ -138,7 +145,7 @@ def get_hf_llm(repo_id, debug_mode, context_window):
if usermodels is not None and repo_id in usermodels:
model_path = usermodels[repo_id]
# print(f"[r2ai] Using {r2ai_model_json} {model_path}")
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=gpulayers(ai), verbose=debug_mode, n_ctx=context_window)
except:
traceback.print_exc()
print(f"Select {repo_id} model. See -M and -m flags", file=sys.stderr)
Expand Down Expand Up @@ -188,11 +195,6 @@ def get_hf_llm(repo_id, debug_mode, context_window):
print("No model selected")
return
answers = inquirer.prompt([inquirer.List("default", message="Use this model by default? ~/.r2ai.model", choices=["Yes", "No"])])
# Third stage: GPU confirm
#if confirm_action("Use GPU? (Large models might crash on GPU, but will run more quickly)"):
## n_gpu_layers = -1
# else:
# n_gpu_layers = 0

# Get user data directory
user_data_dir = appdirs.user_data_dir("r2ai")
Expand Down Expand Up @@ -348,7 +350,7 @@ def supports_metal():
json.dump(usermodels, fd)
fd.close()
print("Saved")
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=gpulayers(ai), verbose=debug_mode, n_ctx=context_window)

def set_default_model(repo_id):
usermodels = {"default": repo_id}
Expand Down Expand Up @@ -474,13 +476,12 @@ def enough_disk_space(size, path) -> bool:

return False

def new_get_hf_llm(repo_id, debug_mode, context_window):
def new_get_hf_llm(ai, repo_id, debug_mode, context_window):
if repo_id.startswith("openai:") or repo_id.startswith("anthropic:") or repo_id.startswith("groq:") or repo_id.startswith("google:"):
return repo_id
if not os.path.exists(repo_id):
return get_hf_llm(repo_id, debug_mode, context_window)
return get_hf_llm(ai, repo_id, debug_mode, context_window)
# print(f"LOADING FILE: {repo_id}")
n_gpu_layers = -1 # = 0 to use cpu
user_data_dir = appdirs.user_data_dir("Open Interpreter")
default_path = os.path.join(user_data_dir, "models")

Expand Down Expand Up @@ -555,4 +556,4 @@ def supports_metal():
# Initialize and return Code-Llama
if not os.path.isfile(model_path):
print("Model is not a file")
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=n_gpu_layers, verbose=debug_mode, n_ctx=context_window)
return llama_cpp.Llama(model_path=model_path, n_gpu_layers=gpulayers(ai), verbose=debug_mode, n_ctx=context_window)

0 comments on commit 66e72b0

Please sign in to comment.