OpenInterpreter · Notnaton · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 13, 2024
diff --git a/interpreter/terminal_interface/profiles/defaults/codestral_voice.py b/interpreter/terminal_interface/profiles/defaults/codestral_voice.py
@@ -0,0 +1,100 @@
+"""
+This is an Open Interpreter profile. It configures Open Interpreter to run `codestral` using Ollama.
+
+Images sent to the model will be described with `moondream`.
+"""
+
+from dimits import Dimits
+import threading
+
+from interpreter import interpreter
+
+interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
+
+User: Open the chrome app.
+Assistant: On it. 
+```python
+import webbrowser
+webbrowser.open('https://chrome.google.com')
+```
+User: The code you ran produced no output. Was this expected, or are we finished?
+Assistant: No further action is required; the provided snippet opens Chrome.
+User: How large are all the files on my desktop combined?
+Assistant: I will sum up the file sizes of every file on your desktop.
+```python
+import os
+import string
+from pathlib import Path
+
+# Get the user's home directory in a cross-platform way
+home_dir = Path.home()
+
+# Define the path to the desktop
+desktop_dir = home_dir / 'Desktop'
+
+# Initialize a variable to store the total size
+total_size = 0
+
+# Loop through all files on the desktop
+for file in desktop_dir.iterdir():
+    # Add the file size to the total
+    total_size += file.stat().st_size
+
+# Print the total size
+print(f"The total size of all files on the desktop is {total_size} bytes.")
+```
+User: I executed that code. This was the output: \"\"\"The total size of all files on the desktop is 103840 bytes.\"\"\"\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.
+Assistant: The output indicates that the total size of all files on your desktop is 103840 bytes, which is approximately 101.4 KB or 0.1 MB. We are finished.
+
+NEVER use placeholders. Always specify exact paths, and use cross-platform ways of determining the desktop, documents, cwd, etc. folders.
+
+Now, your turn:"""
+
+# Message templates
+interpreter.code_output_template = '''I executed that code. This was the output: """{content}"""\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.'''
+interpreter.empty_code_output_template = "The code above was executed on my machine. It produced no text output. What's next (if anything, or are we done?)"
+interpreter.code_output_sender = "user"
+
+# LLM settings
+interpreter.llm.model = "openai/codestral"
+interpreter.llm.supports_functions = False
+interpreter.llm.api_base = "http://localhost:1234/v1"
+interpreter.llm.api_key = "x"
+interpreter.llm.supports_functions = False
+interpreter.llm.execution_instructions = False
+interpreter.llm.max_tokens = 1000
+interpreter.llm.context_window = 7000
+
+# Computer settings
+interpreter.computer.import_computer_api = False
+
+# Misc settings
+interpreter.auto_run = False
+interpreter.offline = True
+
+# Final message
+interpreter.display_message(
+    "> Model set to `codestral`\n\n**Open Interpreter** will require approval before running code.\n\nUse `interpreter -y` to bypass this.\n\nPress `CTRL-C` to exit.\n"
+)
+
+# Initialize Dimits with the desired voice model
+dt = Dimits('en_US-amy-medium', verbose=False)      
+
+# Function to handle text-to-speech in a separate thread
+def text_to_speech_non_blocking(text):
+    dt.text_2_speech(text, engine="aplay")
+
+# Run the interpreter
+for chunk in interpreter.chat(display=True, stream=True):
+    if chunk["type"] == "message":
+        if "end" in chunk:
+            text = interpreter.messages[-1]["content"].strip()
+
+            # Convert text to audio in a non-blocking way
+            tts_thread = threading.Thread(target=text_to_speech_non_blocking, args=(text,))
+            tts_thread.start()
+            # Convert text to audio and play it using the aplay engine
+            #dt.text_2_speech(text, engine="aplay")
+    if chunk["type"] == "confirmation":
+        print(chunk)
+
diff --git a/interpreter/terminal_interface/utils/live_transcribe.py b/interpreter/terminal_interface/utils/live_transcribe.py
@@ -0,0 +1,115 @@
+import pyaudio
+import numpy as np
+import whisper
+import time
+import threading
+
+class LiveTranscriber:
+    def __init__(self, model_name="base", rate=16000, chunk=1024, buffer_seconds=5, silence_threshold=500, silence_duration=3):
+        self.model = whisper.load_model(model_name)
+        self.rate = rate
+        self.chunk = chunk
+        self.buffer_seconds = buffer_seconds
+        self.silence_threshold = silence_threshold
+        self.silence_duration = silence_duration
+        self.buffer = []
+        self.pause_event = threading.Event()
+        self.stop_event = threading.Event()
+        self.buffer_lock = threading.Lock()
+        self.p = pyaudio.PyAudio()
+        self.stream = self.p.open(format=pyaudio.paInt16,
+                                  channels=1,
+                                  rate=self.rate,
+                                  input=True,
+                                  frames_per_buffer=self.chunk)
+        self.recording_thread = threading.Thread(target=self.record_audio)
+        self.transcribing_thread = threading.Thread(target=self.transcribe_audio)
+        self.transcription_generator = self._transcription_generator()
+
+    def start(self):
+        self.recording_thread.start()
+        self.transcribing_thread.start()
+
+    def stop(self):
+        self.stop_event.set()
+        self.recording_thread.join()
+        self.transcribing_thread.join()
+        self.stream.stop_stream()
+        self.stream.close()
+        self.p.terminate()
+        print("Stopped successfully.")
+
+    def record_audio(self):
+        print("Recording...")
+        try:
+            while not self.stop_event.is_set():
+                if self.pause_event.is_set():
+                    time.sleep(0.1)
+                    continue
+
+                data = self.stream.read(self.chunk, exception_on_overflow=False)
+                with self.buffer_lock:
+                    self.buffer.append(data)
+
+                if len(self.buffer) > int(self.rate / self.chunk * self.buffer_seconds):
+                    with self.buffer_lock:
+                        self.buffer = self.buffer[-int(self.rate / self.chunk * self.buffer_seconds):]
+
+        except Exception as e:
+            print(f"Recording error: {e}")
+        finally:
+            self.stop_event.set()
+
+    def transcribe_audio(self):
+        try:
+            for transcription in self.transcription_generator:
+                yield transcription
+                #print("Transcription:", transcription)
+        except Exception as e:
+            print(f"Transcription error: {e}")
+        finally:
+            self.stop_event.set()
+
+    def _transcription_generator(self):
+        while not self.stop_event.is_set():
+            if self.pause_event.is_set():
+                time.sleep(0.1)
+                continue
+
+            time.sleep(self.buffer_seconds)
+            with self.buffer_lock:
+                if self.buffer:
+                    audio_data = np.frombuffer(b''.join(self.buffer), dtype=np.int16).astype(np.float32) / 32768.0
+                    self.buffer = []
+
+            if len(audio_data) > 0:
+                result = self.model.transcribe(audio_data)
+                if result["text"].strip():
+                    yield result["text"]
+
+    def toggle_pause_resume(self):
+        if self.pause_event.is_set():
+            print("Resuming transcription.")
+            self.pause_event.clear()
+        else:
+            print("Pausing transcription.")
+            self.pause_event.set()
+
+    def pause(self):
+        print("Pausing transcription.")
+        self.pause_event.set()
+
+    def resume(self):
+        print("Resuming transcription.")
+        self.pause_event.clear()
+
+if __name__ == "__main__":
+    live_transcriber = LiveTranscriber()
+    live_transcriber.start()
+
+    for text in live_transcriber._transcription_generator():
+        print(text)
+
+    # Manual pause and resume control
+    #live_transcriber.manual_pause_resume()  # Toggle pause/resume
+