Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Codestral tts #1302

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions interpreter/terminal_interface/profiles/defaults/codestral_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
This is an Open Interpreter profile. It configures Open Interpreter to run `codestral` using Ollama.

Images sent to the model will be described with `moondream`.
"""

from dimits import Dimits
import threading

from interpreter import interpreter

interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:

User: Open the chrome app.
Assistant: On it.
```python
import webbrowser
webbrowser.open('https://chrome.google.com')
```
User: The code you ran produced no output. Was this expected, or are we finished?
Assistant: No further action is required; the provided snippet opens Chrome.
User: How large are all the files on my desktop combined?
Assistant: I will sum up the file sizes of every file on your desktop.
```python
import os
import string
from pathlib import Path

# Get the user's home directory in a cross-platform way
home_dir = Path.home()

# Define the path to the desktop
desktop_dir = home_dir / 'Desktop'

# Initialize a variable to store the total size
total_size = 0

# Loop through all files on the desktop
for file in desktop_dir.iterdir():
# Add the file size to the total
total_size += file.stat().st_size

# Print the total size
print(f"The total size of all files on the desktop is {total_size} bytes.")
```
User: I executed that code. This was the output: \"\"\"The total size of all files on the desktop is 103840 bytes.\"\"\"\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.
Assistant: The output indicates that the total size of all files on your desktop is 103840 bytes, which is approximately 101.4 KB or 0.1 MB. We are finished.

NEVER use placeholders. Always specify exact paths, and use cross-platform ways of determining the desktop, documents, cwd, etc. folders.

Now, your turn:"""

# Message templates
interpreter.code_output_template = '''I executed that code. This was the output: """{content}"""\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.'''
interpreter.empty_code_output_template = "The code above was executed on my machine. It produced no text output. What's next (if anything, or are we done?)"
interpreter.code_output_sender = "user"

# LLM settings
interpreter.llm.model = "openai/codestral"
interpreter.llm.supports_functions = False
interpreter.llm.api_base = "http://localhost:1234/v1"
interpreter.llm.api_key = "x"
interpreter.llm.supports_functions = False
interpreter.llm.execution_instructions = False
interpreter.llm.max_tokens = 1000
interpreter.llm.context_window = 7000

# Computer settings
interpreter.computer.import_computer_api = False

# Misc settings
interpreter.auto_run = False
interpreter.offline = True

# Final message
interpreter.display_message(
"> Model set to `codestral`\n\n**Open Interpreter** will require approval before running code.\n\nUse `interpreter -y` to bypass this.\n\nPress `CTRL-C` to exit.\n"
)

# Initialize Dimits with the desired voice model
dt = Dimits('en_US-amy-medium', verbose=False)

# Function to handle text-to-speech in a separate thread
def text_to_speech_non_blocking(text):
dt.text_2_speech(text, engine="aplay")

# Run the interpreter
for chunk in interpreter.chat(display=True, stream=True):
if chunk["type"] == "message":
if "end" in chunk:
text = interpreter.messages[-1]["content"].strip()

# Convert text to audio in a non-blocking way
tts_thread = threading.Thread(target=text_to_speech_non_blocking, args=(text,))
tts_thread.start()
# Convert text to audio and play it using the aplay engine
#dt.text_2_speech(text, engine="aplay")
if chunk["type"] == "confirmation":
print(chunk)

115 changes: 115 additions & 0 deletions interpreter/terminal_interface/utils/live_transcribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import pyaudio
import numpy as np
import whisper
import time
import threading

class LiveTranscriber:
def __init__(self, model_name="base", rate=16000, chunk=1024, buffer_seconds=5, silence_threshold=500, silence_duration=3):
self.model = whisper.load_model(model_name)
self.rate = rate
self.chunk = chunk
self.buffer_seconds = buffer_seconds
self.silence_threshold = silence_threshold
self.silence_duration = silence_duration
self.buffer = []
self.pause_event = threading.Event()
self.stop_event = threading.Event()
self.buffer_lock = threading.Lock()
self.p = pyaudio.PyAudio()
self.stream = self.p.open(format=pyaudio.paInt16,
channels=1,
rate=self.rate,
input=True,
frames_per_buffer=self.chunk)
self.recording_thread = threading.Thread(target=self.record_audio)
self.transcribing_thread = threading.Thread(target=self.transcribe_audio)
self.transcription_generator = self._transcription_generator()

def start(self):
self.recording_thread.start()
self.transcribing_thread.start()

def stop(self):
self.stop_event.set()
self.recording_thread.join()
self.transcribing_thread.join()
self.stream.stop_stream()
self.stream.close()
self.p.terminate()
print("Stopped successfully.")

def record_audio(self):
print("Recording...")
try:
while not self.stop_event.is_set():
if self.pause_event.is_set():
time.sleep(0.1)
continue

data = self.stream.read(self.chunk, exception_on_overflow=False)
with self.buffer_lock:
self.buffer.append(data)

if len(self.buffer) > int(self.rate / self.chunk * self.buffer_seconds):
with self.buffer_lock:
self.buffer = self.buffer[-int(self.rate / self.chunk * self.buffer_seconds):]

except Exception as e:
print(f"Recording error: {e}")
finally:
self.stop_event.set()

def transcribe_audio(self):
try:
for transcription in self.transcription_generator:
yield transcription
#print("Transcription:", transcription)
except Exception as e:
print(f"Transcription error: {e}")
finally:
self.stop_event.set()

def _transcription_generator(self):
while not self.stop_event.is_set():
if self.pause_event.is_set():
time.sleep(0.1)
continue

time.sleep(self.buffer_seconds)
with self.buffer_lock:
if self.buffer:
audio_data = np.frombuffer(b''.join(self.buffer), dtype=np.int16).astype(np.float32) / 32768.0
self.buffer = []

if len(audio_data) > 0:
result = self.model.transcribe(audio_data)
if result["text"].strip():
yield result["text"]

def toggle_pause_resume(self):
if self.pause_event.is_set():
print("Resuming transcription.")
self.pause_event.clear()
else:
print("Pausing transcription.")
self.pause_event.set()

def pause(self):
print("Pausing transcription.")
self.pause_event.set()

def resume(self):
print("Resuming transcription.")
self.pause_event.clear()

if __name__ == "__main__":
live_transcriber = LiveTranscriber()
live_transcriber.start()

for text in live_transcriber._transcription_generator():
print(text)

# Manual pause and resume control
#live_transcriber.manual_pause_resume() # Toggle pause/resume

Loading
Loading