Skip to content

Commit

Permalink
Chat: support input from microphone
Browse files Browse the repository at this point in the history
  • Loading branch information
rmackay9 committed Dec 7, 2023
1 parent 7451c7f commit bc06bff
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 3 deletions.
67 changes: 67 additions & 0 deletions MAVProxy/modules/mavproxy_chat/chat_voice_to_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
'''
AI Chat Module voice-to-text class
Randy Mackay, December 2023
'''

import time
import pyaudio # install using, "sudo apt-get install python3-pyaudio"
import wave # install with "pip3 install wave"
from openai import OpenAI

class chat_voice_to_text():
def __init__(self):
# create connection object
self.client = OpenAI()

# record audio from microphone
# returns filename of recording or None if failed
def record_audio(self):
# Initialize PyAudio
p = pyaudio.PyAudio()

# Open stream
try:
stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
print("recording audio: opened stream")
except:
print("recording audio: failed to open stream")
return None

# calculate time recording should stop
curr_time = time.time()
time_stop = curr_time + 5

# record until specified time
frames = []
while curr_time < time_stop:
data = stream.read(1024)
frames.append(data)
curr_time = time.time()
print("recording audio: reading t:" + str(curr_time))
print("recording audio: data collection complete")

# Stop and close the stream
stream.stop_stream()
stream.close()
p.terminate()

# Save audio file
wf = wave.open("recording.wav", "wb")
wf.setnchannels(1)
wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16))
wf.setframerate(44100)
wf.writeframes(b''.join(frames))
wf.close()
return "recording.wav"

# convert audio to text
# returns transcribed text on success or None if failed
def convert_audio_to_text(self, audio_filename):
# Process with Whisper
audio_file = open(audio_filename, "rb")
client = OpenAI()
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text")
return transcript.text
17 changes: 14 additions & 3 deletions MAVProxy/modules/mavproxy_chat/chat_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from MAVProxy.modules.lib.mp_settings import MPSetting
from MAVProxy.modules.lib.wx_loader import wx
from MAVProxy.modules.lib import mp_util
from MAVProxy.modules.mavproxy_chat import chat_openai
from MAVProxy.modules.mavproxy_chat import chat_openai, chat_voice_to_text
from threading import Thread
from pymavlink import mavutil
import time
Expand Down Expand Up @@ -60,6 +60,9 @@ def __init__(self, mpstate):
# create chat_openai object
self.chat_openai = chat_openai.chat_openai(self.mpstate)

# create chat_voice_to_text object
self.chat_voice_to_text = chat_voice_to_text.chat_voice_to_text()

# run chat window in a separate thread
self.thread = Thread(target=self.idle_task)
self.thread.start()
Expand Down Expand Up @@ -94,8 +97,16 @@ def apikey_set_button_click(self, event):

# record button clicked
def record_button_click(self, event):
print("Record button not yet supported")
self.text_reply.SetValue("Record button not yet supported")
rec_filename = self.chat_voice_to_text.record_audio()
if rec_filename is None:
self.text_input.SetValue("Recording failed")
else:
self.text_input.SetValue("Recorded to " + rec_filename)
text = self.chat_voice_to_text.convert_audio_to_text(rec_filename)
if text is None:
self.text_input.SetValue("Audio to text conversion failed")
else:
self.text_input.SetValue(text)

# send button clicked
def send_button_click(self, event):
Expand Down

0 comments on commit bc06bff

Please sign in to comment.