forked from ArduPilot/MAVProxy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
81 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
''' | ||
AI Chat Module voice-to-text class | ||
Randy Mackay, December 2023 | ||
''' | ||
|
||
import time | ||
import pyaudio # install using, "sudo apt-get install python3-pyaudio" | ||
import wave # install with "pip3 install wave" | ||
from openai import OpenAI | ||
|
||
class chat_voice_to_text(): | ||
def __init__(self): | ||
# create connection object | ||
self.client = OpenAI() | ||
|
||
# record audio from microphone | ||
# returns filename of recording or None if failed | ||
def record_audio(self): | ||
# Initialize PyAudio | ||
p = pyaudio.PyAudio() | ||
|
||
# Open stream | ||
try: | ||
stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024) | ||
print("recording audio: opened stream") | ||
except: | ||
print("recording audio: failed to open stream") | ||
return None | ||
|
||
# calculate time recording should stop | ||
curr_time = time.time() | ||
time_stop = curr_time + 5 | ||
|
||
# record until specified time | ||
frames = [] | ||
while curr_time < time_stop: | ||
data = stream.read(1024) | ||
frames.append(data) | ||
curr_time = time.time() | ||
print("recording audio: reading t:" + str(curr_time)) | ||
print("recording audio: data collection complete") | ||
|
||
# Stop and close the stream | ||
stream.stop_stream() | ||
stream.close() | ||
p.terminate() | ||
|
||
# Save audio file | ||
wf = wave.open("recording.wav", "wb") | ||
wf.setnchannels(1) | ||
wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16)) | ||
wf.setframerate(44100) | ||
wf.writeframes(b''.join(frames)) | ||
wf.close() | ||
return "recording.wav" | ||
|
||
# convert audio to text | ||
# returns transcribed text on success or None if failed | ||
def convert_audio_to_text(self, audio_filename): | ||
# Process with Whisper | ||
audio_file = open(audio_filename, "rb") | ||
client = OpenAI() | ||
transcript = client.audio.transcriptions.create( | ||
model="whisper-1", | ||
file=audio_file, | ||
response_format="text") | ||
return transcript.text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters