diff --git a/MAVProxy/modules/mavproxy_chat/__init__.py b/MAVProxy/modules/mavproxy_chat/__init__.py new file mode 100644 index 0000000000..22472c77eb --- /dev/null +++ b/MAVProxy/modules/mavproxy_chat/__init__.py @@ -0,0 +1,88 @@ +''' +AI Chat Module +Randy Mackay, December 2023 + +This module allows MAVProxy to interface with OpenAI Assitants +''' + +from MAVProxy.modules.lib import mp_module +from MAVProxy.modules.lib import mp_settings +from MAVProxy.modules.lib.mp_settings import MPSetting +from MAVProxy.modules.lib import mp_util +from MAVProxy.modules.lib.wx_loader import wx +from MAVProxy.modules.mavproxy_chat import chat_window +from threading import Thread +from pymavlink import mavutil +import time + +if mp_util.has_wxpython: + from MAVProxy.modules.lib.mp_menu import MPMenuItem + from MAVProxy.modules.lib.mp_menu import MPMenuSubMenu + +class chat(mp_module.MPModule): + def __init__(self, mpstate): + + # call parent class + super(chat, self).__init__(mpstate, "chat", "OpenAI chat support") + + # register module and commands + self.add_command('chat', self.cmd_chat, "chat module", + ["set", "show", "status", + ]) + + # add user settable settings + self.chat_settings = mp_settings.MPSettings( + [ ('verbose', bool, False), + ]) + + # keep reference to mpstate + self.mpstate = mpstate + + # init mavlink message counters + self.packets_mytarget = 0 + self.packets_othertarget = 0 + + # add menu to map and console + if mp_util.has_wxpython: + menu = MPMenuSubMenu('Chat', + items=[ + MPMenuItem('Show', 'show', 'show chat input window'), + ]) + map = self.module('map') + if map is not None: + map.add_menu(menu) + console = self.module('console') + if console is not None: + console.add_menu(menu) + + # create chat window + self.chat_window = chat_window.chat_window(self.mpstate) + + # show help on command line options + def usage(self): + return "Usage: chat " + + # control behaviour of the module + def cmd_chat(self, args): + if len(args) == 0: + print(self.usage()) + elif args[0] == "show": + self.show() + else: + print(self.usage()) + + # show chat input window + def show(self): + self.chat_window.show() + + # handle incoming mavlink packets + def mavlink_packet(self, m): + if m.get_type() == 'GLOBAL_POSITION_INT': + if self.settings.target_system == 0 or self.settings.target_system == m.get_srcSystem(): + self.packets_mytarget += 1 + else: + self.packets_othertarget += 1 + +# initialise module +def init(mpstate): + return chat(mpstate) diff --git a/MAVProxy/modules/mavproxy_chat/chat_openai.py b/MAVProxy/modules/mavproxy_chat/chat_openai.py new file mode 100644 index 0000000000..9678b1f713 --- /dev/null +++ b/MAVProxy/modules/mavproxy_chat/chat_openai.py @@ -0,0 +1,350 @@ +''' +AI Chat Module OpenAi interface +Randy Mackay, December 2023 + +OpenAI Assistant API: https://platform.openai.com/docs/api-reference/assistants +OpenAI Assistant Playground: https://platform.openai.com/playground +''' + +from MAVProxy.modules.lib import mp_module +from MAVProxy.modules.lib import mp_settings +from MAVProxy.modules.lib.mp_settings import MPSetting +from MAVProxy.modules.lib.wx_loader import wx +from MAVProxy.modules.lib import mp_util +from threading import Thread +from pymavlink import mavutil +import time +from datetime import datetime +import json +from openai import OpenAI + +class chat_openai(): + def __init__(self, mpstate): + # keep reference to mpstate + self.mpstate = mpstate + + # initialise OpenAI connection + self.client = None + self.assistant = None + + # default assistant id + self.assistant_id = "asst_XqQN1m1JXtIROLqkjcuNYzZ0" + + # check connection to OpenAI assistant and connect if necessary + # returns True if connection is good, False if not + def check_connection(self): + # create connection object + if self.client is None: + try: + self.client = OpenAI() + except: + print("chat: failed to connect to OpenAI") + return False + + # check connection again just to be sure + if self.client is None: + print("chat: failed to connect to OpenAI") + return False + + # get assistant id + if self.assistant is None: + self.assistant = self.client.beta.assistants.retrieve(self.assistant_id) + if self.assistant is None: + print("chat: failed to connect to OpenAI assistant") + return False + + # if we got this far the connection must be good + return True + + # set the OpenAI API key + def set_api_key(self, api_key_str): + self.client = OpenAI(api_key = api_key_str) + + # send text to assistant + def send_to_assistant(self, text): + # debug + print("chat: sending text to assistant: " + text) + + # check connection + if not self.check_connection(): + return "chat: failed to connect to OpenAI" + + # create new thread + self.assistant_thread = self.client.beta.threads.create( + messages=[{ + "role": "user", + "content": text, + }]) + if self.assistant_thread is None: + return "chat: failed to create assistant thread" + + # create a run + self.run = self.client.beta.threads.runs.create( + thread_id=self.assistant_thread.id, + assistant_id=self.assistant.id + ) + if self.run is None: + return "chat: failed to create run" + + # wait for run to complete + run_done = False + while not run_done: + # wait for one second + time.sleep(0.1) + + # retrieve the run + latest_run = self.client.beta.threads.runs.retrieve( + thread_id=self.assistant_thread.id, + run_id=self.run.id + ) + + # print run status to console + print("chat: run status:" + latest_run.status) + + # check run status + if latest_run.status in ["queued", "in_progress", "cancelling"]: + run_done = False + elif latest_run.status in ["cancelled", "failed", "completed", "expired"]: + run_done = True + elif latest_run.status in ["requires_action"]: + self.handle_function_call(latest_run) + run_done = False + else: + print ("chat: unrecognised run status" + latest_run.status) + run_done = True + + print("chat: run done!") + print(latest_run) + + # retrieve the thread + latest_thread = self.client.beta.threads.retrieve(self.assistant_thread.id) + if latest_thread is None: + return "chat: failed to retrieve thread" + + # retrieve messages on the thread + latest_messages = self.client.beta.threads.messages.list(self.assistant_thread.id) + if latest_messages is None: + return "chat: failed to retrieve messages" + print("chat: latest messages:") + print(latest_messages) + + # concatenate all messages into a single reply skipping the first which is our question + reply = "" + need_newline = False + for message in reversed(latest_messages.data[:-1]): + print(message.content[0].text.value) + reply = reply + message.content[0].text.value + if need_newline: + reply = reply + "\n" + need_newline = True + + if reply is None or reply == "": + return "chat: failed to retrieve latest reply" + return reply + + # handle function call request from assistant + # on success this returns the text response that should be sent to the assistant, returns None on failure + def handle_function_call(self, run): + + # sanity check required action (this should never happen) + if run.required_action is None: + print("chat::handle_function_call: assistant function call empty") + return None + + # check format + if run.required_action.submit_tool_outputs is None: + print("chat::handle_function_call: submit tools outputs empty") + return None + + # display details if more than one function call + tool_num = 0 + for tool_call in run.required_action.submit_tool_outputs.tool_calls: + print("chat::handle_function_call: call:" +str(tool_num) + " fn:" + tool_call.function.name + " tcid:" + str(tool_call.id)) + tool_num = tool_num + 1 + + tool_outputs = [] + tool_num = 0 + for tool_call in run.required_action.submit_tool_outputs.tool_calls: + # init output to None + output = "invalid function call" + recognised_function = False + + # get current date and time + if tool_call.function.name == "get_current_datetime": + recognised_function = True + output = self.get_formatted_date() + + # get vehicle type + if tool_call.function.name == "get_vehicle_type": + recognised_function = True + output = json.dumps(self.get_vehicle_type()) + + # get vehicle state including armed, mode + if tool_call.function.name == "get_vehicle_state": + recognised_function = True + output = json.dumps(self.get_vehicle_state()) + + # get vehicle location and yaw + if tool_call.function.name == "get_vehicle_location_and_yaw": + recognised_function = True + output = json.dumps(self.get_vehicle_location_and_yaw()) + + # send mavlink command_int + if tool_call.function.name == "send_mavlink_command_int": + recognised_function = True + print("chat: send_mavlink_command_int received:") + try: + arguments = json.loads(tool_call.function.arguments) + print(arguments) + output = self.send_mavlink_command_int(arguments) + except: + print("chat::handle_function_call: failed to parse arguments") + + # send mavlink set_position_target_global_int + if tool_call.function.name == "send_mavlink_set_position_target_global_int": + recognised_function = True + print("chat: send_mavlink_set_position_target_global_int received:") + try: + arguments = json.loads(tool_call.function.arguments) + print(arguments) + output = self.send_mavlink_set_position_target_global_int(arguments) + except: + print("chat: send_mavlink_set_position_target_global_int: failed to parse arguments") + + if not recognised_function: + print("chat: handle_function_call: unrecognised function call: " + tool_call.function.name) + output = "unrecognised function call: " + tool_call.function.name + + # debug of reply + print("chat::handle_function_call: replying to call:" +str(tool_num) + " fn:" + tool_call.function.name + " tcid:" + str(tool_call.id)) + tool_num = tool_num + 1 + + # append output to list of outputs + tool_outputs.append({"tool_call_id": tool_call.id, "output": output}) + + # send function replies to assistant + run_reply = self.client.beta.threads.runs.submit_tool_outputs( + thread_id=run.thread_id, + run_id=run.id, + tool_outputs=tool_outputs + ) + + # get the current date and time in the format, Saturday, June 24, 2023 6:14:14 PM + def get_formatted_date(self): + return datetime.now().strftime("%A, %B %d, %Y %I:%M:%S %p") + + # get vehicle vehicle type (e.g. "Copter", "Plane", "Rover", "Boat", etc) + def get_vehicle_type(self): + # get vehicle type from latest HEARTBEAT message + hearbeat_msg = self.mpstate.master().messages.get('HEARTBEAT', None) + vehicle_type_str = "unknown" + if hearbeat_msg is not None: + if hearbeat_msg.type in [mavutil.mavlink.MAV_TYPE_FIXED_WING, + mavutil.mavlink.MAV_TYPE_VTOL_DUOROTOR, + mavutil.mavlink.MAV_TYPE_VTOL_QUADROTOR, + mavutil.mavlink.MAV_TYPE_VTOL_TILTROTOR]: + vehicle_type_str = 'Plane' + if hearbeat_msg.type == mavutil.mavlink.MAV_TYPE_GROUND_ROVER: + vehicle_type_str = 'Rover' + if hearbeat_msg.type == mavutil.mavlink.MAV_TYPE_SURFACE_BOAT: + vehicle_type_str = 'Boat' + if hearbeat_msg.type == mavutil.mavlink.MAV_TYPE_SUBMARINE: + vehicle_type_str = 'Sub' + if hearbeat_msg.type in [mavutil.mavlink.MAV_TYPE_QUADROTOR, + mavutil.mavlink.MAV_TYPE_COAXIAL, + mavutil.mavlink.MAV_TYPE_HEXAROTOR, + mavutil.mavlink.MAV_TYPE_OCTOROTOR, + mavutil.mavlink.MAV_TYPE_TRICOPTER, + mavutil.mavlink.MAV_TYPE_DODECAROTOR]: + vehicle_type_str = "Copter" + if hearbeat_msg.type == mavutil.mavlink.MAV_TYPE_HELICOPTER: + vehicle_type_str = "Heli" + if hearbeat_msg.type == mavutil.mavlink.MAV_TYPE_ANTENNA_TRACKER: + vehicle_type_str = "Tracker" + if hearbeat_msg.type == mavutil.mavlink.MAV_TYPE_AIRSHIP: + vehicle_type_str = "Blimp" + return { + "vehicle_type": vehicle_type_str + } + + # get vehicle state including armed, mode + def get_vehicle_state(self): + # get mode from latest HEARTBEAT message + hearbeat_msg = self.mpstate.master().messages.get('HEARTBEAT', None) + if hearbeat_msg is None: + mode_number = 0 + print ("chat: get_vehicle_state: vehicle mode is unknown") + else: + mode_number = hearbeat_msg.custom_mode + return { + "armed": (self.mpstate.master().motors_armed() > 0), + "mode": mode_number + } + + # return the vehicle's location and yaw + def get_vehicle_location_and_yaw(self): + lat_deg = 0 + lon_deg = 0 + alt_amsl_m = 0 + alt_rel_m = 0 + yaw_deg = 0 + gpi = self.mpstate.master().messages.get('GLOBAL_POSITION_INT', None) + if gpi: + lat_deg = gpi.lat * 1e-7, + lon_deg = gpi.lon * 1e-7, + alt_amsl_m = gpi.alt * 1e-3, + alt_rel_m = gpi.relative_alt * 1e-3 + yaw_deg = gpi.hdg * 1e-2 + location = { + "latitude": lat_deg, + "longitude": lon_deg, + "altitude_amsl": alt_amsl_m, + "altitude_above_home": alt_rel_m, + "yaw" : yaw_deg + } + return location + + # send a mavlink command_int message to the vehicle + def send_mavlink_command_int(self, arguments): + print("chat: send_mavlink_command_int: arguments:" + str(arguments)) + target_system = arguments.get("target_system", 1) + target_component = arguments.get("target_component", 1) + frame = arguments.get("frame", 0) + if ("command" not in arguments): + return "command_int not sent. command field required" + command = arguments.get("command", 0) + current = arguments.get("current", 0) + autocontinue = arguments.get("autocontinue", 0) + param1 = arguments.get("param1", 0) + param2 = arguments.get("param2", 0) + param3 = arguments.get("param3", 0) + param4 = arguments.get("param4", 0) + x = arguments.get("x", 0) + y = arguments.get("y", 0) + z = arguments.get("z", 0) + self.mpstate.master().mav.command_int_send(target_system, target_component, frame, command, current, autocontinue, param1, param2, param3, param4, x, y, z) + return "command_int sent" + + # send a mavlink send_mavlink_set_position_target_global_int message to the vehicle + def send_mavlink_set_position_target_global_int(self, arguments): + if arguments is None: + return "send_mavlink_set_position_target_global_int: arguments is None" + print("chat: send_mavlink_set_position_target_global_int: arguments:" + str(arguments)) + time_boot_ms = arguments.get("time_boot_ms", 0) + target_system = arguments.get("target_system", 1) + target_component = arguments.get("target_component", 1) + coordinate_frame = arguments.get("coordinate_frame", 5) + type_mask = arguments.get("type_mask", 0) + lat_int = arguments.get("lat_int", 0) + lon_int = arguments.get("lon_int", 0) + alt = arguments.get("alt", 0) + vx = arguments.get("vx", 0) + vy = arguments.get("vy", 0) + vz = arguments.get("vz", 0) + afx = arguments.get("afx", 0) + afy = arguments.get("afy", 0) + afz = arguments.get("afz", 0) + yaw = arguments.get("yaw", 0) + yaw_rate = arguments.get("yaw_rate", 0) + self.mpstate.master().mav.set_position_target_global_int_send(time_boot_ms, target_system, target_component, coordinate_frame, type_mask, lat_int, lon_int, alt, vx, vy, vz, afx, afy, afz, yaw, yaw_rate) + return "set_position_target_global_int sent" diff --git a/MAVProxy/modules/mavproxy_chat/chat_voice_to_text.py b/MAVProxy/modules/mavproxy_chat/chat_voice_to_text.py new file mode 100644 index 0000000000..8fa77a3cc8 --- /dev/null +++ b/MAVProxy/modules/mavproxy_chat/chat_voice_to_text.py @@ -0,0 +1,85 @@ +''' +AI Chat Module voice-to-text class +Randy Mackay, December 2023 +''' + +import time +import pyaudio # install using, "sudo apt-get install python3-pyaudio" +import wave # install with "pip3 install wave" +from openai import OpenAI + +class chat_voice_to_text(): + def __init__(self): + # initialise OpenAI connection + self.client = None + self.assistant = None + + # check connection to OpenAI assistant and connect if necessary + # returns True if connection is good, False if not + def check_connection(self): + # create connection object + if self.client is None: + try: + self.client = OpenAI() + except: + print("chat: failed to connect to OpenAI") + return False + + # return True if connected + return self.client is not None + + # record audio from microphone + # returns filename of recording or None if failed + def record_audio(self): + # Initialize PyAudio + p = pyaudio.PyAudio() + + # Open stream + try: + stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024) + print("recording audio: opened stream") + except: + print("recording audio: failed to open stream") + return None + + # calculate time recording should stop + curr_time = time.time() + time_stop = curr_time + 5 + + # record until specified time + frames = [] + while curr_time < time_stop: + data = stream.read(1024) + frames.append(data) + curr_time = time.time() + print("recording audio: reading t:" + str(curr_time)) + print("recording audio: data collection complete") + + # Stop and close the stream + stream.stop_stream() + stream.close() + p.terminate() + + # Save audio file + wf = wave.open("recording.wav", "wb") + wf.setnchannels(1) + wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16)) + wf.setframerate(44100) + wf.writeframes(b''.join(frames)) + wf.close() + return "recording.wav" + + # convert audio to text + # returns transcribed text on success or None if failed + def convert_audio_to_text(self, audio_filename): + # check connection + if not self.check_connection(): + return None + + # Process with Whisper + audio_file = open(audio_filename, "rb") + transcript = self.client.audio.transcriptions.create( + model="whisper-1", + file=audio_file, + response_format="text") + return transcript diff --git a/MAVProxy/modules/mavproxy_chat/chat_window.py b/MAVProxy/modules/mavproxy_chat/chat_window.py new file mode 100644 index 0000000000..cc1ba0a7b3 --- /dev/null +++ b/MAVProxy/modules/mavproxy_chat/chat_window.py @@ -0,0 +1,133 @@ +''' +AI Chat Module Window +Randy Mackay, December 2023 + +Chat window for input and output of AI chat +''' + +from MAVProxy.modules.lib import mp_module +from MAVProxy.modules.lib import mp_settings +from MAVProxy.modules.lib.mp_settings import MPSetting +from MAVProxy.modules.lib.wx_loader import wx +from MAVProxy.modules.lib import mp_util +from MAVProxy.modules.mavproxy_chat import chat_openai, chat_voice_to_text +from threading import Thread +from pymavlink import mavutil +import time + +class chat_window(): + def __init__(self, mpstate): + # keep reference to mpstate + self.mpstate = mpstate + + self.app = wx.App() + self.frame = wx.Frame(None, title="Chat", size=(650, 200)) + + # add menu + self.menu = wx.Menu() + self.menu.Append(1, "Set API Key", "Set OpenAI API Key") + self.menu_bar = wx.MenuBar() + self.menu_bar.Append(self.menu, "Menu") + self.frame.SetMenuBar(self.menu_bar) + self.frame.Bind(wx.EVT_MENU, self.menu_set_api_key_show, id=1) + + # add api key input window + self.apikey_frame = wx.Frame(None, title="Input OpenAI API Key", size=(560, 50)) + self.apikey_text_input = wx.TextCtrl(self.apikey_frame, id=-1, value="Input API Key here and press enter", pos=(10, 10), size=(450, -1), style = wx.TE_PROCESS_ENTER) + self.apikey_set_button = wx.Button(self.apikey_frame, id=-1, label="Set", pos=(470, 10), size=(75, 25)) + self.apikey_frame.Bind(wx.EVT_BUTTON, self.apikey_set_button_click, self.apikey_set_button) + self.apikey_frame.Bind(wx.EVT_TEXT_ENTER, self.apikey_set_button_click, self.apikey_text_input) + + # add a record button + self.record_button = wx.Button(self.frame, id=-1, label="Rec", pos=(20, 20), size=(75, 25)) + self.frame.Bind(wx.EVT_BUTTON, self.record_button_click, self.record_button) + + # add an input text box + self.text_input = wx.TextCtrl(self.frame, id=-1, value="", pos=(100, 20), size=(450, -1), style = wx.TE_PROCESS_ENTER) + self.frame.Bind(wx.EVT_TEXT_ENTER, self.text_input_change, self.text_input) + + # add a send button + self.send_button = wx.Button(self.frame, id=-1, label="Send", pos=(555, 20), size=(75, 25)) + self.frame.Bind(wx.EVT_BUTTON, self.send_button_click, self.send_button) + + # add a reply box and read-only text box + self.reply_box = wx.StaticBox(self.frame, id=-1, label="Reply", pos=(20, 60), size=(610, 110)) + self.text_reply = wx.TextCtrl(self.reply_box, id=-1, pos=(5, 5), size=(600, 80), style=wx.TE_READONLY | wx.TE_MULTILINE | wx.TE_RICH2) + + # show frame + self.frame.Show() + + # create chat_openai object + self.chat_openai = chat_openai.chat_openai(self.mpstate) + + # create chat_voice_to_text object + self.chat_voice_to_text = chat_voice_to_text.chat_voice_to_text() + + # run chat window in a separate thread + self.thread = Thread(target=self.idle_task) + self.thread.start() + + # update function rapidly called by mavproxy + def idle_task(self): + # update chat window + self.app.MainLoop() + + # show the chat window + def show(self): + self.frame.Show() + + # hide the chat window + def hide(self): + self.frame.Hide() + + # close the chat window + # this is called when the module is unloaded + def close(self): + self.frame.Close() + + # menu set API key handling. Shows the API key input window + def menu_set_api_key_show(self, event): + self.apikey_frame.Show() + + # set API key set button clicked + def apikey_set_button_click(self, event): + self.chat_openai.set_api_key(self.apikey_text_input.GetValue()) + self.apikey_frame.Hide() + + # record button clicked + def record_button_click(self, event): + # record audio + rec_filename = self.chat_voice_to_text.record_audio() + if rec_filename is None: + self.text_input.SetValue("chat: recording failed") + return + self.text_input.SetValue("chat: recorded to " + rec_filename) + + # convert audio to text + text = self.chat_voice_to_text.convert_audio_to_text(rec_filename) + if text is None: + self.text_input.SetValue("chat: audio to text conversion failed") + return + self.text_input.SetValue(text) + + # send text to assistant + print("chat: sending:" + text) + self.text_reply.SetValue("processing..") + reply = self.chat_openai.send_to_assistant(text) + self.text_reply.SetValue(reply) + + # send button clicked + def send_button_click(self, event): + send_text = self.text_input.GetValue() + print("chat: sending:" + send_text) + self.text_reply.SetValue("processing..") + reply = self.chat_openai.send_to_assistant(self.text_input.GetValue()) + self.text_reply.SetValue(reply) + + # handle text input + def text_input_change(self, event): + send_text = self.text_input.GetValue() + print("chat: sending:" + send_text) + self.text_reply.SetValue("processing..") + reply = self.chat_openai.send_to_assistant(send_text) + self.text_reply.SetValue(reply) diff --git a/setup.py b/setup.py index 0814c3ab0c..71f476cfd9 100755 --- a/setup.py +++ b/setup.py @@ -87,6 +87,7 @@ def package_files(directory): 'MAVProxy.modules.mavproxy_optitrack', 'MAVProxy.modules.mavproxy_nokov', 'MAVProxy.modules.mavproxy_SIYI', + 'MAVProxy.modules.mavproxy_chat', 'MAVProxy.modules.lib', 'MAVProxy.modules.lib.ANUGA', 'MAVProxy.modules.lib.MacOS',