diff --git a/audio.wav b/audio.wav new file mode 100644 index 0000000..58bf3be Binary files /dev/null and b/audio.wav differ diff --git a/docs/SIP.rst b/docs/SIP.rst index b0fb78f..270bd8d 100644 --- a/docs/SIP.rst +++ b/docs/SIP.rst @@ -68,7 +68,7 @@ SIPClient The SIPClient class is used to communicate with the PBX/VoIP server. It is responsible for registering with the server, and receiving phone calls. -*class* SIP.\ **SIPClient**\ (server: str, port: int, username: str, password: str, myIP="0.0.0.0", myPort=5060, callCallback: Optional[Callable[[SIPMessage], None]] = None) +*class* SIP.\ **SIPClient**\ (server: str, port: int, username: str, password: str, myIP="0.0.0.0", myPort=5060, callCallback: Optional[Callable[[SIPMessage], None]] = None, auth_username: str) The *server* argument is your PBX/VoIP server's IP. The *port* argument is your PBX/VoIP server's port. @@ -83,6 +83,8 @@ The SIPClient class is used to communicate with the PBX/VoIP server. It is resp The *callCallback* argument is the callback function for :ref:`VoIPPhone`. VoIPPhone will process the SIP request, and perform the appropriate actions. + The *auth_username* argument is the optional username for proxy-authentication, represented as a string. + **recv**\ () -> None This method is called by SIPClient.start() and is responsible for receiving and parsing through SIP requests. **This should not be called by the** :term:`user`. diff --git a/docs/VoIP.rst b/docs/VoIP.rst index f8cf0ce..1d91bad 100644 --- a/docs/VoIP.rst +++ b/docs/VoIP.rst @@ -146,7 +146,7 @@ VoIPPhone The VoIPPhone class is used to manage the :ref:`SIPClient` class and create :ref:`VoIPCall`'s when there is an incoming call. It then passes the VoIPCall as the argument in the callback. -*class* VoIP.\ **VoIPPhone**\ (server: str, port: int, username: str, password: str, callCallback: Optional[Callable] = None, myIP: Optional[str] = None, sipPort=5060, rtpPortLow=10000, rtpPortHigh=20000) +*class* VoIP.\ **VoIPPhone**\ (server: str, port: int, username: str, password: str, callCallback: Optional[Callable] = None, myIP: Optional[str] = None, sipPort=5060, rtpPortLow=10000, rtpPortHigh=20000, auth_username: str) The *server* argument is your PBX/VoIP server's IP, represented as a string. The *port* argument is your PBX/VoIP server's port, represented as an integer. @@ -162,6 +162,8 @@ The VoIPPhone class is used to manage the :ref:`SIPClient` class and create :ref The *sipPort* argument is the port SIP will bind to to receive SIP requests. The default for this protocol is port 5060, but any port can be used. The *rtpPortLow* and *rtpPortHigh* arguments are used to generate random ports to use for audio transfer. Per RFC 4566 Sections `5.7 `_ and `5.14 `_, it can take multiple ports to fully communicate with other :term:`clients`, as such a large range is recommended. If an invalid range is given, a :ref:`InvalidStateError` will be thrown. + + The *auth_username* argument is the optional username for proxy-authentication, represented as a string. **callback**\ (request: :ref:`SIPMessage`) -> None This method is called by the :ref:`SIPClient` when an INVITE or BYE request is received. This function then creates a :ref:`VoIPCall` or terminates it respectively. When a VoIPCall is created, it will then pass it to the *callCallback* function as an argument. If *callCallback* is set to None, this function replies as BUSY. **This function should not be called by the** :term:`user`. diff --git a/pyVoIP/SIP.py b/pyVoIP/SIP.py index d3c33cc..1bf6f02 100644 --- a/pyVoIP/SIP.py +++ b/pyVoIP/SIP.py @@ -347,6 +347,7 @@ def __init__(self, data: bytes): self.headers: Dict[str, Any] = {"Via": []} self.body: Dict[str, Any] = {} self.authentication: Dict[str, str] = {} + self.proxy_authentication: Dict[str, str] = {} self.raw = data self.auth_match = re.compile(r'(\w+)=("[^",]+"|[^ \t,]+)') self.parse(data) @@ -460,14 +461,21 @@ def parse_header(self, header: str, data: str) -> None: self.headers[header] = data.split(", ") elif header == "Content-Length": self.headers[header] = int(data) - elif header == "WWW-Authenticate" or header == "Authorization": + elif ( + header == "WWW-Authenticate" + or header == "Authorization" + or header == "Proxy-Authenticate" + ): data = data.replace("Digest ", "") row_data = self.auth_match.findall(data) header_data = {} for var, data in row_data: header_data[var] = data.strip('"') self.headers[header] = header_data - self.authentication = header_data + if header == "Proxy-Authenticate": + self.proxy_authentication = header_data + else: + self.authentication = header_data else: self.headers[header] = data @@ -813,12 +821,14 @@ def __init__( myPort=5060, callCallback: Optional[Callable[[SIPMessage], None]] = None, fatalCallback: Optional[Callable[..., None]] = None, + auth_username: Optional[str] = None, ): self.NSD = False self.server = server self.port = port self.myIP = myIP self.username = username + self.auth_username = auth_username self.password = password self.phone = phone @@ -1075,8 +1085,16 @@ def genAuthorization(self, request: SIPMessage) -> bytes: return self.gen_authorization(request) def gen_authorization(self, request: SIPMessage) -> bytes: - realm = request.authentication["realm"] - HA1 = self.username + ":" + realm + ":" + self.password + if request.status == SIPStatus(407): + nonce = request.proxy_authentication["nonce"] + realm = request.proxy_authentication["realm"] + user = self.auth_username + else: + nonce = request.authentication["nonce"] + realm = request.authentication["realm"] + user = self.username + + HA1 = user + ":" + realm + ":" + self.password HA1 = hashlib.md5(HA1.encode("utf8")).hexdigest() HA2 = ( "" @@ -1086,7 +1104,6 @@ def gen_authorization(self, request: SIPMessage) -> bytes: + ";transport=UDP" ) HA2 = hashlib.md5(HA2.encode("utf8")).hexdigest() - nonce = request.authentication["nonce"] response = (HA1 + ":" + nonce + ":" + HA2).encode("utf8") response = hashlib.md5(response).hexdigest().encode("utf8") @@ -1217,8 +1234,14 @@ def genRegister(self, request: SIPMessage, deregister=False) -> str: def gen_register(self, request: SIPMessage, deregister=False) -> str: response = str(self.gen_authorization(request), "utf8") - nonce = request.authentication["nonce"] - realm = request.authentication["realm"] + if request.status == SIPStatus(407): + nonce = request.proxy_authentication["nonce"] + realm = request.proxy_authentication["realm"] + user = self.auth_username + else: + nonce = request.authentication["nonce"] + realm = request.authentication["realm"] + user = self.username regRequest = f"REGISTER sip:{self.server} SIP/2.0\r\n" regRequest += ( @@ -1251,8 +1274,10 @@ def gen_register(self, request: SIPMessage, deregister=False) -> str: "Expires: " + f"{self.default_expires if not deregister else 0}\r\n" ) + if request.status == SIPStatus(407): + regRequest += "Proxy-" regRequest += ( - f'Authorization: Digest username="{self.username}",' + f'Authorization: Digest username="{user}",' + f'realm="{realm}",nonce="{nonce}",' + f'uri="sip:{self.server};transport=UDP",' + f'response="{response}",algorithm=MD5\r\n' @@ -1613,6 +1638,7 @@ def invite( while ( response.status != SIPStatus(401) + and response.status != SIPStatus(407) and response.status != SIPStatus(100) and response.status != SIPStatus(180) ) or response.headers["Call-ID"] != call_id: @@ -1629,11 +1655,22 @@ def invite( ack = self.gen_ack(response) self.out.sendto(ack.encode("utf8"), (self.server, self.port)) debug("Acknowledged") + authhash = self.gen_authorization(response) - nonce = response.authentication["nonce"] - realm = response.authentication["realm"] - auth = ( - f'Authorization: Digest username="{self.username}",realm=' + auth = "" + + if response.status == SIPStatus(407): + nonce = response.proxy_authentication["nonce"] + realm = response.proxy_authentication["realm"] + user = self.auth_username + auth += "Proxy-" + else: + nonce = response.authentication["nonce"] + realm = response.authentication["realm"] + user = self.username + + auth += ( + f'Authorization: Digest username="{user}",realm=' + f'"{realm}",nonce="{nonce}",uri="sip:{self.server};' + f'transport=UDP",response="{str(authhash, "utf8")}",' + "algorithm=MD5\r\n" @@ -1693,9 +1730,13 @@ def __deregister(self) -> bool: response = SIPMessage(resp) response = self.trying_timeout_check(response) - if response.status == SIPStatus(401): - # Unauthorized, likely due to being password protected. + if response.status == SIPStatus(401) or response.status == SIPStatus( + 407 + ): + # 401 Unauthorized, likely due to being password protected. + # 407 Proxy Authentication Required regRequest = self.gen_register(response, deregister=True) + self.out.sendto( regRequest.encode("utf8"), (self.server, self.port) ) @@ -1703,7 +1744,9 @@ def __deregister(self) -> bool: if ready[0]: resp = self.s.recv(8192) response = SIPMessage(resp) - if response.status == SIPStatus(401): + if response.status == SIPStatus( + 401 + ) or response.status == SIPStatus(407): # At this point, it's reasonable to assume that # this is caused by invalid credentials. debug("Unauthorized") @@ -1797,7 +1840,9 @@ def __register(self) -> bool: # with new urn:uuid or reply with expire 0 self._handle_bad_request() - if response.status == SIPStatus(401): + if response.status == SIPStatus(401) or response.status == SIPStatus( + 407 + ): # Unauthorized, likely due to being password protected. regRequest = self.gen_register(response) self.out.sendto( @@ -1808,7 +1853,9 @@ def __register(self) -> bool: resp = self.s.recv(8192) response = SIPMessage(resp) response = self.trying_timeout_check(response) - if response.status == SIPStatus(401): + if response.status == SIPStatus( + 401 + ) or response.status == SIPStatus(407): # At this point, it's reasonable to assume that # this is caused by invalid credentials. debug("=" * 50) @@ -1837,11 +1884,6 @@ def __register(self) -> bool: else: raise TimeoutError("Registering on SIP Server timed out") - if response.status == SIPStatus(407): - # Proxy Authentication Required - # TODO: implement - debug("Proxy auth required") - # TODO: This must be done more reliable if response.status not in [ SIPStatus(400), diff --git a/pyVoIP/VoIP/VoIP.py b/pyVoIP/VoIP/VoIP.py index fe88472..8880816 100644 --- a/pyVoIP/VoIP/VoIP.py +++ b/pyVoIP/VoIP/VoIP.py @@ -479,6 +479,7 @@ def __init__( sipPort=5060, rtpPortLow=10000, rtpPortHigh=20000, + auth_username: str = None, ): if rtpPortLow > rtpPortHigh: raise InvalidRangeError("'rtpPortHigh' must be >= 'rtpPortLow'") @@ -495,6 +496,7 @@ def __init__( self.port = port self.myIP = myIP self.username = username + self.auth_username = auth_username self.password = password self.callCallback = callCallback self._status = PhoneStatus.INACTIVE @@ -517,6 +519,7 @@ def __init__( myPort=sipPort, callCallback=self.callback, fatalCallback=self.fatal, + auth_username=self.auth_username, ) def callback(self, request: SIP.SIPMessage) -> None: diff --git a/pyVoIP/audio.wav b/pyVoIP/audio.wav new file mode 100644 index 0000000..58bf3be Binary files /dev/null and b/pyVoIP/audio.wav differ diff --git a/pyVoIP/audio_handler.py b/pyVoIP/audio_handler.py new file mode 100644 index 0000000..5989ab4 --- /dev/null +++ b/pyVoIP/audio_handler.py @@ -0,0 +1,216 @@ +from asyncio import InvalidStateError +import logging +import time +import wave +import os +import io +import numpy as np +from collections import deque +from pyVoIP.VoIP import CallState +from pyVoIP.speech_processing import text_to_speech +from pyVoIP.openai_utils import transcribe_with_whisper, get_openai_gpt_response, extract_lead_info +from pydub import AudioSegment +import threading +# import webrtcvad + +# vad = webrtcvad.Vad() +# vad.set_mode(2) # Try 0–3 (0 = lenient, 3 = strict). Start with 2 as a balanced mode. + +# Setup logging format +logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%H:%M:%S') + +conversation_history = "" +conversation_context = [] + +is_playing = False +silence_threshold = 1.0 +max_silence_threshold = 10.0 +last_speech_time = time.time() +buffer_audio = b"" + +energy_levels = deque(maxlen=20) +DYNAMIC_NOISE_MARGIN = 0.9 + +def update_playing_status(duration): + global is_playing + time.sleep(duration) + is_playing = False + +# def save_audio_to_wav(audio_data, filename): +# try: +# logging.info(f"Start saving audio to {filename}") +# with wave.open(filename, 'wb') as wf: +# wf.setnchannels(1) +# wf.setsampwidth(1) +# wf.setframerate(8000) +# wf.writeframes(audio_data) + +# padding_duration = 2000 # 2s +# original = AudioSegment.from_wav(filename) +# padded = original + AudioSegment.silent(duration=padding_duration) +# padded.export(filename, format="wav") +# logging.info(f"Audio saved successfully to {filename}") +# except Exception as e: +# logging.error(f"Error saving audio: {e}") + + +def get_wav_bytes(audio_data, sample_width=2, frame_rate=8000, channels=1): + wav_io = io.BytesIO() + with wave.open(wav_io, 'wb') as wf: + wf.setnchannels(channels) + wf.setsampwidth(sample_width) + wf.setframerate(frame_rate) + wf.writeframes(audio_data) + wav_io.seek(0) + return wav_io + + +def threaded_play_audio(call, audio_data): + global is_playing + is_playing = True + try: + logging.info("Playback started (threaded)") + playback_index = 0 + while call.state == CallState.ANSWERED and playback_index < len(audio_data): + chunk = audio_data[playback_index:playback_index+160] + call.write_audio(chunk) + playback_index += 160 + time.sleep(0.02) + logging.info("Playback finished") + except Exception as e: + logging.error(f"Playback error: {e}") + finally: + is_playing = False + +def is_speech(audio_bytes): + audio_array = np.frombuffer(audio_bytes, dtype=np.int16) + energy = np.mean(audio_array ** 2) if len(audio_array) > 0 else 0 + energy_levels.append(energy) + if len(energy_levels) == energy_levels.maxlen: + baseline = np.median(energy_levels) + threshold = baseline * DYNAMIC_NOISE_MARGIN + return energy > threshold + return False + +# def vad_is_speech(audio_bytes, sample_rate=8000): +# # WebRTC VAD requires mono, 16-bit PCM, and 10/20/30ms frames +# # You must make sure `audio_bytes` is 16-bit PCM +# try: +# return vad.is_speech(audio_bytes, sample_rate) +# except Exception as e: +# logging.warning(f"VAD error: {e}") +# return False + + +def log_interaction(session_id, user_text, bot_response): + log_dir = f"./output/{session_id}" + os.makedirs(log_dir, exist_ok=True) + with open(f"{log_dir}/conversation.txt", "a") as f: + f.write(f"User: {user_text}\n") + f.write(f"Bot: {bot_response}\n\n") + +def answer(call): + global is_playing, buffer_audio, last_speech_time, conversation_history, conversation_context + + try: + logging.info("Call answered") + call.answer() + + session_id = time.strftime("%Y%m%d_%H%M%S") + chunk_index = 1 + idle_count = 0 + voice_detected = False + # silence_start_time = None + conversation_context.clear() + conversation_history = "" + + greeting = "Hi! This is UniBot from UniConnect. I'm here to tell you about a special loan offer. Would you like to hear more?" + # play_audio(call, text_to_speech(greeting)) + threading.Thread(target=threaded_play_audio, args=(call, text_to_speech(greeting))).start() + last_speech_time = time.time() + + while call.state == CallState.ANSWERED: + try: + incoming_audio = call.read_audio(length=160, blocking=True) + buffer_audio += incoming_audio + + if is_speech(incoming_audio): + #if vad_is_speech(incoming_audio): + + # logging.info("Voice detected") + if not voice_detected: + logging.info("Speech started") + # silence_start_time = None + voice_detected = True + last_speech_time = time.time() + # else: + # logging.info("Silence detected") + + if time.time() - last_speech_time > silence_threshold and voice_detected and not is_playing: + silence_duration = time.time() - last_speech_time + logging.info(f"Silence detected for {silence_duration:.2f}s. Beginning processing") + + # output_dir = "./output" + # if not os.path.exists(output_dir): + # os.makedirs(output_dir) + + #chunk_filename = f"./output/speech_chunk_{session_id}_{chunk_index}.wav" + #save_audio_to_wav(buffer_audio, chunk_filename) + audio_wav=get_wav_bytes(buffer_audio) + + logging.info("Transcription started") + text = transcribe_with_whisper(audio_wav) + logging.info("Transcription completed") + + if text: + logging.info(f"User said: {text}") + conversation_history += f"User: {text}\n" + conversation_context.append({"role": "user", "content": text}) + + logging.info("Generating bot response") + response = get_openai_gpt_response(text, conversation_context) + logging.info(f"Bot: {response}") + conversation_context.append({"role": "assistant", "content": response}) + conversation_history += f"Bot: {response}\n" + + log_interaction(session_id, text, response) + + # play_audio(call, text_to_speech(response)) + threading.Thread(target=threaded_play_audio, args=(call, text_to_speech(response))).start() + last_speech_time = time.time() + buffer_audio = b"" + voice_detected = False + chunk_index += 1 + + if any(word in text.lower() for word in ["loan", "apply", "yes", "need", "interested", "buy", "want", "help"]): + lead_json = extract_lead_info(conversation_context) + logging.info(f"Lead info: {lead_json}") + else: + idle_count += 1 + logging.info(f"Could not understand. Idle count: {idle_count}") + if idle_count > 3: + # play_audio(call, text_to_speech("Sorry, I didn't get that. Could you say that again?")) + threading.Thread(target=threaded_play_audio, args=(call, text_to_speech("Sorry, I didn't get that. Could you say that again?"))).start() + last_speech_time = time.time() + idle_count = 0 + + elif time.time() - last_speech_time > max_silence_threshold and idle_count > 0: + logging.info("Extended silence detected. Sending wakeup message") + last_speech_time = time.time() + idle_count = 0 + buffer_audio = b"" + voice_detected = False + # play_audio(call, text_to_speech("Still there? If you're interested in a loan or need help, just say so!")) + threading.Thread(target=threaded_play_audio, args=(call, text_to_speech("Still there? If you're interested in a loan or need help, just say so!"))).start() + last_speech_time = time.time() + + except Exception as e: + logging.error(f"Call stream error: {e}") + + call.hangup() + logging.info("Call ended") + + except Exception as e: + logging.error(f"Fatal error: {e}") + call.hangup() + diff --git a/pyVoIP/openai_utils.py b/pyVoIP/openai_utils.py new file mode 100644 index 0000000..f3911bb --- /dev/null +++ b/pyVoIP/openai_utils.py @@ -0,0 +1,113 @@ +import openai +import logging +from pyVoIP.config import OPENAI_KEY +openai.api_key = OPENAI_KEY # or use os.getenv("OPENAI_API_KEY") + +# Loan-focused system prompt +SYSTEM_PROMPT = """ +You are UniBot, a helpful and friendly call center agent for a company offering a loan product. + +Your goals are: +1. Correct any misheard or mis-transcribed words in the user's message. For example: + - "load", "lone", "loaf", or "loam" → "loan" + - "apple" → "apply" + - "interested in a lone" → "interested in a loan" + Silently fix these before responding. +2. Only talk about the loan product. If the user says something completely unrelated or confusing, politely steer the conversation back to the loan offer. +3. Engage the user in a friendly, conversational tone. +4. If the user says "yes" or shows interest, clearly explain the loan offer. +5. Gather their name and phone number politely if they are interested. +6. If the user is hesitant, gently encourage them by explaining a benefit (e.g., low interest rate). +7. Keep responses short and to the point. + +IMPORTANT: +- Do not answer questions or respond to topics outside the loan offer. +- Do not generate random or irrelevant replies. +- Stay focused on promoting the loan and helping the customer with it. +""" + + +# Common mis-transcriptions that we want to fix +COMMON_CORRECTIONS = { + "load": "loan", + "lone": "loan", + "loam": "loan", + "loaf": "loan", +} + +def transcribe_with_whisper(audio_file_path): + with open(audio_file_path, "rb") as audio_file: + transcript = openai.audio.transcriptions.create( + model="whisper-1", + file=audio_file, + response_format="text", + language="en" + ) + return transcript.strip() + +def correct_transcription(text: str) -> str: + words = text.split() + corrected_words = [COMMON_CORRECTIONS.get(word.lower(), word) for word in words] + return " ".join(corrected_words) + +def get_openai_gpt_response(user_text: str, history: list) -> str: + corrected_text = correct_transcription(user_text) + + messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages.extend(history) + messages.append({"role": "user", "content": corrected_text}) + + # logging.info("Conversation so far: %s", messages) + + try: + response = openai.chat.completions.create( + model="gpt-4", + messages=messages, + temperature=0.7, + max_tokens=200, + ) + bot_message = response.choices[0].message.content + # logging.info("Bot response: %s", bot_message) + return bot_message + except Exception as e: + logging.error("OpenAI API error: %s", str(e)) + return "I'm sorry, something went wrong." + + +def extract_lead_info(conversation: list) -> dict: + """ + After a few turns, extract the lead details from the full conversation. + Returns a dict with name, phone, and interest level. + """ + try: + prompt = """ +You are an AI assistant helping to capture lead details from a customer interaction. +From the following conversation, extract: +- Name (if given) +- Phone number (if given) +- Whether the user is interested in the loan + +Respond with a JSON object like: +{ "name": ..., "phone": ..., "interest": ... } + +Conversation: +""" + "\n".join( + [f"{msg['role'].capitalize()}: {msg['content']}" for msg in conversation] + ) + + response = openai.chat.completions.create( + model="gpt-4", + messages=[ + {"role": "system", "content": "You are an assistant that extracts structured lead data from call transcripts."}, + {"role": "user", "content": prompt}, + ], + temperature=0, + max_tokens=150, + ) + + extracted = response.choices[0].message.content + # logging.info("Extracted lead: %s", extracted) + return eval(extracted) # If you're concerned about `eval`, use `json.loads` after validating format + except Exception as e: + logging.error("Failed to extract lead info: %s", str(e)) + return {"name": None, "phone": None, "interest": None} diff --git a/pyVoIP/speech_processing.py b/pyVoIP/speech_processing.py new file mode 100644 index 0000000..572cecd --- /dev/null +++ b/pyVoIP/speech_processing.py @@ -0,0 +1,73 @@ +import logging +import speech_recognition as sr +from gtts import gTTS +import io +import os +import wave +from pydub import AudioSegment + +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./voic-bot-demo-080f5ddb64e6.json" + +def speech_to_text(wav_file_path): + """Converts incoming raw audio to text.""" + recognizer = sr.Recognizer() + # Load the WAV file + with sr.AudioFile(wav_file_path) as source: + audio = recognizer.record(source) # Record the audio from the file + + try: + # Try to recognize speech in the audio using Google's speech recognition API + text = recognizer.recognize_google(audio) + logging.info(f"Recognized Speech: {text}") + return text + except sr.UnknownValueError: + logging.warning("Speech Recognition could not understand the audio.") + return "" + except sr.RequestError as e: + logging.error(f"Speech Recognition API error: {e}") + return "" + +def text_to_speech(text): + """Converts text response to speech audio.""" + tts = gTTS(text=text, lang="en") + audio_stream = io.BytesIO() + tts.write_to_fp(audio_stream) + audio_stream.seek(0) + + return convert_mp3_to_wav(audio_stream) + +def convert_mp3_to_wav(mp3_stream): + """Converts MP3 audio to WAV format.""" + import pydub + audio = pydub.AudioSegment.from_file(mp3_stream, format="mp3") + audio = audio.set_channels(1).set_frame_rate(8000).set_sample_width(1) + wav_stream = io.BytesIO() + audio.export(wav_stream, format="wav") + wav_stream.seek(0) + + return wav_stream.read() + + +# def trim_silence(raw_audio_bytes, silence_thresh=-40, min_silence_len=200): +# from pydub import AudioSegment +# from pydub.silence import detect_nonsilent + +# audio_segment = AudioSegment( +# data=raw_audio_bytes, +# sample_width=2, +# frame_rate=8000, +# channels=1 +# ) + +# nonsilent_ranges = detect_nonsilent(audio_segment, +# min_silence_len=min_silence_len, +# silence_thresh=silence_thresh) + +# if not nonsilent_ranges: +# return raw_audio_bytes + +# start, end = nonsilent_ranges[0][0], nonsilent_ranges[-1][1] +# trimmed = audio_segment[start:end] +# return trimmed.raw_data + + diff --git a/test-bot-audio.py b/test-bot-audio.py new file mode 100644 index 0000000..396ce60 --- /dev/null +++ b/test-bot-audio.py @@ -0,0 +1,95 @@ +import logging +from pyVoIP.VoIP import VoIPPhone, InvalidStateError, CallState +import time +import wave +import socket +import pyVoIP + +# Configure logging +logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') + +pyVoIP.DEBUG = True + +# Your SIP credentials +SIP_EXTENSION = '2082' +SIP_USERNAME = 'EpheF2GMjn' +SIP_PASSWORD = 'eFQvv0LnZl' +SIP_SERVER = '15.207.103.137' +SIP_PORT = 5060 + +# Your local and public IPs +def get_local_ip(): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + s.connect(("8.8.8.8", 80)) + return s.getsockname()[0] + except Exception: + return "127.0.0.1" + finally: + s.close() + +def get_free_port(start_port, end_port): + for port in range(start_port, end_port + 1): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + # Set socket options to avoid address already in use error + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + try: + s.bind(('0.0.0.0', port)) + # If binding succeeds, return the available port + return port + except socket.error: + # If the port is already in use, try the next one + continue + raise Exception("No free port found in the given range.") + +def answer(call): + try: + f = wave.open('audio.wav', 'rb') + frames = f.getnframes() + data = f.readframes(frames) + f.close() + + call.answer() + call.write_audio(data) # This writes the audio data to the transmit buffer, this must be bytes. + + stop = time.time() + (frames / 8000) # frames/8000 is the length of the audio in seconds. 8000 is the hertz of PCMU. + + while time.time() <= stop and call.state == CallState.ANSWERED: + time.sleep(0.1) + call.hangup() + + except InvalidStateError as e: + logging.warning(f"Invalid state error encountered: {e}") + except Exception as e: + logging.error(f"Unexpected error in answer function: {e}") + finally: + if call.state != CallState.ENDED: + logging.info("Hanging up the call explicitly.") + call.hangup() + +def start_sip_phone(): + try: + logging.info("Initializing VoIPPhone...") + phone = VoIPPhone( + SIP_SERVER, SIP_PORT, SIP_EXTENSION, SIP_PASSWORD, + myIP=get_local_ip(), sipPort=get_free_port(5061,5070), auth_username=SIP_USERNAME, callCallback=answer + ) + phone.start() + logging.info("Phone started successfully.") + return phone + except Exception as e: + logging.critical(f"Fatal error: {e}") + return None + + +# Instantiate and start the VoIP phone +if __name__ == "__main__": + phone = start_sip_phone() + + if phone: + input("Press enter to disable the phone...") + phone.stop() + time.sleep(3) + logging.info("Phone stopped successfully.") + + diff --git a/test-bot.py b/test-bot.py new file mode 100644 index 0000000..2dd1680 --- /dev/null +++ b/test-bot.py @@ -0,0 +1,66 @@ +import logging +from pyVoIP.VoIP import VoIPPhone, InvalidStateError, CallState +import time +import wave +import socket +import pyVoIP +from pyVoIP.audio_handler import answer +from pyVoIP.config import SIP_SERVER, SIP_PORT, SIP_EXTENSION, SIP_PASSWORD, SIP_USERNAME + +# Configure logging +logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') + +pyVoIP.DEBUG = True + +# Your local and public IPs +def get_local_ip(): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + s.connect(("8.8.8.8", 80)) + return s.getsockname()[0] + except Exception: + return "127.0.0.1" + finally: + s.close() + +def get_free_port(start_port, end_port): + for port in range(start_port, end_port + 1): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + # Set socket options to avoid address already in use error + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + try: + s.bind(('0.0.0.0', port)) + # If binding succeeds, return the available port + return port + except socket.error: + # If the port is already in use, try the next one + continue + raise Exception("No free port found in the given range.") + + +def start_sip_phone(): + try: + logging.info("Initializing VoIPPhone...") + phone = VoIPPhone( + SIP_SERVER, SIP_PORT, SIP_EXTENSION, SIP_PASSWORD, + myIP=get_local_ip(), sipPort=get_free_port(5061,5070), auth_username=SIP_USERNAME, callCallback=answer + ) + phone.start() + logging.info("Phone started successfully.") + return phone + except Exception as e: + logging.critical(f"Fatal error: {e}") + return None + + +# Instantiate and start the VoIP phone +if __name__ == "__main__": + phone = start_sip_phone() + + if phone: + input("Press enter to disable the phone...") + phone.stop() + time.sleep(3) + logging.info("Phone stopped successfully.") + +