-
Notifications
You must be signed in to change notification settings - Fork 233
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
openai whisper model integration (#160)
- Loading branch information
Showing
10 changed files
with
1,466 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# openai whisper model integration | ||
|
||
Whisper is a pre-trained model for automatic speech recognition (ASR) and speech translation. Trained on 680k hours of labelled data, Whisper models demonstrate a strong ability to generalise to many datasets and domains without the need for fine-tuning. | ||
|
||
## Requirements | ||
|
||
- Python (v3.10+ recommended) | ||
- Poetry (A Python packaging and dependency management tool) | ||
|
||
## Setup | ||
|
||
1. For the demo to work, you need to get HuggingFaceAPI Token: | ||
|
||
1. Visit [HuggingFace](https://huggingface.co/). | ||
2. Sign up or log in. | ||
3. Navigate to `Profile -> Settings -> Access Tokens`. | ||
4. Copy an existing token or create a new one. | ||
|
||
2. **Install Dependencies** | ||
|
||
```bash | ||
poetry install | ||
``` | ||
|
||
3. **Running The Agent Script** | ||
|
||
open terminal and goto "./openai-whisper-large-v2/src" and | ||
run below command to load environment variables and run the agent. | ||
|
||
```bash | ||
export HUGGING_FACE_ACCESS_TOKEN="{Your HuggingFaceAPI Token}" | ||
poetry run python agent.py | ||
``` | ||
|
||
Check the log for "Adding Agent to Bureau" line and copy the {agent address}. | ||
|
||
4. **Running The User Script** | ||
|
||
open terminal and goto "./openai-whisper-large-v2/src" and | ||
run below command to load environment variables and run the client. | ||
|
||
```bash | ||
export WHISPER_AGENT_ADDRESS="{ agent address from last step }" | ||
poetry run python user.py | ||
``` | ||
|
||
After running the command, a request is sent to the agent every 30 sec till its successful. | ||
|
||
You can change the RECORDING_FILE variable to transcript your own audio file. |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
[tool.poetry] | ||
name = "openai-whisper-large-v2" | ||
version = "0.1.0" | ||
description = "Openai whisper model integration with fetch.ai micro-Agents" | ||
authors = ["Abhi"] | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.10,<3.12" | ||
uagents = "*" | ||
requests = "^2.31.0" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from uagents import Bureau | ||
|
||
from agents.whisper_agent import agent | ||
|
||
|
||
if __name__ == "__main__": | ||
bureau = Bureau(endpoint="http://127.0.0.1:8000/submit", port=8000) | ||
print(f"Adding Agent to Bureau: {agent.address}") | ||
bureau.add(agent) | ||
bureau.run() |
76 changes: 76 additions & 0 deletions
76
integrations/openai-whisper-large-v2/src/agents/whisper_agent.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
from uagents import Agent, Context, Protocol | ||
from messages.whisper_basic import AudioTranscriptRequest, AudioTranscriptResponse, Error | ||
from uagents.setup import fund_agent_if_low | ||
import os | ||
import requests | ||
import base64 | ||
|
||
# Get the HUGGING_FACE_ACCESS_TOKEN from environment variable or default to a placeholder string if not found. | ||
HUGGING_FACE_ACCESS_TOKEN = os.getenv( | ||
"HUGGING_FACE_ACCESS_TOKEN", "HUGGING_FACE_ACCESS_TOKEN") | ||
|
||
if HUGGING_FACE_ACCESS_TOKEN == "HUGGING_FACE_ACCESS_TOKEN": | ||
raise Exception( | ||
"You need to provide an HUGGING_FACE_ACCESS_TOKEN, by exporting env. Follow README for more details") | ||
|
||
WHISPER_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2" | ||
|
||
# Define headers for HTTP request, including content type and authorization details | ||
HEADERS = { | ||
"Authorization": f"Bearer {HUGGING_FACE_ACCESS_TOKEN}" | ||
} | ||
|
||
# Create an agent with predefined properties | ||
agent = Agent( | ||
name="whisper_agent", | ||
seed=HUGGING_FACE_ACCESS_TOKEN, | ||
port=8000, | ||
endpoint=["http://127.0.0.1:8000/submit"], | ||
) | ||
|
||
# Ensure the agent has enough funds | ||
fund_agent_if_low(agent.wallet.address()) | ||
|
||
|
||
async def get_audio_transcript(ctx: Context, sender: str, audiodata: str): | ||
try: | ||
# Encoding the audio data from ASCII to bytes | ||
audiodata = audiodata.encode("ascii") | ||
# Converting the audio data from base64 to bytes | ||
audioBytes = base64.b64decode(audiodata) | ||
|
||
# Sending POST request to WHISPER_URL with audio bytes | ||
response = requests.post(WHISPER_URL, headers=HEADERS, data=audioBytes) | ||
|
||
# If the request response is not successful (non-200 code), send error message | ||
if response.status_code != 200: | ||
await ctx.send(sender, Error(error=f"Error: {response.json().get('error')}")) | ||
return | ||
|
||
# Send the parsed response back to the sender/user | ||
await ctx.send(sender, AudioTranscriptResponse(transcript=response.json().get('text'))) | ||
return | ||
|
||
# If an unknown exception occurs, send a generic error message to the sender/user | ||
except Exception as ex: | ||
await ctx.send(sender, Error(error=f"Exception detail: {ex}")) | ||
return | ||
|
||
# Create an instance of Protocol with a label "WhisperModelAgent" | ||
whisper_agent = Protocol(name="WhisperModelAgent", version="0.1.0") | ||
|
||
|
||
@whisper_agent.on_message(model=AudioTranscriptRequest, replies={AudioTranscriptResponse, Error}) | ||
async def handle_request(ctx: Context, sender: str, request: AudioTranscriptRequest): | ||
# Log the request details | ||
ctx.logger.info(f"Got request from {sender}") | ||
|
||
await get_audio_transcript(ctx, sender, request.audio_data) | ||
|
||
|
||
# Include the protocol with the agent, publish_manifest will make the protocol details available on Agentverse. | ||
agent.include(whisper_agent, publish_manifest=True) | ||
|
||
# Define the main entry point of the application | ||
if __name__ == "__main__": | ||
whisper_agent.run() |
67 changes: 67 additions & 0 deletions
67
integrations/openai-whisper-large-v2/src/agents/whisper_user.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
from uagents import Agent, Context, Protocol | ||
from messages.whisper_basic import AudioTranscriptRequest, AudioTranscriptResponse, Error | ||
from uagents.setup import fund_agent_if_low | ||
import base64 | ||
import os | ||
|
||
RECORDING_FILE = "sample-recording/sample.flac" | ||
|
||
WHISPER_AGENT_ADDRESS = os.getenv( | ||
"WHISPER_AGENT_ADDRESS", "WHISPER_AGENT_ADDRESS") | ||
|
||
if WHISPER_AGENT_ADDRESS == "WHISPER_AGENT_ADDRESS": | ||
raise Exception( | ||
"You need to provide an WHISPER_AGENT_ADDRESS, by exporting env. Follow README for more details") | ||
|
||
# Define user agent with specified parameters | ||
user = Agent( | ||
name="whisper_user", | ||
port=8001, | ||
endpoint=["http://127.0.0.1:8001/submit"], | ||
) | ||
|
||
# Check and top up the agent's fund if low | ||
fund_agent_if_low(user.wallet.address()) | ||
|
||
|
||
@user.on_event("startup") | ||
async def initialize_storage(ctx: Context): | ||
ctx.storage.set("AudioTranscriptSuccessful", False) | ||
|
||
|
||
# Create an instance of Protocol with a label "WhisperModelUser" | ||
whisper_user = Protocol(name="WhisperModelUser", version="0.1.0") | ||
|
||
|
||
# This is an asynchronous function that is set to run at intervals of 30 sec. | ||
# It opens the specified RECORDING_FILE, reads it and encodes in base64 format. | ||
# Afterwards, it sends a request with the encoded data to the AI uagent's address. | ||
@whisper_user.on_interval(period=30, messages=AudioTranscriptRequest) | ||
async def transcript(ctx: Context): | ||
AudioTranscriptSuccessful = ctx.storage.get("AudioTranscriptSuccessful") | ||
|
||
if not AudioTranscriptSuccessful: | ||
# Opening the file in read binary mode | ||
with open(RECORDING_FILE, "rb") as f: | ||
# Encoding the audio data to base64 | ||
data = base64.b64encode(f.read()).decode('ascii') | ||
# Using the context to send the request to the desired address with the audio data | ||
await ctx.send(WHISPER_AGENT_ADDRESS, AudioTranscriptRequest(audio_data=data)) | ||
|
||
|
||
@whisper_user.on_message(model=AudioTranscriptResponse) | ||
async def handle_data(ctx: Context, sender: str, audioTranscript: AudioTranscriptResponse): | ||
ctx.logger.info(f"audio transcript => {audioTranscript.transcript}") | ||
ctx.storage.set("AudioTranscriptSuccessful", True) | ||
|
||
|
||
@whisper_user.on_message(model=Error) | ||
async def handle_error(ctx: Context, sender: str, error: Error): | ||
ctx.logger.info(f"Got error from uagent: {error}") | ||
|
||
# Include the protocol with the agent, publish_manifest will make the protocol details available on Agentverse. | ||
user.include(whisper_user, publish_manifest=True) | ||
|
||
# Initiate the audio AudioTranscripting task | ||
if __name__ == "__main__": | ||
whisper_user.run() |
13 changes: 13 additions & 0 deletions
13
integrations/openai-whisper-large-v2/src/messages/whisper_basic.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from uagents import Model | ||
|
||
|
||
class AudioTranscriptRequest(Model): | ||
audio_data: str | ||
|
||
|
||
class Error(Model): | ||
error: str | ||
|
||
|
||
class AudioTranscriptResponse(Model): | ||
transcript: str |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from uagents import Bureau | ||
from agents.whisper_user import user | ||
|
||
if __name__ == "__main__": | ||
bureau = Bureau(endpoint="http://127.0.0.1:8001/submit", port=8001) | ||
print(f"Adding user agent to Bureau: {user.address}") | ||
bureau.add(user) | ||
bureau.run() |