Skip to content

Commit

Permalink
openai whisper model integration (#160)
Browse files Browse the repository at this point in the history
  • Loading branch information
abhi141 authored Sep 20, 2023
1 parent 877e49d commit 268af59
Show file tree
Hide file tree
Showing 10 changed files with 1,466 additions and 0 deletions.
49 changes: 49 additions & 0 deletions integrations/openai-whisper-large-v2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# openai whisper model integration

Whisper is a pre-trained model for automatic speech recognition (ASR) and speech translation. Trained on 680k hours of labelled data, Whisper models demonstrate a strong ability to generalise to many datasets and domains without the need for fine-tuning.

## Requirements

- Python (v3.10+ recommended)
- Poetry (A Python packaging and dependency management tool)

## Setup

1. For the demo to work, you need to get HuggingFaceAPI Token:

1. Visit [HuggingFace](https://huggingface.co/).
2. Sign up or log in.
3. Navigate to `Profile -> Settings -> Access Tokens`.
4. Copy an existing token or create a new one.

2. **Install Dependencies**

```bash
poetry install
```

3. **Running The Agent Script**

open terminal and goto "./openai-whisper-large-v2/src" and
run below command to load environment variables and run the agent.

```bash
export HUGGING_FACE_ACCESS_TOKEN="{Your HuggingFaceAPI Token}"
poetry run python agent.py
```

Check the log for "Adding Agent to Bureau" line and copy the {agent address}.

4. **Running The User Script**

open terminal and goto "./openai-whisper-large-v2/src" and
run below command to load environment variables and run the client.

```bash
export WHISPER_AGENT_ADDRESS="{ agent address from last step }"
poetry run python user.py
```

After running the command, a request is sent to the agent every 30 sec till its successful.

You can change the RECORDING_FILE variable to transcript your own audio file.
1,233 changes: 1,233 additions & 0 deletions integrations/openai-whisper-large-v2/poetry.lock

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions integrations/openai-whisper-large-v2/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[tool.poetry]
name = "openai-whisper-large-v2"
version = "0.1.0"
description = "Openai whisper model integration with fetch.ai micro-Agents"
authors = ["Abhi"]

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
uagents = "*"
requests = "^2.31.0"
Empty file.
10 changes: 10 additions & 0 deletions integrations/openai-whisper-large-v2/src/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from uagents import Bureau

from agents.whisper_agent import agent


if __name__ == "__main__":
bureau = Bureau(endpoint="http://127.0.0.1:8000/submit", port=8000)
print(f"Adding Agent to Bureau: {agent.address}")
bureau.add(agent)
bureau.run()
76 changes: 76 additions & 0 deletions integrations/openai-whisper-large-v2/src/agents/whisper_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from uagents import Agent, Context, Protocol
from messages.whisper_basic import AudioTranscriptRequest, AudioTranscriptResponse, Error
from uagents.setup import fund_agent_if_low
import os
import requests
import base64

# Get the HUGGING_FACE_ACCESS_TOKEN from environment variable or default to a placeholder string if not found.
HUGGING_FACE_ACCESS_TOKEN = os.getenv(
"HUGGING_FACE_ACCESS_TOKEN", "HUGGING_FACE_ACCESS_TOKEN")

if HUGGING_FACE_ACCESS_TOKEN == "HUGGING_FACE_ACCESS_TOKEN":
raise Exception(
"You need to provide an HUGGING_FACE_ACCESS_TOKEN, by exporting env. Follow README for more details")

WHISPER_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"

# Define headers for HTTP request, including content type and authorization details
HEADERS = {
"Authorization": f"Bearer {HUGGING_FACE_ACCESS_TOKEN}"
}

# Create an agent with predefined properties
agent = Agent(
name="whisper_agent",
seed=HUGGING_FACE_ACCESS_TOKEN,
port=8000,
endpoint=["http://127.0.0.1:8000/submit"],
)

# Ensure the agent has enough funds
fund_agent_if_low(agent.wallet.address())


async def get_audio_transcript(ctx: Context, sender: str, audiodata: str):
try:
# Encoding the audio data from ASCII to bytes
audiodata = audiodata.encode("ascii")
# Converting the audio data from base64 to bytes
audioBytes = base64.b64decode(audiodata)

# Sending POST request to WHISPER_URL with audio bytes
response = requests.post(WHISPER_URL, headers=HEADERS, data=audioBytes)

# If the request response is not successful (non-200 code), send error message
if response.status_code != 200:
await ctx.send(sender, Error(error=f"Error: {response.json().get('error')}"))
return

# Send the parsed response back to the sender/user
await ctx.send(sender, AudioTranscriptResponse(transcript=response.json().get('text')))
return

# If an unknown exception occurs, send a generic error message to the sender/user
except Exception as ex:
await ctx.send(sender, Error(error=f"Exception detail: {ex}"))
return

# Create an instance of Protocol with a label "WhisperModelAgent"
whisper_agent = Protocol(name="WhisperModelAgent", version="0.1.0")


@whisper_agent.on_message(model=AudioTranscriptRequest, replies={AudioTranscriptResponse, Error})
async def handle_request(ctx: Context, sender: str, request: AudioTranscriptRequest):
# Log the request details
ctx.logger.info(f"Got request from {sender}")

await get_audio_transcript(ctx, sender, request.audio_data)


# Include the protocol with the agent, publish_manifest will make the protocol details available on Agentverse.
agent.include(whisper_agent, publish_manifest=True)

# Define the main entry point of the application
if __name__ == "__main__":
whisper_agent.run()
67 changes: 67 additions & 0 deletions integrations/openai-whisper-large-v2/src/agents/whisper_user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from uagents import Agent, Context, Protocol
from messages.whisper_basic import AudioTranscriptRequest, AudioTranscriptResponse, Error
from uagents.setup import fund_agent_if_low
import base64
import os

RECORDING_FILE = "sample-recording/sample.flac"

WHISPER_AGENT_ADDRESS = os.getenv(
"WHISPER_AGENT_ADDRESS", "WHISPER_AGENT_ADDRESS")

if WHISPER_AGENT_ADDRESS == "WHISPER_AGENT_ADDRESS":
raise Exception(
"You need to provide an WHISPER_AGENT_ADDRESS, by exporting env. Follow README for more details")

# Define user agent with specified parameters
user = Agent(
name="whisper_user",
port=8001,
endpoint=["http://127.0.0.1:8001/submit"],
)

# Check and top up the agent's fund if low
fund_agent_if_low(user.wallet.address())


@user.on_event("startup")
async def initialize_storage(ctx: Context):
ctx.storage.set("AudioTranscriptSuccessful", False)


# Create an instance of Protocol with a label "WhisperModelUser"
whisper_user = Protocol(name="WhisperModelUser", version="0.1.0")


# This is an asynchronous function that is set to run at intervals of 30 sec.
# It opens the specified RECORDING_FILE, reads it and encodes in base64 format.
# Afterwards, it sends a request with the encoded data to the AI uagent's address.
@whisper_user.on_interval(period=30, messages=AudioTranscriptRequest)
async def transcript(ctx: Context):
AudioTranscriptSuccessful = ctx.storage.get("AudioTranscriptSuccessful")

if not AudioTranscriptSuccessful:
# Opening the file in read binary mode
with open(RECORDING_FILE, "rb") as f:
# Encoding the audio data to base64
data = base64.b64encode(f.read()).decode('ascii')
# Using the context to send the request to the desired address with the audio data
await ctx.send(WHISPER_AGENT_ADDRESS, AudioTranscriptRequest(audio_data=data))


@whisper_user.on_message(model=AudioTranscriptResponse)
async def handle_data(ctx: Context, sender: str, audioTranscript: AudioTranscriptResponse):
ctx.logger.info(f"audio transcript => {audioTranscript.transcript}")
ctx.storage.set("AudioTranscriptSuccessful", True)


@whisper_user.on_message(model=Error)
async def handle_error(ctx: Context, sender: str, error: Error):
ctx.logger.info(f"Got error from uagent: {error}")

# Include the protocol with the agent, publish_manifest will make the protocol details available on Agentverse.
user.include(whisper_user, publish_manifest=True)

# Initiate the audio AudioTranscripting task
if __name__ == "__main__":
whisper_user.run()
13 changes: 13 additions & 0 deletions integrations/openai-whisper-large-v2/src/messages/whisper_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from uagents import Model


class AudioTranscriptRequest(Model):
audio_data: str


class Error(Model):
error: str


class AudioTranscriptResponse(Model):
transcript: str
Binary file not shown.
8 changes: 8 additions & 0 deletions integrations/openai-whisper-large-v2/src/user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from uagents import Bureau
from agents.whisper_user import user

if __name__ == "__main__":
bureau = Bureau(endpoint="http://127.0.0.1:8001/submit", port=8001)
print(f"Adding user agent to Bureau: {user.address}")
bureau.add(user)
bureau.run()

0 comments on commit 268af59

Please sign in to comment.