Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
6c14008
v2a services
MSCetin37 Oct 9, 2024
1b28def
add a2t - llm
MSCetin37 Oct 15, 2024
8295cd6
update whisper serve
MSCetin37 Oct 16, 2024
6a44c5e
updates
MSCetin37 Oct 17, 2024
80ac6a5
add data service
MSCetin37 Oct 17, 2024
89e76c7
gateway
MSCetin37 Oct 18, 2024
0ff5083
clean gateway & orchestrator
MSCetin37 Oct 18, 2024
22a6516
updates
MSCetin37 Oct 18, 2024
89723ed
updates
MSCetin37 Oct 21, 2024
4907fc1
adding functional tests
MSCetin37 Oct 22, 2024
6cff4b2
updates
MSCetin37 Oct 23, 2024
afbbbde
updates
MSCetin37 Oct 23, 2024
b765da5
updates read me file
MSCetin37 Oct 25, 2024
f84cdcb
name changes
MSCetin37 Oct 28, 2024
f4f7d55
update readme file
MSCetin37 Oct 28, 2024
a4cb22d
update readme file
MSCetin37 Oct 28, 2024
9a9346e
update readme file
MSCetin37 Oct 28, 2024
01e0a4c
update readme file
MSCetin37 Oct 28, 2024
cce4a61
update readme file
MSCetin37 Oct 28, 2024
37112c0
update max token option
MSCetin37 Oct 31, 2024
d428d10
update the test files
MSCetin37 Nov 2, 2024
f2440ed
readme updtes
MSCetin37 Nov 7, 2024
5b2b6cf
readme updtes
MSCetin37 Nov 7, 2024
d6ee02f
merge sync
MSCetin37 Nov 7, 2024
a2a8c86
clean code
MSCetin37 Nov 7, 2024
f402ba3
update dataprep-compose-cd.yaml file
MSCetin37 Nov 8, 2024
6e10b4f
merge and sync
MSCetin37 Nov 8, 2024
ab61f95
merge and sync
MSCetin37 Nov 8, 2024
ff3ef0e
merge and sync gateway
MSCetin37 Nov 8, 2024
e8cd092
adding the copyright header
MSCetin37 Nov 8, 2024
bba8404
update the end of file char
MSCetin37 Nov 8, 2024
51d2784
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 8, 2024
6a87761
Merge branch 'main' into docsum
MSCetin37 Nov 8, 2024
c314778
update gateway
MSCetin37 Nov 9, 2024
c3c2d5f
update gateway-docsum
MSCetin37 Nov 9, 2024
a836be0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 9, 2024
d54f99a
fix test files
MSCetin37 Nov 11, 2024
f1d1b06
fix test files
MSCetin37 Nov 11, 2024
e274a33
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 11, 2024
cfeecca
Merge branch 'main' into docsum
ashahba Nov 12, 2024
0af301f
merge & sync
MSCetin37 Nov 12, 2024
0e344b7
merge & sync
MSCetin37 Nov 12, 2024
374f3a1
Update dataprep-compose.yaml
MSCetin37 Nov 12, 2024
aca165f
update test
MSCetin37 Nov 12, 2024
eb11648
update dataprep-compose
MSCetin37 Nov 13, 2024
4e12321
Merge branch 'main' into docsum
ZePan110 Nov 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/docker/compose/dataprep-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,15 @@ services:
build:
dockerfile: comps/dataprep/neo4j/llama_index/Dockerfile
image: ${REGISTRY:-opea}/dataprep-neo4j-llamaindex:${TAG:-latest}
dataprep-multimedia2text:
build:
dockerfile: comps/dataprep/multimedia2text/Dockerfile
image: ${REGISTRY:-opea}/dataprep-multimedia2text:${TAG:-latest}
dataprep-video2audio:
build:
dockerfile: comps/dataprep/multimedia2text/video2audio/Dockerfile
image: ${REGISTRY:-opea}/dataprep-video2audio:${TAG:-latest}
dataprep-audio2text:
build:
dockerfile: comps/dataprep/multimedia2text/audio2text/Dockerfile
image: ${REGISTRY:-opea}/dataprep-audio2text:${TAG:-latest}
2 changes: 2 additions & 0 deletions comps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
ScoreDoc,
PIIRequestDoc,
PIIResponseDoc,
Audio2text,
DocSumDoc,
)

# Constants
Expand Down
18 changes: 14 additions & 4 deletions comps/asr/whisper/dependency/whisper_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@
class WhisperModel:
"""Convert audio to text."""

def __init__(self, model_name_or_path="openai/whisper-small", language="english", device="cpu", hpu_max_len=8192):
def __init__(
self,
model_name_or_path="openai/whisper-small",
language="english",
device="cpu",
hpu_max_len=8192,
return_timestamps=False,
):
if device == "hpu":
# Explicitly link HPU with Torch
from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
Expand All @@ -31,6 +38,7 @@ def __init__(self, model_name_or_path="openai/whisper-small", language="english"

self.language = language
self.hpu_max_len = hpu_max_len
self.return_timestamps = return_timestamps

if device == "hpu":
self._warmup_whisper_hpu_graph("https://github.com/Spycsh/assets/raw/main/ljspeech_60s_audio.wav")
Expand Down Expand Up @@ -104,7 +112,7 @@ def _warmup_whisper_hpu_graph(self, url):
)
),
language=self.language,
return_timestamps=True,
return_timestamps=self.return_timestamps,
)

def audio2text(self, audio_path):
Expand Down Expand Up @@ -167,7 +175,7 @@ def audio2text(self, audio_path):
)
),
language=self.language,
return_timestamps=True,
return_timestamps=self.return_timestamps,
)
# pylint: disable=E1101
result = self.processor.tokenizer.batch_decode(predicted_ids, skip_special_tokens=True, normalize=True)[0]
Expand All @@ -180,7 +188,9 @@ def audio2text(self, audio_path):


if __name__ == "__main__":
asr = WhisperModel(model_name_or_path="openai/whisper-small", language="english", device="cpu")
asr = WhisperModel(
model_name_or_path="openai/whisper-small", language="english", device="cpu", return_timestamps=True
)

# Test multilanguage asr
asr.language = "chinese"
Expand Down
9 changes: 8 additions & 1 deletion comps/asr/whisper/dependency/whisper_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ async def audio_to_text(request: Request):

audio = AudioSegment.from_file(file_name)
audio = audio.set_frame_rate(16000)

audio.export(f"{file_name}", format="wav")
try:
asr_result = asr.audio2text(file_name)
Expand All @@ -57,8 +58,14 @@ async def audio_to_text(request: Request):
parser.add_argument("--model_name_or_path", type=str, default="openai/whisper-small")
parser.add_argument("--language", type=str, default="english")
parser.add_argument("--device", type=str, default="cpu")
parser.add_argument("--return_timestamps", type=str, default=True)

args = parser.parse_args()
asr = WhisperModel(model_name_or_path=args.model_name_or_path, language=args.language, device=args.device)
asr = WhisperModel(
model_name_or_path=args.model_name_or_path,
language=args.language,
device=args.device,
return_timestamps=args.return_timestamps,
)

uvicorn.run(app, host=args.host, port=args.port)
40 changes: 13 additions & 27 deletions comps/cores/mega/gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
ChatCompletionResponse,
ChatCompletionResponseChoice,
ChatMessage,
DocSumChatCompletionRequest,
EmbeddingRequest,
UsageInfo,
)
from ..proto.docarray import LLMParams, LLMParamsDoc, RerankedDoc, RerankerParms, RetrieverParms, TextDoc
from ..proto.docarray import DocSumDoc, LLMParams, LLMParamsDoc, RerankedDoc, RerankerParms, RetrieverParms, TextDoc
from .constants import MegaServiceEndpoint, ServiceRoleType, ServiceType
from .micro_service import MicroService

Expand Down Expand Up @@ -409,34 +410,20 @@ async def handle_request(self, request: Request):
class DocSumGateway(Gateway):
def __init__(self, megaservice, host="0.0.0.0", port=8888):
super().__init__(
megaservice, host, port, str(MegaServiceEndpoint.DOC_SUMMARY), ChatCompletionRequest, ChatCompletionResponse
megaservice,
host,
port,
str(MegaServiceEndpoint.DOC_SUMMARY),
input_datatype=DocSumChatCompletionRequest,
output_datatype=ChatCompletionResponse,
)

async def handle_request(self, request: Request, files: List[UploadFile] = File(default=None)):
data = await request.form()
async def handle_request(self, request: Request):
data = await request.json()
stream_opt = data.get("stream", True)
chat_request = ChatCompletionRequest.parse_obj(data)
file_summaries = []
if files:
for file in files:
file_path = f"/tmp/{file.filename}"

import aiofiles

async with aiofiles.open(file_path, "wb") as f:
await f.write(await file.read())
docs = read_text_from_file(file, file_path)
os.remove(file_path)
if isinstance(docs, list):
file_summaries.extend(docs)
else:
file_summaries.append(docs)

if file_summaries:
prompt = self._handle_message(chat_request.messages) + "\n".join(file_summaries)
else:
prompt = self._handle_message(chat_request.messages)
chat_request = ChatCompletionRequest.model_validate(data)

prompt = self._handle_message(chat_request.messages)
parameters = LLMParams(
max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
top_k=chat_request.top_k if chat_request.top_k else 10,
Expand All @@ -446,10 +433,9 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File(
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
streaming=stream_opt,
language=chat_request.language if chat_request.language else "auto",
)
result_dict, runtime_graph = await self.megaservice.schedule(
initial_inputs={"query": prompt}, llm_parameters=parameters
initial_inputs={data["type"]: prompt}, llm_parameters=parameters
)
for node, response in result_dict.items():
# Here it suppose the last microservice in the megaservice is LLM.
Expand Down
8 changes: 8 additions & 0 deletions comps/cores/proto/api_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,14 @@ class ChatCompletionRequest(BaseModel):
request_type: Literal["chat"] = "chat"


class DocSumChatCompletionRequest(BaseModel):
llm_params: Optional[ChatCompletionRequest] = None
text: Optional[str] = None
audio: Optional[str] = None
video: Optional[str] = None
type: Optional[str] = None


class AudioChatCompletionRequest(BaseModel):
audio: str
messages: Optional[
Expand Down
10 changes: 10 additions & 0 deletions comps/cores/proto/docarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class TextDoc(BaseDoc, TopologyInfo):
text: str = None


class Audio2text(BaseDoc, TopologyInfo):
query: str = None


class FactualityDoc(BaseDoc):
reference: str
text: str
Expand Down Expand Up @@ -74,6 +78,12 @@ class Base64ByteStrDoc(BaseDoc):
byte_str: str


class DocSumDoc(BaseDoc):
text: Optional[str] = None
audio: Optional[str] = None
video: Optional[str] = None


class DocPath(BaseDoc):
path: str
chunk_size: int = 1500
Expand Down
30 changes: 30 additions & 0 deletions comps/dataprep/multimedia2text/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Use the official Python 3.11 slim image as the base image
FROM python:3.11-slim

# Set environment variables
ENV LANG=C.UTF-8

# Install necessary packages and clean up to reduce image size
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
build-essential \
libgl1-mesa-glx \
libjemalloc-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Create a directory for the user and set it as the working directory
WORKDIR /home/user

# Copy the application code and requirements file to the container
COPY comps /home/user/comps
COPY requirements.txt /home/user/requirements.txt
COPY ./comps/dataprep/multimedia2text/multimedia2text.py /home/user/multimedia2text.py

# Install Python dependencies
RUN python -m pip install --no-cache-dir -r requirements.txt

# Define the entry point for the container
ENTRYPOINT ["python", "multimedia2text.py"]
Loading