Skip to content

Commit 7212dd2

Browse files
FEAT(mcp): add TTS MCP Support (opea-project#1693)
* add TTS MCP Support * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * pass ut by bypassing the volume mapping --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent b2eb525 commit 7212dd2

File tree

6 files changed

+117
-4
lines changed

6 files changed

+117
-4
lines changed

comps/third_parties/gpt-sovits/deployment/docker_compose/compose.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ services:
1313
http_proxy: ${http_proxy}
1414
https_proxy: ${https_proxy}
1515
llm_download: ${llm_download:-True}
16-
volumes:
17-
- ./pretrained_models/:/home/user/GPT-SoVITS/GPT_SoVITS/pretrained_models/
16+
# volumes:
17+
# - ./pretrained_models/:/home/user/GPT-SoVITS/GPT_SoVITS/pretrained_models/
1818
restart: unless-stopped
1919
healthcheck:
2020
test: ["CMD", "curl", "-f", "http://localhost:9880/health"]

comps/tts/deployment/docker_compose/compose.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@ services:
1515
environment:
1616
TTS_ENDPOINT: ${TTS_ENDPOINT}
1717
TTS_COMPONENT_NAME: ${TTS_COMPONENT_NAME:-OPEA_SPEECHT5_TTS}
18+
ENABLE_MCP: ${ENABLE_MCP:-False}
1819
tts-speecht5:
1920
extends: tts
2021
container_name: tts-speecht5-service
2122
environment:
2223
TTS_COMPONENT_NAME: ${TTS_COMPONENT_NAME:-OPEA_SPEECHT5_TTS}
24+
ENABLE_MCP: ${ENABLE_MCP:-False}
2325
depends_on:
2426
speecht5-service:
2527
condition: service_healthy
@@ -28,6 +30,7 @@ services:
2830
container_name: tts-speecht5-gaudi-service
2931
environment:
3032
TTS_COMPONENT_NAME: ${TTS_COMPONENT_NAME:-OPEA_SPEECHT5_TTS}
33+
ENABLE_MCP: ${ENABLE_MCP:-False}
3134
depends_on:
3235
speecht5-gaudi-service:
3336
condition: service_healthy
@@ -36,6 +39,7 @@ services:
3639
container_name: tts-gpt-sovits-service
3740
environment:
3841
TTS_COMPONENT_NAME: ${TTS_COMPONENT_NAME:-OPEA_GPTSOVITS_TTS}
42+
ENABLE_MCP: ${ENABLE_MCP:-False}
3943
depends_on:
4044
gpt-sovits-service:
4145
condition: service_healthy

comps/tts/src/opea_tts_microservice.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# Copyright (C) 2024 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4+
import base64
45
import os
56
import time
67

8+
import requests
79
from fastapi.responses import StreamingResponse
810
from integrations.gptsovits import OpeaGptsovitsTts
911
from integrations.speecht5 import OpeaSpeecht5Tts
@@ -17,12 +19,15 @@
1719
register_statistics,
1820
statistics_dict,
1921
)
22+
from comps.cores.mega.constants import MCPFuncType
2023
from comps.cores.proto.api_protocol import AudioSpeechRequest
2124

2225
logger = CustomLogger("opea_tts_microservice")
2326
logflag = os.getenv("LOGFLAG", False)
2427

2528
tts_component_name = os.getenv("TTS_COMPONENT_NAME", "OPEA_SPEECHT5_TTS")
29+
enable_mcp = os.getenv("ENABLE_MCP", "").strip().lower() in {"true", "1", "yes"}
30+
2631
# Initialize OpeaComponentLoader
2732
loader = OpeaComponentLoader(tts_component_name, description=f"OPEA TTS Component: {tts_component_name}")
2833

@@ -41,6 +46,9 @@ async def stream_forwarder(response):
4146
port=9088,
4247
input_datatype=AudioSpeechRequest,
4348
output_datatype=StreamingResponse,
49+
enable_mcp=enable_mcp,
50+
mcp_func_type=MCPFuncType.TOOL,
51+
description="Convert text to audio.",
4452
)
4553
@register_statistics(names=["opea_service@tts"])
4654
async def text_to_speech(request: AudioSpeechRequest) -> StreamingResponse:
@@ -51,11 +59,17 @@ async def text_to_speech(request: AudioSpeechRequest) -> StreamingResponse:
5159

5260
try:
5361
# Use the loader to invoke the component
54-
tts_response = await loader.invoke(request)
62+
tts_response: requests.models.Response = await loader.invoke(request)
5563
if logflag:
5664
logger.info(tts_response)
5765
statistics_dict["opea_service@tts"].append_latency(time.time() - start, None)
58-
return StreamingResponse(stream_forwarder(tts_response))
66+
if enable_mcp:
67+
# return the base64 string
68+
audio_base64 = base64.b64encode(tts_response.content).decode("utf-8")
69+
70+
return {"audio_str": audio_base64}
71+
else:
72+
return StreamingResponse(stream_forwarder(tts_response))
5973

6074
except Exception as e:
6175
logger.error(f"Error during tts invocation: {e}")

comps/tts/src/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
aiohttp
22
docarray[full]
33
fastapi
4+
mcp
45
opentelemetry-api
56
opentelemetry-exporter-otlp
67
opentelemetry-sdk

tests/tts/test_tts_speecht5_mcp.sh

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/bin/bash
2+
# Copyright (C) 2025 Intel Corporation
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
set -x
6+
7+
WORKPATH=$(dirname "$PWD")
8+
ip_address=$(hostname -I | awk '{print $1}')
9+
export TAG=comps
10+
export SPEECHT5_PORT=11806
11+
export TTS_PORT=11807
12+
export ENABLE_MCP=True
13+
14+
15+
function build_docker_images() {
16+
cd $WORKPATH
17+
echo $(pwd)
18+
docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/speecht5:$TAG -f comps/third_parties/speecht5/src/Dockerfile .
19+
if [ $? -ne 0 ]; then
20+
echo "opea/speecht5 built fail"
21+
exit 1
22+
else
23+
echo "opea/speecht5 built successful"
24+
fi
25+
docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:$TAG -f comps/tts/src/Dockerfile .
26+
if [ $? -ne 0 ]; then
27+
echo "opea/tts built fail"
28+
exit 1
29+
else
30+
echo "opea/tts built successful"
31+
fi
32+
}
33+
34+
function start_service() {
35+
unset http_proxy
36+
export TTS_ENDPOINT=http://$ip_address:$SPEECHT5_PORT
37+
export TTS_COMPONENT_NAME=OPEA_SPEECHT5_TTS
38+
39+
docker compose -f comps/tts/deployment/docker_compose/compose.yaml up speecht5-service tts-speecht5 -d
40+
sleep 15
41+
}
42+
43+
function validate_microservice() {
44+
pip install mcp
45+
python3 ${WORKPATH}/tests/utils/validate_svc_with_mcp.py $ip_address $TTS_PORT "tts"
46+
if [ $? -ne 0 ]; then
47+
docker logs speecht5-service
48+
docker logs tts-speecht5-service
49+
exit 1
50+
fi
51+
52+
}
53+
54+
function stop_docker() {
55+
docker ps -a --filter "name=speecht5-service" --filter "name=tts-speecht5-service" --format "{{.Names}}" | xargs -r docker stop
56+
}
57+
58+
function main() {
59+
60+
stop_docker
61+
62+
build_docker_images
63+
start_service
64+
65+
validate_microservice
66+
67+
stop_docker
68+
echo y | docker system prune
69+
70+
}
71+
72+
main

tests/utils/validate_svc_with_mcp.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,28 @@ async def validate_svc(ip_address, service_port, service_type):
3838
else:
3939
print(f"Result wrong. Received was {result_content}")
4040
exit(1)
41+
elif service_type == "tts":
42+
input_dict = {"request": {"input": "Hi there, welcome to OPEA."}}
43+
tool_result = await session.call_tool(
44+
"text_to_speech",
45+
input_dict,
46+
)
47+
result_content = tool_result.content
48+
# Check result
49+
audio_str = json.loads(result_content[0].text).get("audio_str", "")
50+
if audio_str.startswith("Ukl"): # "Ukl" indicates likely WAV header
51+
audio_data = base64.b64decode(audio_str)
52+
with open("output.wav", "wb") as f:
53+
f.write(audio_data)
54+
with open("output.wav", "rb") as f:
55+
header = f.read(4)
56+
if header == b"RIFF":
57+
print("Result correct.")
58+
else:
59+
print(f"Invalid WAV file: starts with {header}")
60+
else:
61+
print(f"Result wrong. Received was {result_content}")
62+
exit(1)
4163
else:
4264
print(f"Unknown service type: {service_type}")
4365
exit(1)

0 commit comments

Comments
 (0)