uv venv --python 3.12
source .venv/bin/activate
uv pip install voicehub
VoiceHub provides a simple, unified interface for working with various Text-to-Speech (TTS) models. Below are examples showing how to use different supported TTS models with the same consistent approach.
from voicehub.automodel import AutoInferenceModel
model = AutoInferenceModel.from_pretrained(
model_type="orpheustts", # or "dia" or "vui"
model_path="canopylabs/orpheus-3b-0.1-ft",
device="cuda",
)
output = model(
text="Hey, here is some random stuff, the text the less likely the model can cope!",
voice="tara",
output_file="output.wav",
)
from voicehub.automodel import AutoInferenceModel
model = AutoInferenceModel.from_pretrained(
model_type="dia", # or "dia" or "vui"
model_path="dia/dia-100m-base.pt",
device="cuda",
)
output = model(
text="Hey, here is some random stuff, the text the less likely the model can cope!",
output_file="output.wav",
)
from voicehub.automodel import AutoInferenceModel
model = AutoInferenceModel.from_pretrained(
model_type="vui", # or "dia" or "vui"
model_path="fluxions/vui",
device="cuda",
)
output = model(
text="Hey, here is some random stuff, the text the less likely the model can cope!",
output_file="output.wav",
)
from voicehub.automodel import AutoInferenceModel
model = AutoInferenceModel.from_pretrained(
model_type="llasa", # or "dia" or "vui"
model_path="HKUSTAudio/Llasa-1B-Multilingual",
device="cuda",
)
output = model(
text="Hey, here is some random stuff, the text the less likely the model can cope!",
output_file="output.wav",
)
from voicehub.automodel import AutoInferenceModel
model = AutoInferenceModel.from_pretrained(
model_type="llasa_voice_clone", # or "dia" or "vui"
model_path="HKUSTAudio/Llasa-1B-Multilingual",
device="cuda",
)
output = model(
text="Hey, here is some random stuff, the text the less likely the model can cope!",
output_file="output.wav",
)
from voicehub.automodel import AutoInferenceModel
model = AutoInferenceModel.from_pretrained(
model_type="chatterbox", # or "dia" or "vui"
model_path="ResembleAI/chatterbox",
device="cuda",
)
output = model(
text="Hey, here is some random stuff, the text the less likely the model can cope!",
output_file="output.wav",
)
uv pip install pre-commit
pre-commit install
pre-commit run --all-files