Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ jobs:
pip install pyinstaller
pip install -r backend/requirements.txt
pip install --no-deps chatterbox-tts
pip install --no-deps hume-tada
- name: Install MLX dependencies (Apple Silicon only)
if: matrix.backend == 'mlx'
Expand Down Expand Up @@ -188,6 +189,7 @@ jobs:
pip install pyinstaller
pip install -r backend/requirements.txt
pip install --no-deps chatterbox-tts
pip install --no-deps hume-tada
- name: Install PyTorch with CUDA 12.6
run: |
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ RUN pip install --no-cache-dir --upgrade pip

COPY backend/requirements.txt .
RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
RUN pip install --no-cache-dir --prefix=/install --no-deps chatterbox-tts
RUN pip install --no-cache-dir --prefix=/install --no-deps hume-tada
RUN pip install --no-cache-dir --prefix=/install \
git+https://github.com/QwenLM/Qwen3-TTS.git

Expand Down
18 changes: 18 additions & 0 deletions app/src/components/Generation/EngineModelSelector.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,24 @@ const ENGINE_OPTIONS = [
{ value: 'luxtts', label: 'LuxTTS' },
{ value: 'chatterbox', label: 'Chatterbox' },
{ value: 'chatterbox_turbo', label: 'Chatterbox Turbo' },
{ value: 'tada:1B', label: 'TADA 1B' },
{ value: 'tada:3B', label: 'TADA 3B Multilingual' },
] as const;

const ENGINE_DESCRIPTIONS: Record<string, string> = {
qwen: 'Multi-language, two sizes',
luxtts: 'Fast, English-focused',
chatterbox: '23 languages, incl. Hebrew',
chatterbox_turbo: 'English, [laugh] [cough] tags',
tada: 'HumeAI, 700s+ coherent audio',
};

/** Engines that only support English and should force language to 'en' on select. */
const ENGLISH_ONLY_ENGINES = new Set(['luxtts', 'chatterbox_turbo']);

function getSelectValue(engine: string, modelSize?: string): string {
if (engine === 'qwen') return `qwen:${modelSize || '1.7B'}`;
if (engine === 'tada') return `tada:${modelSize || '1B'}`;
return engine;
}

Expand All @@ -48,6 +52,20 @@ function handleEngineChange(form: UseFormReturn<GenerationFormValues>, value: st
if (!available.some((l) => l.value === currentLang)) {
form.setValue('language', available[0]?.value ?? 'en');
}
} else if (value.startsWith('tada:')) {
const [, modelSize] = value.split(':');
form.setValue('engine', 'tada');
form.setValue('modelSize', modelSize as '1B' | '3B');
// TADA 1B is English-only; 3B is multilingual
if (modelSize === '1B') {
form.setValue('language', 'en');
} else {
const currentLang = form.getValues('language');
const available = getLanguageOptionsForEngine('tada');
if (!available.some((l) => l.value === currentLang)) {
form.setValue('language', available[0]?.value ?? 'en');
}
}
} else {
form.setValue('engine', value as GenerationFormValues['engine']);
form.setValue('modelSize', undefined as unknown as '1.7B' | '0.6B');
Expand Down
7 changes: 6 additions & 1 deletion app/src/components/ServerSettings/ModelManagement.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ const MODEL_DESCRIPTIONS: Record<string, string> = {
'Production-grade open source TTS by Resemble AI. Supports 23 languages with voice cloning and emotion exaggeration control.',
'chatterbox-turbo':
'Streamlined 350M parameter TTS by Resemble AI. High-quality English speech with less compute and VRAM than larger models.',
'tada-1b':
'HumeAI TADA 1B — English speech-language model built on Llama 3.2 1B. Generates 700s+ of coherent audio with synchronized text-acoustic alignment.',
'tada-3b-ml':
'HumeAI TADA 3B Multilingual — built on Llama 3.2 3B. Supports 10 languages with high-fidelity voice cloning via text-acoustic dual alignment.',
'whisper-base':
'Smallest Whisper model (74M parameters). Fast transcription with moderate accuracy.',
'whisper-small':
Expand Down Expand Up @@ -391,7 +395,8 @@ export function ModelManagement() {
(m) =>
m.model_name.startsWith('qwen-tts') ||
m.model_name.startsWith('luxtts') ||
m.model_name.startsWith('chatterbox'),
m.model_name.startsWith('chatterbox') ||
m.model_name.startsWith('tada'),
) ?? [];
const whisperModels = modelStatus?.models.filter((m) => m.model_name.startsWith('whisper')) ?? [];

Expand Down
4 changes: 2 additions & 2 deletions app/src/lib/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ export interface GenerationRequest {
text: string;
language: LanguageCode;
seed?: number;
model_size?: '1.7B' | '0.6B';
engine?: 'qwen' | 'luxtts' | 'chatterbox' | 'chatterbox_turbo';
model_size?: '1.7B' | '0.6B' | '1B' | '3B';
engine?: 'qwen' | 'luxtts' | 'chatterbox' | 'chatterbox_turbo' | 'tada';
instruct?: string;
max_chunk_chars?: number;
crossfade_ms?: number;
Expand Down
1 change: 1 addition & 0 deletions app/src/lib/constants/languages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ export const ENGINE_LANGUAGES: Record<string, readonly LanguageCode[]> = {
'zh',
],
chatterbox_turbo: ['en'],
tada: ['en', 'ar', 'zh', 'de', 'es', 'fr', 'it', 'ja', 'pl', 'pt'],
} as const;

/** Helper: get language options for a given engine. */
Expand Down
24 changes: 15 additions & 9 deletions app/src/lib/hooks/useGenerationForm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ const generationSchema = z.object({
text: z.string().min(1, '').max(50000),
language: z.enum(LANGUAGE_CODES as [LanguageCode, ...LanguageCode[]]),
seed: z.number().int().optional(),
modelSize: z.enum(['1.7B', '0.6B']).optional(),
modelSize: z.enum(['1.7B', '0.6B', '1B', '3B']).optional(),
instruct: z.string().max(500).optional(),
engine: z.enum(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo']).optional(),
engine: z.enum(['qwen', 'luxtts', 'chatterbox', 'chatterbox_turbo', 'tada']).optional(),
});

export type GenerationFormValues = z.infer<typeof generationSchema>;
Expand Down Expand Up @@ -79,17 +79,23 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
? 'chatterbox-tts'
: engine === 'chatterbox_turbo'
? 'chatterbox-turbo'
: `qwen-tts-${data.modelSize}`;
: engine === 'tada'
? `tada-${(data.modelSize || '1B').toLowerCase()}`
: `qwen-tts-${data.modelSize}`;
const displayName =
engine === 'luxtts'
? 'LuxTTS'
: engine === 'chatterbox'
? 'Chatterbox TTS'
: engine === 'chatterbox_turbo'
? 'Chatterbox Turbo'
: data.modelSize === '1.7B'
? 'Qwen TTS 1.7B'
: 'Qwen TTS 0.6B';
: engine === 'tada'
? data.modelSize === '3B'
? 'TADA 3B Multilingual'
: 'TADA 1B'
: data.modelSize === '1.7B'
? 'Qwen TTS 1.7B'
: 'Qwen TTS 0.6B';

// Check if model needs downloading
try {
Expand All @@ -104,17 +110,17 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
console.error('Failed to check model status:', error);
}

const isQwen = engine === 'qwen';
const hasModelSizes = engine === 'qwen' || engine === 'tada';
const effectsChain = options.getEffectsChain?.();
// This now returns immediately with status="generating"
const result = await generation.mutateAsync({
profile_id: selectedProfileId,
text: data.text,
language: data.language,
seed: data.seed,
model_size: isQwen ? data.modelSize : undefined,
model_size: hasModelSizes ? data.modelSize : undefined,
engine,
instruct: isQwen ? data.instruct || undefined : undefined,
instruct: engine === 'qwen' ? data.instruct || undefined : undefined,
max_chunk_chars: maxChunkChars,
crossfade_ms: crossfadeMs,
normalize: normalizeAudio,
Expand Down
29 changes: 27 additions & 2 deletions backend/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def is_loaded(self) -> bool:
"luxtts": "LuxTTS",
"chatterbox": "Chatterbox TTS",
"chatterbox_turbo": "Chatterbox Turbo",
"tada": "TADA",
}


Expand Down Expand Up @@ -259,6 +260,24 @@ def _get_non_qwen_tts_configs() -> list[ModelConfig]:
needs_trim=True,
languages=["en"],
),
ModelConfig(
model_name="tada-1b",
display_name="TADA 1B (English)",
engine="tada",
hf_repo_id="HumeAI/tada-1b",
model_size="1B",
size_mb=4000,
languages=["en"],
),
ModelConfig(
model_name="tada-3b-ml",
display_name="TADA 3B Multilingual",
engine="tada",
hf_repo_id="HumeAI/tada-3b-ml",
model_size="3B",
size_mb=8000,
languages=["en", "ar", "zh", "de", "es", "fr", "it", "ja", "pl", "pt"],
),
]


Expand Down Expand Up @@ -339,10 +358,12 @@ def engine_has_model_sizes(engine: str) -> bool:


async def load_engine_model(engine: str, model_size: str = "default") -> None:
"""Load a model for the given engine, handling the Qwen model_size special case."""
"""Load a model for the given engine, handling engines with multiple model sizes."""
backend = get_tts_backend_for_engine(engine)
if engine == "qwen":
await backend.load_model_async(model_size)
elif engine == "tada":
await backend.load_model(model_size)
else:
await backend.load_model()

Expand All @@ -358,7 +379,7 @@ async def ensure_model_cached_or_raise(engine: str, model_size: str = "default")
cfg = c
break

if engine == "qwen":
if engine in ("qwen", "tada"):
if not backend._is_model_cached(model_size):
raise HTTPException(
status_code=400,
Expand Down Expand Up @@ -490,6 +511,10 @@ def get_tts_backend_for_engine(engine: str) -> TTSBackend:
from .chatterbox_turbo_backend import ChatterboxTurboTTSBackend

backend = ChatterboxTurboTTSBackend()
elif engine == "tada":
from .hume_backend import HumeTadaBackend

backend = HumeTadaBackend()
else:
raise ValueError(f"Unknown TTS engine: {engine}. Supported: {list(TTS_ENGINES.keys())}")

Expand Down
Loading