Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added
- **Makefile** - Comprehensive development workflow automation with commands for setup, development, building, testing, and code quality checks
- **48kHz speech tokenizer opt-in setting** - UI toggle in the Models tab (Advanced Settings) to enable the higher-quality 48kHz speech tokenizer on the PyTorch backend. Disabled by default; changes take effect from the next generation. Settings are persisted via `GET /settings` and `PATCH /settings` API endpoints.
- Includes Python version detection and compatibility warnings
- Self-documenting help system with `make help`
- Colored output for better readability
Expand Down
45 changes: 45 additions & 0 deletions app/src/components/ServerSettings/ModelManagement.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
import { Download, Loader2, Trash2 } from 'lucide-react';
import { Checkbox } from '@/components/ui/checkbox';
import { useCallback, useState } from 'react';
import {
AlertDialog,
Expand All @@ -24,6 +25,24 @@ export function ModelManagement() {
const [downloadingModel, setDownloadingModel] = useState<string | null>(null);
const [downloadingDisplayName, setDownloadingDisplayName] = useState<string | null>(null);

const { data: appSettings } = useQuery({
queryKey: ['appSettings'],
queryFn: () => apiClient.getSettings(),
});

const settingsMutation = useMutation({
mutationFn: (data: { use_48k_speech_tokenizer: boolean }) => apiClient.updateSettings(data),
onSuccess: (updated) => {
queryClient.setQueryData(['appSettings'], updated);
toast({
title: 'Setting updated',
description: updated.use_48k_speech_tokenizer
? '48kHz speech tokenizer enabled. It will apply from your next generation.'
: '48kHz speech tokenizer disabled. It will apply from your next generation.',
});
},
});

const { data: modelStatus, isLoading } = useQuery({
queryKey: ['modelStatus'],
queryFn: async () => {
Expand Down Expand Up @@ -215,6 +234,32 @@ export function ModelManagement() {

</div>
) : null}

<div className="mt-6 pt-6 border-t">
<h3 className="text-sm font-semibold mb-3 text-muted-foreground">Advanced Settings</h3>
<div className="flex items-start space-x-3">
<Checkbox
id="use48kTokenizer"
checked={appSettings?.use_48k_speech_tokenizer ?? false}
onCheckedChange={(checked: boolean) => {
settingsMutation.mutate({ use_48k_speech_tokenizer: checked });
}}
disabled={settingsMutation.isPending}
/>
<div className="space-y-1">
<label
htmlFor="use48kTokenizer"
className="text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70 cursor-pointer"
>
Use 48kHz speech tokenizer (experimental)
</label>
<p className="text-sm text-muted-foreground">
Enables a higher quality 48kHz speech tokenizer. Changes take
effect from your next generation.
</p>
</div>
</div>
</div>
</CardContent>

{/* Delete Confirmation Dialog */}
Expand Down
13 changes: 13 additions & 0 deletions app/src/lib/api/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import type {
StoryItemMove,
StoryItemTrim,
StoryItemSplit,
AppSettings,
} from './types';

class ApiClient {
Expand Down Expand Up @@ -495,6 +496,18 @@ class ApiClient {
});
}

// App Settings
async getSettings(): Promise<AppSettings> {
return this.request<AppSettings>('/settings');
}

async updateSettings(data: Partial<AppSettings>): Promise<AppSettings> {
return this.request<AppSettings>('/settings', {
method: 'PATCH',
body: JSON.stringify(data),
});
}

async exportStoryAudio(storyId: string): Promise<Blob> {
const url = `${this.getBaseUrl()}/stories/${storyId}/export-audio`;
const response = await fetch(url);
Expand Down
4 changes: 4 additions & 0 deletions app/src/lib/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,7 @@ export interface StoryItemTrim {
export interface StoryItemSplit {
split_time_ms: number;
}

export interface AppSettings {
use_48k_speech_tokenizer: boolean;
}
26 changes: 26 additions & 0 deletions backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,32 @@ Manually load TTS model.
#### `POST /models/unload`
Unload TTS model to free memory.

### Settings

#### `GET /settings`
Get current application settings.

**Response:**
```json
{
"use_48k_speech_tokenizer": false
}
```

#### `PATCH /settings`
Partially update application settings. Only provided fields are updated.

**Request:**
```json
{
"use_48k_speech_tokenizer": true
}
```

**Response:** Updated settings object (same shape as `GET /settings`).

Settings are persisted to `data/settings.json` and survive server restarts.

## Database Schema

### profiles
Expand Down
10 changes: 10 additions & 0 deletions backend/backends/pytorch_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from pathlib import Path

from . import TTSBackend, STTBackend
from .. import config
from ..models import AppSettings
from ..utils.cache import get_cache_key, get_cached_voice_prompt, cache_voice_prompt
from ..utils.audio import normalize_audio, load_audio
from ..utils.progress import get_progress_manager
Expand Down Expand Up @@ -195,6 +197,14 @@ def _load_model_sync(self, model_size: str):
device_map=self.device,
torch_dtype=torch.bfloat16,
)

# optionally replace speech tokenizer with 48kHz version for better audio quality
settings = AppSettings(**config.load_app_settings())
if settings.use_48k_speech_tokenizer:
from qwen_tts import Qwen3TTSTokenizer
self.model.model.speech_tokenizer = Qwen3TTSTokenizer.from_pretrained(
"takuma104/Qwen3-TTS-Tokenizer-12Hz-48kHz"
)
finally:
# Exit the patch context
tracker_context.__exit__(None, None, None)
Expand Down
20 changes: 20 additions & 0 deletions backend/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Handles data directory configuration for production bundling.
"""

import json
import os
from pathlib import Path

Expand Down Expand Up @@ -66,3 +67,22 @@ def get_models_dir() -> Path:
path = _data_dir / "models"
path.mkdir(parents=True, exist_ok=True)
return path

def get_settings_path() -> Path:
"""Get app settings file path."""
return _data_dir / "settings.json"

def load_app_settings() -> dict:
"""Load app settings from JSON file. Returns empty dict if file not found."""
path = get_settings_path()
if path.exists():
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
return {}

def save_app_settings(data: dict) -> None:
"""Save app settings to JSON file."""
path = get_settings_path()
path.write_text(json.dumps(data, indent=2), encoding="utf-8")
17 changes: 17 additions & 0 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1764,6 +1764,23 @@ def _get_gpu_status() -> str:
return "None (CPU only)"


@app.get("/settings", response_model=models.AppSettings)
async def get_settings():
"""Return current application settings."""
return models.AppSettings(**config.load_app_settings())


@app.patch("/settings", response_model=models.AppSettings)
async def update_settings(update: models.AppSettingsUpdate):
"""Partially update application settings."""
current = models.AppSettings(**config.load_app_settings())
data = current.model_dump()
patch = update.model_dump(exclude_none=True)
data.update(patch)
config.save_app_settings(data)
return models.AppSettings(**data)


@app.on_event("startup")
async def startup_event():
"""Run on application startup."""
Expand Down
10 changes: 10 additions & 0 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,13 @@ class StoryItemTrim(BaseModel):
class StoryItemSplit(BaseModel):
"""Request model for splitting a story item."""
split_time_ms: int = Field(..., ge=0) # Time within the clip to split at (relative to clip start)


class AppSettings(BaseModel):
"""Application settings."""
use_48k_speech_tokenizer: bool = False


class AppSettingsUpdate(BaseModel):
"""Partial update model for application settings."""
use_48k_speech_tokenizer: Optional[bool] = None
3 changes: 3 additions & 0 deletions data/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"use_48k_speech_tokenizer": false
}