jamiepine · takuma104 · Mar 5, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -58,6 +58,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - **Makefile** - Comprehensive development workflow automation with commands for setup, development, building, testing, and code quality checks
+- **48kHz speech tokenizer opt-in setting** - UI toggle in the Models tab (Advanced Settings) to enable the higher-quality 48kHz speech tokenizer on the PyTorch backend. Disabled by default; changes take effect from the next generation. Settings are persisted via `GET /settings` and `PATCH /settings` API endpoints.
   - Includes Python version detection and compatibility warnings
   - Self-documenting help system with `make help`
   - Colored output for better readability

diff --git a/app/src/components/ServerSettings/ModelManagement.tsx b/app/src/components/ServerSettings/ModelManagement.tsx
@@ -1,5 +1,6 @@
 import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
 import { Download, Loader2, Trash2 } from 'lucide-react';
+import { Checkbox } from '@/components/ui/checkbox';
 import { useCallback, useState } from 'react';
 import {
   AlertDialog,
@@ -24,6 +25,24 @@ export function ModelManagement() {
   const [downloadingModel, setDownloadingModel] = useState<string | null>(null);
   const [downloadingDisplayName, setDownloadingDisplayName] = useState<string | null>(null);
 
+  const { data: appSettings } = useQuery({
+    queryKey: ['appSettings'],
+    queryFn: () => apiClient.getSettings(),
+  });
+
+  const settingsMutation = useMutation({
+    mutationFn: (data: { use_48k_speech_tokenizer: boolean }) => apiClient.updateSettings(data),
+    onSuccess: (updated) => {
+      queryClient.setQueryData(['appSettings'], updated);
+      toast({
+        title: 'Setting updated',
+        description: updated.use_48k_speech_tokenizer
+          ? '48kHz speech tokenizer enabled. It will apply from your next generation.'
+          : '48kHz speech tokenizer disabled. It will apply from your next generation.',
+      });
+    },
+  });
+
   const { data: modelStatus, isLoading } = useQuery({
     queryKey: ['modelStatus'],
     queryFn: async () => {
@@ -215,6 +234,32 @@ export function ModelManagement() {
 
           </div>
         ) : null}
+
+        <div className="mt-6 pt-6 border-t">
+          <h3 className="text-sm font-semibold mb-3 text-muted-foreground">Advanced Settings</h3>
+          <div className="flex items-start space-x-3">
+            <Checkbox
+              id="use48kTokenizer"
+              checked={appSettings?.use_48k_speech_tokenizer ?? false}
+              onCheckedChange={(checked: boolean) => {
+                settingsMutation.mutate({ use_48k_speech_tokenizer: checked });
+              }}
+              disabled={settingsMutation.isPending}
+            />
+            <div className="space-y-1">
+              <label
+                htmlFor="use48kTokenizer"
+                className="text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70 cursor-pointer"
+              >
+                Use 48kHz speech tokenizer (experimental)
+              </label>
+              <p className="text-sm text-muted-foreground">
+                Enables a higher quality 48kHz speech tokenizer. Changes take
+                effect from your next generation.
+              </p>
+            </div>
+          </div>
+        </div>
       </CardContent>
 
       {/* Delete Confirmation Dialog */}

diff --git a/app/src/lib/api/client.ts b/app/src/lib/api/client.ts
@@ -24,6 +24,7 @@ import type {
   StoryItemMove,
   StoryItemTrim,
   StoryItemSplit,
+  AppSettings,
 } from './types';
 
 class ApiClient {
@@ -495,6 +496,18 @@ class ApiClient {
     });
   }
 
+  // App Settings
+  async getSettings(): Promise<AppSettings> {
+    return this.request<AppSettings>('/settings');
+  }
+
+  async updateSettings(data: Partial<AppSettings>): Promise<AppSettings> {
+    return this.request<AppSettings>('/settings', {
+      method: 'PATCH',
+      body: JSON.stringify(data),
+    });
+  }
+
   async exportStoryAudio(storyId: string): Promise<Blob> {
     const url = `${this.getBaseUrl()}/stories/${storyId}/export-audio`;
     const response = await fetch(url);

diff --git a/app/src/lib/api/types.ts b/app/src/lib/api/types.ts
@@ -202,3 +202,7 @@ export interface StoryItemTrim {
 export interface StoryItemSplit {
   split_time_ms: number;
 }
+
+export interface AppSettings {
+  use_48k_speech_tokenizer: boolean;
+}
diff --git a/backend/README.md b/backend/README.md
@@ -230,6 +230,32 @@ Manually load TTS model.
 #### `POST /models/unload`
 Unload TTS model to free memory.
 
+### Settings
+
+#### `GET /settings`
+Get current application settings.
+
+**Response:**
+```json
+{
+  "use_48k_speech_tokenizer": false
+}
+```
+
+#### `PATCH /settings`
+Partially update application settings. Only provided fields are updated.
+
+**Request:**
+```json
+{
+  "use_48k_speech_tokenizer": true
+}
+```
+
+**Response:** Updated settings object (same shape as `GET /settings`).
+
+Settings are persisted to `data/settings.json` and survive server restarts.
+
 ## Database Schema
 
 ### profiles

diff --git a/backend/backends/pytorch_backend.py b/backend/backends/pytorch_backend.py
@@ -9,6 +9,8 @@
 from pathlib import Path
 
 from . import TTSBackend, STTBackend
+from .. import config
+from ..models import AppSettings
 from ..utils.cache import get_cache_key, get_cached_voice_prompt, cache_voice_prompt
 from ..utils.audio import normalize_audio, load_audio
 from ..utils.progress import get_progress_manager
@@ -195,6 +197,14 @@ def _load_model_sync(self, model_size: str):
                         device_map=self.device,
                         torch_dtype=torch.bfloat16,
                     )
+
+                # optionally replace speech tokenizer with 48kHz version for better audio quality
+                settings = AppSettings(**config.load_app_settings())
+                if settings.use_48k_speech_tokenizer:
+                    from qwen_tts import Qwen3TTSTokenizer
+                    self.model.model.speech_tokenizer = Qwen3TTSTokenizer.from_pretrained(
+                        "takuma104/Qwen3-TTS-Tokenizer-12Hz-48kHz"
+                    )
             finally:
                 # Exit the patch context
                 tracker_context.__exit__(None, None, None)

diff --git a/backend/config.py b/backend/config.py
@@ -4,6 +4,7 @@
 Handles data directory configuration for production bundling.
 """
 
+import json
 import os
 from pathlib import Path
 
@@ -66,3 +67,22 @@ def get_models_dir() -> Path:
     path = _data_dir / "models"
     path.mkdir(parents=True, exist_ok=True)
     return path
+
+def get_settings_path() -> Path:
+    """Get app settings file path."""
+    return _data_dir / "settings.json"
+
+def load_app_settings() -> dict:
+    """Load app settings from JSON file. Returns empty dict if file not found."""
+    path = get_settings_path()
+    if path.exists():
+        try:
+            return json.loads(path.read_text(encoding="utf-8"))
+        except Exception:
+            return {}
+    return {}
+
+def save_app_settings(data: dict) -> None:
+    """Save app settings to JSON file."""
+    path = get_settings_path()
+    path.write_text(json.dumps(data, indent=2), encoding="utf-8")
diff --git a/backend/main.py b/backend/main.py
@@ -1764,6 +1764,23 @@ def _get_gpu_status() -> str:
     return "None (CPU only)"
 
 
+@app.get("/settings", response_model=models.AppSettings)
+async def get_settings():
+    """Return current application settings."""
+    return models.AppSettings(**config.load_app_settings())
+
+
+@app.patch("/settings", response_model=models.AppSettings)
+async def update_settings(update: models.AppSettingsUpdate):
+    """Partially update application settings."""
+    current = models.AppSettings(**config.load_app_settings())
+    data = current.model_dump()
+    patch = update.model_dump(exclude_none=True)
+    data.update(patch)
+    config.save_app_settings(data)
+    return models.AppSettings(**data)
+
+
 @app.on_event("startup")
 async def startup_event():
     """Run on application startup."""

diff --git a/backend/models.py b/backend/models.py
@@ -299,3 +299,13 @@ class StoryItemTrim(BaseModel):
 class StoryItemSplit(BaseModel):
     """Request model for splitting a story item."""
     split_time_ms: int = Field(..., ge=0)  # Time within the clip to split at (relative to clip start)
+
+
+class AppSettings(BaseModel):
+    """Application settings."""
+    use_48k_speech_tokenizer: bool = False
+
+
+class AppSettingsUpdate(BaseModel):
+    """Partial update model for application settings."""
+    use_48k_speech_tokenizer: Optional[bool] = None
diff --git a/data/settings.json b/data/settings.json
@@ -0,0 +1,3 @@
+{
+  "use_48k_speech_tokenizer": false
+}