diff --git a/App.tsx b/App.tsx
index ac8cee153..d4a43a109 100644
--- a/App.tsx
+++ b/App.tsx
@@ -14,6 +14,14 @@ import { useTheme } from './src/theme';
 import { hardwareService, modelManager, authService, ragService, remoteServerManager } from './src/services';
 import logger from './src/utils/logger';
 import { useAppStore, useAuthStore, useRemoteServerStore } from './src/stores';
+import { useTTSStore } from './src/stores/ttsStore';
+import { initExecutorch } from 'react-native-executorch';
+import { BareResourceFetcher } from 'react-native-executorch-bare-resource-fetcher';
+import { EngineBridge } from './src/components/EngineBridge';
+
+// Initialise executorch resource fetcher once at module load time.
+// This must run before any useTextToSpeech hook is mounted.
+initExecutorch({ resourceFetcher: BareResourceFetcher });
 import { LockScreen } from './src/screens';
 import { useAppState } from './src/hooks/useAppState';
 
@@ -191,6 +199,10 @@ function App() {
       // Initialize RAG database tables
       ragService.ensureReady().catch((err) => logger.error('Failed to initialize RAG service on startup', err));
 
+      // Initialize TTS engine from persisted settings and sync download state
+      const ttsState = useTTSStore.getState();
+      ttsState.setEngine(ttsState.settings.engineId).catch(() => {});
+
       // Show the UI immediately
       setIsInitializing(false);
 
@@ -235,6 +247,7 @@ function App() {
     <GestureHandlerRootView style={styles.flex}>
       <SafeAreaProvider>
         <StatusBar barStyle={isDark ? 'light-content' : 'dark-content'} backgroundColor={colors.background} />
+        <EngineBridge />
         <NavigationContainer
           theme={{
             dark: isDark,
diff --git a/__tests__/integration/stores/tts.test.ts b/__tests__/integration/stores/tts.test.ts
new file mode 100644
index 000000000..ce390146e
--- /dev/null
+++ b/__tests__/integration/stores/tts.test.ts
@@ -0,0 +1,197 @@
+/**
+ * TTS Integration Tests
+ *
+ * Tests the wiring between ttsStore and the engine registry.
+ * Verifies full flows delegate correctly through the engine interface.
+ */
+
+const mockEngine = {
+  id: 'mock-tts',
+  displayName: 'Mock TTS',
+  capabilities: {
+    streaming: false,
+    voiceCloning: false,
+    pauseResume: true,
+    generateAndSave: true,
+    peakRamMB: 100,
+  },
+  getPhase: jest.fn(() => 'ready' as const),
+  on: jest.fn(() => jest.fn()),
+  off: jest.fn(),
+  once: jest.fn(() => jest.fn()),
+  isSupported: jest.fn(() => true),
+  initialize: jest.fn().mockResolvedValue(undefined),
+  release: jest.fn().mockResolvedValue(undefined),
+  destroy: jest.fn().mockResolvedValue(undefined),
+  getRequiredAssets: jest.fn(() => [
+    { id: 'backbone', label: 'Voice Model', url: 'https://example.com/bb.gguf', sizeBytes: 454 * 1024 * 1024, filename: 'bb.gguf' },
+    { id: 'vocoder', label: 'Decoder', url: 'https://example.com/voc.gguf', sizeBytes: 73 * 1024 * 1024, filename: 'voc.gguf' },
+  ]),
+  checkAssetStatus: jest.fn().mockResolvedValue([
+    { asset: { id: 'backbone', label: 'Voice Model', url: '', sizeBytes: 454 * 1024 * 1024, filename: 'bb.gguf' }, status: 'downloaded', progress: 1 },
+    { asset: { id: 'vocoder', label: 'Decoder', url: '', sizeBytes: 73 * 1024 * 1024, filename: 'voc.gguf' }, status: 'downloaded', progress: 1 },
+  ]),
+  downloadAssets: jest.fn().mockResolvedValue(undefined),
+  deleteAssets: jest.fn().mockResolvedValue(undefined),
+  getOverallDownloadProgress: jest.fn(() => 1),
+  isFullyDownloaded: jest.fn(() => true),
+  getBridgeComponent: jest.fn(() => null),
+  getVoices: jest.fn(() => [{ id: '0', label: 'Default', metadata: {} }]),
+  getActiveVoice: jest.fn(() => ({ id: '0', label: 'Default', metadata: {} })),
+  setVoice: jest.fn().mockResolvedValue(undefined),
+  speak: jest.fn().mockResolvedValue(undefined),
+  generateAndSave: jest.fn().mockResolvedValue({
+    filePath: '/cache/c1/m1.pcm',
+    durationSeconds: 1.5,
+    waveformData: new Array(200).fill(0.2),
+  }),
+  playFromFile: jest.fn().mockResolvedValue(undefined),
+  stop: jest.fn(),
+  pause: jest.fn(),
+  resume: jest.fn(),
+};
+
+jest.mock('../../../src/engine', () => ({
+  ttsRegistry: {
+    register: jest.fn(),
+    has: jest.fn(() => true),
+    getEngine: jest.fn(() => mockEngine),
+    setActiveEngine: jest.fn().mockResolvedValue(mockEngine),
+    getActiveEngine: jest.fn(() => mockEngine),
+    getActiveEngineId: jest.fn(() => 'mock-tts'),
+    getRegisteredIds: jest.fn(() => ['mock-tts']),
+  },
+  OuteTTSEngine: class {},
+}));
+
+jest.mock('../../../src/utils/logger', () => ({
+  __esModule: true,
+  default: { log: jest.fn(), error: jest.fn(), warn: jest.fn() },
+}));
+
+import { useTTSStore } from '../../../src/stores/ttsStore';
+
+const getState = () => useTTSStore.getState();
+
+const resetStore = () => {
+  useTTSStore.setState({
+    phase: 'ready',
+    currentMessageId: null,
+    currentAmplitude: 0,
+    playbackElapsed: 0,
+    playSessionId: 0,
+    error: null,
+    isReady: true,
+    isDownloading: false,
+    isLoading: false,
+    isSpeaking: false,
+    isPaused: false,
+    isGeneratingAudio: false,
+    assets: [],
+    overallDownloadProgress: 1,
+    voices: [{ id: '0', label: 'Default', metadata: {} }],
+    activeVoiceId: '0',
+    audioCacheSizeMB: 0,
+    settings: {
+      interfaceMode: 'chat',
+      enabled: true,
+      autoPlay: false,
+      speed: 1.0,
+      engineId: 'mock-tts',
+      voiceByEngine: {},
+    },
+  });
+};
+
+describe('TTS integration', () => {
+  beforeEach(() => {
+    resetStore();
+    jest.clearAllMocks();
+  });
+
+  // ── Chat Mode full flow ───────────────────────────────────────────────
+
+  describe('Chat Mode: speak → stop', () => {
+    it('completes the full Chat Mode flow', async () => {
+      // Speak
+      const speakPromise = getState().speak('hello', 'msg1');
+      expect(getState().currentMessageId).toBe('msg1');
+
+      await speakPromise;
+      expect(mockEngine.speak).toHaveBeenCalledWith('hello', expect.objectContaining({
+        speed: 1.0,
+        messageId: 'msg1',
+      }));
+      expect(getState().currentMessageId).toBeNull();
+
+      // Stop mid-speech
+      mockEngine.speak.mockImplementation(
+        () => new Promise((resolve) => setTimeout(resolve, 1000)),
+      );
+      getState().speak('second', 'msg2');
+      getState().stop();
+      expect(mockEngine.stop).toHaveBeenCalled();
+    });
+  });
+
+  // ── Audio Mode full flow ──────────────────────────────────────────────
+
+  describe('Audio Mode: generateAndSave → playMessage → stop', () => {
+    beforeEach(() => {
+      useTTSStore.setState({
+        settings: { ...getState().settings, interfaceMode: 'audio' },
+      });
+    });
+
+    it('completes the full Audio Mode flow', async () => {
+      // GenerateAndSave
+      const result = await getState().generateAndSave('hello audio', 'conv1', 'msg1');
+
+      expect(result.path).toBe('/cache/c1/m1.pcm');
+      expect(result.waveformData).toHaveLength(200);
+      expect(result.durationSeconds).toBe(1.5);
+
+      // PlayMessage
+      const playPromise = getState().playMessage('msg1', '/cache/c1/m1.pcm');
+      expect(getState().currentMessageId).toBe('msg1');
+
+      await playPromise;
+
+      // StopPlayback
+      getState().stopPlayback();
+      expect(mockEngine.stop).toHaveBeenCalled();
+    });
+  });
+
+  // ── Mode switching ────────────────────────────────────────────────────
+
+  describe('mode switching', () => {
+    it('switching interfaceMode to audio takes effect', () => {
+      expect(getState().settings.interfaceMode).toBe('chat');
+      getState().updateSettings({ interfaceMode: 'audio' });
+      expect(getState().settings.interfaceMode).toBe('audio');
+    });
+
+    it('switching back to chat mode works', () => {
+      getState().updateSettings({ interfaceMode: 'audio' });
+      getState().updateSettings({ interfaceMode: 'chat' });
+      expect(getState().settings.interfaceMode).toBe('chat');
+    });
+  });
+
+  // ── Engine-agnostic speak ─────────────────────────────────────────────
+
+  describe('auto-play', () => {
+    it('speak delegates to engine when autoPlay and engine ready', async () => {
+      useTTSStore.setState({
+        settings: { ...getState().settings, autoPlay: true },
+      });
+
+      await getState().speak('AI response', 'last-msg');
+
+      expect(mockEngine.speak).toHaveBeenCalledWith('AI response', expect.objectContaining({
+        messageId: 'last-msg',
+      }));
+    });
+  });
+});
diff --git a/__tests__/rntl/components/ChatInput.test.tsx b/__tests__/rntl/components/ChatInput.test.tsx
index 617430abf..303297d39 100644
--- a/__tests__/rntl/components/ChatInput.test.tsx
+++ b/__tests__/rntl/components/ChatInput.test.tsx
@@ -51,10 +51,20 @@ jest.mock('../../../src/services/documentService', () => ({
 // Mock the stores
 const mockUseWhisperStore = jest.fn();
 const mockUseAppStore = jest.fn();
+const mockUseTTSStore = jest.fn(() => ({
+  settings: { interfaceMode: 'chat', enabled: false, speed: 1.0 },
+  isBackboneDownloaded: false,
+  isVocoderDownloaded: false,
+  isModelLoaded: false,
+  loadModels: jest.fn(),
+  unloadModels: jest.fn(),
+  updateSettings: jest.fn(),
+}));
 
 jest.mock('../../../src/stores', () => ({
   useWhisperStore: () => mockUseWhisperStore(),
   useAppStore: () => mockUseAppStore(),
+  useTTSStore: () => mockUseTTSStore(),
 }));
 
 // Mock the whisper hook
diff --git a/__tests__/rntl/components/GenerationSettingsModal.test.tsx b/__tests__/rntl/components/GenerationSettingsModal.test.tsx
index a9ef46471..ed7272b1e 100644
--- a/__tests__/rntl/components/GenerationSettingsModal.test.tsx
+++ b/__tests__/rntl/components/GenerationSettingsModal.test.tsx
@@ -859,13 +859,13 @@ describe('GenerationSettingsModal', () => {
   });
 
   it('calls handleSliderComplete on text generation slider (no-op)', () => {
-    const { getByText, getAllByTestId } = render(
+    const { getByText, queryAllByTestId } = render(
       <GenerationSettingsModal {...defaultProps} />,
     );
 
     fireEvent.press(getByText('TEXT GENERATION'));
 
-    const sliders = getAllByTestId('slider');
+    const sliders = queryAllByTestId('slider');
     // onSlidingComplete is a no-op but should not throw
     if (sliders.length > 0 && sliders[0].props.onSlidingComplete) {
       expect(() => sliders[0].props.onSlidingComplete(0.5)).not.toThrow();
@@ -873,13 +873,13 @@ describe('GenerationSettingsModal', () => {
   });
 
   it('calls handleSliderChange on text slider value change', () => {
-    const { getByText, getAllByTestId } = render(
+    const { getByText, queryAllByTestId } = render(
       <GenerationSettingsModal {...defaultProps} />,
     );
 
     fireEvent.press(getByText('TEXT GENERATION'));
 
-    const sliders = getAllByTestId('slider');
+    const sliders = queryAllByTestId('slider');
     if (sliders.length > 0 && sliders[0].props.onValueChange) {
       sliders[0].props.onValueChange(0.5);
       expect(mockUpdateSettings).toHaveBeenCalled();
@@ -1070,17 +1070,16 @@ describe('GenerationSettingsModal', () => {
         expect(mockUpdateSettings).toHaveBeenCalledWith({ enableGpu: true, cacheType: 'f16' });
       });
 
-      it('calls updateSettings with gpuLayers value from GPU layers slider', () => {
+      it('calls updateSettings with gpuLayers value from GPU layers stepper', () => {
         mockStoreValues.settings = { ...defaultSettings, enableGpu: true, gpuLayers: 6, flashAttn: false };
         const { getByText, getByTestId } = render(<GenerationSettingsModal {...defaultProps} />);
         fireEvent.press(getByText('TEXT GENERATION'));
         fireEvent.press(getByTestId('modal-text-advanced-toggle'));
         mockUpdateSettings.mockClear();
 
-        const slider = getByTestId('gpu-layers-slider');
-        slider.props.onSlidingComplete(12);
+        fireEvent.press(getByTestId('gpu-layers-stepper-increment'));
 
-        expect(mockUpdateSettings).toHaveBeenCalledWith({ gpuLayers: 12 });
+        expect(mockUpdateSettings).toHaveBeenCalledWith({ gpuLayers: 7 });
       });
     });
   });
diff --git a/__tests__/rntl/components/VoiceRecordButton.test.tsx b/__tests__/rntl/components/VoiceRecordButton.test.tsx
index b92c45a30..84899278b 100644
--- a/__tests__/rntl/components/VoiceRecordButton.test.tsx
+++ b/__tests__/rntl/components/VoiceRecordButton.test.tsx
@@ -87,16 +87,17 @@ describe('VoiceRecordButton', () => {
     });
 
     it('shows recording indicator when isRecording is true', () => {
-      const { getByText } = render(
+      const { toJSON } = render(
         <VoiceRecordButton {...defaultProps} isRecording={true} />
       );
 
-      // When recording, "Slide to cancel" text appears in the cancel hint
-      expect(getByText('Slide to cancel')).toBeTruthy();
+      // In audio mode (default, !asSendButton), recording shows a stop icon (square)
+      const treeStr = JSON.stringify(toJSON());
+      expect(treeStr).toContain('square');
     });
 
     it('shows transcribing state when isTranscribing is true', () => {
-      const { getByText } = render(
+      const { toJSON } = render(
         <VoiceRecordButton
           {...defaultProps}
           isTranscribing={true}
@@ -104,14 +105,15 @@ describe('VoiceRecordButton', () => {
         />
       );
 
-      // Transcribing state shows "Transcribing..." text
-      expect(getByText('Transcribing...')).toBeTruthy();
+      // Transcribing state renders a spinning indicator (no text in audio mode)
+      expect(toJSON()).toBeTruthy();
     });
 
-    it('shows partial result text when provided', () => {
+    it('shows partial result text when provided in chat mode (asSendButton)', () => {
       const { getByText } = render(
         <VoiceRecordButton
           {...defaultProps}
+          asSendButton={true}
           isRecording={true}
           partialResult="Hello world"
         />
@@ -166,7 +168,7 @@ describe('VoiceRecordButton', () => {
       expect(toJSON()).toBeTruthy();
     });
 
-    it('taps unavailable button and triggers alert with error message', () => {
+    it('taps unavailable button and triggers download prompt alert', () => {
       const { UNSAFE_getAllByType } = render(
         <VoiceRecordButton
           {...defaultProps}
@@ -181,13 +183,13 @@ describe('VoiceRecordButton', () => {
       fireEvent.press(touchables[0]);
 
       expect(mockShowAlert).toHaveBeenCalledWith(
-        'Voice Input Unavailable',
-        expect.stringContaining('Microphone permission denied'),
+        'Download Voice Model',
+        expect.stringContaining('Download Whisper Small'),
         expect.any(Array)
       );
     });
 
-    it('taps unavailable button with default error when no error prop', () => {
+    it('taps unavailable button shows download prompt with size', () => {
       const { UNSAFE_getAllByType } = render(
         <VoiceRecordButton
           {...defaultProps}
@@ -200,13 +202,13 @@ describe('VoiceRecordButton', () => {
       fireEvent.press(touchables[0]);
 
       expect(mockShowAlert).toHaveBeenCalledWith(
-        'Voice Input Unavailable',
-        expect.stringContaining('No transcription model downloaded'),
+        'Download Voice Model',
+        expect.stringContaining('466 MB'),
         expect.any(Array)
       );
     });
 
-    it('alert message includes instructions for downloading model', () => {
+    it('alert message includes Download and Cancel buttons', () => {
       const { UNSAFE_getAllByType } = render(
         <VoiceRecordButton
           {...defaultProps}
@@ -219,9 +221,12 @@ describe('VoiceRecordButton', () => {
       fireEvent.press(touchables[0]);
 
       expect(mockShowAlert).toHaveBeenCalledWith(
-        'Voice Input Unavailable',
-        expect.stringContaining('Download a Whisper model'),
-        expect.any(Array)
+        'Download Voice Model',
+        expect.any(String),
+        expect.arrayContaining([
+          expect.objectContaining({ text: 'Cancel' }),
+          expect.objectContaining({ text: 'Download' }),
+        ])
       );
     });
   });
@@ -400,11 +405,13 @@ describe('VoiceRecordButton', () => {
     });
 
     it('does not show cancel hint when not recording', () => {
-      const { queryByText } = render(
+      const { toJSON } = render(
         <VoiceRecordButton {...defaultProps} isRecording={false} />
       );
 
-      expect(queryByText('Slide to cancel')).toBeNull();
+      // Audio mode (default) uses tap-to-toggle, no slide-to-cancel
+      const treeStr = JSON.stringify(toJSON());
+      expect(treeStr).not.toContain('Slide to cancel');
     });
 
     it('does not show partial result when partialResult is empty', () => {
@@ -418,12 +425,12 @@ describe('VoiceRecordButton', () => {
 
       // partialResult is empty, so the partial result container should not render
       const treeStr = JSON.stringify(toJSON());
-      // The cancel hint should still show
-      expect(treeStr).toContain('Slide to cancel');
+      // Audio mode uses tap-to-toggle with a stop icon
+      expect(treeStr).toContain('square');
     });
 
     it('shows recording UI elements but not transcribing when recording', () => {
-      const { getByText, queryByText } = render(
+      const { toJSON, queryByText } = render(
         <VoiceRecordButton
           {...defaultProps}
           isRecording={true}
@@ -433,7 +440,8 @@ describe('VoiceRecordButton', () => {
 
       // When isRecording is true AND isTranscribing is true,
       // the component shows recording UI (not transcribing state)
-      expect(getByText('Slide to cancel')).toBeTruthy();
+      const treeStr = JSON.stringify(toJSON());
+      expect(treeStr).toContain('square');
       expect(queryByText('Transcribing...')).toBeNull();
     });
 
@@ -446,7 +454,7 @@ describe('VoiceRecordButton', () => {
     });
 
     it('prioritizes model loading state over recording', () => {
-      const { getByText, queryByText } = render(
+      const { getByText, toJSON } = render(
         <VoiceRecordButton
           {...defaultProps}
           isModelLoading={true}
@@ -455,11 +463,13 @@ describe('VoiceRecordButton', () => {
       );
 
       expect(getByText('Loading...')).toBeTruthy();
-      expect(queryByText('Slide to cancel')).toBeNull();
+      // Recording UI should not render when loading
+      const treeStr = JSON.stringify(toJSON());
+      expect(treeStr).not.toContain('square');
     });
 
     it('prioritizes model loading state over transcribing', () => {
-      const { getByText, queryByText } = render(
+      const { getByText, toJSON } = render(
         <VoiceRecordButton
           {...defaultProps}
           isModelLoading={true}
@@ -468,7 +478,8 @@ describe('VoiceRecordButton', () => {
       );
 
       expect(getByText('Loading...')).toBeTruthy();
-      expect(queryByText('Transcribing...')).toBeNull();
+      // Transcribing state should not render when loading
+      expect(toJSON()).toBeTruthy();
     });
   });
 });
diff --git a/__tests__/rntl/screens/DownloadManagerScreen.test.tsx b/__tests__/rntl/screens/DownloadManagerScreen.test.tsx
index 2a976dfd2..255ab5e6a 100644
--- a/__tests__/rntl/screens/DownloadManagerScreen.test.tsx
+++ b/__tests__/rntl/screens/DownloadManagerScreen.test.tsx
@@ -212,20 +212,23 @@ describe('DownloadManagerScreen', () => {
   });
 
   it('shows empty state when no downloads', () => {
-    const { getByText } = render(<DownloadManagerScreen />);
-    expect(getByText('No active downloads')).toBeTruthy();
+    const { getByText, queryByText } = render(<DownloadManagerScreen />);
+    // Active Downloads section is hidden when there are no active items
+    expect(queryByText('Active Downloads')).toBeNull();
     expect(getByText('No models downloaded yet')).toBeTruthy();
   });
 
   it('shows section headers for active and completed', () => {
-    const { getByText } = render(<DownloadManagerScreen />);
-    expect(getByText('Active Downloads')).toBeTruthy();
+    const { getByText, queryByText } = render(<DownloadManagerScreen />);
+    // Active Downloads section is hidden when empty
+    expect(queryByText('Active Downloads')).toBeNull();
+    // Downloaded Models section is always shown
     expect(getByText('Downloaded Models')).toBeTruthy();
   });
 
   it('shows empty subtext when no models downloaded', () => {
     const { getByText } = render(<DownloadManagerScreen />);
-    expect(getByText('Go to the Models tab to browse and download models')).toBeTruthy();
+    expect(getByText('No models downloaded yet')).toBeTruthy();
   });
 
   it('renders completed text model with details', () => {
@@ -305,11 +308,12 @@ describe('DownloadManagerScreen', () => {
     expect(getByText(/Total storage used/)).toBeTruthy();
   });
 
-  it('shows count badges for active and completed sections', () => {
+  it('shows count badge for completed section', () => {
     setupSingleModelState();
 
     const { getByText } = render(<DownloadManagerScreen />);
-    expect(getByText('0')).toBeTruthy();
+    // Active section is hidden when empty (no "0" badge)
+    // Completed section shows count of 1
     expect(getByText('1')).toBeTruthy();
   });
 
@@ -344,7 +348,8 @@ describe('DownloadManagerScreen', () => {
 
     const { UNSAFE_getAllByType } = render(<DownloadManagerScreen />);
     const touchables = UNSAFE_getAllByType(TouchableOpacity);
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     if (cancelButtons.length > 0) {
       fireEvent.press(cancelButtons[0]);
     }
@@ -820,8 +825,8 @@ describe('DownloadManagerScreen', () => {
 
     const { UNSAFE_getAllByType, getByTestId } = render(<DownloadManagerScreen />);
     const touchables = UNSAFE_getAllByType(TouchableOpacity);
-    // Press the cancel button (second touchable after back button)
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     fireEvent.press(cancelButtons[0]);
 
     // Press "Yes" to confirm
@@ -852,7 +857,8 @@ describe('DownloadManagerScreen', () => {
 
     const { UNSAFE_getAllByType, getByTestId } = render(<DownloadManagerScreen />);
     const touchables = UNSAFE_getAllByType(TouchableOpacity);
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     fireEvent.press(cancelButtons[0]);
 
     await act(async () => {
@@ -880,7 +886,8 @@ describe('DownloadManagerScreen', () => {
 
     const { UNSAFE_getAllByType, getByTestId } = render(<DownloadManagerScreen />);
     const touchables = UNSAFE_getAllByType(TouchableOpacity);
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     fireEvent.press(cancelButtons[0]);
 
     await act(async () => {
@@ -1029,7 +1036,8 @@ describe('DownloadManagerScreen', () => {
 
     // Find the cancel button for the RNFS download (which has no downloadId)
     const touchables = result.UNSAFE_getAllByType(TouchableOpacity);
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     if (cancelButtons.length > 0) {
       fireEvent.press(cancelButtons[0]);
 
@@ -1367,8 +1375,8 @@ describe('DownloadManagerScreen', () => {
 
     // Find and press cancel button on the active download
     const touchables = result.UNSAFE_getAllByType(TouchableOpacity);
-    // Find cancel buttons (skip back button)
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     if (cancelButtons.length > 0) {
       fireEvent.press(cancelButtons[0]);
 
diff --git a/__tests__/rntl/screens/ModelSettingsScreen.test.tsx b/__tests__/rntl/screens/ModelSettingsScreen.test.tsx
index 026ba7b17..455b376bf 100644
--- a/__tests__/rntl/screens/ModelSettingsScreen.test.tsx
+++ b/__tests__/rntl/screens/ModelSettingsScreen.test.tsx
@@ -644,14 +644,13 @@ describe('ModelSettingsScreen', () => {
         expect(useAppStore.getState().settings.enableGpu).toBe(true);
       });
 
-      it('updates gpuLayers when GPU Layers slider completes', () => {
+      it('updates gpuLayers when GPU Layers stepper is incremented', () => {
         useAppStore.getState().updateSettings({ enableGpu: true, flashAttn: false, gpuLayers: 6 });
         const { getByTestId } = renderWithSections('text');
 
-        const slider = getByTestId('gpu-layers-slider');
-        fireEvent(slider, 'slidingComplete', 12);
+        fireEvent.press(getByTestId('gpu-layers-stepper-increment'));
 
-        expect(useAppStore.getState().settings.gpuLayers).toBe(12);
+        expect(useAppStore.getState().settings.gpuLayers).toBe(7);
       });
     });
   });
diff --git a/__tests__/rntl/screens/VoiceSettingsScreen.test.tsx b/__tests__/rntl/screens/VoiceSettingsScreen.test.tsx
index a055a2ad1..7d459bde7 100644
--- a/__tests__/rntl/screens/VoiceSettingsScreen.test.tsx
+++ b/__tests__/rntl/screens/VoiceSettingsScreen.test.tsx
@@ -3,15 +3,15 @@
  *
  * Tests for the voice settings screen including:
  * - Title display
- * - Description text about Whisper
- * - Download options when no model
+ * - Privacy note text
+ * - English and Multilingual model sections
  * - Back button navigation
- * - Downloaded model state (name, status badge, remove button)
+ * - Active model state (name, badge, remove button)
  * - Download progress display
  * - Model download trigger
  * - Remove model confirmation alert
  * - Error display and clear
- * - Privacy card display
+ * - Search bar
  *
  * Priority: P1 (High)
  */
@@ -82,6 +82,7 @@ jest.mock('../../../src/components/Button', () => ({
 }));
 
 const mockDownloadModel = jest.fn();
+const mockDownloadFromUrl = jest.fn();
 const mockDeleteModel = jest.fn();
 const mockClearError = jest.fn();
 
@@ -90,6 +91,7 @@ let mockWhisperStoreValues: any = {
   isDownloading: false,
   downloadProgress: 0,
   downloadModel: mockDownloadModel,
+  downloadFromUrl: mockDownloadFromUrl,
   deleteModel: mockDeleteModel,
   error: null,
   clearError: mockClearError,
@@ -101,13 +103,24 @@ jest.mock('../../../src/stores', () => ({
 
 jest.mock('../../../src/services', () => ({
   WHISPER_MODELS: [
-    { id: 'tiny', name: 'Whisper Tiny', size: '75', description: 'Fastest, lower accuracy' },
-    { id: 'base', name: 'Whisper Base', size: '141', description: 'Good accuracy' },
-    { id: 'small', name: 'Whisper Small', size: '461', description: 'Better accuracy' },
-    { id: 'medium', name: 'Whisper Medium', size: '1500', description: 'Best accuracy' },
+    { id: 'tiny.en', name: 'Tiny', size: 75, lang: 'en', description: 'Fastest, English only' },
+    { id: 'base.en', name: 'Base', size: 142, lang: 'en', description: 'Better accuracy, English only' },
+    { id: 'small.en', name: 'Small', size: 466, lang: 'en', description: 'High accuracy, English only' },
+    { id: 'medium.en', name: 'Medium', size: 1500, lang: 'en', description: 'Near human-level, English only' },
+    { id: 'tiny', name: 'Tiny', size: 75, lang: 'multi', description: 'Fastest, 99 languages' },
+    { id: 'base', name: 'Base', size: 142, lang: 'multi', description: 'Better accuracy, 99 languages' },
+    { id: 'small', name: 'Small', size: 466, lang: 'multi', description: 'High accuracy, 99 languages' },
+    { id: 'medium', name: 'Medium', size: 1500, lang: 'multi', description: 'Near human-level, 99 languages' },
   ],
 }));
 
+jest.mock('../../../src/services/huggingface', () => ({
+  huggingFaceService: {
+    searchWhisperRepos: jest.fn().mockResolvedValue([]),
+    getWhisperFiles: jest.fn().mockResolvedValue([]),
+  },
+}));
+
 import { VoiceSettingsScreen } from '../../../src/screens/VoiceSettingsScreen';
 
 const mockGoBack = jest.fn();
@@ -134,6 +147,7 @@ describe('VoiceSettingsScreen', () => {
       isDownloading: false,
       downloadProgress: 0,
       downloadModel: mockDownloadModel,
+      downloadFromUrl: mockDownloadFromUrl,
       deleteModel: mockDeleteModel,
       error: null,
       clearError: mockClearError,
@@ -149,19 +163,16 @@ describe('VoiceSettingsScreen', () => {
       expect(getByText('Voice Transcription')).toBeTruthy();
     });
 
-    it('shows description text about Whisper', () => {
+    it('shows privacy note about on-device transcription', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
       expect(
-        getByText(/Download a Whisper model to enable on-device voice input/),
+        getByText(/All transcription runs on-device/),
       ).toBeTruthy();
     });
 
-    it('shows privacy card', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Privacy First')).toBeTruthy();
-      expect(
-        getByText(/Voice transcription happens entirely on your device/),
-      ).toBeTruthy();
+    it('shows search bar', () => {
+      const { getByPlaceholderText } = render(<VoiceSettingsScreen />);
+      expect(getByPlaceholderText('Search models or HuggingFace...')).toBeTruthy();
     });
 
     it('back button calls goBack', () => {
@@ -178,48 +189,46 @@ describe('VoiceSettingsScreen', () => {
   // No Model Downloaded - Download Options
   // ============================================================================
   describe('download options (no model)', () => {
-    it('shows download options when no model is downloaded', () => {
+    it('shows English model section', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Whisper Tiny')).toBeTruthy();
-      expect(getByText('Whisper Base')).toBeTruthy();
-      expect(getByText('Whisper Small')).toBeTruthy();
+      expect(getByText('ENGLISH ONLY')).toBeTruthy();
     });
 
-    it('shows only first 3 models (slice(0, 3))', () => {
-      const { queryByText } = render(<VoiceSettingsScreen />);
-      // 4th model (medium) should NOT be shown due to .slice(0, 3)
-      expect(queryByText('Whisper Medium')).toBeNull();
+    it('shows Multilingual model section', () => {
+      const { getByText } = render(<VoiceSettingsScreen />);
+      expect(getByText(/MULTILINGUAL/)).toBeTruthy();
     });
 
-    it('shows "Select a model to download" label', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Select a model to download:')).toBeTruthy();
+    it('shows model names in English section', () => {
+      const { getAllByText } = render(<VoiceSettingsScreen />);
+      // "Tiny" appears in both English and Multilingual sections
+      expect(getAllByText('Tiny').length).toBeGreaterThanOrEqual(1);
     });
 
-    it('shows model size for each option', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('75 MB')).toBeTruthy();
-      expect(getByText('141 MB')).toBeTruthy();
-      expect(getByText('461 MB')).toBeTruthy();
+    it('shows model size for options', () => {
+      const { getAllByText } = render(<VoiceSettingsScreen />);
+      // Sizes appear in both English and Multilingual sections
+      expect(getAllByText('75 MB').length).toBeGreaterThanOrEqual(1);
+      expect(getAllByText('142 MB').length).toBeGreaterThanOrEqual(1);
+      expect(getAllByText('466 MB').length).toBeGreaterThanOrEqual(1);
     });
 
-    it('shows model description for each option', () => {
+    it('shows model description for options', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Fastest, lower accuracy')).toBeTruthy();
-      expect(getByText('Good accuracy')).toBeTruthy();
-      expect(getByText('Better accuracy')).toBeTruthy();
+      expect(getByText('Fastest, English only')).toBeTruthy();
+      expect(getByText('Better accuracy, English only')).toBeTruthy();
     });
 
     it('calls downloadModel when a model option is pressed', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      fireEvent.press(getByText('Whisper Base'));
-      expect(mockDownloadModel).toHaveBeenCalledWith('base');
+      const { getByTestId } = render(<VoiceSettingsScreen />);
+      fireEvent.press(getByTestId('model-download-base.en'));
+      expect(mockDownloadModel).toHaveBeenCalledWith('base.en');
     });
 
     it('calls downloadModel with correct id for tiny model', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      fireEvent.press(getByText('Whisper Tiny'));
-      expect(mockDownloadModel).toHaveBeenCalledWith('tiny');
+      const { getByTestId } = render(<VoiceSettingsScreen />);
+      fireEvent.press(getByTestId('model-download-tiny.en'));
+      expect(mockDownloadModel).toHaveBeenCalledWith('tiny.en');
     });
   });
 
@@ -230,28 +239,28 @@ describe('VoiceSettingsScreen', () => {
     beforeEach(() => {
       mockWhisperStoreValues = {
         ...mockWhisperStoreValues,
-        downloadedModelId: 'base',
+        downloadedModelId: 'base.en',
       };
     });
 
-    it('shows downloaded model name', () => {
+    it('shows active model section label', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Whisper Base')).toBeTruthy();
+      expect(getByText('ACTIVE MODEL')).toBeTruthy();
     });
 
-    it('shows "Downloaded" status badge', () => {
+    it('shows downloaded model name with language', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Downloaded')).toBeTruthy();
+      expect(getByText(/Base — English/)).toBeTruthy();
     });
 
-    it('shows "Remove Model" button', () => {
+    it('shows "Active" status badge', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Remove Model')).toBeTruthy();
+      expect(getByText('Active')).toBeTruthy();
     });
 
-    it('does not show download options when model is downloaded', () => {
-      const { queryByText } = render(<VoiceSettingsScreen />);
-      expect(queryByText('Select a model to download:')).toBeNull();
+    it('shows "Remove" button', () => {
+      const { getByText } = render(<VoiceSettingsScreen />);
+      expect(getByText('Remove')).toBeTruthy();
     });
 
     it('shows model id as fallback when model not found in WHISPER_MODELS', () => {
@@ -263,11 +272,11 @@ describe('VoiceSettingsScreen', () => {
       expect(getByText('unknown-model')).toBeTruthy();
     });
 
-    it('pressing Remove Model shows confirmation alert', () => {
+    it('pressing Remove shows confirmation alert', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      fireEvent.press(getByText('Remove Model'));
+      fireEvent.press(getByText('Remove'));
       expect(mockShowAlert).toHaveBeenCalledWith(
-        'Remove Whisper Model',
+        'Remove Voice Model',
         'This will disable voice input until you download a model again.',
         expect.arrayContaining([
           expect.objectContaining({ text: 'Cancel', style: 'cancel' }),
@@ -294,11 +303,6 @@ describe('VoiceSettingsScreen', () => {
       expect(getByText('Downloading... 45%')).toBeTruthy();
     });
 
-    it('does not show download options during download', () => {
-      const { queryByText } = render(<VoiceSettingsScreen />);
-      expect(queryByText('Select a model to download:')).toBeNull();
-    });
-
     it('shows 0% at start of download', () => {
       mockWhisperStoreValues = {
         ...mockWhisperStoreValues,
@@ -334,13 +338,13 @@ describe('VoiceSettingsScreen', () => {
   // Error State
   // ============================================================================
   describe('error state', () => {
-    it('shows error message when whisperError is set', () => {
+    it('shows error message with tap to dismiss when whisperError is set', () => {
       mockWhisperStoreValues = {
         ...mockWhisperStoreValues,
         error: 'Download failed: network error',
       };
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Download failed: network error')).toBeTruthy();
+      expect(getByText('Download failed: network error (tap to dismiss)')).toBeTruthy();
     });
 
     it('calls clearError when error is tapped', () => {
@@ -349,7 +353,7 @@ describe('VoiceSettingsScreen', () => {
         error: 'Download failed',
       };
       const { getByText } = render(<VoiceSettingsScreen />);
-      fireEvent.press(getByText('Download failed'));
+      fireEvent.press(getByText('Download failed (tap to dismiss)'));
       expect(mockClearError).toHaveBeenCalled();
     });
 
diff --git a/__tests__/unit/hooks/useKeyboardAwarePopover.test.ts b/__tests__/unit/hooks/useKeyboardAwarePopover.test.ts
index 0e37e3e3b..727880ba5 100644
--- a/__tests__/unit/hooks/useKeyboardAwarePopover.test.ts
+++ b/__tests__/unit/hooks/useKeyboardAwarePopover.test.ts
@@ -126,12 +126,12 @@ describe('useKeyboardAwarePopover', () => {
       expect(mockKeyboardDismiss).not.toHaveBeenCalled();
     });
 
-    it('measures trigger position with custom offsetX', () => {
+    it('measures trigger position from button coords', () => {
       const mockMeasureInWindow = jest.fn((callback) => {
         callback(10, 100, 50, 30);
       });
 
-      const { result } = renderHook(() => useKeyboardAwarePopover(20));
+      const { result } = renderHook(() => useKeyboardAwarePopover());
 
       // Set up mock ref
       (result.current.triggerRef as any).current = {
@@ -143,9 +143,9 @@ describe('useKeyboardAwarePopover', () => {
       });
 
       expect(mockMeasureInWindow).toHaveBeenCalled();
-      // anchor.y = screenH - y = 800 - 100 = 700
-      // anchor.x = offsetX = 20
-      expect(result.current.anchor).toEqual({ y: 700, x: 20 });
+      // anchor.y = screenH - btnY = 800 - 100 = 700
+      // anchor.x = screenW - (btnX + btnW) = 400 - (10 + 50) = 340
+      expect(result.current.anchor).toEqual({ y: 700, x: 340 });
     });
 
     it('handles missing measureInWindow gracefully', () => {
@@ -175,7 +175,8 @@ describe('useKeyboardAwarePopover', () => {
       });
 
       // y = screenH - (undefined ?? 0) = 800 - 0 = 800
-      expect(result.current.anchor).toEqual({ y: 800, x: 12 }); // SPACING.md = 12
+      // x = screenW - (btnX + btnW) = 400 - (10 + 50) = 340
+      expect(result.current.anchor).toEqual({ y: 800, x: 340 });
     });
   });
 
@@ -361,8 +362,8 @@ describe('useKeyboardAwarePopover', () => {
     });
   });
 
-  describe('offsetX parameter', () => {
-    it('uses default SPACING.md when offsetX not provided', () => {
+  describe('button position measurement', () => {
+    it('computes anchorX as right-edge distance from screen right', () => {
       const mockMeasureInWindow = jest.fn((callback) => {
         callback(10, 100, 50, 30);
       });
@@ -377,16 +378,16 @@ describe('useKeyboardAwarePopover', () => {
         result.current.show();
       });
 
-      // SPACING.md = 12
-      expect(result.current.anchor.x).toBe(12);
+      // screenW=400, btnX=10, btnW=50 → x = 400 - (10+50) = 340
+      expect(result.current.anchor.x).toBe(340);
     });
 
-    it('uses custom offsetX when provided', () => {
+    it('computes anchorY as distance from button top to screen bottom', () => {
       const mockMeasureInWindow = jest.fn((callback) => {
         callback(10, 100, 50, 30);
       });
 
-      const { result } = renderHook(() => useKeyboardAwarePopover(50));
+      const { result } = renderHook(() => useKeyboardAwarePopover());
 
       (result.current.triggerRef as any).current = {
         measureInWindow: mockMeasureInWindow,
@@ -396,7 +397,8 @@ describe('useKeyboardAwarePopover', () => {
         result.current.show();
       });
 
-      expect(result.current.anchor.x).toBe(50);
+      // screenH=800, btnY=100 → y = 800 - 100 = 700
+      expect(result.current.anchor.y).toBe(700);
     });
   });
 });
\ No newline at end of file
diff --git a/__tests__/unit/services/ttsService.test.ts b/__tests__/unit/services/ttsService.test.ts
new file mode 100644
index 000000000..4e46d45b9
--- /dev/null
+++ b/__tests__/unit/services/ttsService.test.ts
@@ -0,0 +1,302 @@
+/**
+ * TTS Service Unit Tests
+ *
+ * Tests for backbone/vocoder download, model lifecycle, audio generation,
+ * file persistence, and playback control.
+ * Priority: P1 - Core TTS functionality.
+ */
+
+jest.mock('llama.rn', () => ({
+  initLlama: jest.fn(),
+}));
+
+jest.mock('react-native-fs', () => ({
+  DocumentDirectoryPath: '/mock/docs',
+  exists: jest.fn(),
+  mkdir: jest.fn(),
+  unlink: jest.fn(),
+  downloadFile: jest.fn(),
+  writeFile: jest.fn(),
+  readFile: jest.fn(),
+  stat: jest.fn(),
+  readDir: jest.fn(),
+}));
+
+jest.mock('react-native-audio-api', () => ({
+  AudioContext: jest.fn().mockImplementation(() => ({
+    createBuffer: jest.fn().mockReturnValue({ copyToChannel: jest.fn() }),
+    createBufferSource: jest.fn().mockReturnValue({
+      connect: jest.fn(),
+      start: jest.fn(),
+      stop: jest.fn(),
+      playbackRate: { value: 1.0 },
+      onended: null,
+      buffer: null,
+    }),
+    destination: {},
+    close: jest.fn(),
+  })),
+}));
+
+jest.mock('../../../src/utils/logger', () => ({
+  __esModule: true,
+  default: { log: jest.fn(), error: jest.fn(), warn: jest.fn() },
+}));
+
+import RNFS from 'react-native-fs';
+import { initLlama } from 'llama.rn';
+import { ttsService } from '../../../src/services/ttsService';
+import { TTS_BACKBONE_MODEL } from '../../../src/constants/ttsModels';
+
+const mockRNFS = RNFS as jest.Mocked<typeof RNFS>;
+const mockInitLlama = initLlama as jest.Mock;
+
+const makeMockContext = (vocoderEnabled = true) => ({
+  initVocoder: jest.fn().mockResolvedValue(undefined),
+  isVocoderEnabled: jest.fn().mockResolvedValue(vocoderEnabled),
+  releaseVocoder: jest.fn().mockResolvedValue(undefined),
+  release: jest.fn().mockResolvedValue(undefined),
+  getFormattedAudioCompletion: jest.fn().mockResolvedValue({ prompt: 'p', grammar: 'g' }),
+  getAudioCompletionGuideTokens: jest.fn().mockResolvedValue([1, 2, 3]),
+  completion: jest.fn().mockResolvedValue({ audio_tokens: [10, 20, 30] }),
+  decodeAudioTokens: jest.fn().mockResolvedValue(new Array(2400).fill(0.1)),
+});
+
+describe('ttsService', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+    // Reset internal state between tests
+    (ttsService as any).context = null;
+    (ttsService as any).isVocoderReady = false;
+    (ttsService as any).isSpeakingFlag = false;
+    (ttsService as any).contextLoadPromise = Promise.resolve();
+  });
+
+  // ─── Paths ────────────────────────────────────────────────────────────────
+
+  describe('paths', () => {
+    it('backbone path uses tts-models directory', () => {
+      expect(ttsService.getBackbonePath()).toBe(
+        `/mock/docs/tts-models/${TTS_BACKBONE_MODEL.backboneFile}`,
+      );
+    });
+
+    it('vocoder path uses tts-models directory', () => {
+      expect(ttsService.getVocoderPath()).toBe(
+        `/mock/docs/tts-models/${TTS_BACKBONE_MODEL.vocoderFile}`,
+      );
+    });
+
+    it('audio file path scoped to conversationId and messageId', () => {
+      expect(ttsService.getAudioFilePath('conv1', 'msg1')).toBe(
+        '/mock/docs/audio-cache/conv1/msg1.pcm',
+      );
+    });
+  });
+
+  // ─── Download ────────────────────────────────────────────────────────────
+
+  describe('downloadBackbone', () => {
+    it('returns existing path without downloading if already present', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(true) // ensureDir
+                     .mockResolvedValueOnce(true); // file exists
+      const path = await ttsService.downloadBackbone();
+      expect(mockRNFS.downloadFile).not.toHaveBeenCalled();
+      expect(path).toBe(ttsService.getBackbonePath());
+    });
+
+    it('downloads and returns path on success', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(false) // dir missing
+                     .mockResolvedValueOnce(false); // file missing
+      mockRNFS.mkdir.mockResolvedValueOnce(undefined);
+      mockRNFS.downloadFile.mockReturnValue({ jobId: 1, promise: Promise.resolve({ statusCode: 200, jobId: 1, bytesWritten: 0 }) });
+
+      const onProgress = jest.fn();
+      const path = await ttsService.downloadBackbone(onProgress);
+
+      expect(mockRNFS.downloadFile).toHaveBeenCalledWith(
+        expect.objectContaining({ fromUrl: TTS_BACKBONE_MODEL.backboneUrl }),
+      );
+      expect(path).toBe(ttsService.getBackbonePath());
+    });
+
+    it('throws and removes partial file on non-200 response', async () => {
+      mockRNFS.exists.mockResolvedValue(false);
+      mockRNFS.mkdir.mockResolvedValueOnce(undefined);
+      mockRNFS.downloadFile.mockReturnValue({ jobId: 1, promise: Promise.resolve({ statusCode: 404, jobId: 1, bytesWritten: 0 }) });
+      mockRNFS.unlink.mockResolvedValue(undefined);
+
+      await expect(ttsService.downloadBackbone()).rejects.toThrow('HTTP 404');
+      expect(mockRNFS.unlink).toHaveBeenCalled();
+    });
+  });
+
+  describe('downloadVocoder', () => {
+    it('downloads vocoder to correct path', async () => {
+      mockRNFS.exists.mockResolvedValue(false);
+      mockRNFS.mkdir.mockResolvedValueOnce(undefined);
+      mockRNFS.downloadFile.mockReturnValue({ jobId: 1, promise: Promise.resolve({ statusCode: 200, jobId: 1, bytesWritten: 0 }) });
+
+      const path = await ttsService.downloadVocoder();
+      expect(mockRNFS.downloadFile).toHaveBeenCalledWith(
+        expect.objectContaining({ fromUrl: TTS_BACKBONE_MODEL.vocoderUrl }),
+      );
+      expect(path).toBe(ttsService.getVocoderPath());
+    });
+  });
+
+  // ─── Model Lifecycle ─────────────────────────────────────────────────────
+
+  describe('loadModels', () => {
+    it('calls initLlama with backbone path then initVocoder', async () => {
+      const ctx = makeMockContext();
+      mockInitLlama.mockResolvedValue(ctx);
+
+      await ttsService.loadModels();
+
+      expect(mockInitLlama).toHaveBeenCalledWith(
+        expect.objectContaining({ model: ttsService.getBackbonePath() }),
+      );
+      expect(ctx.initVocoder).toHaveBeenCalledWith(
+        expect.objectContaining({ path: ttsService.getVocoderPath() }),
+      );
+    });
+
+    it('throws if isVocoderEnabled returns false', async () => {
+      const ctx = makeMockContext(false);
+      mockInitLlama.mockResolvedValue(ctx);
+
+      await expect(ttsService.loadModels()).rejects.toThrow('Vocoder failed to initialize');
+    });
+
+    it('is idempotent — does not double-init if already loaded', async () => {
+      const ctx = makeMockContext();
+      mockInitLlama.mockResolvedValue(ctx);
+
+      await ttsService.loadModels();
+      await ttsService.loadModels();
+
+      expect(mockInitLlama).toHaveBeenCalledTimes(1);
+    });
+  });
+
+  describe('unloadModels', () => {
+    it('calls releaseVocoder and release', async () => {
+      const ctx = makeMockContext();
+      mockInitLlama.mockResolvedValue(ctx);
+      await ttsService.loadModels();
+
+      await ttsService.unloadModels();
+
+      expect(ctx.releaseVocoder).toHaveBeenCalled();
+      expect(ctx.release).toHaveBeenCalled();
+      expect(ttsService.isLoaded()).toBe(false);
+    });
+  });
+
+  // ─── Generation ──────────────────────────────────────────────────────────
+
+  describe('generate', () => {
+    it('calls completion pipeline in correct order and returns GeneratedAudio', async () => {
+      const ctx = makeMockContext();
+      mockInitLlama.mockResolvedValue(ctx);
+      await ttsService.loadModels();
+
+      const audio = await ttsService.generate('hello world');
+
+      expect(ctx.getFormattedAudioCompletion).toHaveBeenCalled();
+      expect(ctx.getAudioCompletionGuideTokens).toHaveBeenCalledWith('hello world');
+      expect(ctx.completion).toHaveBeenCalled();
+      expect(ctx.decodeAudioTokens).toHaveBeenCalled();
+
+      expect(audio.samples).toBeInstanceOf(Float32Array);
+      expect(audio.waveformData).toHaveLength(200);
+      expect(audio.durationSeconds).toBeGreaterThan(0);
+      expect(audio.sampleRate).toBe(TTS_BACKBONE_MODEL.sampleRate);
+    });
+
+    it('throws if models not loaded', async () => {
+      await expect(ttsService.generate('test')).rejects.toThrow('TTS models not loaded');
+    });
+  });
+
+  describe('saveToFile', () => {
+    it('writes base64-encoded PCM to correct path', async () => {
+      mockRNFS.exists.mockResolvedValue(false);
+      mockRNFS.mkdir.mockResolvedValueOnce(undefined);
+      mockRNFS.writeFile.mockResolvedValueOnce(undefined);
+
+      const audio = {
+        samples: new Float32Array([0.1, 0.2, 0.3]),
+        durationSeconds: 0.01,
+        sampleRate: 24000,
+        waveformData: new Array(200).fill(0.1),
+      };
+
+      const path = await ttsService.saveToFile(audio, 'conv1', 'msg1');
+
+      expect(path).toBe('/mock/docs/audio-cache/conv1/msg1.pcm');
+      expect(mockRNFS.writeFile).toHaveBeenCalledWith(
+        '/mock/docs/audio-cache/conv1/msg1.pcm',
+        expect.any(String),
+        'base64',
+      );
+    });
+  });
+
+  // ─── Stop ────────────────────────────────────────────────────────────────
+
+  describe('stop', () => {
+    it('sets isSpeakingFlag to false', () => {
+      (ttsService as any).isSpeakingFlag = true;
+      ttsService.stop();
+      expect(ttsService.isSpeaking()).toBe(false);
+    });
+
+    it('calls stop on currentSource', () => {
+      const mockSource = { stop: jest.fn() };
+      (ttsService as any).currentSource = mockSource;
+      ttsService.stop();
+      expect(mockSource.stop).toHaveBeenCalled();
+    });
+  });
+
+  // ─── Cache ────────────────────────────────────────────────────────────────
+
+  describe('getAudioCacheSizeMB', () => {
+    it('returns 0 if cache directory does not exist', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(false);
+      const size = await ttsService.getAudioCacheSizeMB();
+      expect(size).toBe(0);
+    });
+
+    it('returns size in MB by summing individual file sizes', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(true);
+      // readDir(cacheRoot) → one conversation directory
+      (mockRNFS as any).readDir
+        .mockResolvedValueOnce([{ isDirectory: () => true, path: '/mock/docs/audio-cache/conv1' }])
+        // readDir(conv1) → two .pcm files, each 2.5 MB
+        .mockResolvedValueOnce([
+          { isDirectory: () => false, size: 2.5 * 1024 * 1024 },
+          { isDirectory: () => false, size: 2.5 * 1024 * 1024 },
+        ]);
+      const size = await ttsService.getAudioCacheSizeMB();
+      expect(size).toBeCloseTo(5);
+    });
+  });
+
+  describe('clearAudioCache', () => {
+    it('unlinks the cache root if it exists', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(true);
+      mockRNFS.unlink.mockResolvedValueOnce(undefined);
+      await ttsService.clearAudioCache();
+      expect(mockRNFS.unlink).toHaveBeenCalledWith('/mock/docs/audio-cache');
+    });
+
+    it('does nothing if cache does not exist', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(false);
+      await ttsService.clearAudioCache();
+      expect(mockRNFS.unlink).not.toHaveBeenCalled();
+    });
+  });
+});
diff --git a/__tests__/unit/stores/ttsStore.test.ts b/__tests__/unit/stores/ttsStore.test.ts
new file mode 100644
index 000000000..e3ae0164c
--- /dev/null
+++ b/__tests__/unit/stores/ttsStore.test.ts
@@ -0,0 +1,225 @@
+/**
+ * TTS Store Unit Tests
+ *
+ * Tests for the engine-agnostic TTS store.
+ * The store delegates to the active TTSEngine via the registry.
+ */
+
+// Mock the engine module — we control the registry and engine instances
+const mockEngine = {
+  id: 'mock-tts',
+  displayName: 'Mock TTS',
+  capabilities: {
+    streaming: false,
+    voiceCloning: false,
+    pauseResume: true,
+    generateAndSave: true,
+    peakRamMB: 100,
+  },
+  getPhase: jest.fn(() => 'ready' as const),
+  on: jest.fn(() => jest.fn()), // returns unsub
+  off: jest.fn(),
+  once: jest.fn(() => jest.fn()),
+  isSupported: jest.fn(() => true),
+  initialize: jest.fn().mockResolvedValue(undefined),
+  release: jest.fn().mockResolvedValue(undefined),
+  destroy: jest.fn().mockResolvedValue(undefined),
+  getRequiredAssets: jest.fn(() => []),
+  checkAssetStatus: jest.fn().mockResolvedValue([]),
+  downloadAssets: jest.fn().mockResolvedValue(undefined),
+  deleteAssets: jest.fn().mockResolvedValue(undefined),
+  getOverallDownloadProgress: jest.fn(() => 1),
+  isFullyDownloaded: jest.fn(() => true),
+  getBridgeComponent: jest.fn(() => null),
+  getVoices: jest.fn(() => [{ id: 'default', label: 'Default', metadata: {} }]),
+  getActiveVoice: jest.fn(() => ({ id: 'default', label: 'Default', metadata: {} })),
+  setVoice: jest.fn().mockResolvedValue(undefined),
+  speak: jest.fn().mockResolvedValue(undefined),
+  generateAndSave: jest.fn().mockResolvedValue({
+    filePath: '/cache/c1/m1.pcm',
+    durationSeconds: 2.5,
+    waveformData: new Array(200).fill(0.1),
+  }),
+  playFromFile: jest.fn().mockResolvedValue(undefined),
+  stop: jest.fn(),
+  pause: jest.fn(),
+  resume: jest.fn(),
+};
+
+jest.mock('../../../src/engine', () => ({
+  ttsRegistry: {
+    register: jest.fn(),
+    has: jest.fn(() => true),
+    getEngine: jest.fn(() => mockEngine),
+    setActiveEngine: jest.fn().mockResolvedValue(mockEngine),
+    getActiveEngine: jest.fn(() => mockEngine),
+    getActiveEngineId: jest.fn(() => 'mock-tts'),
+    getRegisteredIds: jest.fn(() => ['mock-tts']),
+  },
+  OuteTTSEngine: class {},
+}));
+
+jest.mock('../../../src/utils/logger', () => ({
+  __esModule: true,
+  default: { log: jest.fn(), error: jest.fn(), warn: jest.fn() },
+}));
+
+import { useTTSStore } from '../../../src/stores/ttsStore';
+
+const getState = () => useTTSStore.getState();
+
+const resetState = () => {
+  useTTSStore.setState({
+    phase: 'ready',
+    currentMessageId: null,
+    currentAmplitude: 0,
+    playbackElapsed: 0,
+    playSessionId: 0,
+    error: null,
+    isReady: true,
+    isDownloading: false,
+    isLoading: false,
+    isSpeaking: false,
+    isPaused: false,
+    isGeneratingAudio: false,
+    assets: [],
+    overallDownloadProgress: 1,
+    voices: [{ id: 'default', label: 'Default', metadata: {} }],
+    activeVoiceId: 'default',
+    audioCacheSizeMB: 0,
+    settings: {
+      interfaceMode: 'chat',
+      enabled: true,
+      autoPlay: false,
+      speed: 1.0,
+      engineId: 'mock-tts',
+      voiceByEngine: {},
+    },
+  });
+};
+
+describe('ttsStore', () => {
+  beforeEach(() => {
+    resetState();
+    jest.clearAllMocks();
+  });
+
+  // ── Speak ──────────────────────────────────────────────────────────────
+
+  describe('speak', () => {
+    it('delegates to engine.speak with correct options', async () => {
+      await getState().speak('hello', 'msg1');
+
+      expect(mockEngine.speak).toHaveBeenCalledWith('hello', expect.objectContaining({
+        speed: 1.0,
+        messageId: 'msg1',
+      }));
+    });
+
+    it('toggles off when same message is already speaking', async () => {
+      useTTSStore.setState({ isSpeaking: true, currentMessageId: 'msg1' });
+
+      await getState().speak('hello', 'msg1');
+
+      expect(mockEngine.stop).toHaveBeenCalled();
+      expect(mockEngine.speak).not.toHaveBeenCalled();
+    });
+
+    it('does nothing when TTS is disabled', async () => {
+      useTTSStore.setState({ settings: { ...getState().settings, enabled: false } });
+
+      await getState().speak('hello', 'msg1');
+
+      expect(mockEngine.speak).not.toHaveBeenCalled();
+    });
+
+    it('clears currentMessageId after completion', async () => {
+      await getState().speak('hello', 'msg1');
+
+      expect(getState().currentMessageId).toBeNull();
+    });
+  });
+
+  // ── Stop / Pause / Resume ─────────────────────────────────────────────
+
+  describe('stop', () => {
+    it('delegates to engine.stop and clears state', () => {
+      useTTSStore.setState({ currentMessageId: 'msg1' });
+      getState().stop();
+
+      expect(mockEngine.stop).toHaveBeenCalled();
+      expect(getState().currentMessageId).toBeNull();
+    });
+  });
+
+  describe('pause/resume', () => {
+    it('delegates to engine', () => {
+      getState().pause();
+      expect(mockEngine.pause).toHaveBeenCalled();
+
+      getState().resume();
+      expect(mockEngine.resume).toHaveBeenCalled();
+    });
+  });
+
+  // ── Generate and Save ─────────────────────────────────────────────────
+
+  describe('generateAndSave', () => {
+    it('delegates to engine and returns result', async () => {
+      const result = await getState().generateAndSave('hello', 'conv1', 'msg1');
+
+      expect(mockEngine.generateAndSave).toHaveBeenCalledWith('hello', 'conv1', 'msg1', expect.any(Object));
+      expect(result.path).toBe('/cache/c1/m1.pcm');
+      expect(result.waveformData).toHaveLength(200);
+      expect(result.durationSeconds).toBe(2.5);
+    });
+  });
+
+  // ── Play Message ──────────────────────────────────────────────────────
+
+  describe('playMessage', () => {
+    it('delegates to engine.playFromFile', async () => {
+      await getState().playMessage('msg1', '/cache/conv1/msg1.pcm');
+
+      expect(mockEngine.playFromFile).toHaveBeenCalledWith('/cache/conv1/msg1.pcm', expect.objectContaining({
+        speed: 1.0,
+        startOffset: 0,
+        messageId: 'msg1',
+      }));
+    });
+
+    it('stops if same message is already playing', async () => {
+      useTTSStore.setState({ isSpeaking: true, currentMessageId: 'msg1' });
+
+      await getState().playMessage('msg1', '/cache/conv1/msg1.pcm');
+
+      expect(mockEngine.stop).toHaveBeenCalled();
+      expect(mockEngine.playFromFile).not.toHaveBeenCalled();
+    });
+  });
+
+  // ── Settings ──────────────────────────────────────────────────────────
+
+  describe('updateSettings', () => {
+    it('merges partial settings', () => {
+      getState().updateSettings({ speed: 1.5, autoPlay: true });
+      const { settings } = getState();
+      expect(settings.speed).toBe(1.5);
+      expect(settings.autoPlay).toBe(true);
+      expect(settings.enabled).toBe(true);
+    });
+
+    it('can switch interfaceMode', () => {
+      getState().updateSettings({ interfaceMode: 'audio' });
+      expect(getState().settings.interfaceMode).toBe('audio');
+    });
+  });
+
+  describe('clearError', () => {
+    it('clears the error field', () => {
+      useTTSStore.setState({ error: 'something went wrong' });
+      getState().clearError();
+      expect(getState().error).toBeNull();
+    });
+  });
+});
diff --git a/__tests__/unit/utils/messageContent.test.ts b/__tests__/unit/utils/messageContent.test.ts
index b35b0181e..5f79afefb 100644
--- a/__tests__/unit/utils/messageContent.test.ts
+++ b/__tests__/unit/utils/messageContent.test.ts
@@ -118,8 +118,8 @@ describe('stripControlTokens', () => {
       expect(stripControlTokens('<|im_start|>assistant\n<|im_end|>')).toBe('');
     });
 
-    it('preserves whitespace in content', () => {
-      expect(stripControlTokens('  Hello  World  ')).toBe('  Hello  World  ');
+    it('trims leading/trailing whitespace in content', () => {
+      expect(stripControlTokens('  Hello  World  ')).toBe('Hello  World');
     });
 
     it('preserves HTML-like tags that are not control tokens', () => {
diff --git a/android/build.gradle b/android/build.gradle
index dad99b022..984e5bed6 100644
--- a/android/build.gradle
+++ b/android/build.gradle
@@ -19,3 +19,4 @@ buildscript {
 }
 
 apply plugin: "com.facebook.react.rootproject"
+
diff --git a/docs/PERSONAS_IMPLEMENTATION_PLAN.md b/docs/PERSONAS_IMPLEMENTATION_PLAN.md
index 93ccdd5db..dd1225fcb 100644
--- a/docs/PERSONAS_IMPLEMENTATION_PLAN.md
+++ b/docs/PERSONAS_IMPLEMENTATION_PLAN.md
@@ -31,7 +31,8 @@ export type Capability =
   | 'voice'         // STT + TTS
   | 'vision'        // image understanding
   | 'image-gen'     // image generation
-  | 'rag';          // knowledge base search
+  | 'rag'           // knowledge base search (user-uploaded documents)
+  | 'memory-rag';   // cross-conversation RAG — past messages indexed and retrieved
 
 export type SkillTriggerEvent =
   | 'message_received'    // new message in connected app
@@ -109,8 +110,9 @@ export interface Persona {
   capabilities: Capability[];
 
   // What this persona knows
-  knowledgeBaseIds: string[];        // attached RAG knowledge bases (use projectId as KB id)
-  memoryFacts: PersonaMemoryFact[];  // persistent learned facts
+  knowledgeBaseIds: string[];        // attached RAG knowledge bases (user-uploaded documents)
+  conversationMemoryEnabled: boolean; // true = all past conversations for this persona are embedded + searchable
+  memoryFacts: PersonaMemoryFact[];  // persistent learned facts (LLM-extracted, concise)
 
   // What this persona does automatically
   skills: Skill[];
@@ -227,8 +229,9 @@ export const DEFAULT_PERSONAS: Omit<Persona, 'createdAt' | 'updatedAt'>[] = [
     systemPrompt: 'You are Jarvis, a capable and concise personal assistant. You help with anything — questions, tasks, planning, thinking. You are direct, warm, and never verbose unless asked.',
     icon: 'cpu',
     accentColor: '#6366F1',
-    capabilities: ['text', 'voice', 'vision'],
+    capabilities: ['text', 'voice', 'vision', 'memory-rag'],
     knowledgeBaseIds: [],
+    conversationMemoryEnabled: true,  // Jarvis indexes all past conversations — gives it cross-chat intelligence
     memoryFacts: [],
     skills: [],
     integrationIds: [],
@@ -418,6 +421,113 @@ export function buildMemoryContext(facts: PersonaMemoryFact[]): string {
 }
 ```
 
+### conversationRagService.ts (new — cross-conversation memory)
+
+This is what makes Jarvis actually intelligent across sessions. Rather than relying only on extracted `memoryFacts` (brief summaries) or the current context window, Jarvis embeds every conversation message into a per-persona vector store. When a new message arrives, relevant past exchanges are retrieved and injected as context — so Jarvis remembers "we discussed your onboarding last Tuesday" without you having to repeat it.
+
+**How it's different from document KB:**
+
+| | Document KB (`knowledgeBaseIds`) | Conversation RAG (`conversationMemoryEnabled`) |
+|---|---|---|
+| Source | User-uploaded PDFs, notes | Past conversation messages |
+| Indexed when | User uploads a file | After each assistant response |
+| Retrieved by | User explicitly asking about docs | Automatically on every message |
+| Scoped to | Attached knowledge bases | All conversations for this persona |
+
+```typescript
+// src/services/conversationRagService.ts
+
+/**
+ * Indexes completed conversation messages into the persona's vector store.
+ * Called after each assistant turn completes (streaming done).
+ *
+ * Each chunk stored = ~4–6 messages grouped by semantic coherence, not
+ * arbitrary token windows. This preserves conversational context.
+ */
+export async function indexConversationTurn(
+  personaId: string,
+  conversationId: string,
+  messages: Message[],   // recent messages to embed (typically last 4–6)
+): Promise<void> {
+  const chunks = chunkMessagesForEmbedding(messages);
+  for (const chunk of chunks) {
+    const embedding = await embeddingService.embed(chunk.text);
+    await vectorStore.upsert({
+      id: `${conversationId}:${chunk.startIndex}`,
+      embedding,
+      metadata: {
+        personaId,
+        conversationId,
+        timestamp: chunk.timestamp,
+        preview: chunk.text.slice(0, 120),
+      },
+    });
+  }
+}
+
+/**
+ * Retrieves the most relevant past conversation context for the current message.
+ * Returns plain text ready to inject into the system prompt.
+ */
+export async function retrieveRelevantHistory(
+  personaId: string,
+  currentMessage: string,
+  topK = 3,
+): Promise<string> {
+  const queryEmbedding = await embeddingService.embed(currentMessage);
+  const results = await vectorStore.search({
+    embedding: queryEmbedding,
+    filter: { personaId },
+    topK,
+    minScore: 0.72,   // only inject if meaningfully relevant
+  });
+
+  if (results.length === 0) return '';
+
+  const snippets = results.map(r =>
+    `[${formatRelativeDate(r.metadata.timestamp)}]\n${r.metadata.preview}`
+  );
+  return `\n\nRelevant context from past conversations:\n${snippets.join('\n\n---\n\n')}`;
+}
+
+/**
+ * Groups messages into semantically coherent chunks for embedding.
+ * Avoids splitting a user question from its assistant answer.
+ */
+function chunkMessagesForEmbedding(messages: Message[]): EmbeddingChunk[] {
+  // Pair each user message with its following assistant response
+  // Output: chunks of ~300–400 tokens each
+}
+```
+
+**System prompt injection** (in `llm.ts` or wherever the prompt is assembled):
+
+```typescript
+// When conversationMemoryEnabled is true for the active persona:
+if (persona.conversationMemoryEnabled) {
+  const history = await conversationRagService.retrieveRelevantHistory(
+    persona.id,
+    latestUserMessage,
+  );
+  systemPrompt += history;
+}
+```
+
+**Indexing trigger** (after streaming completes, in chatStore or the streaming callback):
+
+```typescript
+// After assistant response is done streaming:
+if (persona.conversationMemoryEnabled) {
+  conversationRagService.indexConversationTurn(
+    persona.id,
+    conversationId,
+    recentMessages.slice(-6),
+  ).catch(() => {});  // fire-and-forget, non-blocking
+}
+```
+
+**Storage:** Uses the existing `ragService` vector store, namespaced by `personaId`. No new storage layer needed — just a new indexing source.
+
 ---
 
 ## Screens
@@ -926,6 +1036,11 @@ export interface Message {
 18. Memory injection into system prompt
 19. `PersonaMemoryScreen`
 20. Memory bar in chat (new fact notification)
+21. `conversationRagService.ts` — cross-conversation RAG for `memory-rag` capability
+    - Index each conversation turn after streaming completes (fire-and-forget)
+    - Retrieve relevant history and inject into system prompt before each LLM call
+    - Jarvis has `conversationMemoryEnabled: true` by default; other personas opt in via PersonaEditScreen
+    - Reuses existing `ragService` vector store, namespaced by `personaId`
 
 ### Phase 5 — Integrations in Chat (tool calls)
 21. Wire integration tool registry entries
diff --git a/docs/TTS_ENGINE_INTERFACE.md b/docs/TTS_ENGINE_INTERFACE.md
new file mode 100644
index 000000000..a8ddd5f1f
--- /dev/null
+++ b/docs/TTS_ENGINE_INTERFACE.md
@@ -0,0 +1,154 @@
+# TTS Engine Interface
+
+## Overview
+
+The TTS subsystem uses a pluggable engine interface that decouples the app from any specific TTS implementation. Engines are registered at startup, the user picks one in settings, and the store delegates all operations through the active engine.
+
+The interface is designed as the first concrete implementation of a broader **On-Device Engine** pattern that will generalize to STT, Vision, and LLM modalities.
+
+## Architecture
+
+```
+src/engine/
+  types.ts                 # OnDeviceEngine base + TTSEngine interface
+  OnDeviceEngineEmitter.ts # Zero-dep typed event emitter
+  EngineRegistry.ts        # Generic registry (TTS, STT, Vision, LLM)
+  index.ts                 # Barrel + singleton ttsRegistry
+
+  tts/engines/
+    kokoro/                # Kokoro TTS via react-native-executorch
+      KokoroEngine.ts      # TTSEngine implementation
+      KokoroTTSBridge.tsx  # React component bridge (wraps useTextToSpeech hook)
+      voices.ts            # 8 voice definitions
+    outetts/               # OuteTTS 0.3 via llama.rn
+      OuteTTSEngine.ts     # TTSEngine implementation
+      models.ts            # GGUF asset definitions
+    qwen3/                 # Qwen3-TTS 0.6B (stub)
+      Qwen3TTSEngine.ts    # Asset management ready, inference TODO
+      models.ts            # Talker + predictor + codec asset definitions
+```
+
+## How It Works
+
+### Engine Lifecycle
+
+```
+register → getEngine → setActiveEngine → initialize → speak/stop/pause → release
+```
+
+1. **Registration** — engines register factories at import time in `engine/index.ts`
+2. **Activation** — `ttsRegistry.setActiveEngine('kokoro')` creates the instance and releases the previous engine
+3. **Initialization** — imperative engines (OuteTTS) load models via `initialize()`. Hook-based engines (Kokoro) initialize when the bridge component mounts.
+4. **Usage** — `engine.speak(text, options)` is the universal entry point
+5. **Teardown** — `engine.release()` frees models; `engine.destroy()` also deletes downloaded files
+
+### Event System
+
+Every engine emits typed events. The store subscribes once and syncs state:
+
+- `phaseChange` — idle/downloading/loading/ready/processing/paused/error
+- `audioChunk` — streaming PCM data (Kokoro)
+- `audioComplete` — full audio buffer (OuteTTS)
+- `downloadProgress` — per-asset download progress
+- `amplitudeChange` — RMS amplitude for waveform visualization
+- `voiceChanged` — active voice updated
+- `error` — recoverable/non-recoverable errors
+
+### Store Delegation
+
+The Zustand store (`ttsStore.ts`) is a thin proxy:
+
+```typescript
+speak: async (text, messageId) => {
+  const engine = ttsRegistry.getActiveEngine();
+  if (!engine || !get().settings.enabled) return;
+  await engine.speak(text, { speed: get().settings.speed, messageId });
+}
+```
+
+No engine-specific branching. The store exposes derived booleans (`isReady`, `isSpeaking`, `isPaused`) computed from the engine's phase for backward compatibility with UI components.
+
+### React Bridge Pattern
+
+Some engines (Kokoro) depend on React hooks. These engines return a React component from `getBridgeComponent()`. The `<EngineBridge />` component (mounted in `App.tsx`) renders it:
+
+```
+App.tsx → <EngineBridge /> → engine.getBridgeComponent() → <KokoroTTSBridge />
+```
+
+The bridge mounts the hook, then pushes an imperative handle into the engine instance. Fully imperative engines (OuteTTS, Qwen3) return `null` — no bridge needed.
+
+## Registered Engines
+
+| Engine | ID | Size | Streaming | Voice Cloning | Status |
+|--------|-----|------|-----------|---------------|--------|
+| Kokoro TTS | `kokoro` | 82 MB | Yes | No | Production |
+| OuteTTS 0.3 | `outetts` | 530 MB | No | Yes | Production |
+| Qwen3-TTS 0.6B | `qwen3-tts` | ~650 MB | No | Yes | Stub (not registered) |
+
+## Adding a New Engine
+
+1. Create `src/engine/tts/engines/<name>/` with:
+   - `models.ts` — `ModelAsset[]` definitions (URLs, sizes, filenames)
+   - `<Name>Engine.ts` — class extending `OnDeviceEngineEmitter<TTSEngineEvents>` implementing `TTSEngine`
+   - `index.ts` — barrel exports
+
+2. Implement the interface:
+   - `getRequiredAssets()` — what to download
+   - `initialize()` — load models into memory
+   - `speak()` — text in, audio out
+   - `getVoices()` / `setVoice()` — voice management
+   - `stop()` / `pause()` / `resume()` — playback control
+   - `getBridgeComponent()` — return `null` for imperative engines
+
+3. Register in `src/engine/index.ts`:
+   ```typescript
+   import { MyEngine } from './tts/engines/myengine';
+   ttsRegistry.register('myengine', () => new MyEngine());
+   ```
+
+4. It appears in the engine picker on the TTS Settings screen automatically.
+
+## Multimodal Future
+
+The `OnDeviceEngine` base interface generalizes beyond TTS:
+
+```
+OnDeviceEngine<TEvents>        # lifecycle, assets, events, capabilities
+  ├── TTSEngine                # text → audio (Kokoro, OuteTTS, Qwen3)
+  ├── STTEngine (future)       # audio → text (whisper.rn)
+  ├── VisionEngine (future)    # image → structured (CoreML)
+  └── LLMEngine (future)       # text → text (llama.rn)
+```
+
+Each modality shares: lifecycle management, model asset download/delete, typed event system, capability declaration, platform checks, and the React bridge pattern.
+
+The `EngineRegistry<T>` is generic — `new EngineRegistry<STTEngine>()` works identically.
+
+The orchestration layer above would wire engines together:
+- **Listen** (STT) → **Think** (LLM) → **Speak** (TTS)
+- **See** (Vision) feeds context to **Think**
+
+## Qwen3-TTS Integration Path
+
+The stub is ready at `src/engine/tts/engines/qwen3/`. Asset management, download, and lifecycle are implemented. The remaining work is the inference pipeline in `speak()`:
+
+1. Load talker GGUF + predictor GGUF via `llama.rn` (two contexts)
+2. Load codec decoder ONNX via `onnxruntime-react-native`
+3. Talker generates first-codebook tokens at 12Hz
+4. Predictor fills codebooks 2-16
+5. Codec decodes token grid to PCM Float32 at 24kHz
+
+Reference: [LunaVox](https://github.com/wkwong/lunavox) has a working desktop implementation of this pipeline.
+
+## Settings Migration
+
+The store handles migration from the pre-engine-interface format automatically via `onRehydrateStorage`. Old fields (`voiceId`, `kokoroVoiceId`) are migrated to `voiceByEngine` map on first load.
+
+## Key Files
+
+- `src/engine/types.ts` — all interfaces
+- `src/engine/index.ts` — registry + engine registration
+- `src/stores/ttsStore.ts` — store (delegates to active engine)
+- `src/components/EngineBridge.tsx` — renders bridge for hook-based engines
+- `src/screens/TTSSettingsScreen/index.tsx` — engine picker UI
diff --git a/docs/TTS_IMPLEMENTATION_PLAN.md b/docs/TTS_IMPLEMENTATION_PLAN.md
index 19b6942c5..41f548f40 100644
--- a/docs/TTS_IMPLEMENTATION_PLAN.md
+++ b/docs/TTS_IMPLEMENTATION_PLAN.md
@@ -2,1075 +2,275 @@
 
 ## Product Vision
 
-Two first-class interface modes, switchable from Settings:
+Two first-class interface modes, switchable from Chat Settings or TTS Settings:
 
 | Mode | Primary output | TTS role | Text |
 |---|---|---|---|
 | **Chat Mode** | Text bubbles | Add-on — play button per message | Default visible |
-| **Audio Mode** | Waveform bubbles | Core — auto-generated at completion | Hidden by default, expandable |
+| **Audio Mode** | Waveform bubbles (both sides) | Core — auto-generated at completion | Hidden by default, expandable |
 
-**Audio Mode is the target product experience.** Messages feel like voice note exchanges — not a chat app that also speaks. The user has full per-message audio controls: scrub to position, adjust playback speed, change voice/tone. Text is always available as a "Show transcript" expand.
+**Audio Mode is the target product experience.** Both the user's voice recordings AND the AI's responses appear as waveform audio bubbles — a full voice-note conversation. No text is shown by default; transcript is always accessible via "Show transcript" expand.
 
-Chat Mode is the fallback for devices that can't run TTS models, or users who prefer it.
+- User voice recordings: right-aligned audio bubbles (recorded WAV, played back locally)
+- AI responses: left-aligned audio bubbles (OuteTTS-generated, with 40-bar waveform visualization)
+
+Chat Mode is the fallback for devices that can't run TTS models, or users who prefer text.
 
 ---
 
 ## Decision Log
 
-### Engine
-**OuteTTS 0.3 (500M) + WavTokenizer** via `llama.rn`.
+### Engine (updated)
+
+**Two-tier TTS architecture:**
+
+| Tier | Engine | Use case | Speed | Size |
+|---|---|---|---|---|
+| **Tier 1 — Speak (Chat Mode)** | Kokoro via `react-native-executorch` | On-demand speak button, long-press Speak action | ~1s (streaming) | ~100MB |
+| **Tier 2 — Generate+Save (Audio Mode)** | OuteTTS 0.3 + WavTokenizer via `llama.rn` | Auto-generate waveform bubble after streaming | ~30–120s | ~527MB |
+
+**Why two tiers:**
+- Kokoro via ExecuTorch is fast enough for interactive use (streaming starts < 1s) but outputs raw PCM chunks — no way to write to disk for waveform scrubbing without custom buffering
+- OuteTTS via llama.rn generates the full audio up front, returns `Float32Array` + waveform data + duration in one call — ideal for the saved-file + waveform visualisation pattern Audio Mode requires
+- OuteTTS is NOT suitable for the speak button (too slow, ~30–120s per sentence)
+- Kokoro is NOT currently available as a GGUF via llama.cpp (feature request opened Jan 2025, closed stale Oct 2025, never merged)
+
+**Previous decision (superseded):**
+OuteTTS only via llama.rn for both modes. Superseded because ~1 minute to speak a single sentence is not acceptable for interactive use.
+
+### Platform constraint
 
-- OuteTTS 1.0 (Qwen3 0.6B) is blocked: the DAC vocoder has no GGUF, and llama.cpp PR#12794 is an open draft. The backbone exists on HuggingFace but the decoder is not implemented upstream.
-- OuteTTS 0.3 with WavTokenizer is the **only fully working path** through llama.rn today (confirmed via TTSScreen.tsx in mybigday/llama.rn example app).
-- Upgrade to OuteTTS 1.0 will be a model swap with no architecture change once PR#12794 and llama.rn PR#300 land.
+`react-native-executorch` requires **Android 13 (API 33)** minimum and **iOS 17** minimum.
+
+Current app `minSdkVersion` is **24 (Android 7)**.
+
+**Resolution:** Kokoro speak is available only on Android 13+ / iOS 17+. On older devices, the speak button falls back to OuteTTS (slow but functional). This is detected at runtime — no code path is dead, just slower on older OS.
+
+`minSdkVersion` stays at 24. No breaking change for existing users.
 
 ### Playback
-**react-native-audio-api** (Software Mansion). Implements the Web Audio API spec for React Native. `decodeAudioTokens()` returns `number[]` (Float32 PCM at 24kHz mono) which feeds directly into an `AudioBuffer`.
+**react-native-audio-api** (Software Mansion, already installed). Implements the Web Audio API spec for React Native. Both Kokoro (streaming `Float32Array` chunks) and OuteTTS (full `Float32Array`) pipe through the same `AudioContext → AudioBufferSourceNode` path at 24kHz mono.
 
 ### Audio Persistence (Audio Mode only)
-In Audio Mode, generated PCM is written to disk as a WAV file per message so scrubbing works without re-generating. Files live at:
+In Audio Mode, generated PCM is written to disk as a raw PCM file per message so scrubbing works without re-generating. Files live at:
 
 ```
-${RNFS.DocumentDirectoryPath}/audio-cache/{conversationId}/{messageId}.wav
+${RNFS.DocumentDirectoryPath}/audio-cache/{conversationId}/{messageId}.pcm
 ```
 
 Cache eviction strategy:
 - Keep the last 50 messages worth of audio per conversation
 - User can wipe audio cache from Settings ("Clear audio cache — X MB")
-- Estimated size: ~1–4 MB per message (24kHz mono, varies by length)
+- Estimated size: ~1–4 MB per message (24kHz mono Float32, varies by length)
 
-In Chat Mode, audio is generated on demand, played, then discarded (no disk write).
+In Chat Mode, audio is generated (via Kokoro) on demand, played, then discarded (no disk write).
 
 ### Voice Selection
-OuteTTS 0.3 supports multiple speaker profiles. Expose as a voice picker in TTSSettingsScreen. Store selected voice ID in `ttsStore` settings (persisted). Default: speaker 0 (natural female).
+- **Kokoro voices (Chat Mode speak):** 8 built-in voices (US/GB English, male/female). Stored as `kokoroVoiceId` in `ttsStore` settings. Default: `af_heart`.
+- **OuteTTS voices (Audio Mode waveform):** Single profile (`speaker 0`) — OuteTTS 0.3 multi-speaker not confirmed working via llama.rn. Will expand when OuteTTS 1.0 lands.
 
 ### Device Gate
-Require **flagship tier (8GB+ RAM)**. The memory stack:
-```
-LLM (3B Q4)       ~2.0 GB
-Whisper base       ~150 MB
-OuteTTS backbone   ~454 MB
-WavTokenizer       ~ 73 MB
-OS + app           ~2.0 GB
-─────────────────────────
-Total:             ~4.7 GB   → fits 8GB devices, tight on 6GB
-```
-Show a warning (not a hard block) for 6–8GB devices. Hard block below 6GB. If device is blocked, Audio Mode is unavailable — app defaults to Chat Mode and hides the Audio Mode option.
-
----
-
-## Model Files
-
-| Role | HuggingFace Repo | File | Size |
-|---|---|---|---|
-| TTS Backbone | `OuteAI/OuteTTS-0.3-500M-GGUF` | `OuteTTS-0.3-500M-Q4_K_M.gguf` | 454 MB |
-| Vocoder | `ggml-org/WavTokenizer` | `WavTokenizer-Large-75-Q5_1.gguf` | 73 MB |
+Show a warning (not a hard block) for 6–8GB devices. Hard block below 6GB for Audio Mode (OuteTTS only). Kokoro speak has no RAM gate.
 
-Direct download URLs (HuggingFace resolve):
+Memory stack (worst case — both models loaded simultaneously):
 ```
-https://huggingface.co/OuteAI/OuteTTS-0.3-500M-GGUF/resolve/main/OuteTTS-0.3-500M-Q4_K_M.gguf
-https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-Q5_1.gguf
+LLM (3B Q4)            ~2.0 GB
+Whisper base           ~150 MB
+OuteTTS backbone       ~454 MB
+WavTokenizer           ~ 73 MB
+Kokoro (XNNPACK .pte)  ~100 MB  ← new
+OS + app               ~2.0 GB
+──────────────────────────────
+Total:                 ~4.8 GB  → fits 8GB devices
 ```
 
-Storage directories:
-```
-${RNFS.DocumentDirectoryPath}/tts-models/     ← model weights
-${RNFS.DocumentDirectoryPath}/audio-cache/    ← per-message WAV files (Audio Mode only)
-```
+Kokoro and OuteTTS are never loaded simultaneously — Kokoro handles Chat Mode speak (OuteTTS not loaded), OuteTTS handles Audio Mode generation (Kokoro not involved).
 
 ---
 
-## New Package
-
-```bash
-npm install react-native-audio-api
-```
-
-iOS: run `pod install` after.
-Android: auto-linked.
-
----
-
-## Interface Mode Setting
-
-### Where it lives
-`ttsStore` settings object gains:
+## Model Files
 
-```typescript
-export type InterfaceMode = 'chat' | 'audio';
-
-export interface TTSSettings {
-  interfaceMode: InterfaceMode; // default: 'chat' until TTS models downloaded, then user can switch
-  enabled: boolean;
-  autoPlay: boolean;            // Chat Mode only — auto-speak after completion
-  speed: number;                // 0.5–2.0, default 1.0
-  voiceId: string;              // OuteTTS speaker profile, default '0'
-}
-```
+### Tier 1 — Kokoro (react-native-executorch)
 
-### Mode switching rules
-- If TTS models not downloaded → `interfaceMode` locked to `'chat'`
-- If device RAM < 6GB → `interfaceMode` locked to `'chat'`, Audio Mode option hidden
-- Switching mode takes effect immediately for new messages; existing messages render in whatever mode they were generated in (Chat Mode messages have no audio file, Audio Mode messages have one)
-- A banner appears at the top of the chat on first switch: "Audio mode on — responses will play as voice notes."
+Downloaded automatically by `react-native-executorch` to its internal cache (`react-native-executorch/` in document directory). No manual download management needed.
 
----
+| File | Source | Size (approx) |
+|---|---|---|
+| `duration_predictor.pte` | HuggingFace: `software-mansion/react-native-executorch-kokoro` | ~10 MB |
+| `synthesizer.pte` | same | ~80 MB |
+| Voice `.bin` files (per voice) | same repo | ~3–5 MB each |
+| Phonemizer data (tagger + lexicon) | same repo | ~5 MB |
 
-## Audio Mode: Message Bubble
+Total cold download: ~100–120 MB. Subsequent launches use cached files.
 
-### Layout (replaces text bubble for assistant messages)
+### Tier 2 — OuteTTS (llama.rn, audio mode only)
 
-```
-┌─────────────────────────────────────────────┐
-│  [avatar]  ●━━━━━━━━━━━━━━━━━━━  0:42  1x  │
-│            [waveform visualization]          │
-│            [Show transcript ▾]               │
-└─────────────────────────────────────────────┘
-```
-
-- **Waveform bar** — static amplitude visualization drawn from PCM data at generation time (no real-time animation needed, just a static shape like WhatsApp)
-- **Scrubber** — draggable progress indicator
-- **Timestamp** — elapsed / total duration
-- **Speed chip** — tappable, cycles 0.5x → 1x → 1.5x → 2x
-- **Show transcript** — expands inline to full text, collapses again
-
-User messages (voice input via Whisper) show the same bubble layout but with the transcript as primary since we have no TTS for user messages.
+| Role | HuggingFace Repo | File | Size |
+|---|---|---|---|
+| TTS Backbone | `OuteAI/OuteTTS-0.3-500M-GGUF` | `OuteTTS-0.3-500M-Q4_K_M.gguf` | 454 MB |
+| Vocoder | `ggml-org/WavTokenizer` | `WavTokenizer-Large-75-Q5_1.gguf` | 73 MB |
 
-### Per-message controls (long press → action sheet)
-- Change voice (re-generates audio with new speaker profile, overwrites cached file)
-- Regenerate audio
-- Copy text
-- Delete message
+Stored at: `${RNFS.DocumentDirectoryPath}/tts-models/`
 
 ---
 
-## Files to Create
-
-### 1. `src/constants/ttsModels.ts`
+## New Packages
 
-```typescript
-export const TTS_BACKBONE_MODEL = {
-  id: 'outetts-0.3-500m-q4',
-  name: 'OuteTTS 0.3',
-  backboneFile: 'OuteTTS-0.3-500M-Q4_K_M.gguf',
-  backboneUrl: 'https://huggingface.co/OuteAI/OuteTTS-0.3-500M-GGUF/resolve/main/OuteTTS-0.3-500M-Q4_K_M.gguf',
-  backboneSizeMB: 454,
-  vocoderFile: 'WavTokenizer-Large-75-Q5_1.gguf',
-  vocoderUrl: 'https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-Q5_1.gguf',
-  vocoderSizeMB: 73,
-  sampleRate: 24000,
-  description: 'Natural-sounding on-device speech. Requires ~530 MB storage.',
-};
-
-export const TTS_SPEAKER_PROFILES = [
-  { id: '0', label: 'Default' },
-  // Add more as OuteTTS 0.3 speaker profiles are confirmed
-];
-
-export const TTS_MIN_RAM_GB = 6;   // warn below 8, hard block below 6
-export const TTS_BLOCK_RAM_GB = 6; // hard block
-export const TTS_WARN_RAM_GB = 8;  // show warning card
-export const AUDIO_CACHE_MAX_MESSAGES = 50; // per conversation
+```bash
+npm install react-native-executorch
+npm install react-native-executorch-bare-resource-fetcher
+npm install @dr.pogodin/react-native-fs @kesha-antonov/react-native-background-downloader
 ```
 
----
-
-### 2. `src/services/ttsService.ts`
-
-Mirror `whisperService.ts` pattern exactly.
-
-```typescript
-import { initLlama, LlamaContext } from 'llama.rn';
-import RNFS from 'react-native-fs';
-import { AudioContext } from 'react-native-audio-api';
-import logger from '../utils/logger';
-import { TTS_BACKBONE_MODEL } from '../constants/ttsModels';
-
-export interface TTSOptions {
-  speed?: number;    // 0.5–2.0, default 1.0
-  voiceId?: string;  // speaker profile id, default '0'
-}
-
-export interface GeneratedAudio {
-  samples: Float32Array;
-  durationSeconds: number;
-  sampleRate: number;
-  /** Amplitude envelope (downsampled to ~200 points) for waveform visualization */
-  waveformData: number[];
-}
-
-class TTSService {
-  private context: LlamaContext | null = null;
-  private isVocoderReady: boolean = false;
-  private isSpeakingFlag: boolean = false;
-  private audioCtx: AudioContext | null = null;
-  private currentSource: AudioBufferSourceNode | null = null;
-  private contextLoadPromise: Promise<void> = Promise.resolve();
-
-  // ─── Directories & Paths ────────────────────────────────────────────────
-
-  getModelsDir(): string {
-    return `${RNFS.DocumentDirectoryPath}/tts-models`;
-  }
-
-  getAudioCacheDir(conversationId: string): string {
-    return `${RNFS.DocumentDirectoryPath}/audio-cache/${conversationId}`;
-  }
-
-  getAudioFilePath(conversationId: string, messageId: string): string {
-    return `${this.getAudioCacheDir(conversationId)}/${messageId}.wav`;
-  }
-
-  async ensureModelsDirExists(): Promise<void> {
-    const dir = this.getModelsDir();
-    if (!await RNFS.exists(dir)) await RNFS.mkdir(dir);
-  }
-
-  async ensureAudioCacheDirExists(conversationId: string): Promise<void> {
-    const dir = this.getAudioCacheDir(conversationId);
-    if (!await RNFS.exists(dir)) await RNFS.mkdir(dir);
-  }
-
-  getBackbonePath(): string {
-    return `${this.getModelsDir()}/${TTS_BACKBONE_MODEL.backboneFile}`;
-  }
-
-  getVocoderPath(): string {
-    return `${this.getModelsDir()}/${TTS_BACKBONE_MODEL.vocoderFile}`;
-  }
-
-  async isBackboneDownloaded(): Promise<boolean> {
-    return RNFS.exists(this.getBackbonePath());
-  }
-
-  async isVocoderDownloaded(): Promise<boolean> {
-    return RNFS.exists(this.getVocoderPath());
-  }
-
-  async areBothModelsDownloaded(): Promise<boolean> {
-    return (await this.isBackboneDownloaded()) && (await this.isVocoderDownloaded());
-  }
-
-  async isAudioCached(conversationId: string, messageId: string): Promise<boolean> {
-    return RNFS.exists(this.getAudioFilePath(conversationId, messageId));
-  }
-
-  async getAudioCacheSizeMB(): Promise<number> {
-    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
-    if (!await RNFS.exists(cacheRoot)) return 0;
-    const stat = await RNFS.stat(cacheRoot);
-    return stat.size / (1024 * 1024);
-  }
-
-  async clearAudioCache(): Promise<void> {
-    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
-    if (await RNFS.exists(cacheRoot)) await RNFS.unlink(cacheRoot);
-  }
-
-  // ─── Download ────────────────────────────────────────────────────────────
-
-  async downloadBackbone(onProgress?: (p: number) => void): Promise<string> {
-    await this.ensureModelsDirExists();
-    const dest = this.getBackbonePath();
-    if (await RNFS.exists(dest)) return dest;
-    const dl = RNFS.downloadFile({
-      fromUrl: TTS_BACKBONE_MODEL.backboneUrl,
-      toFile: dest,
-      progressDivider: 1,
-      progress: (res) => onProgress?.(res.bytesWritten / res.contentLength),
-    });
-    const result = await dl.promise;
-    if (result.statusCode !== 200) {
-      await RNFS.unlink(dest).catch(() => {});
-      throw new Error(`Backbone download failed: HTTP ${result.statusCode}`);
-    }
-    return dest;
-  }
-
-  async downloadVocoder(onProgress?: (p: number) => void): Promise<string> {
-    await this.ensureModelsDirExists();
-    const dest = this.getVocoderPath();
-    if (await RNFS.exists(dest)) return dest;
-    const dl = RNFS.downloadFile({
-      fromUrl: TTS_BACKBONE_MODEL.vocoderUrl,
-      toFile: dest,
-      progressDivider: 1,
-      progress: (res) => onProgress?.(res.bytesWritten / res.contentLength),
-    });
-    const result = await dl.promise;
-    if (result.statusCode !== 200) {
-      await RNFS.unlink(dest).catch(() => {});
-      throw new Error(`Vocoder download failed: HTTP ${result.statusCode}`);
-    }
-    return dest;
-  }
-
-  async deleteModels(): Promise<void> {
-    await this.unloadModels();
-    const bp = this.getBackbonePath();
-    const vp = this.getVocoderPath();
-    if (await RNFS.exists(bp)) await RNFS.unlink(bp);
-    if (await RNFS.exists(vp)) await RNFS.unlink(vp);
-  }
-
-  // ─── Model Lifecycle ─────────────────────────────────────────────────────
-
-  async loadModels(): Promise<void> {
-    if (this.context && this.isVocoderReady) return;
-
-    this.contextLoadPromise = this.contextLoadPromise.then(async () => {
-      if (this.context && this.isVocoderReady) return;
-
-      logger.log('[TTS] Loading backbone...');
-      this.context = await initLlama({
-        model: this.getBackbonePath(),
-        n_ctx: 8192,
-        n_threads: 4,
-      });
-
-      logger.log('[TTS] Loading vocoder...');
-      await this.context.initVocoder({
-        path: this.getVocoderPath(),
-        n_batch: 4096,
-      });
+iOS: `pod install` after.
 
-      this.isVocoderReady = await this.context.isVocoderEnabled();
-      if (!this.isVocoderReady) {
-        throw new Error('Vocoder failed to initialize — check model files.');
-      }
-
-      logger.log('[TTS] Ready.');
-    });
-
-    return this.contextLoadPromise;
-  }
-
-  async unloadModels(): Promise<void> {
-    this.stop();
-    if (this.context) {
-      await this.context.releaseVocoder().catch(() => {});
-      await this.context.release().catch(() => {});
-      this.context = null;
-    }
-    this.isVocoderReady = false;
-    this.audioCtx?.close().catch(() => {});
-    this.audioCtx = null;
-  }
-
-  isLoaded(): boolean {
-    return this.context !== null && this.isVocoderReady;
-  }
-
-  // ─── Audio Generation ────────────────────────────────────────────────────
-
-  /**
-   * Generate PCM audio for `text`. Does NOT play it.
-   * Returns samples + metadata needed for waveform rendering and playback.
-   */
-  async generate(text: string, options: TTSOptions = {}): Promise<GeneratedAudio> {
-    if (!this.context || !this.isVocoderReady) {
-      throw new Error('TTS models not loaded.');
-    }
-
-    const speakerId = options.voiceId ?? '0';
-    const { prompt, grammar } = await this.context.getFormattedAudioCompletion(
-      speakerId === '0' ? null : speakerId,
-      text,
-    );
-    const guideTokens = await this.context.getAudioCompletionGuideTokens(text);
-
-    const result = await this.context.completion({
-      prompt,
-      grammar,
-      guide_tokens: guideTokens,
-      n_predict: 4096,
-      temperature: 0.7,
-      top_p: 0.9,
-      stop: ['<|im_end|>'],
-    });
-
-    const pcmArray = await this.context.decodeAudioTokens(result.audio_tokens);
-    const samples = new Float32Array(pcmArray);
-    const sampleRate = TTS_BACKBONE_MODEL.sampleRate;
-    const durationSeconds = samples.length / sampleRate;
-    const waveformData = this.downsampleForWaveform(samples, 200);
-
-    return { samples, durationSeconds, sampleRate, waveformData };
-  }
-
-  /**
-   * Write PCM samples to a WAV file on disk.
-   * Used in Audio Mode to persist audio per message.
-   */
-  async saveToFile(audio: GeneratedAudio, conversationId: string, messageId: string): Promise<string> {
-    await this.ensureAudioCacheDirExists(conversationId);
-    const path = this.getAudioFilePath(conversationId, messageId);
-    const wavBuffer = this.encodeWAV(audio.samples, audio.sampleRate);
-    await RNFS.writeFile(path, wavBuffer, 'base64');
-    return path;
-  }
-
-  /**
-   * Generate + save in one step (Audio Mode convenience).
-   */
-  async generateAndSave(
-    text: string,
-    conversationId: string,
-    messageId: string,
-    options: TTSOptions = {},
-  ): Promise<{ path: string; audio: GeneratedAudio }> {
-    const audio = await this.generate(text, options);
-    const path = await this.saveToFile(audio, conversationId, messageId);
-    return { path, audio };
-  }
-
-  // ─── Playback ────────────────────────────────────────────────────────────
-
-  async playFromSamples(samples: Float32Array, speed: number = 1.0, startOffset: number = 0): Promise<void> {
-    const sampleRate = TTS_BACKBONE_MODEL.sampleRate;
-
-    this.audioCtx?.close().catch(() => {});
-    this.audioCtx = new AudioContext({ sampleRate });
-
-    const buffer = this.audioCtx.createBuffer(1, samples.length, sampleRate);
-    buffer.copyToChannel(samples, 0);
-
-    const source = this.audioCtx.createBufferSource();
-    source.buffer = buffer;
-    source.playbackRate.value = speed;
-    source.connect(this.audioCtx.destination);
-
-    this.currentSource = source;
-    this.isSpeakingFlag = true;
-
-    return new Promise((resolve) => {
-      source.onended = () => {
-        this.currentSource = null;
-        this.isSpeakingFlag = false;
-        resolve();
-      };
-      source.start(0, startOffset);
-    });
-  }
-
-  async playFromFile(filePath: string, speed: number = 1.0, startOffset: number = 0): Promise<void> {
-    const base64 = await RNFS.readFile(filePath, 'base64');
-    const samples = this.decodeWAV(base64);
-    return this.playFromSamples(samples, speed, startOffset);
-  }
-
-  /**
-   * Chat Mode convenience: generate + play + discard (no disk write).
-   */
-  async speak(text: string, options: TTSOptions = {}): Promise<void> {
-    if (this.isSpeakingFlag) this.stop();
-    const audio = await this.generate(text, options);
-    if (!this.isSpeakingFlag) { // may have been stopped during generation
-      await this.playFromSamples(audio.samples, options.speed ?? 1.0);
-    }
-  }
-
-  stop(): void {
-    this.isSpeakingFlag = false;
-    try {
-      this.currentSource?.stop();
-    } catch {
-      // already stopped
-    }
-    this.currentSource = null;
-  }
-
-  isSpeaking(): boolean {
-    return this.isSpeakingFlag;
-  }
-
-  // ─── Utilities ───────────────────────────────────────────────────────────
-
-  private downsampleForWaveform(samples: Float32Array, points: number): number[] {
-    const blockSize = Math.floor(samples.length / points);
-    const result: number[] = [];
-    for (let i = 0; i < points; i++) {
-      let sum = 0;
-      for (let j = 0; j < blockSize; j++) {
-        sum += Math.abs(samples[i * blockSize + j]);
-      }
-      result.push(sum / blockSize);
-    }
-    return result;
-  }
-
-  private encodeWAV(samples: Float32Array, sampleRate: number): string {
-    // Standard 16-bit PCM WAV encoding → base64
-    // Implementation: write RIFF header + PCM data
-    const buffer = new ArrayBuffer(44 + samples.length * 2);
-    const view = new DataView(buffer);
-    const writeString = (offset: number, s: string) => {
-      for (let i = 0; i < s.length; i++) view.setUint8(offset + i, s.charCodeAt(i));
-    };
-    writeString(0, 'RIFF');
-    view.setUint32(4, 36 + samples.length * 2, true);
-    writeString(8, 'WAVE');
-    writeString(12, 'fmt ');
-    view.setUint32(16, 16, true);
-    view.setUint16(20, 1, true);
-    view.setUint16(22, 1, true);
-    view.setUint32(24, sampleRate, true);
-    view.setUint32(28, sampleRate * 2, true);
-    view.setUint16(32, 2, true);
-    view.setUint16(34, 16, true);
-    writeString(36, 'data');
-    view.setUint32(40, samples.length * 2, true);
-    for (let i = 0; i < samples.length; i++) {
-      view.setInt16(44 + i * 2, Math.max(-32768, Math.min(32767, samples[i] * 32768)), true);
-    }
-    return Buffer.from(buffer).toString('base64');
-  }
-
-  private decodeWAV(base64: string): Float32Array {
-    const buffer = Buffer.from(base64, 'base64');
-    const view = new DataView(buffer.buffer);
-    const sampleCount = (buffer.length - 44) / 2;
-    const samples = new Float32Array(sampleCount);
-    for (let i = 0; i < sampleCount; i++) {
-      samples[i] = view.getInt16(44 + i * 2, true) / 32768;
-    }
-    return samples;
-  }
-}
-
-export const ttsService = new TTSService();
-```
+**Note:** `react-native-executorch-bare-resource-fetcher` requires its own RNFS fork (`@dr.pogodin/react-native-fs`) alongside the existing `react-native-fs`. Both can coexist.
 
 ---
 
-### 3. `src/stores/ttsStore.ts`
+## Architecture
 
-Mirror `whisperStore.ts` pattern, using Zustand with `persist`.
+### Initialization (`App.tsx`)
 
 ```typescript
-import { create } from 'zustand';
-import { persist, createJSONStorage } from 'zustand/middleware';
-import AsyncStorage from '@react-native-async-storage/async-storage';
-import { ttsService } from '../services/ttsService';
-import logger from '../utils/logger';
-
-export type InterfaceMode = 'chat' | 'audio';
-
-export interface TTSSettings {
-  interfaceMode: InterfaceMode;
-  enabled: boolean;
-  autoPlay: boolean;     // Chat Mode only
-  speed: number;         // 0.5–2.0
-  voiceId: string;       // OuteTTS speaker profile
-}
+import { initExecutorch } from 'react-native-executorch';
+import { BareResourceFetcher } from 'react-native-executorch-bare-resource-fetcher';
 
-export interface TTSState {
-  // Download state
-  isBackboneDownloaded: boolean;
-  isVocoderDownloaded: boolean;
-  isDownloadingBackbone: boolean;
-  isDownloadingVocoder: boolean;
-  backboneDownloadProgress: number;
-  vocoderDownloadProgress: number;
-
-  // Model lifecycle
-  isModelLoading: boolean;
-  isModelLoaded: boolean;
-
-  // Playback
-  isSpeaking: boolean;
-  currentMessageId: string | null;
-  playbackPosition: number;  // seconds, for scrubber
-
-  // Cache
-  audioCacheSizeMB: number;
-
-  // Settings (persisted)
-  settings: TTSSettings;
-
-  error: string | null;
-
-  // Actions
-  checkDownloadStatus: () => Promise<void>;
-  downloadModels: () => Promise<void>;
-  deleteModels: () => Promise<void>;
-  loadModels: () => Promise<void>;
-  unloadModels: () => Promise<void>;
-
-  // Chat Mode
-  speak: (text: string, messageId: string) => Promise<void>;
-  stop: () => void;
-
-  // Audio Mode
-  generateAndSave: (text: string, conversationId: string, messageId: string) => Promise<{ path: string; waveformData: number[]; durationSeconds: number }>;
-  playMessage: (messageId: string, filePath: string, startOffset?: number) => Promise<void>;
-  stopPlayback: () => void;
-
-  // Cache management
-  refreshCacheSize: () => Promise<void>;
-  clearAudioCache: () => Promise<void>;
-
-  updateSettings: (patch: Partial<TTSSettings>) => void;
-  clearError: () => void;
-}
-
-export const useTTSStore = create<TTSState>()(
-  persist(
-    (set, get) => ({
-      isBackboneDownloaded: false,
-      isVocoderDownloaded: false,
-      isDownloadingBackbone: false,
-      isDownloadingVocoder: false,
-      backboneDownloadProgress: 0,
-      vocoderDownloadProgress: 0,
-      isModelLoading: false,
-      isModelLoaded: false,
-      isSpeaking: false,
-      currentMessageId: null,
-      playbackPosition: 0,
-      audioCacheSizeMB: 0,
-      settings: {
-        interfaceMode: 'chat',
-        enabled: true,
-        autoPlay: false,
-        speed: 1.0,
-        voiceId: '0',
-      },
-      error: null,
-
-      checkDownloadStatus: async () => {
-        const [backbone, vocoder] = await Promise.all([
-          ttsService.isBackboneDownloaded(),
-          ttsService.isVocoderDownloaded(),
-        ]);
-        set({ isBackboneDownloaded: backbone, isVocoderDownloaded: vocoder });
-      },
-
-      downloadModels: async () => {
-        set({ error: null });
-        try {
-          set({ isDownloadingBackbone: true, backboneDownloadProgress: 0 });
-          await ttsService.downloadBackbone((p) => set({ backboneDownloadProgress: p }));
-          set({ isDownloadingBackbone: false, isBackboneDownloaded: true });
-
-          set({ isDownloadingVocoder: true, vocoderDownloadProgress: 0 });
-          await ttsService.downloadVocoder((p) => set({ vocoderDownloadProgress: p }));
-          set({ isDownloadingVocoder: false, isVocoderDownloaded: true });
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : 'Download failed';
-          logger.error('[TTS Store] Download error:', msg);
-          set({ isDownloadingBackbone: false, isDownloadingVocoder: false, error: msg });
-        }
-      },
-
-      deleteModels: async () => {
-        await ttsService.deleteModels();
-        set({ isBackboneDownloaded: false, isVocoderDownloaded: false, isModelLoaded: false });
-      },
-
-      loadModels: async () => {
-        if (get().isModelLoaded || get().isModelLoading) return;
-        set({ isModelLoading: true, error: null });
-        try {
-          await ttsService.loadModels();
-          set({ isModelLoaded: true });
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : 'Failed to load TTS models';
-          logger.error('[TTS Store] Load error:', msg);
-          set({ error: msg });
-        } finally {
-          set({ isModelLoading: false });
-        }
-      },
-
-      unloadModels: async () => {
-        await ttsService.unloadModels();
-        set({ isModelLoaded: false, isSpeaking: false, currentMessageId: null });
-      },
-
-      // ── Chat Mode ──────────────────────────────────────────────────────────
-
-      speak: async (text: string, messageId: string) => {
-        const { isModelLoaded, settings } = get();
-        if (!settings.enabled) return;
-        if (!isModelLoaded) return;
-
-        if (get().currentMessageId === messageId && get().isSpeaking) {
-          get().stop();
-          return;
-        }
-
-        ttsService.stop();
-        set({ isSpeaking: true, currentMessageId: messageId, error: null });
-
-        try {
-          await ttsService.speak(text, { speed: settings.speed, voiceId: settings.voiceId });
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : 'Speech failed';
-          logger.error('[TTS Store] Speak error:', msg);
-          set({ error: msg });
-        } finally {
-          set({ isSpeaking: false, currentMessageId: null });
-        }
-      },
-
-      stop: () => {
-        ttsService.stop();
-        set({ isSpeaking: false, currentMessageId: null });
-      },
-
-      // ── Audio Mode ─────────────────────────────────────────────────────────
-
-      generateAndSave: async (text: string, conversationId: string, messageId: string) => {
-        const { settings } = get();
-        const { path, audio } = await ttsService.generateAndSave(
-          text,
-          conversationId,
-          messageId,
-          { voiceId: settings.voiceId },
-        );
-        await get().refreshCacheSize();
-        return { path, waveformData: audio.waveformData, durationSeconds: audio.durationSeconds };
-      },
-
-      playMessage: async (messageId: string, filePath: string, startOffset: number = 0) => {
-        const { settings } = get();
-
-        if (get().currentMessageId === messageId && get().isSpeaking) {
-          get().stopPlayback();
-          return;
-        }
-
-        ttsService.stop();
-        set({ isSpeaking: true, currentMessageId: messageId, playbackPosition: startOffset });
-
-        try {
-          await ttsService.playFromFile(filePath, settings.speed, startOffset);
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : 'Playback failed';
-          logger.error('[TTS Store] Playback error:', msg);
-          set({ error: msg });
-        } finally {
-          set({ isSpeaking: false, currentMessageId: null, playbackPosition: 0 });
-        }
-      },
-
-      stopPlayback: () => {
-        ttsService.stop();
-        set({ isSpeaking: false, currentMessageId: null, playbackPosition: 0 });
-      },
-
-      // ── Cache ──────────────────────────────────────────────────────────────
-
-      refreshCacheSize: async () => {
-        const mb = await ttsService.getAudioCacheSizeMB();
-        set({ audioCacheSizeMB: mb });
-      },
-
-      clearAudioCache: async () => {
-        await ttsService.clearAudioCache();
-        set({ audioCacheSizeMB: 0 });
-      },
-
-      updateSettings: (patch) => {
-        set((state) => ({ settings: { ...state.settings, ...patch } }));
-      },
-
-      clearError: () => set({ error: null }),
-    }),
-    {
-      name: 'tts-store',
-      storage: createJSONStorage(() => AsyncStorage),
-      partialize: (state) => ({ settings: state.settings }),
-    }
-  )
-);
+// Called once at startup, before any model hook is used
+initExecutorch({ resourceFetcher: BareResourceFetcher });
 ```
 
----
+### KokoroTTSManager component
 
-### 4. `src/hooks/useTTS.ts`
+`react-native-executorch`'s `useTextToSpeech` is a React hook — it must live in a component. A `KokoroTTSManager` component mounts near the root, holds the hook instance, and exposes its methods via a module-level ref (`kokoroRef`).
 
-```typescript
-import { useEffect, useCallback } from 'react';
-import { useTTSStore } from '../stores/ttsStore';
-import { hardwareService } from '../services/hardware';
-import { TTS_BLOCK_RAM_GB, TTS_WARN_RAM_GB } from '../constants/ttsModels';
-
-export function useTTS() {
-  const store = useTTSStore();
-
-  useEffect(() => {
-    store.checkDownloadStatus();
-  }, []);
-
-  const canRunOnDevice = useCallback(async (): Promise<{ allowed: boolean; warning: boolean }> => {
-    const ramGB = await hardwareService.getTotalMemoryGB();
-    return {
-      allowed: ramGB >= TTS_BLOCK_RAM_GB,
-      warning: ramGB < TTS_WARN_RAM_GB,
-    };
-  }, []);
-
-  const speakMessage = useCallback(
-    (text: string, messageId: string) => {
-      if (!store.isModelLoaded && store.isBackboneDownloaded && store.isVocoderDownloaded) {
-        store.loadModels().then(() => store.speak(text, messageId));
-        return;
-      }
-      store.speak(text, messageId);
-    },
-    [store]
-  );
-
-  return {
-    ...store,
-    speakMessage,
-    canRunOnDevice,
-    areBothDownloaded: store.isBackboneDownloaded && store.isVocoderDownloaded,
-    isDownloading: store.isDownloadingBackbone || store.isDownloadingVocoder,
-    overallDownloadProgress:
-      store.backboneDownloadProgress * 0.86 + store.vocoderDownloadProgress * 0.14,
-    isAudioMode: store.settings.interfaceMode === 'audio',
-    isChatMode: store.settings.interfaceMode === 'chat',
-  };
-}
 ```
-
----
-
-### 5. `src/components/AudioMessageBubble/index.tsx` *(Audio Mode only)*
-
-Replaces `ChatMessage` assistant bubble when `interfaceMode === 'audio'`.
-
-```typescript
-interface AudioMessageBubbleProps {
-  messageId: string;
-  conversationId: string;
-  audioPath: string;          // path to WAV on disk
-  waveformData: number[];     // 200-point amplitude array
-  durationSeconds: number;
-  isGenerating?: boolean;     // true while TTS is still running
-}
+App
+└── KokoroTTSManager          ← mounts useTextToSpeech, wires to kokoroRef
+    └── AppNavigator
+        └── ChatScreen
+            └── TTSButton     ← calls kokoroRef.stream(text, callbacks)
 ```
 
-**Layout:**
-- Static waveform bar (200 rect bars, amplitude-scaled, filled up to scrubber position)
-- Draggable scrubber thumb
-- `MM:SS` elapsed / total
-- Speed chip (cycles 0.5x → 1x → 1.5x → 2x, persists to store)
-- "Show transcript" collapse/expand
-- Long press → action sheet (Change voice, Regenerate, Copy text, Delete)
-
----
-
-### 6. `src/components/TTSButton/index.tsx` *(Chat Mode only)*
-
-Play/stop button that appears on each assistant message bubble. Unchanged from original plan — only rendered when `interfaceMode === 'chat'`.
+### Speak flow (Chat Mode — Kokoro, fast)
 
-```typescript
-// Don't render in Audio Mode or if TTS disabled/not downloaded
-if (settings.interfaceMode === 'audio' || !settings.enabled || !areBothDownloaded) return null;
 ```
-
----
-
-### 7. `src/screens/TTSSettingsScreen/index.tsx`
-
-Accessible from SettingsScreen → "Text to Speech" row.
-
-**Sections:**
-1. **Header** — back button + "Text to Speech" title
-2. **Interface Mode card** — segmented control: `Chat` / `Audio`
-   - If device RAM < `TTS_BLOCK_RAM_GB`: Audio option is greyed out with "Requires 6GB+ RAM"
-   - If RAM is between block and warn thresholds: yellow warning under the control
-3. **Master toggle card** — enable/disable TTS (Chat Mode only — in Audio Mode, TTS is always on)
-4. **Model download card** — download status for both files with separate progress bars; "Download (527 MB)" / "Remove" buttons
-5. **Voice card** (shown when downloaded) — voice picker from `TTS_SPEAKER_PROFILES`
-6. **Playback card** (shown when downloaded) — Speed slider (0.5–2.0x), Auto-play toggle (Chat Mode only)
-7. **Audio cache card** (Audio Mode only) — "Audio cache: X MB" + "Clear cache" button
-8. **Device compatibility card** — RAM check with status
-9. **Privacy card** — "All speech generated on your device. Nothing is sent to any server."
-
----
-
-### 8. `src/stores/index.ts`
-
-Add:
-```typescript
-export { useTTSStore } from './ttsStore';
+TTSButton tap
+  → kokoroRef.stream({ text, onNext: playChunk, onBegin, onEnd })
+  → AudioContext buffers played as Float32Array chunks arrive
+  → Streaming: audio starts < 1s after tap
 ```
 
-### 9. `src/services/index.ts`
+### Voice input flow (Audio Mode — user side)
 
-Add:
-```typescript
-export { ttsService } from './ttsService';
 ```
-
-### 10. `src/navigation/types.ts`
-
-Add `TTSSettings: undefined` to `RootStackParamList`.
-
-### 11. `src/navigation/AppNavigator.tsx`
-
-```tsx
-<RootStack.Screen name="TTSSettings" component={TTSSettingsScreen} options={{ headerShown: false }} />
+User taps mic button
+  → audioRecorderService.startRecording() — records WAV to disk
+  → User releases mic
+  → audioRecorderService.stopRecording() → { path, durationSeconds }
+  → whisperService.transcribeFile(path) — file-based STT
+  → onAutoSend(transcript, { uri: path, format: 'wav', durationSeconds })
+  → ChatInput builds MediaAttachment { type: 'audio', uri, durationSeconds }
+  → onSend(transcript, [audioAttachment]) — content = transcript, attachment = WAV
+  → MessageRenderer: user message with audio attachment → right-aligned AudioMessageBubble
+  → LLM receives transcript as text input (standard text generation)
 ```
 
-### 12. `src/screens/index.ts`
-
-Export `TTSSettingsScreen` and `AudioMessageBubble`.
+For models that natively support audio input (e.g. Qwen2-Audio): WAV is passed directly as `input_audio` to the model — Whisper is bypassed entirely.
 
-### 13. `src/screens/SettingsScreen.tsx`
+### Generate+Save flow (Audio Mode — AI side)
 
-Add nav row pointing to `TTSSettings` (after the Voice row):
-```tsx
-<TouchableOpacity onPress={() => navigation.navigate('TTSSettings')}>
-  <Icon name="volume-2" />
-  <Text>Text to Speech</Text>
-  <Icon name="chevron-right" />
-</TouchableOpacity>
 ```
-
-### 14. `src/components/ChatMessage/index.tsx`
-
-Mode-branch the assistant message render path:
-
-```tsx
-import { AudioMessageBubble } from '../AudioMessageBubble';
-import { TTSButton } from '../TTSButton';
-
-// In assistant message render:
-const { settings } = useTTSStore();
-
-if (settings.interfaceMode === 'audio' && message.audioPath) {
-  return (
-    <AudioMessageBubble
-      messageId={message.id}
-      conversationId={conversationId}
-      audioPath={message.audioPath}
-      waveformData={message.waveformData ?? []}
-      durationSeconds={message.audioDurationSeconds ?? 0}
-      isGenerating={message.isGeneratingAudio}
-    />
-  );
-}
-
-// Chat Mode: existing text bubble + TTSButton
+Streaming LLM response ends
+  → triggerAudioModeGeneration(conversationId, messageId, content)
+    (reads fresh message from useChatStore.getState() — not stale closure)
+  → ttsService.generateAndSave(text, ctx, options)
+  → OuteTTS runs inference → Float32Array + waveformData + duration
+  → Write PCM to disk → update message { audioPath, waveformData, audioDurationSeconds }
+  → MessageRenderer shows left-aligned AudioMessageBubble
 ```
 
-This requires adding `audioPath`, `waveformData`, `audioDurationSeconds`, and `isGeneratingAudio` fields to the message model.
+---
 
-### 15. Message model update (`src/types/` or wherever `Message` is defined)
+## ttsStore additions
 
 ```typescript
-export interface Message {
-  // ... existing fields ...
-  audioPath?: string;              // Audio Mode: path to WAV on disk
-  waveformData?: number[];         // Audio Mode: 200-point amplitude envelope
-  audioDurationSeconds?: number;   // Audio Mode: total duration
-  isGeneratingAudio?: boolean;     // true while TTS is running for this message
-}
+// Kokoro state
+kokoroReady: boolean;           // useTextToSpeech.isReady
+kokoroDownloadProgress: number; // 0–1, during initial model download
+kokoroVoiceId: KokoroVoiceId;  // persisted setting
+
+// Actions
+setKokoroReady: (ready: boolean, progress: number) => void;
+kokoroSpeak: (text: string, messageId: string) => void;  // delegates to kokoroRef
+kokoroStop: () => void;
 ```
 
-### 16. Chat completion flow
-
-**Chat Mode (autoPlay):** unchanged from original plan — call `speak()` after streaming completes when `autoPlay: true`.
-
-**Audio Mode:** after streaming completes, immediately trigger `generateAndSave()` and update the message record with the returned `audioPath`, `waveformData`, `durationSeconds`. Set `isGeneratingAudio: true` on the message while generation runs so the bubble shows a loading state.
-
+The existing `speak()` action becomes:
 ```typescript
-// After streaming completes, if Audio Mode:
-if (settings.interfaceMode === 'audio') {
-  updateMessage(lastMessage.id, { isGeneratingAudio: true });
-  const { path, waveformData, durationSeconds } = await ttsStore.generateAndSave(
-    stripControlTokens(lastMessage.content),
-    conversationId,
-    lastMessage.id,
-  );
-  updateMessage(lastMessage.id, {
-    audioPath: path,
-    waveformData,
-    audioDurationSeconds: durationSeconds,
-    isGeneratingAudio: false,
-  });
+speak: (text, messageId) => {
+  if (kokoroReady) {
+    kokoroSpeak(text, messageId);  // fast path
+  } else {
+    // OuteTTS fallback (slow, Android <13 or first launch before Kokoro loads)
+    outeTTSSpeak(text, messageId);
+  }
 }
 ```
 
 ---
 
-## Tests to Write
-
-### `__tests__/unit/services/ttsService.test.ts`
-- `generate` calls `getFormattedAudioCompletion`, `getAudioCompletionGuideTokens`, `completion`, `decodeAudioTokens` in order
-- `generate` returns correct `durationSeconds` and 200-point `waveformData`
-- `saveToFile` writes a valid WAV file to the correct path
-- `generateAndSave` calls both and returns path + audio
-- `playFromFile` reads WAV, decodes, and calls `playFromSamples`
-- `stop` sets `isSpeakingFlag` to false and calls `currentSource.stop()`
-- `encodeWAV` / `decodeWAV` round-trip preserves samples (within 16-bit quantization error)
-- `getAudioCacheSizeMB` returns correct value
-- `clearAudioCache` removes the cache directory
-
-### `__tests__/unit/stores/ttsStore.test.ts`
-- `generateAndSave` sets correct waveformData and calls `refreshCacheSize`
-- `playMessage` sets `isSpeaking: true`, then `false` after completion
-- `playMessage` on same messageId while playing → calls `stopPlayback`
-- `updateSettings` merges partial settings correctly
-- Settings persisted: `interfaceMode`, `speed`, `voiceId`, `enabled` survive re-hydration
-
-### `__tests__/integration/tts.test.ts`
-- **Chat Mode full flow:** download → load → speak → stop
-- **Audio Mode full flow:** download → load → generateAndSave → playMessage → stop
-- **Auto-play:** Chat Mode with `autoPlay: true`, streaming completes → `speak` called
-- **Audio Mode post-completion:** streaming completes → `generateAndSave` called → message updated with `audioPath`
-- **Mode switch:** switching `interfaceMode` from `'chat'` to `'audio'` takes effect for next message
+## Kokoro Voice IDs
 
----
-
-## Implementation Order
-
-1. `src/constants/ttsModels.ts`
-2. `src/services/ttsService.ts` (with WAV encode/decode + `generate`/`generateAndSave`/`playFromFile`)
-3. `src/stores/ttsStore.ts` (with Audio Mode actions)
-4. `src/hooks/useTTS.ts`
-5. `src/stores/index.ts` — add export
-6. `src/services/index.ts` — add export
-7. `src/navigation/types.ts` — add route
-8. Message model — add `audioPath`, `waveformData`, `audioDurationSeconds`, `isGeneratingAudio`
-9. `src/components/AudioMessageBubble/index.tsx`
-10. `src/components/TTSButton/index.tsx` (Chat Mode only, unchanged)
-11. `src/screens/TTSSettingsScreen/index.tsx` (with Interface Mode section)
-12. `src/screens/index.ts` — add exports
-13. `src/navigation/AppNavigator.tsx` — add screen
-14. `src/screens/SettingsScreen.tsx` — add nav row
-15. `src/components/ChatMessage/index.tsx` — mode-branch render
-16. Wire Audio Mode generation into chat completion flow
-17. Write all tests
-18. `npm install react-native-audio-api` + `pod install`
+| ID | Label | Accent | Gender |
+|---|---|---|---|
+| `af_heart` | Heart | US English | Female |
+| `af_river` | River | US English | Female |
+| `af_sarah` | Sarah | US English | Female |
+| `am_adam` | Adam | US English | Male |
+| `am_michael` | Michael | US English | Male |
+| `am_santa` | Santa | US English | Male |
+| `bf_emma` | Emma | British English | Female |
+| `bm_daniel` | Daniel | British English | Male |
 
 ---
 
-## Memory Safety
+## Files to Create / Modify
 
-Before calling `loadModels()`, check available memory:
+### New files
+- `src/components/KokoroTTSManager.tsx` — mounts the hook, exposes via ref
+- `src/constants/kokoroModels.ts` — voice/model constants mirroring executorch exports
 
-```typescript
-const available = await hardwareService.getAvailableMemoryGB();
-if (available < 1.0) {
-  throw new Error('Not enough free memory. Try closing image generation first.');
-}
-```
+### Modified files
+- `App.tsx` — add `initExecutorch()` call + mount `<KokoroTTSManager>`
+- `src/stores/ttsStore.ts` — add Kokoro state + `kokoroVoiceId` setting
+- `src/services/ttsService.ts` — no change to OuteTTS path
+- `src/components/TTSButton/index.tsx` — use Kokoro speak when available
+- `src/screens/TTSSettingsScreen/index.tsx` — add voice picker (8 Kokoro voices)
 
-This check belongs in `useTTSStore.loadModels()` before calling `ttsService.loadModels()`.
+### android/build.gradle
+- Bump `minSdkVersion` for executorch: **leave at 24**, guard Kokoro at runtime via `Platform.Version >= 33`
 
 ---
 
-## Future: Upgrade to OuteTTS 1.0
-
-When llama.cpp PR#12794 (DAC decoder) merges and llama.rn PR#300 (codec.cpp integration) ships:
-
-1. Add `TTS_BACKBONE_MODEL_V2` to `ttsModels.ts` (backbone + DAC vocoder GGUF)
-2. `ttsService.ts` API is unchanged — model-agnostic
-3. Store gets a `modelVersion` setting; 0.3 and 1.0 can coexist on disk
+## Status
+
+| Task | Status |
+|---|---|
+| OuteTTS speak (Chat Mode) | ✅ Implemented (slow, functional) |
+| OuteTTS generate+save (Audio Mode — AI side) | ✅ Implemented |
+| Stale-closure bug fix (reads fresh store state) | ✅ Fixed |
+| TTSButton + Speak long-press action | ✅ Implemented |
+| Generation vs playback state (spinner) | ✅ Implemented |
+| 300-char text truncation | ✅ Implemented |
+| checkDownloadStatus on app start | ✅ Implemented |
+| User voice recording → audio bubble (Audio Mode) | ✅ Implemented |
+| Auto-send on voice stop in Audio Mode | ✅ Implemented |
+| User audio bubble right-aligned | ✅ Implemented |
+| TTS section in Chat Settings modal | ✅ Implemented |
+| Chat Settings modal: TTS Settings deep link | ✅ Implemented |
+| Multimodal audio input (bypass Whisper for audio-capable models) | ✅ Implemented |
+| Kokoro via react-native-executorch | 🔲 Not started |
+| KokoroTTSManager component | 🔲 Not started |
+| Voice picker in TTSSettingsScreen | 🔲 Not started |
+| Kokoro → OuteTTS fallback for Android <13 | 🔲 Not started |
diff --git a/ios/Podfile.lock b/ios/Podfile.lock
index a076829d0..3f58a70ef 100644
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@@ -2797,6 +2797,121 @@ PODS:
     - React-perflogger (= 0.83.1)
     - React-utils (= 0.83.1)
     - SocketRocket
+  - RNAudioAPI (0.11.7):
+    - boost
+    - DoubleConversion
+    - fast_float
+    - fmt
+    - glog
+    - hermes-engine
+    - RCT-Folly
+    - RCT-Folly/Fabric
+    - RCTRequired
+    - RCTTypeSafety
+    - React-Core
+    - React-debug
+    - React-Fabric
+    - React-featureflags
+    - React-graphics
+    - React-ImageManager
+    - React-jsi
+    - React-NativeModulesApple
+    - React-RCTFabric
+    - React-renderercss
+    - React-rendererdebug
+    - React-utils
+    - ReactCodegen
+    - ReactCommon/turbomodule/bridging
+    - ReactCommon/turbomodule/core
+    - RNAudioAPI/audioapi (= 0.11.7)
+    - SocketRocket
+    - Yoga
+  - RNAudioAPI/audioapi (0.11.7):
+    - boost
+    - DoubleConversion
+    - fast_float
+    - fmt
+    - glog
+    - hermes-engine
+    - RCT-Folly
+    - RCT-Folly/Fabric
+    - RCTRequired
+    - RCTTypeSafety
+    - React-Core
+    - React-debug
+    - React-Fabric
+    - React-featureflags
+    - React-graphics
+    - React-ImageManager
+    - React-jsi
+    - React-NativeModulesApple
+    - React-RCTFabric
+    - React-renderercss
+    - React-rendererdebug
+    - React-utils
+    - ReactCodegen
+    - ReactCommon/turbomodule/bridging
+    - ReactCommon/turbomodule/core
+    - RNAudioAPI/audioapi/audioapi_dsp (= 0.11.7)
+    - RNAudioAPI/audioapi/ios (= 0.11.7)
+    - SocketRocket
+    - Yoga
+  - RNAudioAPI/audioapi/audioapi_dsp (0.11.7):
+    - boost
+    - DoubleConversion
+    - fast_float
+    - fmt
+    - glog
+    - hermes-engine
+    - RCT-Folly
+    - RCT-Folly/Fabric
+    - RCTRequired
+    - RCTTypeSafety
+    - React-Core
+    - React-debug
+    - React-Fabric
+    - React-featureflags
+    - React-graphics
+    - React-ImageManager
+    - React-jsi
+    - React-NativeModulesApple
+    - React-RCTFabric
+    - React-renderercss
+    - React-rendererdebug
+    - React-utils
+    - ReactCodegen
+    - ReactCommon/turbomodule/bridging
+    - ReactCommon/turbomodule/core
+    - SocketRocket
+    - Yoga
+  - RNAudioAPI/audioapi/ios (0.11.7):
+    - boost
+    - DoubleConversion
+    - fast_float
+    - fmt
+    - glog
+    - hermes-engine
+    - RCT-Folly
+    - RCT-Folly/Fabric
+    - RCTRequired
+    - RCTTypeSafety
+    - React-Core
+    - React-debug
+    - React-Fabric
+    - React-featureflags
+    - React-graphics
+    - React-ImageManager
+    - React-jsi
+    - React-NativeModulesApple
+    - React-RCTFabric
+    - React-renderercss
+    - React-rendererdebug
+    - React-utils
+    - ReactCodegen
+    - ReactCommon/turbomodule/bridging
+    - ReactCommon/turbomodule/core
+    - SocketRocket
+    - Yoga
   - RNCAsyncStorage (2.2.0):
     - boost
     - DoubleConversion
@@ -3368,6 +3483,7 @@ DEPENDENCIES:
   - ReactAppDependencyProvider (from `build/generated/ios/ReactAppDependencyProvider`)
   - ReactCodegen (from `build/generated/ios/ReactCodegen`)
   - ReactCommon/turbomodule/core (from `../node_modules/react-native/ReactCommon`)
+  - RNAudioAPI (from `../node_modules/react-native-audio-api`)
   - "RNCAsyncStorage (from `../node_modules/@react-native-async-storage/async-storage`)"
   - RNDeviceInfo (from `../node_modules/react-native-device-info`)
   - RNFS (from `../node_modules/react-native-fs`)
@@ -3566,6 +3682,8 @@ EXTERNAL SOURCES:
     :path: build/generated/ios/ReactCodegen
   ReactCommon:
     :path: "../node_modules/react-native/ReactCommon"
+  RNAudioAPI:
+    :path: "../node_modules/react-native-audio-api"
   RNCAsyncStorage:
     :path: "../node_modules/@react-native-async-storage/async-storage"
   RNDeviceInfo:
@@ -3684,6 +3802,7 @@ SPEC CHECKSUMS:
   ReactAppDependencyProvider: 0eb286cc274abb059ee601b862ebddac2e681d01
   ReactCodegen: 3d48510bcef445f6403c0004047d4d9cbb915435
   ReactCommon: ac934cb340aee91282ecd6f273a26d24d4c55cae
+  RNAudioAPI: 106257d5f3713bb667d6d74ebb3105c9cf5d60db
   RNCAsyncStorage: 29f0230e1a25f36c20b05f65e2eb8958d6526e82
   RNDeviceInfo: 36d7f232bfe7c9b5c494cb7793230424ed32c388
   RNFS: 89de7d7f4c0f6bafa05343c578f61118c8282ed8
diff --git a/jest.setup.ts b/jest.setup.ts
index 15d0f8cb2..af694a3d2 100644
--- a/jest.setup.ts
+++ b/jest.setup.ts
@@ -149,6 +149,61 @@ jest.mock('whisper.rn', () => ({
   },
 }), { virtual: true });
 
+// react-native-audio-api mock
+jest.mock('react-native-audio-api', () => ({
+  AudioContext: jest.fn().mockImplementation(() => ({
+    createBuffer: jest.fn().mockReturnValue({ copyToChannel: jest.fn() }),
+    createBufferSource: jest.fn().mockReturnValue({
+      connect: jest.fn(),
+      start: jest.fn(),
+      stop: jest.fn(),
+      playbackRate: { value: 1.0 },
+      onEnded: null,
+      buffer: null,
+    }),
+    destination: {},
+    close: jest.fn(),
+  })),
+  AudioRecorder: jest.fn().mockImplementation(() => ({
+    enableFileOutput: jest.fn().mockReturnValue({ status: 'success', path: '/mock/audio/input.wav' }),
+    start: jest.fn().mockReturnValue({ status: 'success', path: '/mock/audio/input.wav' }),
+    stop: jest.fn().mockReturnValue({ status: 'success', path: '/mock/audio/input.wav', size: 1024, duration: 1.0 }),
+    pause: jest.fn(),
+    resume: jest.fn(),
+    isRecording: jest.fn().mockReturnValue(false),
+    isPaused: jest.fn().mockReturnValue(false),
+  })),
+  FileFormat: { Wav: 0, Caf: 1, M4A: 2, Flac: 3 },
+  FileDirectory: { Document: 0, Cache: 1 },
+}), { virtual: true });
+
+// @react-native-community/slider mock
+jest.mock('@react-native-community/slider', () => {
+  const { View } = require('react-native');
+  return { __esModule: true, default: View };
+});
+
+// react-native-executorch mock
+const mockVoiceConfig = { id: 'mock_voice' };
+jest.mock('react-native-executorch', () => ({
+  useTextToSpeech: jest.fn(() => ({
+    isReady: true,
+    downloadProgress: 1,
+    error: null,
+    stream: jest.fn(() => Promise.resolve()),
+    streamStop: jest.fn(),
+  })),
+  KOKORO_MEDIUM: 'kokoro-medium',
+  KOKORO_VOICE_AF_HEART: mockVoiceConfig,
+  KOKORO_VOICE_AF_RIVER: mockVoiceConfig,
+  KOKORO_VOICE_AF_SARAH: mockVoiceConfig,
+  KOKORO_VOICE_AM_ADAM: mockVoiceConfig,
+  KOKORO_VOICE_AM_MICHAEL: mockVoiceConfig,
+  KOKORO_VOICE_AM_SANTA: mockVoiceConfig,
+  KOKORO_VOICE_BF_EMMA: mockVoiceConfig,
+  KOKORO_VOICE_BM_DANIEL: mockVoiceConfig,
+}));
+
 // react-native-fs mock
 jest.mock('react-native-fs', () => ({
   DocumentDirectoryPath: '/mock/documents',
diff --git a/package-lock.json b/package-lock.json
index 9353548fc..1d6a7f40d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,7 +9,9 @@
       "version": "0.0.86",
       "hasInstallScript": true,
       "dependencies": {
+        "@dr.pogodin/react-native-fs": "^2.38.1",
         "@gorhom/bottom-sheet": "^5.2.8",
+        "@kesha-antonov/react-native-background-downloader": "^4.5.4",
         "@op-engineering/op-sqlite": "^15.2.5",
         "@react-native-async-storage/async-storage": "^2.2.0",
         "@react-native-community/blur": "^4.4.1",
@@ -31,7 +33,10 @@
         "patch-package": "^8.0.1",
         "react": "19.2.0",
         "react-native": "0.83.1",
+        "react-native-audio-api": "^0.11.7",
         "react-native-device-info": "^15.0.1",
+        "react-native-executorch": "^0.8.1",
+        "react-native-executorch-bare-resource-fetcher": "^0.8.0",
         "react-native-fs": "^2.20.0",
         "react-native-gesture-handler": "^2.30.0",
         "react-native-haptic-feedback": "^2.3.3",
@@ -2113,6 +2118,51 @@
       "devOptional": true,
       "license": "MIT"
     },
+    "node_modules/@dr.pogodin/react-native-fs": {
+      "version": "2.38.1",
+      "resolved": "https://registry.npmjs.org/@dr.pogodin/react-native-fs/-/react-native-fs-2.38.1.tgz",
+      "integrity": "sha512-H5uxbEy61as7m5p4dNhv4a/huO8g9r4weu0FM/UjlgRd1PSYqpZaJBi2nhDGums/N+MrK8IZFOHVV5ukHWX8UQ==",
+      "license": "MIT",
+      "workspaces": [
+        "example"
+      ],
+      "dependencies": {
+        "buffer": "^6.0.3",
+        "http-status-codes": "^2.3.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/birdofpreyru"
+      },
+      "peerDependencies": {
+        "react": "*",
+        "react-native": "*"
+      }
+    },
+    "node_modules/@dr.pogodin/react-native-fs/node_modules/buffer": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
+      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "base64-js": "^1.3.1",
+        "ieee754": "^1.2.1"
+      }
+    },
     "node_modules/@egjs/hammerjs": {
       "version": "2.0.17",
       "resolved": "https://registry.npmjs.org/@egjs/hammerjs/-/hammerjs-2.0.17.tgz",
@@ -2559,6 +2609,15 @@
         "@hapi/hoek": "^9.0.0"
       }
     },
+    "node_modules/@huggingface/jinja": {
+      "version": "0.5.6",
+      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.6.tgz",
+      "integrity": "sha512-MyMWyLnjqo+KRJYSH7oWNbsOn5onuIvfXYPcc0WOGxU0eHUV7oAYUoQTl2BMdu7ml+ea/bu11UM+EshbeHwtIA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/@humanwhocodes/config-array": {
       "version": "0.13.0",
       "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.13.0.tgz",
@@ -3110,6 +3169,15 @@
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
+    "node_modules/@kesha-antonov/react-native-background-downloader": {
+      "version": "4.5.4",
+      "resolved": "https://registry.npmjs.org/@kesha-antonov/react-native-background-downloader/-/react-native-background-downloader-4.5.4.tgz",
+      "integrity": "sha512-WH9n7Sy8MebWiVZqZYpvP4q2sJeOIiNLrbHB64ue/YYsXnWtdJ3iMQowv/QEmU2Cw9biI1d2k8LFHKV9oACLsw==",
+      "license": "Apache-2.0",
+      "peerDependencies": {
+        "react-native": ">=0.57.0"
+      }
+    },
     "node_modules/@motionone/animation": {
       "version": "10.18.0",
       "resolved": "https://registry.npmjs.org/@motionone/animation/-/animation-10.18.0.tgz",
@@ -8090,6 +8158,12 @@
         "node": ">= 0.8"
       }
     },
+    "node_modules/http-status-codes": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/http-status-codes/-/http-status-codes-2.3.0.tgz",
+      "integrity": "sha512-RJ8XvFvpPM/Dmc5SV+dC4y5PCeOhT3x1Hq0NU3rjGeg5a/CqlhZ7uudknPwZFz4aeAXDcbAyaeP7GAo9lvngtA==",
+      "license": "MIT"
+    },
     "node_modules/https-proxy-agent": {
       "version": "7.0.6",
       "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
@@ -8146,7 +8220,6 @@
       "version": "1.2.1",
       "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
       "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-      "devOptional": true,
       "funding": [
         {
           "type": "github",
@@ -9609,6 +9682,24 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/jsonrepair": {
+      "version": "3.13.3",
+      "resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.13.3.tgz",
+      "integrity": "sha512-BTznj0owIt2CBAH/LTo7+1I5pMvl1e1033LRl/HUowlZmJOIhzC0zbX5bxMngLkfT4WnzPP26QnW5wMr2g9tsQ==",
+      "license": "ISC",
+      "bin": {
+        "jsonrepair": "bin/cli.js"
+      }
+    },
+    "node_modules/jsonschema": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/jsonschema/-/jsonschema-1.5.0.tgz",
+      "integrity": "sha512-K+A9hhqbn0f3pJX17Q/7H6yQfD/5OXgdrR5UE12gMXCiN9D5Xq2o5mddV2QEcX/bjla99ASsAAQUyMCCRWAEhw==",
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      }
+    },
     "node_modules/jsx-ast-utils": {
       "version": "3.3.5",
       "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz",
@@ -11862,6 +11953,15 @@
         "node": ">=8.0"
       }
     },
+    "node_modules/pngjs": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/pngjs/-/pngjs-7.0.0.tgz",
+      "integrity": "sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=14.19.0"
+      }
+    },
     "node_modules/popmotion": {
       "version": "11.0.3",
       "resolved": "https://registry.npmjs.org/popmotion/-/popmotion-11.0.3.tgz",
@@ -12220,6 +12320,34 @@
         }
       }
     },
+    "node_modules/react-native-audio-api": {
+      "version": "0.11.7",
+      "resolved": "https://registry.npmjs.org/react-native-audio-api/-/react-native-audio-api-0.11.7.tgz",
+      "integrity": "sha512-2oIoP77Tn2nlouRVfEC3bAsuSyKU6xhGNkSnVXTLLQQZslEDoYX2cN9pVRZoWOqhFrLT8q4IZI9HaFgYL13L1A==",
+      "license": "MIT",
+      "dependencies": {
+        "semver": "^7.7.3"
+      },
+      "bin": {
+        "setup-rn-audio-api-web": "scripts/setup-rn-audio-api-web.js"
+      },
+      "peerDependencies": {
+        "react": "*",
+        "react-native": "*"
+      }
+    },
+    "node_modules/react-native-audio-api/node_modules/semver": {
+      "version": "7.7.4",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
+      "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
     "node_modules/react-native-device-info": {
       "version": "15.0.1",
       "resolved": "https://registry.npmjs.org/react-native-device-info/-/react-native-device-info-15.0.1.tgz",
@@ -12229,6 +12357,38 @@
         "react-native": "*"
       }
     },
+    "node_modules/react-native-executorch": {
+      "version": "0.8.1",
+      "resolved": "https://registry.npmjs.org/react-native-executorch/-/react-native-executorch-0.8.1.tgz",
+      "integrity": "sha512-DEVWs+Ki7p1C8mEgsHiabZizO/kDM0zELlJ+JFCfNCb2RrraMUXBTZIARWHPUbxpG17nqFswIZmwjUoNK5V36g==",
+      "license": "MIT",
+      "workspaces": [
+        "example"
+      ],
+      "dependencies": {
+        "@huggingface/jinja": "^0.5.0",
+        "jsonrepair": "^3.12.0",
+        "jsonschema": "^1.5.0",
+        "pngjs": "^7.0.0",
+        "zod": "^4.3.6"
+      },
+      "peerDependencies": {
+        "react": "*",
+        "react-native": "*"
+      }
+    },
+    "node_modules/react-native-executorch-bare-resource-fetcher": {
+      "version": "0.8.0",
+      "resolved": "https://registry.npmjs.org/react-native-executorch-bare-resource-fetcher/-/react-native-executorch-bare-resource-fetcher-0.8.0.tgz",
+      "integrity": "sha512-PzSzK31qnKmwW06+JCbpQML24u3XiqYcWKQG0Y1cwPmkOqz0VppI0ZOeCZh03/03SMyuvwwEgteJtgO0uSP8sg==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@dr.pogodin/react-native-fs": "^2.0.0",
+        "@kesha-antonov/react-native-background-downloader": "^4.0.0",
+        "react-native": "*",
+        "react-native-executorch": "*"
+      }
+    },
     "node_modules/react-native-fit-image": {
       "version": "1.5.5",
       "resolved": "https://registry.npmjs.org/react-native-fit-image/-/react-native-fit-image-1.5.5.tgz",
@@ -14716,7 +14876,6 @@
       "version": "4.3.6",
       "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz",
       "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==",
-      "dev": true,
       "license": "MIT",
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
diff --git a/package.json b/package.json
index 0650d784a..54ceb6a25 100644
--- a/package.json
+++ b/package.json
@@ -20,7 +20,9 @@
     "postinstall": "patch-package"
   },
   "dependencies": {
+    "@dr.pogodin/react-native-fs": "^2.38.1",
     "@gorhom/bottom-sheet": "^5.2.8",
+    "@kesha-antonov/react-native-background-downloader": "^4.5.4",
     "@op-engineering/op-sqlite": "^15.2.5",
     "@react-native-async-storage/async-storage": "^2.2.0",
     "@react-native-community/blur": "^4.4.1",
@@ -42,7 +44,10 @@
     "patch-package": "^8.0.1",
     "react": "19.2.0",
     "react-native": "0.83.1",
+    "react-native-audio-api": "^0.11.7",
     "react-native-device-info": "^15.0.1",
+    "react-native-executorch": "^0.8.1",
+    "react-native-executorch-bare-resource-fetcher": "^0.8.0",
     "react-native-fs": "^2.20.0",
     "react-native-gesture-handler": "^2.30.0",
     "react-native-haptic-feedback": "^2.3.3",
diff --git a/src/components/AudioMessageBubble/PlaybackControls.tsx b/src/components/AudioMessageBubble/PlaybackControls.tsx
new file mode 100644
index 000000000..8477b595e
--- /dev/null
+++ b/src/components/AudioMessageBubble/PlaybackControls.tsx
@@ -0,0 +1,264 @@
+import React, { useState, useCallback, useEffect, useRef } from 'react';
+import {
+  View,
+  Text,
+  TouchableOpacity,
+  ActivityIndicator,
+} from 'react-native';
+import { ScrollView } from 'react-native-gesture-handler';
+import Slider from '@react-native-community/slider';
+import { stripMarkdownForSpeech } from '../../utils/messageContent';
+import { MarkdownText } from '../MarkdownText';
+import Icon from 'react-native-vector-icons/Feather';
+import { useTTSStore } from '../../stores/ttsStore';
+import type { ThemeColors } from '../../theme';
+
+const SPEED_STEPS: number[] = [0.5, 0.8, 0.9, 1.0, 1.1, 1.2, 1.5, 2.0];
+
+function formatDuration(seconds: number): string {
+  const m = Math.floor(seconds / 60);
+  const s = Math.floor(seconds % 60);
+  return `${m}:${s.toString().padStart(2, '0')}`;
+}
+
+interface PlaybackState {
+  isThisPlaying: boolean;
+  isThisPaused: boolean;
+  isThisAudible: boolean;
+  isThisLoading: boolean;
+}
+
+/** Derives playback state for a given messageId from TTS store selectors */
+export function usePlaybackState(messageId: string): PlaybackState {
+  const isSpeaking = useTTSStore((s) => s.isSpeaking);
+  const isPaused = useTTSStore((s) => s.isPaused);
+  const isAudioPlaying = useTTSStore((s) => s.isSpeaking);
+  const currentMessageId = useTTSStore((s) => s.currentMessageId);
+
+  const isThisPlaying = isSpeaking && currentMessageId === messageId && !isPaused;
+  const isThisPaused = isSpeaking && currentMessageId === messageId && isPaused;
+  const isThisAudible = isAudioPlaying && currentMessageId === messageId && !isPaused;
+  const isThisLoading = isThisPlaying && !isThisAudible;
+
+  return { isThisPlaying, isThisPaused, isThisAudible, isThisLoading };
+}
+
+/** Hook for wall-clock elapsed timer */
+export function useElapsedTimer(
+  playback: { isThisAudible: boolean; isThisPaused: boolean },
+  seekOffsetRef: React.MutableRefObject<number>,
+) {
+  const { isThisAudible, isThisPaused } = playback;
+  // playSessionId is a monotonic counter that increments on every new play —
+  // guarantees the effect re-runs even if boolean deps appear unchanged.
+  const playSessionId = useTTSStore((s) => s.playSessionId);
+  const [localElapsed, setLocalElapsed] = useState(0);
+  const startTimeRef = useRef<number>(0);
+  const pausedAtRef = useRef<number>(0);
+
+  useEffect(() => {
+    console.log('[Timer] effect: isThisAudible=', isThisAudible, 'isThisPaused=', isThisPaused, 'playSessionId=', playSessionId);
+    if (!isThisAudible && !isThisPaused) {
+      if (seekOffsetRef.current === 0) {
+        setLocalElapsed(0);
+        pausedAtRef.current = 0;
+      }
+      console.log('[Timer] not audible, not paused — resetting');
+      return;
+    }
+    if (isThisPaused) {
+      pausedAtRef.current = localElapsed;
+      console.log('[Timer] paused at', localElapsed);
+      return;
+    }
+    const offset = seekOffsetRef.current || pausedAtRef.current;
+    seekOffsetRef.current = 0;
+    startTimeRef.current = Date.now() - offset * 1000;
+    console.log('[Timer] STARTING interval, offset=', offset);
+    const id = setInterval(() => {
+      setLocalElapsed((Date.now() - startTimeRef.current) / 1000);
+    }, 50);
+    return () => { console.log('[Timer] CLEARING interval'); clearInterval(id); };
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isThisAudible, isThisPaused, playSessionId]);
+
+  return { localElapsed, setLocalElapsed };
+}
+
+/** Play/pause button with loading states */
+export const PlayButton: React.FC<{
+  isLoading: boolean;
+  isThisLoading: boolean;
+  isThisPlaying: boolean;
+  onPlayPause: () => void;
+  colors: ThemeColors;
+  styles: any;
+}> = ({ isLoading, isThisLoading, isThisPlaying, onPlayPause, colors, styles }) => {
+  if (isLoading) {
+    return (
+      <View style={[styles.playButton, styles.playButtonDisabled]}>
+        <Icon name="play" size={16} color={colors.primary} />
+      </View>
+    );
+  }
+  if (isThisLoading) {
+    return (
+      <View style={styles.playButton}>
+        <ActivityIndicator size="small" color={colors.primary} />
+      </View>
+    );
+  }
+  return (
+    <TouchableOpacity
+      onPress={onPlayPause}
+      style={styles.playButton}
+      hitSlop={{ top: 8, left: 8, right: 8 }}
+    >
+      <Icon
+        name={isThisPlaying ? 'pause' : 'play'}
+        size={16}
+        color={colors.primary}
+      />
+    </TouchableOpacity>
+  );
+};
+
+/** Speed cycle chip */
+export const SpeedChip: React.FC<{
+  styles: any;
+}> = ({ styles }) => {
+  const speed = useTTSStore((s) => s.settings.speed);
+  const updateSettings = useTTSStore((s) => s.updateSettings);
+
+  const handleSpeedCycle = useCallback(() => {
+    let idx = SPEED_STEPS.indexOf(speed);
+    if (idx < 0) {
+      idx = SPEED_STEPS.findIndex((s) => s > speed) - 1;
+      if (idx < 0) idx = 0;
+    }
+    const next = (idx + 1) % SPEED_STEPS.length;
+    updateSettings({ speed: SPEED_STEPS[next] });
+  }, [speed, updateSettings]);
+
+  return (
+    <TouchableOpacity
+      onPress={handleSpeedCycle}
+      style={styles.speedChip}
+      hitSlop={{ top: 8, left: 8, right: 8 }}
+    >
+      <Text style={styles.speedText}>{speed}x</Text>
+    </TouchableOpacity>
+  );
+};
+
+/** Duration display */
+export const DurationText: React.FC<{
+  isLoading: boolean;
+  totalDuration: number;
+  styles: any;
+}> = ({ isLoading, totalDuration, styles }) => (
+  <Text style={styles.duration}>
+    {isLoading ? '—' : formatDuration(totalDuration)}
+  </Text>
+);
+
+/** Seekable progress bar using native Slider component */
+export const SeekBar: React.FC<{
+  displayProgress: number;
+  colors: ThemeColors;
+  styles: any;
+  onSeek: (fraction: number) => void;
+}> = ({ displayProgress, colors, styles, onSeek }) => {
+  const [isSeeking, setIsSeeking] = useState(false);
+  const [seekValue, setSeekValue] = useState(0);
+
+  return (
+    <Slider
+      style={styles.seekSlider}
+      value={isSeeking ? seekValue : displayProgress}
+      minimumValue={0}
+      maximumValue={1}
+      minimumTrackTintColor="transparent"
+      maximumTrackTintColor="transparent"
+      thumbTintColor={colors.primary}
+      onSlidingStart={(val) => { setIsSeeking(true); setSeekValue(val); }}
+      onValueChange={(val) => { if (isSeeking) setSeekValue(val); }}
+      onSlidingComplete={(val) => { setIsSeeking(false); onSeek(val); }}
+    />
+  );
+};
+
+/** Transcript toggle and content */
+export const TranscriptToggle: React.FC<{
+  transcript?: string;
+  colors: ThemeColors;
+  styles: any;
+  isOpen: boolean;
+  onToggle: (v: boolean) => void;
+}> = ({ transcript, colors, styles, isOpen, onToggle }) => {
+  if (!transcript) return null;
+
+  return (
+    <TouchableOpacity
+      onPress={() => onToggle(!isOpen)}
+      style={styles.transcriptToggle}
+    >
+      <Text style={styles.transcriptToggleText}>
+        {isOpen ? 'Hide transcript' : 'Show transcript'}
+      </Text>
+      <Icon
+        name={isOpen ? 'chevron-up' : 'chevron-down'}
+        size={11}
+        color={colors.textMuted}
+      />
+    </TouchableOpacity>
+  );
+};
+
+export const TranscriptContent: React.FC<{
+  transcript: string;
+  styles: any;
+}> = ({ transcript, styles }) => (
+  <ScrollView style={styles.transcriptScroll} nestedScrollEnabled>
+    <View style={styles.transcriptContent}>
+      <MarkdownText>{transcript}</MarkdownText>
+    </View>
+  </ScrollView>
+);
+
+/** Hook for seek logic */
+interface SeekHandlerParams {
+  transcript: string | undefined;
+  audioPath: string;
+  messageId: string;
+  totalDurationRef: React.MutableRefObject<number>;
+  seekOffsetRef: React.MutableRefObject<number>;
+  setLocalElapsed: (v: number) => void;
+  setIsSeeking: (v: boolean) => void;
+}
+
+export function useSeekHandler({
+  transcript, audioPath, messageId,
+  totalDurationRef, seekOffsetRef, setLocalElapsed, setIsSeeking,
+}: SeekHandlerParams) {
+  const stop = useTTSStore((s) => s.stop);
+  const speak = useTTSStore((s) => s.speak);
+
+  return useCallback((fraction: number) => {
+    if (!transcript || audioPath) return;
+    const text = stripMarkdownForSpeech(transcript);
+    const charOffset = Math.floor(fraction * text.length);
+    const seekPoint = text.lastIndexOf('. ', charOffset) + 2 || charOffset;
+    const remaining = text.slice(seekPoint).trim();
+    console.log(`[AudioBubble] seeking to ${Math.round(fraction * 100)}%`, 'charOffset:', charOffset, 'remaining:', remaining.length, 'chars');
+    if (!remaining) return;
+    const seekSeconds = Math.floor(fraction * totalDurationRef.current);
+    seekOffsetRef.current = seekSeconds;
+    setLocalElapsed(seekSeconds);
+    setIsSeeking(true);
+    stop();
+    setTimeout(() => {
+      speak(remaining, messageId).finally(() => setIsSeeking(false));
+    }, 200);
+  }, [transcript, audioPath, stop, speak, messageId, totalDurationRef, seekOffsetRef, setLocalElapsed, setIsSeeking]);
+}
diff --git a/src/components/AudioMessageBubble/index.tsx b/src/components/AudioMessageBubble/index.tsx
new file mode 100644
index 000000000..c18cfa6c2
--- /dev/null
+++ b/src/components/AudioMessageBubble/index.tsx
@@ -0,0 +1,390 @@
+import React, { useState, useCallback, useEffect, useRef, useMemo } from 'react';
+import {
+  View,
+  Text,
+  TouchableOpacity,
+  StyleSheet,
+  Animated,
+} from 'react-native';
+import { stripMarkdownForSpeech } from '../../utils/messageContent';
+import { useTheme, useThemedStyles } from '../../theme';
+import { useTTSStore } from '../../stores/ttsStore';
+import { triggerHaptic } from '../../utils/haptics';
+import { TYPOGRAPHY, SPACING } from '../../constants';
+import type { ThemeColors, ThemeShadows } from '../../theme';
+import { ActionMenuSheet } from '../ChatMessage/components/ActionMenuSheet';
+import { createStyles as createChatStyles } from '../ChatMessage/styles';
+import {
+  usePlaybackState,
+  useElapsedTimer,
+  useSeekHandler,
+  PlayButton,
+  SpeedChip,
+  DurationText,
+  SeekBar,
+  TranscriptToggle,
+  TranscriptContent,
+} from './PlaybackControls';
+
+const WAVEFORM_BARS = 48;
+
+interface AudioMessageBubbleProps {
+  messageId: string;
+  audioPath: string;
+  waveformData: number[];
+  durationSeconds: number;
+  transcript?: string;
+  isUser?: boolean;
+  isLoading?: boolean;
+  _reasoningContent?: string;
+  onCopy?: (content: string) => void;
+  onRetry?: () => void;
+  onEdit?: (newContent: string) => void;
+}
+
+function subsample(data: number[], count: number): number[] {
+  if (data.length === 0) {
+    return Array.from({ length: count }, (_, i) => 0.25 + 0.25 * Math.sin((i / count) * Math.PI * 4));
+  }
+  const step = data.length / count;
+  const result: number[] = [];
+  for (let i = 0; i < count; i++) {
+    result.push(data[Math.floor(i * step)] ?? 0.1);
+  }
+  return result;
+}
+
+function normalize(data: number[]): number[] {
+  const max = Math.max(...data, 0.001);
+  return data.map((v) => v / max);
+}
+
+/** WhatsApp-style waveform — bars tint as the playhead passes over them.
+ *  Played bars are full color, unplayed bars are muted. */
+const WaveformBars: React.FC<{
+  data: number[];
+  colors: ThemeColors;
+  /** 0–1 playback progress — bars behind the playhead are tinted */
+  progress?: number;
+}> = ({ data, colors, progress = 0 }) => {
+  const bars = useMemo(() => normalize(subsample(data, WAVEFORM_BARS)), [data]);
+
+  return (
+    <View style={barStyles.container}>
+      {bars.map((shape, i) => {
+        const played = progress > 0 && (i / bars.length) < progress;
+        return (
+          <View
+            key={i}
+            style={[
+              barStyles.bar,
+              {
+                height: Math.max(6, Math.round(shape * 32)),
+                backgroundColor: colors.primary,
+                opacity: played ? (0.7 + shape * 0.3) : (0.2 + shape * 0.25),
+              },
+            ]}
+          />
+        );
+      })}
+    </View>
+  );
+};
+
+const barStyles = StyleSheet.create({
+  container: {
+    flex: 1,
+    flexDirection: 'row',
+    alignItems: 'center',
+    gap: 1.5,
+    height: 40,
+    overflow: 'hidden',
+  },
+  bar: {
+    flex: 1,
+    borderRadius: 2,
+  },
+});
+
+/** Three pulsing dots shown while the LLM is generating */
+const ThinkingDots: React.FC<{ colors: ThemeColors }> = ({ colors }) => {
+  const dots = useRef([new Animated.Value(0.3), new Animated.Value(0.3), new Animated.Value(0.3)]).current;
+
+  useEffect(() => {
+    const anims = dots.map((v, i) =>
+      Animated.loop(
+        Animated.sequence([
+          Animated.delay(i * 150),
+          Animated.timing(v, { toValue: 1, duration: 300, useNativeDriver: false }),
+          Animated.timing(v, { toValue: 0.3, duration: 300, useNativeDriver: false }),
+        ]),
+      ),
+    );
+    anims.forEach((a) => a.start());
+    return () => anims.forEach((a) => a.stop());
+  }, [dots]);
+
+  return (
+    <View style={dotStyles.container}>
+      {dots.map((v, i) => (
+        <Animated.View key={i} style={[dotStyles.dot, { backgroundColor: colors.primary, opacity: v }]} />
+      ))}
+    </View>
+  );
+};
+
+const dotStyles = StyleSheet.create({
+  container: {
+    flex: 1,
+    flexDirection: 'row',
+    alignItems: 'center',
+    gap: 6,
+    paddingHorizontal: 4,
+    height: 32,
+  },
+  dot: {
+    width: 7,
+    height: 7,
+    borderRadius: 4,
+  },
+});
+
+export const AudioMessageBubble: React.FC<AudioMessageBubbleProps> = ({
+  messageId,
+  audioPath,
+  waveformData,
+  durationSeconds,
+  transcript,
+  isUser = false,
+  isLoading = false,
+  _reasoningContent,
+  onCopy,
+  onRetry,
+  onEdit,
+}) => {
+  const { colors } = useTheme();
+  const styles = useThemedStyles(createStyles);
+  const chatStyles = useThemedStyles(createChatStyles);
+  const [showActionMenu, setShowActionMenu] = useState(false);
+  const speed = useTTSStore((s) => s.settings.speed);
+  const playMessage = useTTSStore((s) => s.playMessage);
+  const speak = useTTSStore((s) => s.speak);
+
+  const { isThisPlaying, isThisPaused, isThisAudible, isThisLoading } = usePlaybackState(messageId);
+  const currentMessageId = useTTSStore((s) => s.currentMessageId);
+
+  useEffect(() => {
+    console.log('[AudioBubble] state: messageId=', messageId, 'currentMessageId=', currentMessageId, 'isThisAudible=', isThisAudible, 'isThisPlaying=', isThisPlaying);
+  }, [messageId, currentMessageId, isThisAudible, isThisPlaying]);
+  const [showTranscript, setShowTranscript] = useState(false);
+  const [isSeeking, setIsSeeking] = useState(false);
+  const seekOffsetRef = useRef<number>(0);
+  const { localElapsed, setLocalElapsed } = useElapsedTimer({ isThisAudible, isThisPaused }, seekOffsetRef);
+
+  const handlePlayPause = useCallback(() => {
+    const { pause, resume } = useTTSStore.getState();
+    if (isThisPaused) { resume(); return; }
+    if (isThisPlaying) { pause(); return; }
+    if (audioPath) {
+      playMessage(messageId, audioPath);
+    } else {
+      const text = stripMarkdownForSpeech(transcript ?? '');
+      speak(text, messageId);
+    }
+  }, [isThisPlaying, isThisPaused, playMessage, speak, messageId, audioPath, transcript]);
+
+  const totalDurationRef = useRef(0);
+  const totalDuration = useMemo(() => {
+    if (!audioPath && transcript) {
+      const wordCount = transcript.trim().split(/\s+/).filter(Boolean).length;
+      return Math.max(1, wordCount / (2.5 * speed));
+    }
+    return durationSeconds;
+  }, [audioPath, transcript, speed, durationSeconds]);
+  totalDurationRef.current = totalDuration;
+
+  const handleSeek = useSeekHandler({
+    transcript, audioPath, messageId,
+    totalDurationRef, seekOffsetRef, setLocalElapsed, setIsSeeking,
+  });
+
+  const isThisActive = ((isThisPlaying || isThisPaused) && currentMessageId === messageId) || isSeeking;
+  const progress = isThisActive ? Math.min(1, localElapsed / Math.max(1, totalDuration)) : 0;
+
+  // Waveform + seekbar overlay — seekbar sits on top of the waveform, centered vertically
+  const waveformWithSeek = (
+    <View style={styles.waveformSeekContainer}>
+      {isLoading && !isUser
+        ? <ThinkingDots colors={colors} />
+        : <WaveformBars data={waveformData} colors={colors} progress={progress} />}
+      {!isLoading && (
+        <View style={styles.seekOverlay}>
+          <SeekBar displayProgress={progress} colors={colors} styles={styles} onSeek={handleSeek} />
+        </View>
+      )}
+    </View>
+  );
+
+  const handleLongPress = useCallback(() => {
+    if (isLoading) return;
+    triggerHaptic('impactMedium');
+    setShowActionMenu(true);
+  }, [isLoading]);
+
+  const showActions = !!(onCopy || onRetry || onEdit);
+
+  return (
+    <View style={[styles.bubble, isUser && styles.bubbleUser]} testID={`audio-bubble-${messageId}`}>
+      <TouchableOpacity
+        activeOpacity={0.9}
+        onLongPress={handleLongPress}
+        delayLongPress={300}
+        disabled={!showActions}
+      >
+        <View style={styles.playRow}>
+          <PlayButton isLoading={isLoading} isThisLoading={isThisLoading} isThisPlaying={isThisPlaying} onPlayPause={handlePlayPause} colors={colors} styles={styles} />
+          {waveformWithSeek}
+        </View>
+
+        <View style={styles.metaRow}>
+          <TranscriptToggle transcript={transcript} colors={colors} styles={styles} onToggle={setShowTranscript} isOpen={showTranscript} />
+          <View style={styles.metaRight}>
+            <DurationText isLoading={isLoading} totalDuration={totalDuration} styles={styles} />
+            <SpeedChip styles={styles} />
+            {showActions && !isLoading && (
+              <TouchableOpacity style={styles.actionHint} onPress={() => { triggerHaptic('impactLight'); setShowActionMenu(true); }}>
+                <Text style={styles.actionHintText}>•••</Text>
+              </TouchableOpacity>
+            )}
+          </View>
+        </View>
+      </TouchableOpacity>
+
+      {showTranscript && transcript ? (
+        <TranscriptContent transcript={transcript} styles={styles} />
+      ) : null}
+
+      <ActionMenuSheet
+        visible={showActionMenu}
+        onClose={() => setShowActionMenu(false)}
+        isUser={isUser}
+        canEdit={isUser && !!onEdit}
+        canRetry={!!onRetry}
+        canGenerateImage={false}
+        canSpeak={false}
+        styles={chatStyles}
+        onCopy={() => { onCopy?.(transcript ?? ''); setShowActionMenu(false); }}
+        onEdit={() => setShowActionMenu(false)}
+        onRetry={() => { onRetry?.(); setShowActionMenu(false); }}
+        onGenerateImage={() => setShowActionMenu(false)}
+        onSpeak={() => setShowActionMenu(false)}
+      />
+    </View>
+  );
+};
+
+const createStyles = (colors: ThemeColors, _shadows: ThemeShadows) => ({
+  bubble: {
+    backgroundColor: colors.surface,
+    borderRadius: 12,
+    borderWidth: 1,
+    borderColor: colors.border,
+    padding: SPACING.md,
+    width: '88%' as const,
+    alignSelf: 'flex-start' as const,
+    gap: SPACING.sm,
+    overflow: 'hidden' as const,
+  },
+  bubbleUser: {
+    alignSelf: 'flex-end' as const,
+    backgroundColor: `${colors.primary}18`,
+    borderColor: `${colors.primary}40`,
+  },
+  playRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    gap: SPACING.xs,
+  },
+  metaRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'space-between' as const,
+  },
+  metaRight: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    gap: SPACING.sm,
+  },
+  playButton: {
+    width: 28,
+    height: 28,
+    borderRadius: 14,
+    backgroundColor: `${colors.primary}20`,
+    alignItems: 'center' as const,
+    justifyContent: 'center' as const,
+  },
+  playButtonDisabled: {
+    opacity: 0.35,
+  },
+  duration: {
+    ...TYPOGRAPHY.meta,
+    color: colors.textMuted,
+    minWidth: 32,
+    textAlign: 'right' as const,
+  },
+  speedChip: {
+    backgroundColor: colors.surfaceLight,
+    borderRadius: 10,
+    paddingHorizontal: SPACING.sm,
+    paddingVertical: SPACING.xs,
+    borderWidth: 1,
+    borderColor: colors.border,
+  },
+  speedText: {
+    ...TYPOGRAPHY.metaSmall,
+    color: colors.textSecondary,
+  },
+  waveformSeekContainer: {
+    flex: 1,
+    position: 'relative' as const,
+    marginLeft: SPACING.sm,
+  },
+  seekOverlay: {
+    position: 'absolute' as const,
+    top: 0,
+    left: -16,
+    right: -16,
+    bottom: 0,
+    justifyContent: 'center' as const,
+  },
+  seekSlider: {
+    height: 40,
+  },
+  transcriptToggle: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    gap: SPACING.xs,
+  },
+  transcriptToggleText: {
+    ...TYPOGRAPHY.meta,
+    color: colors.textMuted,
+  },
+  transcriptContent: {
+    paddingTop: SPACING.xs,
+  },
+  transcriptScroll: {
+    maxHeight: 120,
+  },
+  transcriptText: {
+    ...TYPOGRAPHY.bodySmall,
+    lineHeight: 20,
+  },
+  actionHint: {
+    padding: 4,
+  },
+  actionHintText: {
+    ...TYPOGRAPHY.bodySmall,
+    color: colors.textMuted,
+    letterSpacing: 1,
+  },
+});
diff --git a/src/components/ChatInput/Attachments.tsx b/src/components/ChatInput/Attachments.tsx
index bdf90cdfe..b96e3b53b 100644
--- a/src/components/ChatInput/Attachments.tsx
+++ b/src/components/ChatInput/Attachments.tsx
@@ -101,9 +101,21 @@ export function useAttachments(setAlertState: (state: AlertState) => void) {
     }
   };
 
+  const addAudioAttachment = (uri: string, audioFormat: 'wav' | 'mp3', audioDurationSeconds?: number) => {
+    const attachment: MediaAttachment = {
+      id: nextAttachmentId(),
+      type: 'audio',
+      uri,
+      audioFormat,
+      audioDurationSeconds,
+      fileName: uri.split('/').pop(),
+    };
+    setAttachments(prev => [...prev, attachment]);
+  };
+
   const clearAttachments = () => setAttachments([]);
 
-  return { attachments, removeAttachment, clearAttachments, handlePickImage, handlePickDocument };
+  return { attachments, removeAttachment, clearAttachments, handlePickImage, handlePickDocument, addAudioAttachment };
 }
 
 // ─── AttachmentPreview component ─────────────────────────────────────────────
@@ -135,6 +147,11 @@ export const AttachmentPreview: React.FC<AttachmentPreviewProps> = ({ attachment
               source={{ uri: attachment.uri }}
               style={styles.attachmentImage}
             />
+          ) : attachment.type === 'audio' ? (
+            <View testID={`audio-preview-${attachment.id}`} style={styles.documentPreview}>
+              <Icon name="mic" size={24} color={colors.primary} />
+              <Text style={styles.documentName} numberOfLines={2}>Voice</Text>
+            </View>
           ) : (
             <View testID={`document-preview-${attachment.id}`} style={styles.documentPreview}>
               <Icon name="file-text" size={24} color={colors.primary} />
diff --git a/src/components/ChatInput/AudioModeLayout.tsx b/src/components/ChatInput/AudioModeLayout.tsx
new file mode 100644
index 000000000..1cd957cb6
--- /dev/null
+++ b/src/components/ChatInput/AudioModeLayout.tsx
@@ -0,0 +1,239 @@
+import React from 'react';
+import { View, TouchableOpacity, Text, ActivityIndicator } from 'react-native';
+import Icon from 'react-native-vector-icons/Feather';
+import { useTheme } from '../../theme';
+import { ImageModeState, MediaAttachment } from '../../types';
+import { VoiceRecordButton } from '../VoiceRecordButton';
+import { triggerHaptic } from '../../utils/haptics';
+import { CustomAlert, hideAlert, AlertState } from '../CustomAlert';
+import { QueueRow } from './Toolbar';
+import { AttachmentPreview } from './Attachments';
+import { AttachPickerPopover, VoicePickerPopover, QuickSettingsPopover } from './Popovers';
+import { useTTSStore } from '../../stores/ttsStore';
+import type { TTSVoice } from '../../engine';
+
+interface AudioModeLayoutProps {
+  styles: any;
+  disabled?: boolean;
+  isGenerating?: boolean;
+  imageMode: ImageModeState;
+  imageModelLoaded: boolean;
+  supportsThinking: boolean;
+  supportsToolCalling: boolean;
+  enabledToolCount: number;
+  thinkingEnabled: boolean;
+  currentVoice: TTSVoice;
+  // Attachments
+  attachments: MediaAttachment[];
+  onRemoveAttachment: (id: string) => void;
+  // Queue
+  queueCount: number;
+  queuedTexts: string[];
+  onClearQueue?: () => void;
+  // Voice recording
+  isRecording: boolean;
+  voiceAvailable: boolean;
+  isModelLoading: boolean;
+  isTranscribing: boolean;
+  partialResult: string;
+  error: string | null;
+  onStartRecording: () => void;
+  onStopRecording: () => void;
+  onCancelRecording: () => void;
+  // Handlers
+  onStop?: () => void;
+  onImageModeToggle: () => void;
+  onThinkingToggle: () => void;
+  onToolsPress?: () => void;
+  onVisionPress: () => void;
+  onPickDocument: () => void;
+  // Popovers
+  attachPicker: any;
+  voicePicker: any;
+  quickSettings: any;
+  supportsVision: boolean;
+  // Alert
+  alertState: AlertState;
+  setAlertState: (s: AlertState) => void;
+}
+
+export const AudioModeLayout: React.FC<AudioModeLayoutProps> = ({
+  styles,
+  disabled,
+  isGenerating,
+  imageMode,
+  imageModelLoaded,
+  supportsThinking,
+  supportsToolCalling,
+  enabledToolCount,
+  thinkingEnabled,
+  currentVoice,
+  attachments,
+  onRemoveAttachment,
+  queueCount,
+  queuedTexts,
+  onClearQueue,
+  isRecording,
+  voiceAvailable,
+  isModelLoading,
+  isTranscribing,
+  partialResult,
+  error,
+  onStartRecording,
+  onStopRecording,
+  onCancelRecording,
+  onStop,
+  onImageModeToggle,
+  onThinkingToggle,
+  onToolsPress,
+  onVisionPress,
+  onPickDocument,
+  attachPicker,
+  voicePicker,
+  quickSettings,
+  supportsVision,
+  alertState,
+  setAlertState,
+}) => {
+  const { colors } = useTheme();
+  const isChangingVoice = false; // Voice change state is handled by the engine internally
+
+  const handleStop = () => {
+    if (onStop && isGenerating) {
+      triggerHaptic('impactLight');
+      onStop();
+    }
+  };
+
+  const audioStopButton = isGenerating && onStop ? (
+    <TouchableOpacity
+      testID="stop-button"
+      style={styles.circleButton}
+      onPress={handleStop}
+    >
+      <Icon name="square" size={18} color={colors.background} />
+    </TouchableOpacity>
+  ) : null;
+
+  return (
+    <View style={styles.container}>
+      <AttachmentPreview attachments={attachments} onRemove={onRemoveAttachment} />
+      <QueueRow
+        queueCount={queueCount}
+        queuedTexts={queuedTexts}
+        onClearQueue={onClearQueue}
+      />
+      <View style={styles.audioModeRow}>
+        <TouchableOpacity
+          ref={attachPicker.triggerRef}
+          style={styles.pillIconButton}
+          onPress={() => attachPicker.show()}
+          disabled={disabled}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          <Icon name="plus" size={20} color={disabled ? colors.textMuted : colors.textSecondary} />
+        </TouchableOpacity>
+        <TouchableOpacity
+          style={styles.pillIconButton}
+          onPress={() => {
+            triggerHaptic('impactLight');
+            useTTSStore.getState().updateSettings({ interfaceMode: 'chat' });
+          }}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          <Icon name="message-square" size={18} color={colors.textSecondary} />
+        </TouchableOpacity>
+        <TouchableOpacity
+          style={styles.pillIconButton}
+          onPress={onImageModeToggle}
+          disabled={disabled || !imageModelLoaded}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          <Icon name="image" size={18} color={imageMode === 'force' ? colors.primary : !imageModelLoaded ? colors.textMuted : colors.textSecondary} />
+        </TouchableOpacity>
+        {supportsThinking && (
+          <TouchableOpacity
+            style={styles.pillIconButton}
+            onPress={onThinkingToggle}
+            disabled={disabled}
+            hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+          >
+            <Icon name="zap" size={18} color={thinkingEnabled ? colors.primary : (disabled ? colors.textMuted : colors.textSecondary)} />
+          </TouchableOpacity>
+        )}
+        <TouchableOpacity
+          style={styles.pillIconButton}
+          onPress={() => { triggerHaptic('impactLight'); onToolsPress?.(); }}
+          disabled={disabled || !supportsToolCalling}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          <Icon name="tool" size={18} color={enabledToolCount > 0 ? colors.primary : !supportsToolCalling ? colors.textMuted : colors.textSecondary} />
+        </TouchableOpacity>
+        <TouchableOpacity
+          ref={voicePicker.triggerRef}
+          style={styles.audioVoiceButton}
+          onPress={() => voicePicker.show()}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          {isChangingVoice
+            ? <ActivityIndicator size="small" color={colors.textMuted} />
+            : <Icon name="user" size={14} color={colors.textSecondary} />}
+          <Text style={styles.audioVoiceLabel}>{currentVoice.label}</Text>
+        </TouchableOpacity>
+
+        {isGenerating && onStop ? (
+          audioStopButton
+        ) : (
+          <VoiceRecordButton
+            isRecording={isRecording}
+            isAvailable={voiceAvailable}
+            isModelLoading={isModelLoading}
+            isTranscribing={isTranscribing}
+            partialResult={partialResult}
+            error={error}
+            disabled={disabled}
+            onStartRecording={onStartRecording}
+            onStopRecording={onStopRecording}
+            onCancelRecording={onCancelRecording}
+          />
+        )}
+      </View>
+
+      <AttachPickerPopover
+        visible={attachPicker.visible}
+        onClose={attachPicker.hide}
+        anchorY={attachPicker.anchor.y}
+        anchorX={attachPicker.anchor.x}
+        supportsVision={supportsVision}
+        onPhoto={onVisionPress}
+        onDocument={onPickDocument}
+      />
+      <VoicePickerPopover
+        visible={voicePicker.visible}
+        onClose={voicePicker.hide}
+        anchorY={voicePicker.anchor.y}
+        anchorX={voicePicker.anchor.x}
+      />
+      <QuickSettingsPopover
+        visible={quickSettings.visible}
+        onClose={quickSettings.hide}
+        anchorY={quickSettings.anchor.y}
+        anchorX={quickSettings.anchor.x}
+        imageMode={imageMode}
+        onImageModeToggle={onImageModeToggle}
+        imageModelLoaded={imageModelLoaded}
+        supportsThinking={supportsThinking}
+        supportsToolCalling={supportsToolCalling}
+        enabledToolCount={enabledToolCount}
+        onToolsPress={onToolsPress}
+      />
+      <CustomAlert
+        visible={alertState.visible}
+        title={alertState.title}
+        message={alertState.message}
+        buttons={alertState.buttons}
+        onClose={() => setAlertState(hideAlert())}
+      />
+    </View>
+  );
+};
diff --git a/src/components/ChatInput/Popovers.tsx b/src/components/ChatInput/Popovers.tsx
index 52a61b694..53fc42f77 100644
--- a/src/components/ChatInput/Popovers.tsx
+++ b/src/components/ChatInput/Popovers.tsx
@@ -1,11 +1,15 @@
 import React from 'react';
 import { View, TouchableOpacity, Text, StyleSheet, Modal, TouchableWithoutFeedback } from 'react-native';
 import Icon from 'react-native-vector-icons/Feather';
+import { useNavigation } from '@react-navigation/native';
 import { useTheme } from '../../theme';
 import { ImageModeState } from '../../types';
-import { useAppStore } from '../../stores';
+import { useAppStore, useTTSStore } from '../../stores';
 import { triggerHaptic } from '../../utils/haptics';
-import { FONTS } from '../../constants';
+import { FONTS, TYPOGRAPHY } from '../../constants';
+import type { TTSVoice } from '../../engine';
+import type { NativeStackNavigationProp } from '@react-navigation/native-stack';
+import type { RootStackParamList } from '../../navigation/types';
 
 // ─── Shared Styles ──────────────────────────────────────────────────────────
 
@@ -100,11 +104,28 @@ export const QuickSettingsPopover: React.FC<QuickSettingsPopoverProps> = ({
 }) => {
   const { colors } = useTheme();
   const { settings, updateSettings } = useAppStore();
+  const { settings: ttsSettings, isReady: ttsReady, updateSettings: updateTTSSettings, initializeEngine } = useTTSStore();
+  const navigation = useNavigation<NativeStackNavigationProp<RootStackParamList>>();
 
   if (!visible) return null;
 
   const imgBadge = getImageModeBadge(imageMode, colors);
   const tools = getToolsStyle(supportsToolCalling, enabledToolCount, colors);
+  const ttsMode = ttsSettings.interfaceMode;
+  const ttsBadge = !ttsReady
+    ? { label: 'N/A', bg: colors.textMuted }
+    : ttsMode === 'audio'
+      ? { label: 'Audio', bg: colors.primary }
+      : { label: 'Chat', bg: `${colors.textMuted}80` };
+
+  const handleTTSToggle = () => {
+    triggerHaptic('impactLight');
+    if (!ttsReady) { onClose(); navigation.navigate('TTSSettings'); return; }
+    onClose();
+    const next = ttsMode === 'audio' ? 'chat' : 'audio';
+    updateTTSSettings({ interfaceMode: next });
+    if (next === 'audio') initializeEngine();
+  };
 
   return (
     <Modal transparent visible={visible} animationType="fade" onRequestClose={onClose}>
@@ -150,6 +171,18 @@ export const QuickSettingsPopover: React.FC<QuickSettingsPopoverProps> = ({
                 </TouchableOpacity>
               )}
 
+              <TouchableOpacity
+                testID="quick-tts-mode"
+                style={popoverStyles.row}
+                onPress={handleTTSToggle}
+              >
+                <Icon name={ttsMode === 'audio' ? 'volume-2' : 'volume-1'} size={16} color={ttsReady ? colors.text : colors.textMuted} />
+                <Text style={[popoverStyles.rowLabel, { color: ttsReady ? colors.text : colors.textMuted }]}>Voice</Text>
+                <View style={[popoverStyles.badge, { backgroundColor: ttsBadge.bg }]}>
+                  <Text style={[popoverStyles.badgeText, { color: colors.background }]}>{ttsBadge.label}</Text>
+                </View>
+              </TouchableOpacity>
+
               <TouchableOpacity
                 testID="quick-tools"
                 style={popoverStyles.row}
@@ -227,3 +260,84 @@ export const AttachPickerPopover: React.FC<AttachPickerPopoverProps> = ({
     </Modal>
   );
 };
+
+// ─── Voice Picker Popover ──────────────────────────────────────────────────
+
+interface VoicePickerPopoverProps {
+  visible: boolean;
+  onClose: () => void;
+  anchorY: number;
+  anchorX: number;
+}
+
+export const VoicePickerPopover: React.FC<VoicePickerPopoverProps> = ({
+  visible, onClose, anchorY, anchorX,
+}) => {
+  const { colors } = useTheme();
+  const { voices, activeVoiceId, isSpeaking, stop, setVoice } = useTTSStore();
+
+  if (!visible) return null;
+
+  const handleSelect = (voice: TTSVoice) => {
+    triggerHaptic('impactLight');
+    if (isSpeaking) { stop(); }
+    setVoice(voice.id);
+    onClose();
+  };
+
+  return (
+    <Modal transparent visible={visible} animationType="fade" onRequestClose={onClose}>
+      <TouchableWithoutFeedback onPress={onClose}>
+        <View style={popoverStyles.overlay}>
+          <TouchableWithoutFeedback>
+            <View style={[popoverStyles.popover, voicePickerStyles.popover, {
+              backgroundColor: colors.surface,
+              borderColor: colors.border,
+              bottom: anchorY + 8,
+              right: anchorX,
+            }]}>
+              {voices.map((voice) => {
+                const isActive = voice.id === activeVoiceId;
+                return (
+                  <TouchableOpacity
+                    key={voice.id}
+                    style={popoverStyles.row}
+                    onPress={() => handleSelect(voice)}
+                  >
+                    <Icon
+                      name="user"
+                      size={14}
+                      color={isActive ? colors.primary : colors.textMuted}
+                    />
+                    <View style={voicePickerStyles.labelCol}>
+                      <Text style={[popoverStyles.rowLabel, { color: isActive ? colors.primary : colors.text }]}>
+                        {voice.label}
+                      </Text>
+                      <Text style={[voicePickerStyles.accent, { color: colors.textMuted }]}>
+                        {voice.metadata.persona || ''}
+                      </Text>
+                    </View>
+                    {isActive && <Icon name="check" size={14} color={colors.primary} />}
+                  </TouchableOpacity>
+                );
+              })}
+            </View>
+          </TouchableWithoutFeedback>
+        </View>
+      </TouchableWithoutFeedback>
+    </Modal>
+  );
+};
+
+const voicePickerStyles = StyleSheet.create({
+  popover: {
+    minWidth: 200,
+  },
+  labelCol: {
+    flex: 1,
+  },
+  accent: {
+    ...TYPOGRAPHY.meta,
+    marginTop: 1,
+  },
+});
diff --git a/src/components/ChatInput/Voice.ts b/src/components/ChatInput/Voice.ts
index 1cc66a19e..616b6bcaf 100644
--- a/src/components/ChatInput/Voice.ts
+++ b/src/components/ChatInput/Voice.ts
@@ -1,35 +1,195 @@
-import { useEffect, useRef } from 'react';
+import { useEffect, useRef, useState } from 'react';
 import { useWhisperTranscription } from '../../hooks/useWhisperTranscription';
-import { useWhisperStore } from '../../stores';
+import { useWhisperStore, useChatStore } from '../../stores';
+import { useTTSStore } from '../../stores/ttsStore';
+import { llmService } from '../../services/llm';
+import { audioRecorderService } from '../../services/audioRecorderService';
+import { whisperService } from '../../services/whisperService';
+import logger from '../../utils/logger';
 
 interface UseVoiceInputParams {
   conversationId?: string | null;
   onTranscript: (text: string) => void;
+  onAudioAttachment?: (uri: string, format: 'wav' | 'mp3', durationSeconds?: number) => void;
+  /** Called in Audio Mode to auto-send. Includes audio info so caller can build attachment atomically. */
+  onAutoSend?: (text: string, audio: { uri: string; format: 'wav' | 'mp3'; durationSeconds: number }) => void;
 }
 
-export function useVoiceInput({ conversationId, onTranscript }: UseVoiceInputParams) {
+export function useVoiceInput({ conversationId, onTranscript, onAudioAttachment, onAutoSend }: UseVoiceInputParams) {
   const recordingConversationIdRef = useRef<string | null>(null);
   const onTranscriptRef = useRef(onTranscript);
   onTranscriptRef.current = onTranscript;
+  const onAudioAttachmentRef = useRef(onAudioAttachment);
+  onAudioAttachmentRef.current = onAudioAttachment;
+  const onAutoSendRef = useRef(onAutoSend);
+  onAutoSendRef.current = onAutoSend;
   const { downloadedModelId } = useWhisperStore();
+  const [isDirectRecording, setIsDirectRecording] = useState(false);
+  const [isAudioModeRecording, setIsAudioModeRecording] = useState(false);
+  const [isTranscribingFile, setIsTranscribingFile] = useState(false);
+  const [directError, setDirectError] = useState<string | null>(null);
 
   const {
-    isRecording,
+    isRecording: isWhisperRecording,
     isModelLoading,
-    isTranscribing,
+    isTranscribing: isWhisperTranscribing,
     partialResult,
     finalResult,
-    error,
-    startRecording: startRecordingBase,
-    stopRecording,
+    error: whisperError,
+    startRecording: startWhisperRecording,
+    stopRecording: stopWhisperRecording,
     clearResult,
   } = useWhisperTranscription();
 
-  const voiceAvailable = !!downloadedModelId;
+  const supportsDirectAudio = (): boolean => {
+    const support = llmService.getMultimodalSupport();
+    return Boolean(support?.audio) && audioRecorderService.supportsDirectAudioInput();
+  };
+
+  const isInAudioInterfaceMode = (): boolean =>
+    useTTSStore.getState().settings.interfaceMode === 'audio';
+
+  // Use file-based transcription path when: Audio Mode + Whisper available + not direct audio model
+  const shouldUseFilePath = (): boolean =>
+    isInAudioInterfaceMode() && !!downloadedModelId && !supportsDirectAudio();
+
+  const isTranscribing = isWhisperTranscribing || isTranscribingFile;
+  const isRecording = isDirectRecording || isAudioModeRecording || isWhisperRecording;
+  const error = directError ?? whisperError;
+
+  // voiceAvailable: direct audio OR whisper downloaded
+  const voiceAvailable = supportsDirectAudio() || !!downloadedModelId;
 
   const startRecording = async () => {
     recordingConversationIdRef.current = conversationId || null;
-    await startRecordingBase();
+    setDirectError(null);
+    // Stop any TTS playback before recording — mic and speaker shouldn't overlap
+    const tts = useTTSStore.getState();
+    if (tts.isSpeaking) { tts.stop(); }
+
+    if (supportsDirectAudio()) {
+      try {
+        setIsDirectRecording(true);
+        await audioRecorderService.startRecording();
+      } catch (err) {
+        setIsDirectRecording(false);
+        const msg = err instanceof Error ? err.message : 'Recording failed';
+        logger.error('[Voice] Direct audio recording error:', err);
+        setDirectError(msg);
+      }
+      return;
+    }
+
+    if (shouldUseFilePath()) {
+      try {
+        setIsAudioModeRecording(true);
+        await audioRecorderService.startRecording();
+      } catch (err) {
+        setIsAudioModeRecording(false);
+        const msg = err instanceof Error ? err.message : 'Recording failed';
+        logger.error('[Voice] Audio mode recording error:', err);
+        setDirectError(msg);
+      }
+      return;
+    }
+
+    await startWhisperRecording();
+  };
+
+  const stopRecording = async () => {
+    if (isDirectRecording) {
+      try {
+        const { path, durationSeconds } = await audioRecorderService.stopRecording();
+        setIsDirectRecording(false);
+        if (!recordingConversationIdRef.current || recordingConversationIdRef.current === conversationId) {
+          const format = audioRecorderService.getFormat();
+          // In Audio Mode, auto-send directly — no transcription needed for multimodal models
+          if (onAutoSendRef.current && isInAudioInterfaceMode()) {
+            onAutoSendRef.current('', { uri: path, format, durationSeconds });
+
+            // Parallel transcription: send audio to model immediately, transcribe in background
+            // so the voice bubble gets a transcript for display/playback review
+            if (downloadedModelId) {
+              const convId = conversationId;
+              whisperService.transcribeFile(path).then(text => {
+                if (!text?.trim() || !convId) return;
+                const conv = useChatStore.getState().conversations.find(c => c.id === convId);
+                const msg = conv?.messages.find(m =>
+                  m.role === 'user' && m.attachments?.some(a => a.uri === path),
+                );
+                if (msg) {
+                  useChatStore.getState().updateMessageContent(convId, msg.id, text.trim());
+                }
+              }).catch(err => logger.error('[Voice] Background transcription error:', err));
+            }
+          } else {
+            onAudioAttachmentRef.current?.(path, format, durationSeconds);
+          }
+        }
+        recordingConversationIdRef.current = null;
+      } catch (err) {
+        setIsDirectRecording(false);
+        logger.error('[Voice] Failed to stop direct recording:', err);
+      }
+      return;
+    }
+
+    if (isAudioModeRecording) {
+      try {
+        const { path, durationSeconds } = await audioRecorderService.stopRecording();
+        setIsAudioModeRecording(false);
+        if (recordingConversationIdRef.current && recordingConversationIdRef.current !== conversationId) {
+          recordingConversationIdRef.current = null;
+          return;
+        }
+        setIsTranscribingFile(true);
+        let text = '';
+        try {
+          text = await whisperService.transcribeFile(path);
+        } catch (transcribeErr) {
+          logger.error('[Voice] File transcription error:', transcribeErr);
+        }
+        setIsTranscribingFile(false);
+        recordingConversationIdRef.current = null;
+        if (text.trim()) {
+          if (onAutoSendRef.current) {
+            onAutoSendRef.current(text.trim(), { uri: path, format: 'wav', durationSeconds });
+          } else {
+            onAudioAttachmentRef.current?.(path, 'wav', durationSeconds);
+            onTranscriptRef.current(text.trim());
+          }
+        } else {
+          // Transcription returned nothing — clip too short or too quiet
+          setDirectError("Couldn't hear that — try again");
+          setTimeout(() => setDirectError(null), 3000);
+        }
+      } catch (err) {
+        setIsAudioModeRecording(false);
+        setIsTranscribingFile(false);
+        logger.error('[Voice] Failed to stop audio mode recording:', err);
+      }
+      return;
+    }
+
+    await stopWhisperRecording();
+  };
+
+  const cancelRecording = () => {
+    if (isDirectRecording) {
+      audioRecorderService.cancelRecording();
+      setIsDirectRecording(false);
+      recordingConversationIdRef.current = null;
+      return;
+    }
+    if (isAudioModeRecording) {
+      audioRecorderService.cancelRecording();
+      setIsAudioModeRecording(false);
+      recordingConversationIdRef.current = null;
+      return;
+    }
+    stopWhisperRecording();
+    clearResult();
+    recordingConversationIdRef.current = null;
   };
 
   useEffect(() => {
@@ -49,5 +209,20 @@ export function useVoiceInput({ conversationId, onTranscript }: UseVoiceInputPar
     }
   }, [finalResult, clearResult, conversationId]);
 
-  return { isRecording, isModelLoading, isTranscribing, partialResult, error, voiceAvailable, startRecording, stopRecording, clearResult };
+  return {
+    isRecording,
+    isModelLoading,
+    isTranscribing,
+    partialResult,
+    error,
+    voiceAvailable,
+    startRecording,
+    stopRecording,
+    cancelRecording,
+    clearResult,
+    /** True when model accepts audio directly (no Whisper needed) */
+    isDirectAudioMode: supportsDirectAudio(),
+    /** True when recording in Audio Mode for file-based transcription */
+    isAudioModeRecording,
+  };
 }
diff --git a/src/components/ChatInput/index.tsx b/src/components/ChatInput/index.tsx
index 1ebbb496e..7368cfb9f 100644
--- a/src/components/ChatInput/index.tsx
+++ b/src/components/ChatInput/index.tsx
@@ -1,4 +1,4 @@
-import React, { useState, useRef, useEffect } from 'react';
+import React, { useState, useRef, useEffect, useMemo } from 'react';
 import { View, TextInput, TouchableOpacity, Animated, StyleSheet } from 'react-native';
 import Icon from 'react-native-vector-icons/Feather';
 import { useTheme, useThemedStyles } from '../../theme';
@@ -13,6 +13,10 @@ import { AttachmentPreview, useAttachments } from './Attachments';
 import { useVoiceInput } from './Voice';
 import { QuickSettingsPopover, AttachPickerPopover } from './Popovers';
 import { useKeyboardAwarePopover } from './useKeyboardAwarePopover';
+import { useTTSStore } from '../../stores/ttsStore';
+import { useAppStore } from '../../stores';
+import type { TTSVoice } from '../../engine';
+import { AudioModeLayout } from './AudioModeLayout';
 
 interface ChatInputProps {
   onSend: (message: string, attachments?: MediaAttachment[], imageMode?: ImageModeState) => void;
@@ -33,7 +37,6 @@ interface ChatInputProps {
   supportsToolCalling?: boolean;
   supportsThinking?: boolean;
   onRepairVision?: () => void;
-  /** When set, mounts a single AttachStep for that index. Only one at a time to avoid waypoint dots. */
   activeSpotlight?: number | null;
 }
 
@@ -69,7 +72,9 @@ export const ChatInput: React.FC<ChatInputProps> = ({
   const [alertState, setAlertState] = useState<AlertState>(initialAlertState);
   const quickSettings = useKeyboardAwarePopover();
   const attachPicker = useKeyboardAwarePopover();
+  const voicePicker = useKeyboardAwarePopover();
   const inputRef = useRef<TextInput>(null);
+  const attachmentsRef = useRef<MediaAttachment[]>([]);
   const hasText = message.length > 0;
   const iconsAnim = useRef(new Animated.Value(0)).current;
 
@@ -81,9 +86,18 @@ export const ChatInput: React.FC<ChatInputProps> = ({
     }).start();
   }, [hasText, iconsAnim]);
 
-  const { attachments, removeAttachment, clearAttachments, handlePickImage, handlePickDocument } = useAttachments(setAlertState);
+  const { attachments, removeAttachment, clearAttachments, handlePickImage, handlePickDocument, addAudioAttachment } = useAttachments(setAlertState);
+  attachmentsRef.current = attachments;
+  const ttsInterfaceMode = useTTSStore((s) => s.settings.interfaceMode);
+  const activeVoiceId = useTTSStore((s) => s.activeVoiceId);
+  const voices = useTTSStore((s) => s.voices);
+  const isAudioMode = ttsInterfaceMode === 'audio';
+  const currentVoice: TTSVoice = useMemo(
+    () => voices.find((v) => v.id === activeVoiceId) ?? voices[0] ?? { id: 'default', label: 'Default', metadata: {} },
+    [activeVoiceId, voices],
+  );
 
-  const { isRecording, isModelLoading, isTranscribing, partialResult, error, voiceAvailable, startRecording, stopRecording, clearResult } = useVoiceInput({
+  const { isRecording, isModelLoading, isTranscribing, partialResult, error, voiceAvailable, startRecording, stopRecording, cancelRecording } = useVoiceInput({
     conversationId,
     onTranscript: (text) => {
       setMessage(prev => {
@@ -91,8 +105,33 @@ export const ChatInput: React.FC<ChatInputProps> = ({
         return prefix + text;
       });
     },
+    onAudioAttachment: (uri, format, durationSeconds) => {
+      addAudioAttachment(uri, format, durationSeconds);
+    },
+    onAutoSend: isAudioMode ? (text, audio) => {
+      const audioAttachment: MediaAttachment = {
+        id: `audio-${Date.now()}`,
+        type: 'audio',
+        uri: audio.uri,
+        audioFormat: audio.format,
+        audioDurationSeconds: audio.durationSeconds,
+        fileName: audio.uri.split('/').pop(),
+      };
+      triggerHaptic('impactMedium');
+      const all = [...attachmentsRef.current, audioAttachment];
+      onSend(text, all, imageMode);
+      clearAttachments();
+    } : undefined,
   });
 
+  const { settings: appSettings, updateSettings: updateAppSettings } = useAppStore();
+  const thinkingEnabled = appSettings.thinkingEnabled;
+
+  const handleThinkingToggle = () => {
+    triggerHaptic('impactLight');
+    updateAppSettings({ thinkingEnabled: !thinkingEnabled });
+  };
+
   const canSend = (message.trim().length > 0 || attachments.length > 0) && !disabled;
 
   const handleSend = () => {
@@ -137,9 +176,49 @@ export const ChatInput: React.FC<ChatInputProps> = ({
     }
   };
 
-  const handleQuickSettingsPress = () => quickSettings.show();
-
-  const handleAttachPress = () => attachPicker.show();
+  // ─── Audio mode: simplified mic-only layout ─────────────────────────────────
+  if (isAudioMode) {
+    return (
+      <AudioModeLayout
+        styles={styles}
+        disabled={disabled}
+        isGenerating={isGenerating}
+        imageMode={imageMode}
+        imageModelLoaded={imageModelLoaded}
+        supportsThinking={supportsThinking}
+        supportsToolCalling={supportsToolCalling}
+        enabledToolCount={enabledToolCount}
+        thinkingEnabled={thinkingEnabled}
+        currentVoice={currentVoice}
+        attachments={attachments}
+        onRemoveAttachment={removeAttachment}
+        queueCount={queueCount}
+        queuedTexts={queuedTexts}
+        onClearQueue={onClearQueue}
+        isRecording={isRecording}
+        voiceAvailable={voiceAvailable}
+        isModelLoading={isModelLoading}
+        isTranscribing={isTranscribing}
+        partialResult={partialResult}
+        error={error}
+        onStartRecording={startRecording}
+        onStopRecording={stopRecording}
+        onCancelRecording={cancelRecording}
+        onStop={onStop}
+        onImageModeToggle={handleImageModeToggle}
+        onThinkingToggle={handleThinkingToggle}
+        onToolsPress={onToolsPress}
+        onVisionPress={handleVisionPress}
+        onPickDocument={handlePickDocument}
+        attachPicker={attachPicker}
+        voicePicker={voicePicker}
+        quickSettings={quickSettings}
+        supportsVision={supportsVision}
+        alertState={alertState}
+        setAlertState={setAlertState}
+      />
+    );
+  }
 
   const actionButton = canSend ? (
     <TouchableOpacity
@@ -168,12 +247,12 @@ export const ChatInput: React.FC<ChatInputProps> = ({
       disabled={disabled}
       onStartRecording={startRecording}
       onStopRecording={stopRecording}
-      onCancelRecording={() => { stopRecording(); clearResult(); }}
+      onCancelRecording={cancelRecording}
       asSendButton
     />
   );
 
-  const content = (
+  return (
     <View style={styles.container}>
       <AttachmentPreview attachments={attachments} onRemove={removeAttachment} />
       <QueueRow
@@ -182,7 +261,6 @@ export const ChatInput: React.FC<ChatInputProps> = ({
         onClearQueue={onClearQueue}
       />
       <View style={styles.mainRow}>
-        {/* Pill: text input + right icons */}
         <View style={styles.pill}>
           <TextInput
             ref={inputRef}
@@ -198,7 +276,6 @@ export const ChatInput: React.FC<ChatInputProps> = ({
             blurOnSubmit={false}
             returnKeyType="default"
           />
-          {/* Icons collapse when user starts typing, reappear when input is empty */}
           <Animated.View
             pointerEvents={hasText ? 'none' : 'auto'}
             style={[styles.pillIcons, {
@@ -207,38 +284,40 @@ export const ChatInput: React.FC<ChatInputProps> = ({
               overflow: 'hidden' as const,
             }]}
           >
-            {/* Attach button — opens picker for image or document */}
             <TouchableOpacity
               ref={attachPicker.triggerRef}
               testID="attach-button"
               style={styles.pillIconButton}
-              onPress={handleAttachPress}
+              onPress={() => attachPicker.show()}
               disabled={disabled}
               hitSlop={{ top: 4, bottom: 4, left: 4, right: 4 }}
             >
-              <Icon
-                name="plus"
-                size={20}
-                color={disabled ? colors.textMuted : colors.textSecondary}
-              />
+              <Icon name="plus" size={20} color={disabled ? colors.textMuted : colors.textSecondary} />
             </TouchableOpacity>
-
-            {/* Quick settings button */}
+            {supportsThinking && (
+              <TouchableOpacity
+                testID="thinking-toggle-button"
+                style={styles.pillIconButton}
+                onPress={handleThinkingToggle}
+                disabled={disabled}
+                hitSlop={{ top: 4, bottom: 4, left: 4, right: 4 }}
+              >
+                <Icon name="zap" size={18} color={thinkingEnabled ? colors.primary : (disabled ? colors.textMuted : colors.textSecondary)} />
+              </TouchableOpacity>
+            )}
             <TouchableOpacity
               ref={quickSettings.triggerRef}
               testID="quick-settings-button"
               style={styles.pillIconButton}
-              onPress={handleQuickSettingsPress}
+              onPress={() => quickSettings.show()}
               disabled={disabled}
               hitSlop={{ top: 4, bottom: 4, left: 4, right: 4 }}
             >
               <Icon name="settings" size={18} color={disabled ? colors.textMuted : colors.textSecondary} />
             </TouchableOpacity>
-
           </Animated.View>
         </View>
 
-        {/* Circular action button — conditionally wrapped with AttachStep */}
         {activeSpotlight === 12 ? (
           <AttachStep index={12} style={spotlightStyles.centered}>{actionButton}</AttachStep>
         ) : actionButton}
@@ -253,7 +332,6 @@ export const ChatInput: React.FC<ChatInputProps> = ({
         onPhoto={handleVisionPress}
         onDocument={handlePickDocument}
       />
-
       <QuickSettingsPopover
         visible={quickSettings.visible}
         onClose={quickSettings.hide}
@@ -267,7 +345,6 @@ export const ChatInput: React.FC<ChatInputProps> = ({
         enabledToolCount={enabledToolCount}
         onToolsPress={onToolsPress}
       />
-
       <CustomAlert
         visible={alertState.visible}
         title={alertState.title}
@@ -277,11 +354,8 @@ export const ChatInput: React.FC<ChatInputProps> = ({
       />
     </View>
   );
-
-  return content;
 };
 
 const spotlightStyles = StyleSheet.create({
   centered: { alignSelf: 'center' },
 });
-
diff --git a/src/components/ChatInput/styles.ts b/src/components/ChatInput/styles.ts
index a9f8df69c..7aab9a884 100644
--- a/src/components/ChatInput/styles.ts
+++ b/src/components/ChatInput/styles.ts
@@ -1,5 +1,5 @@
 import type { ThemeColors, ThemeShadows } from '../../theme';
-import { FONTS } from '../../constants';
+import { FONTS, TYPOGRAPHY, SPACING } from '../../constants';
 import { Platform } from 'react-native';
 
 export const PILL_ICON_SIZE = 32;
@@ -208,4 +208,25 @@ export const createStyles = (colors: ThemeColors, _shadows: ThemeShadows) => ({
     fontWeight: '500' as const,
     color: colors.primary,
   },
+  // Audio mode layout
+  audioModeRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'center' as const,
+    gap: SPACING.md,
+    paddingVertical: SPACING.xs,
+  },
+  // Voice cycle button — shows icon + voice name
+  audioVoiceButton: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    gap: 4,
+    paddingHorizontal: SPACING.sm,
+    height: 32,
+    borderRadius: 16,
+  },
+  audioVoiceLabel: {
+    ...TYPOGRAPHY.meta,
+    color: colors.textSecondary,
+  },
 });
diff --git a/src/components/ChatInput/useKeyboardAwarePopover.ts b/src/components/ChatInput/useKeyboardAwarePopover.ts
index 13cdfaa4c..dc4f0b7b8 100644
--- a/src/components/ChatInput/useKeyboardAwarePopover.ts
+++ b/src/components/ChatInput/useKeyboardAwarePopover.ts
@@ -1,13 +1,15 @@
 import { useRef, useEffect, useState, useCallback } from 'react';
 import { Keyboard, Dimensions, Platform, StatusBar, TouchableOpacity } from 'react-native';
-import { SPACING } from '../../constants';
 
 /**
  * Hook that manages keyboard-aware popover positioning.
  * When the keyboard is visible, dismisses it and waits for `keyboardDidHide`
  * before measuring position to ensure correct coordinates.
+ *
+ * anchorY → distance from screen bottom to trigger top (popover sits above trigger)
+ * anchorX → distance from screen right to trigger right edge (popover right-aligns with trigger)
  */
-export function useKeyboardAwarePopover(offsetX: number = SPACING.md) {
+export function useKeyboardAwarePopover() {
     const [anchor, setAnchor] = useState({ y: 0, x: 0 });
     const [visible, setVisible] = useState(false);
     const triggerRef = useRef<React.ElementRef<typeof TouchableOpacity>>(null);
@@ -27,13 +29,15 @@ export function useKeyboardAwarePopover(offsetX: number = SPACING.md) {
 
     const show = useCallback(() => {
         const measureAndShow = () => {
-            triggerRef.current?.measureInWindow?.((...args: number[]) => {
-                const screenH = Dimensions.get('window').height;
-                // On Android, measureInWindow Y includes the status bar but
-                // Dimensions.get('window').height may not — subtract the offset
-                // so the popover sits snugly above the trigger button.
+            triggerRef.current?.measureInWindow?.((btnX: number, btnY: number, btnW: number) => {
+                const { height: screenH, width: screenW } = Dimensions.get('window');
+                // On Android, measureInWindow Y includes the status bar height.
                 const statusBarOffset = Platform.OS === 'android' ? (StatusBar.currentHeight ?? 0) : 0;
-                setAnchor({ y: screenH - (args[1] ?? 0) - statusBarOffset, x: offsetX });
+                // bottom: how far the popover bottom sits above the screen bottom (= above the trigger)
+                const y = screenH - (btnY ?? 0) - statusBarOffset;
+                // right: align popover's right edge with the trigger button's right edge
+                const x = screenW - ((btnX ?? 0) + (btnW ?? 0));
+                setAnchor({ y, x });
             });
             setVisible(true);
         };
@@ -54,7 +58,7 @@ export function useKeyboardAwarePopover(offsetX: number = SPACING.md) {
         } else {
             measureAndShow();
         }
-    }, [offsetX]);
+    }, []);
 
     const hide = useCallback(() => setVisible(false), []);
 
diff --git a/src/components/ChatMessage/components/ActionMenuSheet.tsx b/src/components/ChatMessage/components/ActionMenuSheet.tsx
index 1f380fe2d..802bc5db1 100644
--- a/src/components/ChatMessage/components/ActionMenuSheet.tsx
+++ b/src/components/ChatMessage/components/ActionMenuSheet.tsx
@@ -12,11 +12,13 @@ interface ActionMenuSheetProps {
   canEdit: boolean;
   canRetry: boolean;
   canGenerateImage: boolean;
+  canSpeak: boolean;
   styles: any;
   onCopy: () => void;
   onEdit: () => void;
   onRetry: () => void;
   onGenerateImage: () => void;
+  onSpeak: () => void;
 }
 
 export function ActionMenuSheet({
@@ -26,11 +28,13 @@ export function ActionMenuSheet({
   canEdit,
   canRetry,
   canGenerateImage,
+  canSpeak,
   styles,
   onCopy,
   onEdit,
   onRetry,
   onGenerateImage,
+  onSpeak,
 }: ActionMenuSheetProps) {
   const { colors } = useTheme();
 
@@ -89,6 +93,18 @@ export function ActionMenuSheet({
             <Text style={styles.actionSheetText}>Generate Image</Text>
           </AnimatedPressable>
         )}
+
+        {!isUser && canSpeak && (
+          <AnimatedPressable
+            testID="action-speak"
+            hapticType="selection"
+            style={styles.actionSheetItem}
+            onPress={onSpeak}
+          >
+            <Icon name="volume-2" size={18} color={colors.textSecondary} />
+            <Text style={styles.actionSheetText}>Speak</Text>
+          </AnimatedPressable>
+        )}
       </View>
     </AppSheet>
   );
diff --git a/src/components/ChatMessage/components/MessageAttachments.tsx b/src/components/ChatMessage/components/MessageAttachments.tsx
index adead2c98..b798a2fcd 100644
--- a/src/components/ChatMessage/components/MessageAttachments.tsx
+++ b/src/components/ChatMessage/components/MessageAttachments.tsx
@@ -78,7 +78,22 @@ export function MessageAttachments({
   return (
     <View testID="message-attachments" style={styles.attachmentsContainer}>
       {attachments.map((attachment, index) =>
-        attachment.type === 'document' ? (
+        attachment.type === 'audio' ? (
+          <View
+            key={attachment.id}
+            style={[
+              styles.documentBadge,
+              isUser ? styles.documentBadgeUser : styles.documentBadgeAssistant,
+            ]}
+          >
+            <Icon name="mic" size={14} color={isUser ? colors.background : colors.textSecondary} />
+            <Text
+              style={[styles.documentBadgeText, isUser ? styles.documentBadgeTextUser : styles.documentBadgeTextAssistant]}
+            >
+              Voice message
+            </Text>
+          </View>
+        ) : attachment.type === 'document' ? (
           <TouchableOpacity
             key={attachment.id}
             testID={`document-badge-${index}`}
diff --git a/src/components/ChatMessage/components/MessageContent.tsx b/src/components/ChatMessage/components/MessageContent.tsx
index e2fa7afca..cbaefaacc 100644
--- a/src/components/ChatMessage/components/MessageContent.tsx
+++ b/src/components/ChatMessage/components/MessageContent.tsx
@@ -43,6 +43,17 @@ export function MessageContent({
         </Text>
       );
     }
+    // No content but may have thinking — render ThinkingBlock alone (audio mode above-bubble use case)
+    if (parsedContent.thinking) {
+      return (
+        <ThinkingBlock
+          parsedContent={parsedContent}
+          showThinking={showThinking}
+          onToggle={onToggleThinking}
+          styles={styles}
+        />
+      );
+    }
     return null;
   }
 
diff --git a/src/components/ChatMessage/index.tsx b/src/components/ChatMessage/index.tsx
index d80310b7d..f8c6e83f5 100644
--- a/src/components/ChatMessage/index.tsx
+++ b/src/components/ChatMessage/index.tsx
@@ -1,6 +1,7 @@
 import React, { useState } from 'react';
 import { View, Text, TouchableOpacity, Clipboard } from 'react-native';
 import { useTheme, useThemedStyles } from '../../theme';
+import { useTTSStore } from '../../stores/ttsStore';
 import Icon from 'react-native-vector-icons/Feather';
 import { stripControlTokens } from '../../utils/messageContent';
 import { CustomAlert, showAlert, hideAlert, AlertState, initialAlertState } from '../CustomAlert';
@@ -133,14 +134,16 @@ type MetaRowProps = {
   isStreaming?: boolean;
   showActions: boolean;
   onMenuOpen: () => void;
+  metaExtra?: React.ReactNode;
 };
 
-const MessageMetaRow: React.FC<MetaRowProps> = ({ message, styles, isStreaming, showActions, onMenuOpen }) => (
+const MessageMetaRow: React.FC<MetaRowProps> = ({ message, styles, isStreaming, showActions, onMenuOpen, metaExtra }) => (
   <View style={styles.metaRow}>
     <Text style={styles.timestamp}>{formatTime(message.timestamp)}</Text>
     {message.generationTimeMs != null && message.role === 'assistant' && (
       <Text style={styles.generationTime}>{formatDuration(message.generationTimeMs)}</Text>
     )}
+    {metaExtra}
     {showActions && !isStreaming && (
       <TouchableOpacity style={styles.actionHint} onPress={onMenuOpen}>
         <Text style={styles.actionHintText}>•••</Text>
@@ -157,7 +160,9 @@ const ToolCallWithThinking: React.FC<{
   return (
     <View style={styles.systemInfoContainer}>
       {!!tc?.thinking && (
-        <ThinkingBlock parsedContent={tc} showThinking={showThinking} onToggle={onToggle} styles={styles} />
+        <View style={styles.thinkingBlockWrapper}>
+          <ThinkingBlock parsedContent={tc} showThinking={showThinking} onToggle={onToggle} styles={styles} />
+        </View>
       )}
       {hasText && (
         <View testID="tool-call-pre-text" style={styles.toolCallPreText}>
@@ -179,11 +184,17 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
   onGenerateImage,
   showActions = true,
   canGenerateImage = false,
+  canSpeak: canSpeakProp = false,
+  onSpeak: onSpeakProp,
   showGenerationDetails = false,
   animateEntry = false,
+  metaExtra,
 }) => {
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
+  const ttsCanSpeak = useTTSStore(
+    s => s.settings.enabled && s.isReady,
+  );
   const [showActionMenu, setShowActionMenu] = useState(false);
   const [isEditing, setIsEditing] = useState(false);
   const [editedContent, setEditedContent] = useState(message.content);
@@ -242,6 +253,17 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
     setShowActionMenu(false);
   };
 
+  const canSpeak = !isUser && !isStreaming && (canSpeakProp || ttsCanSpeak);
+
+  const handleSpeak = () => {
+    setShowActionMenu(false);
+    if (onSpeakProp) {
+      onSpeakProp();
+      return;
+    }
+    useTTSStore.getState().speak(displayContent, message.id);
+  };
+
   if (message.isSystemInfo) {
     return <SystemInfoMessage content={displayContent} styles={styles}
       alertState={alertState} onCloseAlert={() => setAlertState(hideAlert())} />;
@@ -291,6 +313,7 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
         isStreaming={isStreaming}
         showActions={showActions}
         onMenuOpen={() => setShowActionMenu(true)}
+        metaExtra={metaExtra}
       />
 
       {showGenerationDetails && !isUser && message.generationMeta && (
@@ -310,11 +333,13 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
         canEdit={!!onEdit}
         canRetry={!!onRetry}
         canGenerateImage={canGenerateImage && !!onGenerateImage}
+        canSpeak={canSpeak}
         styles={styles}
         onCopy={handleCopy}
         onEdit={handleEdit}
         onRetry={handleRetry}
         onGenerateImage={handleGenerateImage}
+        onSpeak={handleSpeak}
       />
       <EditSheet
         visible={isEditing}
diff --git a/src/components/ChatMessage/styles.ts b/src/components/ChatMessage/styles.ts
index 83c79a228..3b3312816 100644
--- a/src/components/ChatMessage/styles.ts
+++ b/src/components/ChatMessage/styles.ts
@@ -174,6 +174,11 @@ const createThinkingStyles = (colors: ThemeColors) => ({
     overflow: 'hidden' as const,
     width: '100%' as const,
   },
+  /** Constrains the ThinkingBlock when rendered outside a message bubble (e.g. ToolCallWithThinking) */
+  thinkingBlockWrapper: {
+    width: '88%' as const,
+    alignSelf: 'flex-start' as const,
+  },
   thinkingHeader: {
     flexDirection: 'row' as const,
     alignItems: 'flex-start' as const,
diff --git a/src/components/ChatMessage/types.ts b/src/components/ChatMessage/types.ts
index f93ef8ec2..becd367aa 100644
--- a/src/components/ChatMessage/types.ts
+++ b/src/components/ChatMessage/types.ts
@@ -10,8 +10,12 @@ export interface ChatMessageProps {
   onGenerateImage?: (prompt: string) => void;
   showActions?: boolean;
   canGenerateImage?: boolean;
+  canSpeak?: boolean;
+  onSpeak?: () => void;
   showGenerationDetails?: boolean;
   animateEntry?: boolean;
+  /** Extra element rendered at the end of the meta row (e.g. TTSButton) */
+  metaExtra?: React.ReactNode;
 }
 
 export interface ParsedContent {
diff --git a/src/components/EngineBridge.tsx b/src/components/EngineBridge.tsx
new file mode 100644
index 000000000..a877b0113
--- /dev/null
+++ b/src/components/EngineBridge.tsx
@@ -0,0 +1,37 @@
+/**
+ * EngineBridge
+ *
+ * Renders the React bridge component for the currently active TTS engine
+ * (if it needs one). Mount once at the app root.
+ *
+ * Engines that are fully imperative (OuteTTS, Qwen3-TTS) return null
+ * from getBridgeComponent() and this renders nothing.
+ *
+ * Hook-based engines (Kokoro) return a component that mounts their
+ * React hooks and registers imperative handles with the engine instance.
+ *
+ * Platform gating: if the engine declares platformRequirements and the
+ * device doesn't meet them, the bridge is not rendered (prevents crashes
+ * from mounting native hooks on unsupported OS versions).
+ */
+import React, { useMemo } from 'react';
+import { useTTSStore } from '../stores/ttsStore';
+import { ttsRegistry } from '../engine';
+
+export const EngineBridge: React.FC = () => {
+  const engineId = useTTSStore(s => s.settings.engineId);
+
+  const BridgeComponent = useMemo(() => {
+    if (!ttsRegistry.has(engineId)) return null;
+    try {
+      const engine = ttsRegistry.getEngine(engineId);
+      if (!engine.isSupported()) return null;
+      return engine.getBridgeComponent();
+    } catch {
+      return null;
+    }
+  }, [engineId]);
+
+  if (!BridgeComponent) return null;
+  return <BridgeComponent />;
+};
diff --git a/src/components/GenerationSettingsModal/ImageQualitySliders.tsx b/src/components/GenerationSettingsModal/ImageQualitySliders.tsx
index f1e0544dc..2feac93a0 100644
--- a/src/components/GenerationSettingsModal/ImageQualitySliders.tsx
+++ b/src/components/GenerationSettingsModal/ImageQualitySliders.tsx
@@ -1,6 +1,6 @@
 import React from 'react';
 import { View, Text, Switch, Platform, TouchableOpacity } from 'react-native';
-import Slider from '@react-native-community/slider';
+import { NumericStepper } from '../NumericStepper';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
 import { useClearGpuCache } from '../../hooks/useImageGenerationSettings';
@@ -24,70 +24,38 @@ const ClearGPUCacheButton: React.FC = () => {
   );
 };
 
-/** Basic sliders: Image Steps + Image Size */
+/** Basic controls: Image Steps + Image Size */
 export const ImageQualityBasicSliders: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
 
   return (
     <>
       <View style={styles.settingGroup}>
-        <View style={styles.settingHeader}>
-          <Text style={styles.settingLabel}>Image Steps</Text>
-          <Text style={styles.settingValue}>{settings.imageSteps || 8}</Text>
-        </View>
-        <Text style={styles.settingDescription}>
-          4-8 steps for speed, 20-50 for quality
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={4}
-          maximumValue={50}
-          step={1}
+        <Text style={styles.settingLabel}>Image Steps</Text>
+        <Text style={styles.settingDescription}>4-8 steps for speed, 20-50 for quality</Text>
+        <NumericStepper
           value={settings.imageSteps || 8}
-          onSlidingComplete={(value) => updateSettings({ imageSteps: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surfaceLight}
-          thumbTintColor={colors.primary}
+          min={4} max={50} step={1}
+          onChange={(value) => updateSettings({ imageSteps: value })}
         />
-        <View style={styles.sliderLabels}>
-          <Text style={styles.sliderMinMax}>4</Text>
-          <Text style={styles.sliderMinMax}>50</Text>
-        </View>
       </View>
 
       <View style={styles.settingGroup}>
-        <View style={styles.settingHeader}>
-          <Text style={styles.settingLabel}>Image Size</Text>
-          <Text style={styles.settingValue}>
-            {settings.imageWidth ?? 256}x{settings.imageHeight ?? 256}
-          </Text>
-        </View>
-        <Text style={styles.settingDescription}>
-          Output resolution (smaller = faster, larger = more detail)
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={128}
-          maximumValue={512}
-          step={64}
+        <Text style={styles.settingLabel}>Image Size</Text>
+        <Text style={styles.settingDescription}>Output resolution (smaller = faster, larger = more detail)</Text>
+        <NumericStepper
           value={settings.imageWidth ?? 256}
-          onSlidingComplete={(value) => updateSettings({ imageWidth: value, imageHeight: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surfaceLight}
-          thumbTintColor={colors.primary}
+          min={128} max={512} step={64}
+          formatValue={(v) => `${v}x${v}`}
+          onChange={(value) => updateSettings({ imageWidth: value, imageHeight: value })}
         />
-        <View style={styles.sliderLabels}>
-          <Text style={styles.sliderMinMax}>128</Text>
-          <Text style={styles.sliderMinMax}>512</Text>
-        </View>
       </View>
     </>
   );
 };
 
-/** Advanced sliders: Guidance Scale, Image Threads, GPU Acceleration */
+/** Advanced controls: Guidance Scale, Image Threads, GPU Acceleration */
 export const ImageQualityAdvancedSliders: React.FC = () => {
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
@@ -96,53 +64,23 @@ export const ImageQualityAdvancedSliders: React.FC = () => {
   return (
     <>
       <View style={styles.settingGroup}>
-        <View style={styles.settingHeader}>
-          <Text style={styles.settingLabel}>Guidance Scale</Text>
-          <Text style={styles.settingValue}>{(settings.imageGuidanceScale || 7.5).toFixed(1)}</Text>
-        </View>
-        <Text style={styles.settingDescription}>
-          Higher = follows prompt more strictly (5-15 range)
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={20}
-          step={0.5}
+        <Text style={styles.settingLabel}>Guidance Scale</Text>
+        <Text style={styles.settingDescription}>Higher = follows prompt more strictly (5-15 range)</Text>
+        <NumericStepper
           value={settings.imageGuidanceScale || 7.5}
-          onSlidingComplete={(value) => updateSettings({ imageGuidanceScale: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surfaceLight}
-          thumbTintColor={colors.primary}
+          min={1} max={20} step={0.5} decimals={1}
+          onChange={(value) => updateSettings({ imageGuidanceScale: value })}
         />
-        <View style={styles.sliderLabels}>
-          <Text style={styles.sliderMinMax}>1</Text>
-          <Text style={styles.sliderMinMax}>20</Text>
-        </View>
       </View>
 
       <View style={styles.settingGroup}>
-        <View style={styles.settingHeader}>
-          <Text style={styles.settingLabel}>Image Threads</Text>
-          <Text style={styles.settingValue}>{settings.imageThreads ?? 4}</Text>
-        </View>
-        <Text style={styles.settingDescription}>
-          CPU threads used for image generation. Takes effect next time the image model loads.
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={8}
-          step={1}
+        <Text style={styles.settingLabel}>Image Threads</Text>
+        <Text style={styles.settingDescription}>CPU threads used for image generation. Takes effect next time the image model loads.</Text>
+        <NumericStepper
           value={settings.imageThreads ?? 4}
-          onSlidingComplete={(value) => updateSettings({ imageThreads: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surfaceLight}
-          thumbTintColor={colors.primary}
+          min={1} max={8} step={1}
+          onChange={(value) => updateSettings({ imageThreads: value })}
         />
-        <View style={styles.sliderLabels}>
-          <Text style={styles.sliderMinMax}>1</Text>
-          <Text style={styles.sliderMinMax}>8</Text>
-        </View>
       </View>
 
       {Platform.OS === 'android' && (
@@ -157,7 +95,7 @@ export const ImageQualityAdvancedSliders: React.FC = () => {
             />
           </View>
           <Text style={styles.settingDescription}>
-            Use GPU for faster image generation. First run may be slower while optimizing for your device. For best performance, use NPU models on supported Snapdragon devices.
+            Use GPU for faster image generation. First run may be slower while optimizing for your device.
           </Text>
           {(settings.imageUseOpenCL ?? true) && <ClearGPUCacheButton />}
         </View>
diff --git a/src/components/GenerationSettingsModal/TTSSection.tsx b/src/components/GenerationSettingsModal/TTSSection.tsx
new file mode 100644
index 000000000..cf4f384cf
--- /dev/null
+++ b/src/components/GenerationSettingsModal/TTSSection.tsx
@@ -0,0 +1,227 @@
+import React from 'react';
+import { View, Text, Switch, TouchableOpacity, ActivityIndicator } from 'react-native';
+import Icon from 'react-native-vector-icons/Feather';
+import { NumericStepper } from '../NumericStepper';
+import { useTheme, useThemedStyles } from '../../theme';
+import type { ThemeColors, ThemeShadows } from '../../theme';
+import { SPACING } from '../../constants';
+import { useTTSStore } from '../../stores/ttsStore';
+import { createStyles as createModalStyles } from './styles';
+
+const createLocalStyles = (colors: ThemeColors, _shadows: ThemeShadows) => ({
+  modeChipDisabled: { opacity: 0.4 as const },
+  linkButton: {
+    alignSelf: 'flex-start' as const,
+    paddingHorizontal: SPACING.md,
+    paddingVertical: SPACING.sm,
+    borderRadius: 8,
+    borderWidth: 1,
+    borderColor: colors.border,
+    marginTop: SPACING.sm,
+  },
+  linkButtonRow: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.xs },
+  flex1: { flex: 1 },
+  toggleRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'space-between' as const,
+    marginBottom: SPACING.lg,
+  },
+  toggleInfo: { flex: 1 },
+  noBottomMargin: { marginBottom: 0 },
+  divider: { height: 1, backgroundColor: colors.border, marginBottom: SPACING.lg },
+  voiceRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'space-between' as const,
+    paddingVertical: SPACING.sm,
+  },
+  voiceRowBorder: { borderTopWidth: 1, borderTopColor: colors.border },
+  voiceInfo: { flex: 1 },
+  voiceName: { fontSize: 13, color: colors.text },
+  voiceMeta: { fontSize: 11, color: colors.textMuted, marginTop: 2 },
+  voiceSectionHeader: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'space-between' as const,
+    marginBottom: SPACING.sm,
+  },
+  voiceSectionLabel: { fontSize: 11, color: colors.textMuted, textTransform: 'uppercase' as const, letterSpacing: 0.3 },
+  downloadRow: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.sm, marginBottom: SPACING.md },
+  downloadText: { fontSize: 12, color: colors.textSecondary, flex: 1 },
+});
+
+// ── Mode Picker ──────────────────────────────────────────────────────────────
+
+const ModePicker: React.FC<{ audioAvailable: boolean }> = ({ audioAvailable }) => {
+  const modal = useThemedStyles(createModalStyles);
+  const local = useThemedStyles(createLocalStyles);
+  const { settings, updateSettings, initializeEngine } = useTTSStore();
+  const mode = settings.interfaceMode;
+
+  const handleModeChange = (next: 'chat' | 'audio') => {
+    if (next === 'audio' && !audioAvailable) return;
+    updateSettings({ interfaceMode: next });
+    if (next === 'audio') initializeEngine();
+  };
+
+  return (
+    <View style={modal.modeToggleContainer}>
+      <View style={modal.modeToggleInfo}>
+        <Text style={modal.modeToggleLabel}>Interface Mode</Text>
+        <Text style={modal.modeToggleDesc}>
+          {mode === 'audio'
+            ? 'Audio Mode — responses rendered as voice notes'
+            : 'Chat Mode — play button added to text messages'}
+        </Text>
+      </View>
+      <View style={modal.modeToggleButtons}>
+        {(['chat', 'audio'] as const).map((m) => {
+          const active = mode === m;
+          const disabled = m === 'audio' && !audioAvailable;
+          return (
+            <TouchableOpacity
+              key={m}
+              style={[modal.modeButton, active && modal.modeButtonActive, disabled && local.modeChipDisabled]}
+              onPress={() => handleModeChange(m)}
+              disabled={disabled}
+            >
+              <Text style={[modal.modeButtonText, active && modal.modeButtonTextActive]}>
+                {m === 'chat' ? 'Chat' : 'Audio'}
+              </Text>
+            </TouchableOpacity>
+          );
+        })}
+      </View>
+    </View>
+  );
+};
+
+// ── Voice Picker ─────────────────────────────────────────────────────────────
+
+const VoicePicker: React.FC = () => {
+  const { colors } = useTheme();
+  const local = useThemedStyles(createLocalStyles);
+  const { voices, activeVoiceId, isReady, isDownloading, overallDownloadProgress, setVoice } = useTTSStore();
+
+  return (
+    <View>
+      <View style={local.voiceSectionHeader}>
+        <Text style={local.voiceSectionLabel}>Voice</Text>
+        {isDownloading && (
+          <Text style={local.voiceSectionLabel}>{Math.round(overallDownloadProgress * 100)}%</Text>
+        )}
+        {!isReady && !isDownloading && (
+          <ActivityIndicator size="small" color={colors.textMuted} />
+        )}
+        {isReady && (
+          <Icon name="check-circle" size={12} color={colors.primary} />
+        )}
+      </View>
+
+      {voices.map((voice, i) => {
+        const active = voice.id === activeVoiceId;
+        return (
+          <TouchableOpacity
+            key={voice.id}
+            style={[local.voiceRow, i > 0 && local.voiceRowBorder]}
+            onPress={() => setVoice(voice.id)}
+          >
+            <View style={local.voiceInfo}>
+              <Text style={local.voiceName}>{voice.label}</Text>
+              <Text style={local.voiceMeta}>
+                {voice.metadata.accent ? `${voice.metadata.accent} · ` : ''}
+                {voice.metadata.gender || ''}
+              </Text>
+            </View>
+            {active && <Icon name="check" size={13} color={colors.primary} />}
+          </TouchableOpacity>
+        );
+      })}
+
+      <View style={[local.divider, { marginTop: SPACING.md }]} />
+    </View>
+  );
+};
+
+// ── Main TTS Section ─────────────────────────────────────────────────────────
+
+interface TTSSectionProps {
+  onNavigateToTTSSettings?: () => void;
+}
+
+export const TTSSection: React.FC<TTSSectionProps> = ({ onNavigateToTTSSettings }) => {
+  const { colors } = useTheme();
+  const modal = useThemedStyles(createModalStyles);
+  const local = useThemedStyles(createLocalStyles);
+  const { settings, updateSettings, isReady } = useTTSStore();
+
+  const trackColor = { false: colors.surfaceLight, true: `${colors.primary}80` };
+  const isChatMode = settings.interfaceMode === 'chat';
+
+  if (!isReady) {
+    return (
+      <View style={modal.sectionCard}>
+        <Text style={modal.settingDescription}>
+          No voice models downloaded. Go to TTS Settings to download them.
+        </Text>
+        {onNavigateToTTSSettings && (
+          <TouchableOpacity style={local.linkButton} onPress={onNavigateToTTSSettings}>
+            <View style={local.linkButtonRow}>
+              <Icon name="external-link" size={13} color={colors.textSecondary} />
+              <Text style={modal.modeButtonText}>TTS Settings</Text>
+            </View>
+          </TouchableOpacity>
+        )}
+      </View>
+    );
+  }
+
+  return (
+    <View style={modal.sectionCard}>
+      <ModePicker audioAvailable={isReady} />
+
+      {isChatMode && (
+        <View style={local.toggleRow}>
+          <View style={local.toggleInfo}>
+            <Text style={modal.modeToggleLabel}>Enable TTS</Text>
+            <Text style={modal.modeToggleDesc}>Show play buttons on assistant messages</Text>
+          </View>
+          <Switch
+            value={settings.enabled}
+            onValueChange={(v) => updateSettings({ enabled: v })}
+            trackColor={trackColor}
+            thumbColor={settings.enabled ? colors.primary : colors.textMuted}
+          />
+        </View>
+      )}
+
+      <VoicePicker />
+
+      <View style={modal.settingGroup}>
+        <Text style={modal.settingLabel}>Speed</Text>
+        <NumericStepper
+          value={settings.speed}
+          min={0.5} max={2.0} step={0.1} decimals={1}
+          formatValue={(v) => `${v.toFixed(1)}x`}
+          onChange={(v) => updateSettings({ speed: v })}
+        />
+      </View>
+
+      {isChatMode && (
+        <View style={[local.toggleRow, local.noBottomMargin]}>
+          <View style={local.toggleInfo}>
+            <Text style={modal.modeToggleLabel}>Auto-play</Text>
+            <Text style={modal.modeToggleDesc}>Speak AI responses automatically</Text>
+          </View>
+          <Switch
+            value={settings.autoPlay}
+            onValueChange={(v) => updateSettings({ autoPlay: v })}
+            trackColor={trackColor}
+            thumbColor={settings.autoPlay ? colors.primary : colors.textMuted}
+          />
+        </View>
+      )}
+    </View>
+  );
+};
diff --git a/src/components/GenerationSettingsModal/TextGenerationAdvanced.tsx b/src/components/GenerationSettingsModal/TextGenerationAdvanced.tsx
index 0b017e571..3d44a9998 100644
--- a/src/components/GenerationSettingsModal/TextGenerationAdvanced.tsx
+++ b/src/components/GenerationSettingsModal/TextGenerationAdvanced.tsx
@@ -1,7 +1,7 @@
 import React from 'react';
 import { View, Text, TouchableOpacity } from 'react-native';
-import Slider from '@react-native-community/slider';
-import { useTheme, useThemedStyles } from '../../theme';
+import { NumericStepper } from '../NumericStepper';
+import { useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
 import { CacheType } from '../../types';
 import {
@@ -15,7 +15,6 @@ import { createStyles } from './styles';
 // ─── GPU Acceleration ─────────────────────────────────────────────────────────
 
 export const GpuAccelerationToggle: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
   const { gpuLayersEffective, handleGpuToggle } = useTextGenerationAdvanced();
@@ -51,24 +50,15 @@ export const GpuAccelerationToggle: React.FC = () => {
 
       {settings.enableGpu && (
         <View style={styles.gpuLayersInline}>
-          <View style={styles.settingHeader}>
-            <Text style={styles.settingLabel}>GPU Layers</Text>
-            <Text style={styles.settingValue}>{gpuLayersEffective}</Text>
-          </View>
+          <Text style={styles.settingLabel}>GPU Layers</Text>
           <Text style={styles.settingDescription}>
             Layers offloaded to GPU. Higher = faster but may crash on low-VRAM devices. Requires model reload.
           </Text>
-          <Slider
-            testID="gpu-layers-slider"
-            style={styles.slider}
-            minimumValue={1}
-            maximumValue={GPU_LAYERS_MAX}
-            step={1}
+          <NumericStepper
+            testID="gpu-layers-stepper"
             value={gpuLayersEffective}
-            onSlidingComplete={(value: number) => updateSettings({ gpuLayers: value })}
-            minimumTrackTintColor={colors.primary}
-            maximumTrackTintColor={colors.surfaceLight}
-            thumbTintColor={colors.primary}
+            min={1} max={GPU_LAYERS_MAX} step={1}
+            onChange={(value) => updateSettings({ gpuLayers: value })}
           />
         </View>
       )}
@@ -199,56 +189,34 @@ export const ModelLoadingStrategyToggle: React.FC = () => {
 // ─── CPU Threads & Batch Size ────────────────────────────────────────────────
 
 export const CpuThreadsSlider: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
-  const value = settings.nThreads ?? 6;
 
   return (
     <View style={styles.modeToggleContainer}>
-      <View style={styles.settingHeader}>
-        <Text style={styles.settingLabel}>CPU Threads</Text>
-        <Text style={styles.settingValue}>{value}</Text>
-      </View>
+      <Text style={styles.settingLabel}>CPU Threads</Text>
       <Text style={styles.settingDescription}>Parallel threads for inference</Text>
-      <Slider
-        style={styles.slider}
-        minimumValue={1}
-        maximumValue={12}
-        step={1}
-        value={value}
-        onSlidingComplete={(v: number) => updateSettings({ nThreads: v })}
-        minimumTrackTintColor={colors.primary}
-        maximumTrackTintColor={colors.surfaceLight}
-        thumbTintColor={colors.primary}
+      <NumericStepper
+        value={settings.nThreads ?? 6}
+        min={1} max={12} step={1}
+        onChange={(v) => updateSettings({ nThreads: v })}
       />
     </View>
   );
 };
 
 export const BatchSizeSlider: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
-  const value = settings.nBatch ?? 512;
 
   return (
     <View style={styles.modeToggleContainer}>
-      <View style={styles.settingHeader}>
-        <Text style={styles.settingLabel}>Batch Size</Text>
-        <Text style={styles.settingValue}>{value}</Text>
-      </View>
+      <Text style={styles.settingLabel}>Batch Size</Text>
       <Text style={styles.settingDescription}>Tokens processed per batch</Text>
-      <Slider
-        style={styles.slider}
-        minimumValue={32}
-        maximumValue={512}
-        step={32}
-        value={value}
-        onSlidingComplete={(v: number) => updateSettings({ nBatch: v })}
-        minimumTrackTintColor={colors.primary}
-        maximumTrackTintColor={colors.surfaceLight}
-        thumbTintColor={colors.primary}
+      <NumericStepper
+        value={settings.nBatch ?? 512}
+        min={32} max={512} step={32}
+        onChange={(v) => updateSettings({ nBatch: v })}
       />
     </View>
   );
diff --git a/src/components/GenerationSettingsModal/TextGenerationSection.tsx b/src/components/GenerationSettingsModal/TextGenerationSection.tsx
index 18ed0c031..9ef8070da 100644
--- a/src/components/GenerationSettingsModal/TextGenerationSection.tsx
+++ b/src/components/GenerationSettingsModal/TextGenerationSection.tsx
@@ -1,6 +1,6 @@
 import React, { useState } from 'react';
 import { View, Text, TouchableOpacity, Platform } from 'react-native';
-import Slider from '@react-native-community/slider';
+import { NumericStepper } from '../NumericStepper';
 import { AdvancedToggle } from '../AdvancedToggle';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
@@ -103,35 +103,23 @@ const SettingSlider: React.FC<SettingSliderProps> = ({ config }) => {
   const rawValue = (settings as Record<string, unknown>)[config.key];
   const value = (rawValue ?? DEFAULT_SETTINGS[config.key]) as number;
   const warningText = config.warning?.(value) ?? null;
+  const decimals = config.step < 1 ? 2 : 0;
 
   return (
     <View style={styles.settingGroup}>
-      <View style={styles.settingHeader}>
-        <Text style={styles.settingLabel}>{config.label}</Text>
-        <Text style={styles.settingValue}>{config.format(value)}</Text>
-      </View>
+      <Text style={styles.settingLabel}>{config.label}</Text>
       {config.description && (
         <Text style={styles.settingDescription}>{config.description}</Text>
       )}
       {warningText && (
         <Text style={[styles.settingDescription, { color: colors.error }]}>{warningText}</Text>
       )}
-      <Slider
-        style={styles.slider}
-        minimumValue={config.min}
-        maximumValue={config.max}
-        step={config.step}
+      <NumericStepper
         value={value}
-        onValueChange={(v) => updateSettings({ [config.key]: v })}
-        onSlidingComplete={() => {}}
-        minimumTrackTintColor={colors.primary}
-        maximumTrackTintColor={colors.surfaceLight}
-        thumbTintColor={colors.primary}
+        min={config.min} max={config.max} step={config.step} decimals={decimals}
+        formatValue={config.format}
+        onChange={(v) => updateSettings({ [config.key]: v })}
       />
-      <View style={styles.sliderLabels}>
-        <Text style={styles.sliderMinMax}>{config.format(config.min)}</Text>
-        <Text style={styles.sliderMinMax}>{config.format(config.max)}</Text>
-      </View>
     </View>
   );
 };
diff --git a/src/components/GenerationSettingsModal/index.tsx b/src/components/GenerationSettingsModal/index.tsx
index b23a3b74a..fa54ea964 100644
--- a/src/components/GenerationSettingsModal/index.tsx
+++ b/src/components/GenerationSettingsModal/index.tsx
@@ -9,6 +9,7 @@ import { createStyles } from './styles';
 import { ConversationActionsSection } from './ConversationActionsSection';
 import { ImageGenerationSection } from './ImageGenerationSection';
 import { TextGenerationSection } from './TextGenerationSection';
+import { TTSSection } from './TTSSection';
 
 const DEFAULT_SETTINGS = {
   temperature: 0.7,
@@ -26,6 +27,7 @@ interface GenerationSettingsModalProps {
   onOpenProject?: () => void;
   onOpenGallery?: () => void;
   onDeleteConversation?: () => void;
+  onOpenTTSSettings?: () => void;
   conversationImageCount?: number;
   activeProjectName?: string | null;
   isRemote?: boolean;
@@ -37,6 +39,7 @@ export const GenerationSettingsModal: React.FC<GenerationSettingsModalProps> = (
   onOpenProject,
   onOpenGallery,
   onDeleteConversation,
+  onOpenTTSSettings,
   conversationImageCount = 0,
   activeProjectName,
   isRemote,
@@ -48,6 +51,7 @@ export const GenerationSettingsModal: React.FC<GenerationSettingsModalProps> = (
   const [performanceStats, setPerformanceStats] = useState(llmService.getPerformanceStats());
   const [imageSettingsOpen, setImageSettingsOpen] = useState(false);
   const [textSettingsOpen, setTextSettingsOpen] = useState(false);
+  const [ttsSettingsOpen, setTtsSettingsOpen] = useState(false);
 
   useEffect(() => {
     if (visible) {
@@ -144,6 +148,23 @@ export const GenerationSettingsModal: React.FC<GenerationSettingsModalProps> = (
           </>
         )}
 
+        {/* TTS SETTINGS */}
+        <TouchableOpacity
+          style={styles.accordionHeader}
+          onPress={() => setTtsSettingsOpen(!ttsSettingsOpen)}
+          activeOpacity={0.7}
+        >
+          <Text style={styles.accordionTitle}>TEXT TO SPEECH</Text>
+          <Icon
+            name={ttsSettingsOpen ? 'chevron-up' : 'chevron-down'}
+            size={16}
+            color={colors.textMuted}
+          />
+        </TouchableOpacity>
+        {ttsSettingsOpen && (
+          <TTSSection onNavigateToTTSSettings={onOpenTTSSettings} />
+        )}
+
         <TouchableOpacity style={styles.resetButton} onPress={handleResetDefaults}>
           <Text style={styles.resetButtonText}>Reset to Defaults</Text>
         </TouchableOpacity>
diff --git a/src/components/MarkdownText.tsx b/src/components/MarkdownText.tsx
index 78d6c9ae1..233a606a3 100644
--- a/src/components/MarkdownText.tsx
+++ b/src/components/MarkdownText.tsx
@@ -1,5 +1,5 @@
 import React, { useCallback, useMemo } from 'react';
-import { Linking, Pressable, Text, StyleSheet } from 'react-native';
+import { Linking, Text } from 'react-native';
 import Markdown from '@ronradtke/react-native-markdown-display';
 import { useTheme } from '../theme';
 import type { ThemeColors } from '../theme';
@@ -14,21 +14,17 @@ export function preprocessMarkdown(text: string): string {
   return text.replaceAll(/(\d)\*(?=\d)/g, String.raw`$1\*`);
 }
 
-const linkWrapperStyles = StyleSheet.create({
-  pressable: { flexShrink: 1, paddingBottom: 6 },
-});
-
-/** Custom link rule that constrains the Pressable wrapper width */
+/** Custom link rule — renders as inline Text so it wraps correctly inside list items */
 function createLinkRule(onPress: (url: string) => void) {
-  return (node: any, renderChildren: any, _parent: any) => (
-    <Pressable
+  return (node: any, children: any, ...[, styles]: any[]) => (
+    <Text
       key={node.key}
       accessibilityRole="link"
-      style={linkWrapperStyles.pressable}
+      style={styles.link}
       onPress={() => onPress(node.attributes?.href ?? '')}
     >
-      <Text>{renderChildren}</Text>
-    </Pressable>
+      {children}
+    </Text>
   );
 }
 
diff --git a/src/components/NumericStepper.tsx b/src/components/NumericStepper.tsx
new file mode 100644
index 000000000..342cc6694
--- /dev/null
+++ b/src/components/NumericStepper.tsx
@@ -0,0 +1,105 @@
+import React from 'react';
+import { View, Text, TouchableOpacity, StyleSheet } from 'react-native';
+import Icon from 'react-native-vector-icons/Feather';
+import { useTheme } from '../theme';
+import { TYPOGRAPHY, SPACING } from '../constants';
+
+interface NumericStepperProps {
+  value: number;
+  min: number;
+  max: number;
+  step: number;
+  decimals?: number;
+  onChange: (value: number) => void;
+  formatValue?: (value: number) => string;
+  testID?: string;
+}
+
+export const NumericStepper: React.FC<NumericStepperProps> = ({
+  value,
+  min,
+  max,
+  step,
+  decimals = 0,
+  onChange,
+  formatValue,
+  testID,
+}) => {
+  const { colors } = useTheme();
+
+  const round = (v: number) => Math.round(v / step) * step;
+
+  const decrement = () => {
+    const next = round(value - step);
+    if (next >= min) onChange(parseFloat(next.toFixed(decimals)));
+  };
+
+  const increment = () => {
+    const next = round(value + step);
+    if (next <= max) onChange(parseFloat(next.toFixed(decimals)));
+  };
+
+  const display = formatValue ? formatValue(value) : value.toFixed(decimals);
+  const canDecrement = value > min;
+  const canIncrement = value < max;
+
+  return (
+    <View style={styles.row}>
+      <TouchableOpacity
+        testID={testID ? `${testID}-decrement` : undefined}
+        onPress={decrement}
+        disabled={!canDecrement}
+        hitSlop={{ top: 8, bottom: 8, left: 8, right: 8 }}
+        style={[styles.button, { borderColor: colors.border, backgroundColor: colors.surface }, !canDecrement && styles.buttonDisabled]}
+      >
+        <Icon name="minus" size={14} color={canDecrement ? colors.text : colors.textMuted} />
+      </TouchableOpacity>
+
+      <Text testID={testID ? `${testID}-value` : undefined} style={[styles.value, { color: colors.primary, borderColor: colors.border, backgroundColor: colors.surfaceLight }]}>
+        {display}
+      </Text>
+
+      <TouchableOpacity
+        testID={testID ? `${testID}-increment` : undefined}
+        onPress={increment}
+        disabled={!canIncrement}
+        hitSlop={{ top: 8, bottom: 8, left: 8, right: 8 }}
+        style={[styles.button, { borderColor: colors.border, backgroundColor: colors.surface }, !canIncrement && styles.buttonDisabled]}
+      >
+        <Icon name="plus" size={14} color={canIncrement ? colors.text : colors.textMuted} />
+      </TouchableOpacity>
+    </View>
+  );
+};
+
+const styles = StyleSheet.create({
+  row: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    justifyContent: 'center',
+    gap: SPACING.sm,
+    marginTop: SPACING.sm,
+  },
+  button: {
+    width: 32,
+    height: 32,
+    borderRadius: 8,
+    borderWidth: 1,
+    alignItems: 'center',
+    justifyContent: 'center',
+  },
+  buttonDisabled: {
+    opacity: 0.35,
+  },
+  value: {
+    ...TYPOGRAPHY.body,
+    fontWeight: '400',
+    minWidth: 72,
+    textAlign: 'center',
+    paddingHorizontal: SPACING.sm,
+    paddingVertical: SPACING.xs,
+    borderRadius: 8,
+    borderWidth: 1,
+    overflow: 'hidden',
+  },
+});
diff --git a/src/components/TTSButton/index.tsx b/src/components/TTSButton/index.tsx
new file mode 100644
index 000000000..38335e0b7
--- /dev/null
+++ b/src/components/TTSButton/index.tsx
@@ -0,0 +1,96 @@
+import React, { useEffect } from 'react';
+import { TouchableOpacity, ActivityIndicator, StyleSheet } from 'react-native';
+import Animated, {
+  useSharedValue,
+  useAnimatedStyle,
+  withRepeat,
+  withSequence,
+  withTiming,
+} from 'react-native-reanimated';
+import Icon from 'react-native-vector-icons/Feather';
+import { useTheme } from '../../theme';
+import { useTTSStore } from '../../stores/ttsStore';
+import { SPACING } from '../../constants';
+
+interface TTSButtonProps {
+  text: string;
+  messageId: string;
+}
+
+export const TTSButton: React.FC<TTSButtonProps> = ({ text, messageId }) => {
+  const { colors } = useTheme();
+  const {
+    speak,
+    stop,
+    isSpeaking,
+    isGeneratingAudio,
+    isLoading,
+    isReady,
+    currentMessageId,
+    settings,
+  } = useTTSStore();
+
+  const isThisMessage = currentMessageId === messageId;
+  const isThisMessageGenerating = isGeneratingAudio && isThisMessage;
+  const isThisMessageSpeaking = isSpeaking && !isGeneratingAudio && isThisMessage;
+
+  const opacity = useSharedValue(1);
+  useEffect(() => {
+    if (isThisMessageSpeaking) {
+      opacity.value = withRepeat(
+        withSequence(
+          withTiming(0.4, { duration: 600 }),
+          withTiming(1, { duration: 600 }),
+        ),
+        -1,
+        false,
+      );
+    } else {
+      opacity.value = withTiming(1, { duration: 200 });
+    }
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isThisMessageSpeaking]);
+
+  const animatedStyle = useAnimatedStyle(() => ({ opacity: opacity.value }));
+
+  // Don't render if TTS disabled or engine not ready
+  if (!settings.enabled || !isReady) {
+    return null;
+  }
+
+  // Show spinner while loading or generating audio tokens
+  if ((isLoading && isThisMessage) || isThisMessageGenerating) {
+    return <ActivityIndicator size="small" color={colors.textMuted} style={styles.button} />;
+  }
+
+  const handlePress = () => {
+    if (isThisMessageSpeaking || isThisMessageGenerating) {
+      stop();
+      return;
+    }
+    speak(text, messageId);
+  };
+
+  return (
+    <TouchableOpacity
+      onPress={handlePress}
+      style={styles.button}
+      hitSlop={{ top: 8, bottom: 8, left: 8, right: 8 }}
+      testID={`tts-button-${messageId}`}
+    >
+      <Animated.View style={isThisMessageSpeaking ? animatedStyle : undefined}>
+        <Icon
+          name={isThisMessageSpeaking ? 'volume-2' : 'volume-1'}
+          size={14}
+          color={isThisMessageSpeaking ? colors.primary : colors.textMuted}
+        />
+      </Animated.View>
+    </TouchableOpacity>
+  );
+};
+
+const styles = StyleSheet.create({
+  button: {
+    padding: SPACING.xs,
+  },
+});
diff --git a/src/components/VoiceRecordButton/index.tsx b/src/components/VoiceRecordButton/index.tsx
index bd1cca737..6844c05f6 100644
--- a/src/components/VoiceRecordButton/index.tsx
+++ b/src/components/VoiceRecordButton/index.tsx
@@ -9,6 +9,7 @@ import {
   PanResponderGestureState,
   Vibration,
 } from 'react-native';
+import Icon from 'react-native-vector-icons/Feather';
 import ReanimatedAnimated, {
   useSharedValue,
   useAnimatedStyle,
@@ -16,15 +17,16 @@ import ReanimatedAnimated, {
   withTiming,
   Easing,
 } from 'react-native-reanimated';
-import { useNavigation } from '@react-navigation/native';
-import { NativeStackNavigationProp } from '@react-navigation/native-stack';
 import { useThemedStyles } from '../../theme';
 import { CustomAlert, showAlert, hideAlert, AlertState, initialAlertState } from '../CustomAlert';
 import { createStyles } from './styles';
 import { LoadingState, TranscribingState, UnavailableButton, ButtonIcon } from './states';
-import { RootStackParamList } from '../../navigation/types';
+import { useWhisperStore } from '../../stores';
 import logger from '../../utils/logger';
 
+const DOWNLOAD_MODEL_ID = 'small.en';
+const DOWNLOAD_MODEL_SIZE_MB = 466;
+
 interface VoiceRecordButtonProps {
   isRecording: boolean;
   isAvailable: boolean;
@@ -95,7 +97,7 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
   isModelLoading,
   isTranscribing,
   partialResult,
-  error,
+  error: _error,
   disabled,
   onStartRecording,
   onStopRecording,
@@ -103,7 +105,7 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
   asSendButton = false,
 }) => {
   const styles = useThemedStyles(createStyles);
-  const navigation = useNavigation<NativeStackNavigationProp<RootStackParamList>>();
+  const { downloadModel, isDownloading, downloadProgress } = useWhisperStore();
 
   const pulseAnim = useRef(new Animated.Value(1)).current;
   const loadingAnim = useRef(new Animated.Value(0)).current;
@@ -125,6 +127,7 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
       rippleOpacity.value = 0;
     }
 
+  // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [isRecording]);
 
   const rippleStyle = useAnimatedStyle(() => ({
@@ -161,15 +164,20 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
   const panResponder = useRef(buildPanResponder({ isDraggingToCancel, cancelOffsetX, callbacksRef })).current;
 
   const handleUnavailableTap = () => {
-    const errorDetail = error || 'No transcription model downloaded';
+    if (isDownloading) { return; }
     setAlertState(showAlert(
-      'Voice Input Unavailable',
-      `${errorDetail}\n\nDownload a Whisper model to enable on-device voice input.`,
+      'Download Voice Model',
+      `Download Whisper Small to enable voice input? (${DOWNLOAD_MODEL_SIZE_MB} MB)`,
       [
-        { text: 'Cancel' },
+        { text: 'Cancel', style: 'cancel' },
         {
-          text: 'Go to Voice Settings',
-          onPress: () => navigation.navigate('VoiceSettings'),
+          text: 'Download',
+          onPress: () => {
+            setAlertState(hideAlert());
+            downloadModel(DOWNLOAD_MODEL_ID).catch((err) => {
+              logger.error('[VoiceRecordButton] Download failed:', err);
+            });
+          },
         },
       ],
     ));
@@ -206,8 +214,8 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
   if (!isAvailable) {
     return (
       <View style={styles.container}>
-        <TouchableOpacity style={styles.buttonWrapper} onPress={handleUnavailableTap}>
-          <UnavailableButton asSendButton={asSendButton} />
+        <TouchableOpacity style={styles.buttonWrapper} onPress={handleUnavailableTap} disabled={isDownloading}>
+          <UnavailableButton asSendButton={asSendButton} downloadProgress={isDownloading ? downloadProgress : undefined} />
         </TouchableOpacity>
         {alert}
       </View>
@@ -221,6 +229,42 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
     disabled && styles.buttonDisabled,
   ];
 
+  // ── Audio mode: tap-to-toggle (tap to start, tap to stop & send) ───────────
+  if (!asSendButton) {
+    const handleToggle = () => {
+      if (disabled) return;
+      Vibration.vibrate(50);
+      if (isRecording) {
+        onStopRecording();
+      } else {
+        onStartRecording();
+      }
+    };
+
+    return (
+      <View style={styles.container}>
+        {isRecording && <ReanimatedAnimated.View style={[styles.rippleRing, rippleStyle]} />}
+        <Animated.View
+          style={[styles.buttonWrapper, { transform: [{ scale: isRecording ? pulseAnim : 1 }] }]}
+        >
+          <TouchableOpacity
+            onPress={handleToggle}
+            disabled={disabled}
+            activeOpacity={0.7}
+          >
+            <View style={buttonStyle}>
+              {isRecording
+                ? <Icon name="square" size={16} color="#fff" />
+                : <ButtonIcon asSendButton={false} isRecording={false} />}
+            </View>
+          </TouchableOpacity>
+        </Animated.View>
+        {alert}
+      </View>
+    );
+  }
+
+  // ── Chat mode: hold-to-record with slide-to-cancel ─────────────────────────
   return (
     <View style={styles.container}>
       {isRecording && (
diff --git a/src/components/VoiceRecordButton/states.tsx b/src/components/VoiceRecordButton/states.tsx
index d0ba1ab22..889a820c7 100644
--- a/src/components/VoiceRecordButton/states.tsx
+++ b/src/components/VoiceRecordButton/states.tsx
@@ -43,7 +43,6 @@ export const TranscribingState: React.FC<TranscribingStateProps> = ({ asSendButt
       <Animated.View style={[styles.button, asSendButton ? styles.buttonAsSendLoading : styles.buttonTranscribing, { transform: [{ rotate: spin }] }]}>
         {asSendButton ? <Icon name="mic" size={18} color={colors.info} /> : <View style={styles.loadingIndicator} />}
       </Animated.View>
-      {!asSendButton && <Text style={styles.transcribingText}>Transcribing...</Text>}
     </View>
   );
 };
@@ -52,16 +51,30 @@ export const TranscribingState: React.FC<TranscribingStateProps> = ({ asSendButt
 
 interface UnavailableButtonProps {
   asSendButton: boolean;
+  /** 0–1 while downloading, undefined when idle */
+  downloadProgress?: number;
 }
 
-export const UnavailableButton: React.FC<UnavailableButtonProps> = ({ asSendButton }) => {
+export const UnavailableButton: React.FC<UnavailableButtonProps> = ({ asSendButton, downloadProgress }) => {
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
+  const isDownloading = downloadProgress !== undefined;
+
+  if (asSendButton) {
+    return (
+      <View style={[styles.button, styles.buttonAsSendUnavailable]}>
+        <Icon name={isDownloading ? 'download' : 'mic-off'} size={18} color={colors.textMuted} />
+      </View>
+    );
+  }
 
   return (
-    <View style={[styles.button, asSendButton ? styles.buttonAsSendUnavailable : styles.buttonUnavailable]}>
-      {asSendButton ? (
-        <Icon name="mic-off" size={18} color={colors.textMuted} />
+    <View style={[styles.button, styles.buttonUnavailable]}>
+      {isDownloading ? (
+        <>
+          <Icon name="download" size={14} color={colors.textMuted} />
+          <Text style={styles.loadingText}>{Math.round(downloadProgress * 100)}%</Text>
+        </>
       ) : (
         <>
           <View style={styles.micIcon}>
diff --git a/src/constants/kokoroModels.ts b/src/constants/kokoroModels.ts
new file mode 100644
index 000000000..333555f2c
--- /dev/null
+++ b/src/constants/kokoroModels.ts
@@ -0,0 +1,24 @@
+/**
+ * @deprecated — Use imports from 'src/engine' instead.
+ * This file re-exports for backward compatibility with any remaining consumers.
+ */
+export {
+  KOKORO_VOICES,
+  DEFAULT_KOKORO_VOICE_ID,
+  getKokoroVoiceConfig,
+} from '../engine/tts/engines/kokoro/voices';
+export type { KokoroVoiceId } from '../engine/tts/engines/kokoro/voices';
+export { KOKORO_MEDIUM } from 'react-native-executorch';
+
+import { Platform } from 'react-native';
+
+/** @deprecated — Use engine.isSupported() instead */
+export function isExecutorchSupported(): boolean {
+  if (Platform.OS === 'android') {
+    return (Platform.Version as number) >= 26;
+  }
+  if (Platform.OS === 'ios') {
+    return parseInt(Platform.Version as string, 10) >= 17;
+  }
+  return false;
+}
diff --git a/src/constants/ttsModels.ts b/src/constants/ttsModels.ts
new file mode 100644
index 000000000..f93dfe856
--- /dev/null
+++ b/src/constants/ttsModels.ts
@@ -0,0 +1,25 @@
+export const TTS_BACKBONE_MODEL = {
+  id: 'outetts-0.3-500m-q4',
+  name: 'OuteTTS 0.3',
+  backboneFile: 'OuteTTS-0.3-500M-Q4_K_M.gguf',
+  backboneUrl:
+    'https://huggingface.co/OuteAI/OuteTTS-0.3-500M-GGUF/resolve/main/OuteTTS-0.3-500M-Q4_K_M.gguf',
+  backboneSizeMB: 454,
+  vocoderFile: 'WavTokenizer-Large-75-Q5_1.gguf',
+  vocoderUrl:
+    'https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-Q5_1.gguf',
+  vocoderSizeMB: 73,
+  sampleRate: 24000,
+  description: 'Natural-sounding on-device speech. Requires ~530 MB storage.',
+};
+
+export const TTS_SPEAKER_PROFILES = [
+  { id: '0', label: 'Default' },
+];
+
+/** Warn user if device RAM is below this threshold */
+export const TTS_WARN_RAM_GB = 8;
+/** Hard-block TTS on devices below this threshold */
+export const TTS_BLOCK_RAM_GB = 6;
+/** Max cached audio messages per conversation before eviction */
+export const AUDIO_CACHE_MAX_MESSAGES = 50;
diff --git a/src/engine/EngineRegistry.ts b/src/engine/EngineRegistry.ts
new file mode 100644
index 000000000..78d5711f3
--- /dev/null
+++ b/src/engine/EngineRegistry.ts
@@ -0,0 +1,116 @@
+/**
+ * Generic engine registry.
+ *
+ * Works for any modality — TTS, STT, Vision, LLM.
+ * Engines register a factory; the registry lazily instantiates and
+ * manages the active engine lifecycle.
+ *
+ * Usage:
+ *   const ttsRegistry = new EngineRegistry<TTSEngine>();
+ *   ttsRegistry.register('kokoro', () => new KokoroEngine());
+ *   await ttsRegistry.setActiveEngine('kokoro');
+ */
+import type { OnDeviceEngine, BaseEngineEvents } from './types';
+
+export type EngineFactory<T> = () => T;
+
+interface Stoppable { stop(): void; }
+function hasStop(obj: unknown): obj is Stoppable {
+  return typeof obj === 'object' && obj !== null && 'stop' in obj && typeof (obj as Stoppable).stop === 'function';
+}
+
+export class EngineRegistry<
+  T extends OnDeviceEngine<BaseEngineEvents>,
+> {
+  private _factories = new Map<string, EngineFactory<T>>();
+  private _instances = new Map<string, T>();
+  private _activeId: string | null = null;
+
+  /** Register an engine factory. Call once per engine at module load time. */
+  register(id: string, factory: EngineFactory<T>): void {
+    this._factories.set(id, factory);
+  }
+
+  /** Unregister an engine. Releases instance if it exists. */
+  async unregister(id: string): Promise<void> {
+    const instance = this._instances.get(id);
+    if (instance) {
+      if (hasStop(instance)) instance.stop();
+      await instance.release();
+      this._instances.delete(id);
+    }
+    this._factories.delete(id);
+    if (this._activeId === id) {
+      this._activeId = null;
+    }
+  }
+
+  /** All registered engine IDs */
+  getRegisteredIds(): string[] {
+    return Array.from(this._factories.keys());
+  }
+
+  /** Check if an engine ID is registered */
+  has(id: string): boolean {
+    return this._factories.has(id);
+  }
+
+  /** Get or lazily create a singleton engine instance */
+  getEngine(id: string): T {
+    let engine = this._instances.get(id);
+    if (!engine) {
+      const factory = this._factories.get(id);
+      if (!factory) {
+        throw new Error(`Engine '${id}' is not registered.`);
+      }
+      engine = factory();
+      this._instances.set(id, engine);
+    }
+    return engine;
+  }
+
+  /**
+   * Set the active engine. Stops and releases the previous one.
+   * Returns the newly active engine instance.
+   */
+  async setActiveEngine(id: string): Promise<T> {
+    if (this._activeId && this._activeId !== id) {
+      const prev = this._instances.get(this._activeId);
+      if (prev) {
+        try {
+          if (hasStop(prev)) prev.stop();
+          await prev.release();
+        } catch {
+          // Best-effort cleanup
+        }
+      }
+    }
+    this._activeId = id;
+    return this.getEngine(id);
+  }
+
+  /** Currently active engine (null if none set) */
+  getActiveEngine(): T | null {
+    if (!this._activeId) return null;
+    return this._instances.get(this._activeId) ?? null;
+  }
+
+  /** Currently active engine ID (null if none set) */
+  getActiveEngineId(): string | null {
+    return this._activeId;
+  }
+
+  /** Release all engine instances */
+  async releaseAll(): Promise<void> {
+    for (const [, engine] of this._instances) {
+      try {
+        if (hasStop(engine)) engine.stop();
+        await engine.release();
+      } catch {
+        // Best-effort
+      }
+    }
+    this._instances.clear();
+    this._activeId = null;
+  }
+}
diff --git a/src/engine/OnDeviceEngineEmitter.ts b/src/engine/OnDeviceEngineEmitter.ts
new file mode 100644
index 000000000..b61bd6a27
--- /dev/null
+++ b/src/engine/OnDeviceEngineEmitter.ts
@@ -0,0 +1,71 @@
+/**
+ * Minimal typed event emitter for on-device engines.
+ *
+ * Engines extend this to get on/off/once/emit for free.
+ * Zero dependencies — no Node EventEmitter, no third-party lib.
+ */
+
+type Listener = (...args: any[]) => void;
+
+export class OnDeviceEngineEmitter<
+  TEvents extends Record<string, Listener> = Record<string, Listener>,
+> {
+  private _listeners = new Map<string, Set<Listener>>();
+
+  on<K extends keyof TEvents>(event: K, listener: TEvents[K]): () => void {
+    const key = event as string;
+    if (!this._listeners.has(key)) {
+      this._listeners.set(key, new Set());
+    }
+    this._listeners.get(key)!.add(listener as Listener);
+    return () => this.off(event, listener);
+  }
+
+  off<K extends keyof TEvents>(event: K, listener: TEvents[K]): void {
+    this._listeners.get(event as string)?.delete(listener as Listener);
+  }
+
+  once<K extends keyof TEvents>(event: K, listener: TEvents[K]): () => void {
+    const wrapper = ((...args: any[]) => {
+      this.off(event, wrapper as TEvents[K]);
+      (listener as Listener)(...args);
+    }) as TEvents[K];
+    return this.on(event, wrapper);
+  }
+
+  protected emit<K extends keyof TEvents>(
+    event: K,
+    ...args: Parameters<TEvents[K]>
+  ): void {
+    const listeners = this._listeners.get(event as string);
+    if (!listeners) return;
+    for (const fn of listeners) {
+      try {
+        fn(...args);
+      } catch {
+        // Swallow event handler errors to prevent cascading failures
+      }
+    }
+  }
+
+  /** Remove all listeners, optionally for a specific event */
+  protected removeAllListeners(event?: keyof TEvents): void {
+    if (event) {
+      this._listeners.delete(event as string);
+    } else {
+      this._listeners.clear();
+    }
+  }
+
+  /** Current listener count, optionally for a specific event */
+  protected listenerCount(event?: keyof TEvents): number {
+    if (event) {
+      return this._listeners.get(event as string)?.size ?? 0;
+    }
+    let count = 0;
+    for (const set of this._listeners.values()) {
+      count += set.size;
+    }
+    return count;
+  }
+}
diff --git a/src/engine/index.ts b/src/engine/index.ts
new file mode 100644
index 000000000..e20172d27
--- /dev/null
+++ b/src/engine/index.ts
@@ -0,0 +1,52 @@
+/**
+ * On-Device Engine SDK
+ *
+ * Public API surface. Everything exported here is part of the SDK contract.
+ */
+
+// ── Types ─────────────────────────────────────────────────────────────────
+export type {
+  // Base
+  EnginePhase,
+  ModelAsset,
+  ModelAssetStatus,
+  ModelAssetState,
+  EngineCapabilities,
+  BaseEngineEvents,
+  OnDeviceEngine,
+  // TTS
+  TTSVoice,
+  TTSEngineCapabilities,
+  TTSSpeakOptions,
+  TTSGenerateResult,
+  TTSEngineEvents,
+  TTSEngine,
+} from './types';
+
+// ── Classes ───────────────────────────────────────────────────────────────
+export { OnDeviceEngineEmitter } from './OnDeviceEngineEmitter';
+export { EngineRegistry } from './EngineRegistry';
+export type { EngineFactory } from './EngineRegistry';
+
+// ── TTS Engines ──────────────────────────────────────────────────────────
+export { KokoroEngine } from './tts/engines/kokoro';
+export { OuteTTSEngine } from './tts/engines/outetts';
+export { Qwen3TTSEngine } from './tts/engines/qwen3';
+
+// Re-export Kokoro voice types for settings UI
+export { KOKORO_VOICES, DEFAULT_KOKORO_VOICE_ID } from './tts/engines/kokoro';
+export type { KokoroVoiceId } from './tts/engines/kokoro';
+
+// ── TTS Registry (singleton) ──────────────────────────────────────────────
+import { EngineRegistry } from './EngineRegistry';
+import type { TTSEngine } from './types';
+import { KokoroEngine } from './tts/engines/kokoro';
+import { OuteTTSEngine } from './tts/engines/outetts';
+export const ttsRegistry = new EngineRegistry<TTSEngine>();
+
+// Register built-in TTS engines
+ttsRegistry.register('kokoro', () => new KokoroEngine());
+ttsRegistry.register('outetts', () => new OuteTTSEngine());
+// Qwen3-TTS stub — uncomment when inference pipeline is implemented:
+// import { Qwen3TTSEngine } from './tts/engines/qwen3';
+// ttsRegistry.register('qwen3-tts', () => new Qwen3TTSEngine());
diff --git a/src/engine/tts/engines/kokoro/KokoroEngine.ts b/src/engine/tts/engines/kokoro/KokoroEngine.ts
new file mode 100644
index 000000000..fa345454d
--- /dev/null
+++ b/src/engine/tts/engines/kokoro/KokoroEngine.ts
@@ -0,0 +1,300 @@
+/**
+ * KokoroEngine — TTSEngine implementation for Kokoro TTS via ExecuTorch.
+ *
+ * Wraps react-native-executorch's useTextToSpeech hook through a bridge
+ * component pattern. The bridge registers an imperative handle; the engine
+ * exposes the standard TTSEngine API.
+ */
+import { Platform } from 'react-native';
+import { OnDeviceEngineEmitter } from '../../../OnDeviceEngineEmitter';
+import type {
+  EnginePhase,
+  TTSEngine,
+  TTSEngineCapabilities,
+  TTSEngineEvents,
+  TTSSpeakOptions,
+  TTSGenerateResult,
+  TTSVoice,
+  ModelAsset,
+  ModelAssetState,
+} from '../../../types';
+import {
+  KOKORO_VOICES,
+  DEFAULT_KOKORO_VOICE_ID,
+  getKokoroTTSVoices,
+} from './voices';
+import type { KokoroVoiceId } from './voices';
+import { createKokoroTTSBridge } from './KokoroTTSBridge';
+import logger from '../../../../utils/logger';
+
+/** Bridge interface: the React component pushes these into the engine */
+export interface KokoroBridgeHandle {
+  speak: (text: string, speed: number) => Promise<void>;
+  stop: (instant?: boolean) => void;
+  pause: () => void;
+  resume: () => void;
+  setKeepAlive: (keepAlive: boolean) => void;
+}
+
+export class KokoroEngine
+  extends OnDeviceEngineEmitter<TTSEngineEvents>
+  implements TTSEngine
+{
+  readonly id = 'kokoro';
+  readonly displayName = 'Kokoro TTS';
+  readonly capabilities: TTSEngineCapabilities = {
+    streaming: true,
+    voiceCloning: false,
+    pauseResume: true,
+    generateAndSave: false,
+    platformRequirements: {
+      android: { minSdkVersion: 26 },
+      ios: { minVersion: 17 },
+    },
+    peakRamMB: 82,
+  };
+
+  private _phase: EnginePhase = 'idle';
+  private _bridge: KokoroBridgeHandle | null = null;
+  private _activeVoiceId: KokoroVoiceId = DEFAULT_KOKORO_VOICE_ID;
+  private _downloadProgress = 0;
+  private _currentMessageId: string | null = null;
+  private _playSessionId = 0;
+  private _BridgeComponent: React.ComponentType;
+
+  constructor() {
+    super();
+    this._BridgeComponent = createKokoroTTSBridge(this);
+  }
+
+  // ── State ───────────────────────────────────────────────────────────────
+
+  getPhase(): EnginePhase {
+    return this._phase;
+  }
+
+  private _setPhase(phase: EnginePhase): void {
+    if (phase === this._phase) return;
+    const prev = this._phase;
+    this._phase = phase;
+    this.emit('phaseChange', phase, prev);
+  }
+
+  // ── Bridge callbacks (called by KokoroTTSBridge) ────────────────────────
+
+  /** @internal Called by bridge when hook becomes ready or is torn down */
+  _setBridge(handle: KokoroBridgeHandle | null, voiceId: KokoroVoiceId): void {
+    this._bridge = handle;
+    if (handle) {
+      this._activeVoiceId = voiceId;
+      this._setPhase('ready');
+      logger.log('[KokoroEngine] Bridge registered, voice:', voiceId);
+    } else {
+      this._setPhase(this._downloadProgress > 0 && this._downloadProgress < 1 ? 'downloading' : 'idle');
+    }
+  }
+
+  /** @internal Called by bridge to sync download progress */
+  _setDownloadProgress(progress: number): void {
+    this._downloadProgress = progress;
+    if (progress > 0 && progress < 1 && this._phase === 'idle') {
+      this._setPhase('downloading');
+    }
+    this.emit('downloadProgress', {
+      assetId: 'kokoro-medium',
+      progress,
+      bytesWritten: 0,
+      totalBytes: 0,
+    });
+  }
+
+  /** @internal Called by bridge on each audio chunk */
+  _onAudioChunk(data: {
+    samples: Float32Array;
+    sampleRate: number;
+    chunkIndex: number;
+    isFinal: boolean;
+  }): void {
+    this.emit('audioChunk', data);
+  }
+
+  /** @internal Called by bridge on runtime error */
+  _onBridgeError(message: string): void {
+    this._bridge = null;
+    this._setPhase('error');
+    this.emit('error', { code: 'KOKORO_RUNTIME', message, recoverable: false });
+  }
+
+  // ── Lifecycle ───────────────────────────────────────────────────────────
+
+  isSupported(): boolean {
+    if (Platform.OS === 'android') {
+      return (Platform.Version as number) >= 26;
+    }
+    if (Platform.OS === 'ios') {
+      return parseInt(Platform.Version as string, 10) >= 17;
+    }
+    return false;
+  }
+
+  async initialize(): Promise<void> {
+    // No-op: Kokoro initializes when the bridge component mounts.
+    // The bridge calls _setBridge() which transitions to 'ready'.
+  }
+
+  async release(): Promise<void> {
+    this._bridge?.stop(true);
+    this._bridge = null;
+    this._currentMessageId = null;
+    this._setPhase('idle');
+  }
+
+  async destroy(): Promise<void> {
+    await this.release();
+    // Kokoro models are managed by executorch's internal cache
+  }
+
+  // ── Assets ──────────────────────────────────────────────────────────────
+
+  getRequiredAssets(): ModelAsset[] {
+    return [
+      {
+        id: 'kokoro-medium',
+        label: 'Kokoro Medium',
+        url: '', // Managed internally by react-native-executorch
+        sizeBytes: 82 * 1024 * 1024,
+        filename: 'kokoro-medium',
+      },
+    ];
+  }
+
+  async checkAssetStatus(): Promise<ModelAssetState[]> {
+    const isReady = this._phase === 'ready';
+    return [
+      {
+        asset: this.getRequiredAssets()[0],
+        status: isReady ? 'downloaded' : this._downloadProgress > 0 ? 'downloading' : 'not-downloaded',
+        progress: isReady ? 1 : this._downloadProgress,
+      },
+    ];
+  }
+
+  async downloadAssets(): Promise<void> {
+    // Handled by react-native-executorch when the hook mounts
+  }
+
+  async deleteAssets(): Promise<void> {
+    await this.release();
+    // Would need executorch API to clear its internal cache
+  }
+
+  getOverallDownloadProgress(): number {
+    return this._phase === 'ready' ? 1 : this._downloadProgress;
+  }
+
+  isFullyDownloaded(): boolean {
+    return this._phase === 'ready' || this._downloadProgress >= 1;
+  }
+
+  // ── Voices ──────────────────────────────────────────────────────────────
+
+  getVoices(): TTSVoice[] {
+    return getKokoroTTSVoices();
+  }
+
+  getActiveVoice(): TTSVoice | null {
+    return this.getVoices().find(v => v.id === this._activeVoiceId) ?? null;
+  }
+
+  async setVoice(voiceId: string): Promise<void> {
+    const valid = KOKORO_VOICES.find(v => v.id === voiceId);
+    if (!valid) {
+      throw new Error(`Unknown Kokoro voice: ${voiceId}`);
+    }
+    this._activeVoiceId = voiceId as KokoroVoiceId;
+    // Emit voiceChanged — the bridge component listens and does key-based remount
+    this.emit('voiceChanged', voiceId);
+  }
+
+  // ── Speech ──────────────────────────────────────────────────────────────
+
+  async speak(text: string, options?: TTSSpeakOptions): Promise<void> {
+    if (!this._bridge) {
+      throw new Error('Kokoro bridge not mounted. Is the device supported?');
+    }
+
+    const speed = options?.speed ?? 1.0;
+    const messageId = options?.messageId ?? null;
+
+    this._currentMessageId = messageId;
+    const sessionId = ++this._playSessionId;
+    this._setPhase('processing');
+
+    this._bridge.setKeepAlive(false);
+
+    // Retry loop — executorch may still be busy from a previous stream
+    const MAX_RETRIES = 10;
+    for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
+      try {
+        logger.log('[KokoroEngine] speak attempt', attempt + 1);
+        await this._bridge.speak(text, speed);
+        break;
+      } catch (err: unknown) {
+        const errCode = (err as { code?: number })?.code;
+        if (errCode === 104 && attempt < MAX_RETRIES - 1) {
+          logger.log('[KokoroEngine] executorch busy, retrying in 200ms');
+          await new Promise<void>((r) => setTimeout(r, 200));
+          continue;
+        }
+        this.emit('error', {
+          code: 'KOKORO_SPEAK',
+          message: err instanceof Error ? err.message : 'Speech failed',
+          recoverable: true,
+        });
+        throw err;
+      }
+    }
+
+    // Only clear state if this speak call still owns playback
+    if (this._playSessionId === sessionId) {
+      this._currentMessageId = null;
+      this._setPhase('ready');
+    }
+  }
+
+  async generateAndSave(): Promise<TTSGenerateResult> {
+    throw new Error('Kokoro does not support generateAndSave. Use an engine with generateAndSave capability.');
+  }
+
+  async playFromFile(): Promise<void> {
+    throw new Error('Kokoro does not support file playback.');
+  }
+
+  stop(): void {
+    this._bridge?.stop(true);
+    this._currentMessageId = null;
+    if (this._phase === 'processing' || this._phase === 'paused') {
+      this._setPhase(this._bridge ? 'ready' : 'idle');
+    }
+  }
+
+  pause(): void {
+    this._bridge?.pause();
+    if (this._phase === 'processing') {
+      this._setPhase('paused');
+    }
+  }
+
+  resume(): void {
+    this._bridge?.resume();
+    if (this._phase === 'paused') {
+      this._setPhase('processing');
+    }
+  }
+
+  // ── React Bridge ────────────────────────────────────────────────────────
+
+  getBridgeComponent(): React.ComponentType | null {
+    return this._BridgeComponent;
+  }
+}
diff --git a/src/engine/tts/engines/kokoro/KokoroTTSBridge.tsx b/src/engine/tts/engines/kokoro/KokoroTTSBridge.tsx
new file mode 100644
index 000000000..0f29f6a55
--- /dev/null
+++ b/src/engine/tts/engines/kokoro/KokoroTTSBridge.tsx
@@ -0,0 +1,185 @@
+/**
+ * KokoroTTSBridge
+ *
+ * React component that mounts the react-native-executorch useTextToSpeech
+ * hook and registers imperative methods with the KokoroEngine instance.
+ *
+ * This replaces the old KokoroTTSManager. The key difference: instead of
+ * exposing module-level refs, it pushes its handle into the engine instance
+ * via engine._setBridge(). The engine owns the public API.
+ *
+ * Mount exactly once, near the root (via <EngineBridge />), only on
+ * supported platforms.
+ */
+import React, { useEffect, useRef } from 'react';
+import { useTextToSpeech } from 'react-native-executorch';
+import { AudioContext } from 'react-native-audio-api';
+import { KOKORO_MEDIUM } from 'react-native-executorch';
+import { getKokoroVoiceConfig } from './voices';
+import type { KokoroVoiceId } from './voices';
+import type { KokoroEngine, KokoroBridgeHandle } from './KokoroEngine';
+import logger from '../../../../utils/logger';
+
+// ─── Inner component — holds the hook for a single voice ────────────────────
+
+const KokoroTTSInner: React.FC<{
+  voiceId: KokoroVoiceId;
+  engine: KokoroEngine;
+}> = ({ voiceId, engine }) => {
+  const audioCtxRef = useRef<AudioContext | null>(null);
+  const pendingResolvers = useRef<Set<() => void>>(new Set());
+  const skipSuspendOnEnd = useRef(false);
+
+  const tts = useTextToSpeech({
+    model: KOKORO_MEDIUM,
+    voice: getKokoroVoiceConfig(voiceId),
+  });
+
+  // Sync readiness + download progress into the engine
+  useEffect(() => {
+    logger.log('[KokoroBridge] isReady=', tts.isReady, 'downloadProgress=', tts.downloadProgress);
+    engine._setDownloadProgress(tts.downloadProgress);
+    if (tts.isReady) {
+      // Register the bridge handle so the engine can call speak/stop/etc.
+      const handle: KokoroBridgeHandle = {
+        speak: async (text: string, speed: number) => {
+          if (!audioCtxRef.current || audioCtxRef.current.state === 'closed') {
+            audioCtxRef.current = new AudioContext({ sampleRate: 24000 });
+          } else if (audioCtxRef.current.state === 'suspended') {
+            await audioCtxRef.current.resume().catch(() => {});
+          }
+          const ctx = audioCtxRef.current;
+          let chunkIndex = 0;
+
+          try {
+            await tts.stream({
+              text,
+              speed,
+              onNext: (chunk: Float32Array) =>
+                new Promise<void>((resolve) => {
+                  pendingResolvers.current.add(resolve);
+                  const done = () => {
+                    pendingResolvers.current.delete(resolve);
+                    resolve();
+                  };
+
+                  // Emit audioChunk event so listeners can react
+                  engine._onAudioChunk({ samples: chunk, sampleRate: 24000, chunkIndex, isFinal: false });
+                  chunkIndex++;
+
+                  const buffer = ctx.createBuffer(1, chunk.length, 24000);
+                  buffer.copyToChannel(chunk, 0);
+                  const source = ctx.createBufferSource();
+                  source.buffer = buffer;
+                  source.playbackRate.value = speed;
+                  source.connect(ctx.destination);
+                  source.onEnded = done;
+                  source.start();
+                }),
+              onEnd: async () => {
+                // Emit final chunk marker
+                engine._onAudioChunk({ samples: new Float32Array(0), sampleRate: 24000, chunkIndex, isFinal: true });
+                if (!skipSuspendOnEnd.current) {
+                  await ctx.suspend().catch(() => {});
+                }
+              },
+            });
+          } catch (err) {
+            logger.error('[KokoroBridge] stream error:', err);
+            throw err;
+          }
+        },
+
+        stop: (instant = true) => {
+          pendingResolvers.current.forEach((r) => r());
+          pendingResolvers.current.clear();
+          tts.streamStop(instant);
+          audioCtxRef.current?.close().catch(() => {});
+          audioCtxRef.current = null;
+        },
+
+        pause: () => {
+          audioCtxRef.current?.suspend().catch(() => {});
+        },
+
+        resume: () => {
+          audioCtxRef.current?.resume().catch(() => {});
+        },
+
+        setKeepAlive: (keepAlive: boolean) => {
+          skipSuspendOnEnd.current = keepAlive;
+        },
+      };
+
+      engine._setBridge(handle, voiceId);
+    }
+  }, [tts.isReady, tts.downloadProgress, voiceId, engine, tts]);
+
+  useEffect(() => {
+    if (tts.error) {
+      logger.warn('[KokoroBridge] Runtime error:', tts.error);
+      engine._onBridgeError(String(tts.error));
+    }
+  }, [tts.error, engine]);
+
+  // Clean up on unmount
+  useEffect(() => {
+    return () => {
+      logger.log('[KokoroBridge] Inner unmounting');
+      engine._setBridge(null, voiceId);
+    };
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+
+  return null;
+};
+
+// ─── Outer component — manages voice switching via key-based remount ────────
+
+export function createKokoroTTSBridge(engine: KokoroEngine): React.FC {
+  return function KokoroTTSBridgeOuter() {
+    const [activeVoiceId, setActiveVoiceId] = React.useState<KokoroVoiceId>(
+      (engine.getActiveVoice()?.id as KokoroVoiceId) ?? 'af_heart',
+    );
+    const cooldownRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+    const lastStreamEndRef = useRef(0);
+
+    // Listen for voice changes from the engine
+    useEffect(() => {
+      const unsub = engine.on('voiceChanged', (voiceId) => {
+        const newVoice = voiceId as KokoroVoiceId;
+        if (newVoice === activeVoiceId) return;
+
+        // Cooldown before remount to let executorch clean up
+        const elapsed = Date.now() - lastStreamEndRef.current;
+        const waitMs = Math.max(100, 2000 - elapsed);
+
+        logger.log('[KokoroBridge] Voice change cooldown:', waitMs, 'ms');
+        engine._setDownloadProgress(0); // Show loader during switch
+
+        if (cooldownRef.current) clearTimeout(cooldownRef.current);
+        cooldownRef.current = setTimeout(() => {
+          setActiveVoiceId(newVoice);
+          cooldownRef.current = null;
+        }, waitMs);
+      });
+
+      return () => {
+        unsub();
+        if (cooldownRef.current) clearTimeout(cooldownRef.current);
+      };
+    }, [activeVoiceId]);
+
+    // Track stream end time for cooldown calculation
+    useEffect(() => {
+      const unsub = engine.on('phaseChange', (phase, prev) => {
+        if (prev === 'processing' && (phase === 'ready' || phase === 'idle')) {
+          lastStreamEndRef.current = Date.now();
+        }
+      });
+      return unsub;
+    }, []);
+
+    return <KokoroTTSInner key={activeVoiceId} voiceId={activeVoiceId} engine={engine} />;
+  };
+}
diff --git a/src/engine/tts/engines/kokoro/index.ts b/src/engine/tts/engines/kokoro/index.ts
new file mode 100644
index 000000000..9ae77834d
--- /dev/null
+++ b/src/engine/tts/engines/kokoro/index.ts
@@ -0,0 +1,4 @@
+export { KokoroEngine } from './KokoroEngine';
+export type { KokoroBridgeHandle } from './KokoroEngine';
+export { KOKORO_VOICES, DEFAULT_KOKORO_VOICE_ID, getKokoroVoiceConfig, getKokoroTTSVoices } from './voices';
+export type { KokoroVoiceId, KokoroVoiceEntry } from './voices';
diff --git a/src/engine/tts/engines/kokoro/voices.ts b/src/engine/tts/engines/kokoro/voices.ts
new file mode 100644
index 000000000..67395658e
--- /dev/null
+++ b/src/engine/tts/engines/kokoro/voices.ts
@@ -0,0 +1,69 @@
+/**
+ * Kokoro voice definitions.
+ *
+ * Moved from constants/kokoroModels.ts into the engine boundary.
+ * The VoiceConfig imports come from react-native-executorch; the
+ * TTSVoice wrappers are engine-agnostic.
+ */
+import {
+  KOKORO_VOICE_AF_HEART,
+  KOKORO_VOICE_AF_RIVER,
+  KOKORO_VOICE_AF_SARAH,
+  KOKORO_VOICE_AM_ADAM,
+  KOKORO_VOICE_AM_MICHAEL,
+  KOKORO_VOICE_AM_SANTA,
+  KOKORO_VOICE_BF_EMMA,
+  KOKORO_VOICE_BM_DANIEL,
+} from 'react-native-executorch';
+import type { VoiceConfig } from 'react-native-executorch';
+import type { TTSVoice } from '../../../types';
+
+export type KokoroVoiceId =
+  | 'af_heart'
+  | 'af_river'
+  | 'af_sarah'
+  | 'am_adam'
+  | 'am_michael'
+  | 'am_santa'
+  | 'bf_emma'
+  | 'bm_daniel';
+
+export interface KokoroVoiceEntry {
+  id: KokoroVoiceId;
+  label: string;
+  persona: string;
+  accent: string;
+  gender: 'Female' | 'Male';
+  defaultSpeed: number;
+  config: VoiceConfig;
+}
+
+export const KOKORO_VOICES: KokoroVoiceEntry[] = [
+  { id: 'af_heart',   label: 'Warm',      persona: 'Friendly and approachable',   accent: 'US',      gender: 'Female', defaultSpeed: 1.0, config: KOKORO_VOICE_AF_HEART },
+  { id: 'af_river',   label: 'Calm',      persona: 'Relaxed and soothing',        accent: 'US',      gender: 'Female', defaultSpeed: 0.9, config: KOKORO_VOICE_AF_RIVER },
+  { id: 'af_sarah',   label: 'Clear',     persona: 'Crisp and professional',      accent: 'US',      gender: 'Female', defaultSpeed: 1.0, config: KOKORO_VOICE_AF_SARAH },
+  { id: 'am_adam',    label: 'Steady',    persona: 'Composed and reliable',       accent: 'US',      gender: 'Male',   defaultSpeed: 1.0, config: KOKORO_VOICE_AM_ADAM },
+  { id: 'am_michael', label: 'Bold',      persona: 'Confident and direct',        accent: 'US',      gender: 'Male',   defaultSpeed: 1.1, config: KOKORO_VOICE_AM_MICHAEL },
+  { id: 'am_santa',   label: 'Cheerful',  persona: 'Upbeat and energetic',        accent: 'US',      gender: 'Male',   defaultSpeed: 1.2, config: KOKORO_VOICE_AM_SANTA },
+  { id: 'bf_emma',    label: 'Gentle',    persona: 'Soft and thoughtful',         accent: 'British', gender: 'Female', defaultSpeed: 0.9, config: KOKORO_VOICE_BF_EMMA },
+  { id: 'bm_daniel',  label: 'Refined',   persona: 'Polished and articulate',     accent: 'British', gender: 'Male',   defaultSpeed: 1.0, config: KOKORO_VOICE_BM_DANIEL },
+];
+
+export const DEFAULT_KOKORO_VOICE_ID: KokoroVoiceId = 'af_heart';
+
+export function getKokoroVoiceConfig(id: KokoroVoiceId): VoiceConfig {
+  return KOKORO_VOICES.find(v => v.id === id)?.config ?? KOKORO_VOICE_AF_HEART;
+}
+
+/** Convert internal voice entries to engine-agnostic TTSVoice objects */
+export function getKokoroTTSVoices(): TTSVoice[] {
+  return KOKORO_VOICES.map(v => ({
+    id: v.id,
+    label: v.label,
+    metadata: {
+      accent: v.accent,
+      gender: v.gender,
+      persona: v.persona,
+    },
+  }));
+}
diff --git a/src/engine/tts/engines/outetts/OuteTTSEngine.ts b/src/engine/tts/engines/outetts/OuteTTSEngine.ts
new file mode 100644
index 000000000..c494deb36
--- /dev/null
+++ b/src/engine/tts/engines/outetts/OuteTTSEngine.ts
@@ -0,0 +1,557 @@
+/* eslint-disable max-lines */
+/**
+ * OuteTTSEngine — TTSEngine implementation for OuteTTS via llama.rn.
+ *
+ * Absorbs the logic from services/ttsService.ts into the engine interface.
+ * Fully imperative — no React bridge needed.
+ */
+import { initLlama } from 'llama.rn';
+import type { LlamaContext } from 'llama.rn';
+import RNFS from 'react-native-fs';
+import { AudioContext, AudioBufferSourceNode } from 'react-native-audio-api';
+import { OnDeviceEngineEmitter } from '../../../OnDeviceEngineEmitter';
+import type {
+  EnginePhase,
+  TTSEngine,
+  TTSEngineCapabilities,
+  TTSEngineEvents,
+  TTSSpeakOptions,
+  TTSGenerateResult,
+  TTSVoice,
+  ModelAsset,
+  ModelAssetState,
+} from '../../../types';
+import { OUTETTS_ASSETS, OUTETTS_BACKBONE, OUTETTS_VOCODER, OUTETTS_SAMPLE_RATE } from './models';
+import logger from '../../../../utils/logger';
+
+export class OuteTTSEngine
+  extends OnDeviceEngineEmitter<TTSEngineEvents>
+  implements TTSEngine
+{
+  readonly id = 'outetts';
+  readonly displayName = 'OuteTTS 0.3';
+  readonly capabilities: TTSEngineCapabilities = {
+    streaming: false,
+    voiceCloning: true,
+    pauseResume: true,
+    generateAndSave: true,
+    peakRamMB: 530,
+  };
+
+  private _phase: EnginePhase = 'idle';
+  private _context: LlamaContext | null = null;
+  private _isVocoderReady = false;
+  private _contextLoadPromise: Promise<void> = Promise.resolve();
+  private _audioCtx: AudioContext | null = null;
+  private _currentSource: AudioBufferSourceNode | null = null;
+  private _isSpeakingFlag = false;
+  private _currentMessageId: string | null = null;
+  private _playSessionId = 0;
+  private _assetStates: ModelAssetState[] = [];
+
+  constructor() {
+    super();
+    this._assetStates = OUTETTS_ASSETS.map(asset => ({
+      asset,
+      status: 'not-downloaded' as const,
+      progress: 0,
+    }));
+  }
+
+  // ── State ───────────────────────────────────────────────────────────────
+
+  getPhase(): EnginePhase {
+    return this._phase;
+  }
+
+  private _setPhase(phase: EnginePhase): void {
+    if (phase === this._phase) return;
+    const prev = this._phase;
+    this._phase = phase;
+    this.emit('phaseChange', phase, prev);
+  }
+
+  // ── Paths ───────────────────────────────────────────────────────────────
+
+  private _getModelsDir(): string {
+    return `${RNFS.DocumentDirectoryPath}/tts-models`;
+  }
+
+  private _getAssetPath(asset: ModelAsset): string {
+    return `${this._getModelsDir()}/${asset.filename}`;
+  }
+
+  private _getAudioCacheDir(conversationId: string): string {
+    return `${RNFS.DocumentDirectoryPath}/audio-cache/${conversationId}`;
+  }
+
+  private _getAudioFilePath(conversationId: string, messageId: string): string {
+    return `${this._getAudioCacheDir(conversationId)}/${messageId}.pcm`;
+  }
+
+  private async _ensureDir(dir: string): Promise<void> {
+    if (!(await RNFS.exists(dir))) {
+      await RNFS.mkdir(dir);
+    }
+  }
+
+  // ── Lifecycle ───────────────────────────────────────────────────────────
+
+  isSupported(): boolean {
+    return true; // OuteTTS runs on all platforms via llama.rn
+  }
+
+  async initialize(): Promise<void> {
+    if (this._context && this._isVocoderReady) return;
+    if (this._phase === 'loading') return this._contextLoadPromise;
+
+    this._setPhase('loading');
+
+    this._contextLoadPromise = this._contextLoadPromise.then(async () => {
+      if (this._context && this._isVocoderReady) return;
+
+      logger.log('[OuteTTSEngine] Loading backbone...');
+      this._context = await initLlama({
+        model: this._getAssetPath(OUTETTS_BACKBONE),
+        n_ctx: 8192,
+        n_threads: 4,
+      });
+
+      logger.log('[OuteTTSEngine] Loading vocoder...');
+      await this._context.initVocoder({
+        path: this._getAssetPath(OUTETTS_VOCODER),
+        n_batch: 4096,
+      });
+      this._isVocoderReady = await this._context.isVocoderEnabled();
+
+      if (!this._isVocoderReady) {
+        throw new Error('Vocoder failed to initialize.');
+      }
+      logger.log('[OuteTTSEngine] Ready.');
+    });
+
+    try {
+      await this._contextLoadPromise;
+      this._setPhase('ready');
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : 'Failed to load OuteTTS';
+      this._setPhase('error');
+      this.emit('error', { code: 'OUTETTS_LOAD', message: msg, recoverable: true });
+      throw err;
+    }
+  }
+
+  async release(): Promise<void> {
+    this.stop();
+    if (this._context) {
+      await this._context.releaseVocoder().catch(() => {});
+      await this._context.release().catch(() => {});
+      this._context = null;
+    }
+    this._isVocoderReady = false;
+    this._audioCtx?.close().catch(() => {});
+    this._audioCtx = null;
+    this._setPhase('idle');
+  }
+
+  async destroy(): Promise<void> {
+    await this.release();
+    await this.deleteAssets();
+  }
+
+  // ── Assets ──────────────────────────────────────────────────────────────
+
+  getRequiredAssets(): ModelAsset[] {
+    return OUTETTS_ASSETS;
+  }
+
+  async checkAssetStatus(): Promise<ModelAssetState[]> {
+    const states: ModelAssetState[] = [];
+    for (const asset of OUTETTS_ASSETS) {
+      const path = this._getAssetPath(asset);
+      const exists = await RNFS.exists(path);
+      states.push({
+        asset,
+        status: exists ? 'downloaded' : 'not-downloaded',
+        progress: exists ? 1 : 0,
+        localPath: exists ? path : undefined,
+      });
+    }
+    this._assetStates = states;
+    return states;
+  }
+
+  async downloadAssets(assetIds?: string[]): Promise<void> {
+    await this._ensureDir(this._getModelsDir());
+    const toDownload = assetIds
+      ? OUTETTS_ASSETS.filter(a => assetIds.includes(a.id))
+      : OUTETTS_ASSETS;
+
+    this._setPhase('downloading');
+
+    for (const asset of toDownload) {
+      const dest = this._getAssetPath(asset);
+      if (await RNFS.exists(dest)) {
+        this._updateAssetState(asset.id, { status: 'downloaded', progress: 1, localPath: dest });
+        continue;
+      }
+
+      this._updateAssetState(asset.id, { status: 'downloading', progress: 0 });
+
+      const dl = RNFS.downloadFile({
+        fromUrl: asset.url,
+        toFile: dest,
+        progressDivider: 1,
+        progress: (res) => {
+          const p = res.bytesWritten / res.contentLength;
+          this._updateAssetState(asset.id, { status: 'downloading', progress: p });
+          this.emit('downloadProgress', {
+            assetId: asset.id,
+            progress: p,
+            bytesWritten: res.bytesWritten,
+            totalBytes: res.contentLength,
+          });
+        },
+      });
+
+      const result = await dl.promise;
+      if (result.statusCode !== 200) {
+        await RNFS.unlink(dest).catch(() => {});
+        this._updateAssetState(asset.id, { status: 'error', progress: 0, error: `HTTP ${result.statusCode}` });
+        throw new Error(`Download failed for ${asset.label}: HTTP ${result.statusCode}`);
+      }
+      this._updateAssetState(asset.id, { status: 'downloaded', progress: 1, localPath: dest });
+    }
+
+    // Stay in downloading until all done, then move to idle (not ready — need initialize())
+    if (this.isFullyDownloaded()) {
+      this._setPhase('idle');
+    }
+  }
+
+  async deleteAssets(assetIds?: string[]): Promise<void> {
+    await this.release();
+    const toDelete = assetIds
+      ? OUTETTS_ASSETS.filter(a => assetIds.includes(a.id))
+      : OUTETTS_ASSETS;
+
+    for (const asset of toDelete) {
+      const path = this._getAssetPath(asset);
+      if (await RNFS.exists(path)) {
+        await RNFS.unlink(path);
+      }
+      this._updateAssetState(asset.id, { status: 'not-downloaded', progress: 0 });
+    }
+  }
+
+  getOverallDownloadProgress(): number {
+    const totalSize = OUTETTS_ASSETS.reduce((sum, a) => sum + a.sizeBytes, 0);
+    let weightedProgress = 0;
+    for (const state of this._assetStates) {
+      weightedProgress += state.progress * (state.asset.sizeBytes / totalSize);
+    }
+    return weightedProgress;
+  }
+
+  isFullyDownloaded(): boolean {
+    return this._assetStates.every(s => s.status === 'downloaded');
+  }
+
+  private _updateAssetState(
+    assetId: string,
+    patch: Pick<ModelAssetState, 'status' | 'progress'> & { localPath?: string; error?: string },
+  ): void {
+    const idx = this._assetStates.findIndex(s => s.asset.id === assetId);
+    if (idx >= 0) {
+      this._assetStates[idx] = { ...this._assetStates[idx], ...patch };
+    }
+  }
+
+  // ── Voices ──────────────────────────────────────────────────────────────
+
+  getVoices(): TTSVoice[] {
+    return [
+      {
+        id: '0',
+        label: 'Default',
+        metadata: { gender: 'Neutral' },
+      },
+    ];
+  }
+
+  getActiveVoice(): TTSVoice | null {
+    return this.getVoices()[0];
+  }
+
+  async setVoice(voiceId: string): Promise<void> {
+    // OuteTTS only has one built-in voice; voice cloning uses referenceAudioPath
+    this.emit('voiceChanged', voiceId);
+  }
+
+  // ── Audio Generation ────────────────────────────────────────────────────
+
+  private async _generate(text: string): Promise<{
+    samples: Float32Array;
+    durationSeconds: number;
+    sampleRate: number;
+    waveformData: number[];
+  }> {
+    if (!this._context || !this._isVocoderReady) {
+      throw new Error('OuteTTS models not loaded.');
+    }
+
+    const { prompt, grammar } = await this._context.getFormattedAudioCompletion(
+      null, // default speaker
+      text,
+    );
+    const guideTokens = (await this._context.getAudioCompletionGuideTokens(text)) ?? [];
+    const result = await this._context.completion({
+      prompt,
+      grammar,
+      guide_tokens: guideTokens,
+      n_predict: 4096,
+      temperature: 0.7,
+      top_p: 0.9,
+      stop: ['<|im_end|>'],
+    });
+
+    const pcmArray = await this._context.decodeAudioTokens(result.audio_tokens ?? []);
+    const samples = new Float32Array(pcmArray);
+    const sampleRate = OUTETTS_SAMPLE_RATE;
+
+    return {
+      samples,
+      durationSeconds: samples.length / sampleRate,
+      sampleRate,
+      waveformData: this._buildWaveformData(samples, 200),
+    };
+  }
+
+  // ── Speech ──────────────────────────────────────────────────────────────
+
+  async speak(text: string, options?: TTSSpeakOptions): Promise<void> {
+    if (!this._context || !this._isVocoderReady) {
+      throw new Error('OuteTTS models not loaded. Call initialize() first.');
+    }
+
+    const speed = options?.speed ?? 1.0;
+    const messageId = options?.messageId ?? null;
+
+    this.stop();
+    this._currentMessageId = messageId;
+    const sessionId = ++this._playSessionId;
+    this._isSpeakingFlag = true;
+    this._setPhase('processing');
+
+    try {
+      // Truncate to keep generation time reasonable (~300 chars ~ 20-30s on device)
+      const truncated = text.length > 300 ? `${text.slice(0, 297)}...` : text;
+      const audio = await this._generate(truncated);
+
+      // Abort if stop() was called or another speak() started during generation
+      if (!this._isSpeakingFlag || this._playSessionId !== sessionId) return;
+
+      this.emit('audioComplete', audio);
+      await this._playFromSamples(audio.samples, speed);
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : 'Speech failed';
+      this.emit('error', { code: 'OUTETTS_SPEAK', message: msg, recoverable: true });
+      throw err;
+    } finally {
+      if (this._playSessionId === sessionId) {
+        this._currentMessageId = null;
+        this._isSpeakingFlag = false;
+        this._setPhase('ready');
+      }
+    }
+  }
+
+  // eslint-disable-next-line max-params
+  async generateAndSave(
+    text: string,
+    conversationId: string,
+    messageId: string,
+    _options?: TTSSpeakOptions,
+  ): Promise<TTSGenerateResult> {
+    if (!this._context || !this._isVocoderReady) {
+      throw new Error('OuteTTS models not loaded. Call initialize() first.');
+    }
+
+    const audio = await this._generate(text);
+    this.emit('audioComplete', audio);
+
+    // Save to file
+    await this._ensureDir(this._getAudioCacheDir(conversationId));
+    const filePath = this._getAudioFilePath(conversationId, messageId);
+    const base64 = this._float32ToBase64(audio.samples);
+    await RNFS.writeFile(filePath, base64, 'base64');
+
+    return {
+      filePath,
+      durationSeconds: audio.durationSeconds,
+      waveformData: audio.waveformData,
+    };
+  }
+
+  async playFromFile(
+    filePath: string,
+    options?: { speed?: number; startOffset?: number; messageId?: string },
+  ): Promise<void> {
+    const speed = options?.speed ?? 1.0;
+    const startOffset = options?.startOffset ?? 0;
+    const messageId = options?.messageId ?? null;
+
+    this.stop();
+    this._currentMessageId = messageId;
+    const sessionId = ++this._playSessionId;
+    this._isSpeakingFlag = true;
+    this._setPhase('processing');
+
+    try {
+      this._audioCtx?.close().catch(() => {});
+      this._audioCtx = new AudioContext();
+      const src = filePath.startsWith('file://') ? filePath : `file://${filePath}`;
+      const buffer = await this._audioCtx.decodeAudioData(src as unknown as ArrayBuffer);
+
+      // Abort if stop() was called during decode
+      if (this._playSessionId !== sessionId) return;
+
+      const source = this._audioCtx.createBufferSource();
+      source.buffer = buffer;
+      source.playbackRate.value = speed;
+      source.connect(this._audioCtx.destination);
+      this._currentSource = source;
+
+      await new Promise<void>((resolve) => {
+        source.onEnded = () => {
+          this._currentSource = null;
+          resolve();
+        };
+        source.start(0, startOffset);
+      });
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : 'Playback failed';
+      this.emit('error', { code: 'OUTETTS_PLAYBACK', message: msg, recoverable: true });
+      throw err;
+    } finally {
+      if (this._playSessionId === sessionId) {
+        this._currentMessageId = null;
+        this._isSpeakingFlag = false;
+        this._setPhase('ready');
+      }
+    }
+  }
+
+  stop(): void {
+    this._isSpeakingFlag = false;
+    try { this._currentSource?.stop(); } catch { /* already stopped */ }
+    this._currentSource = null;
+    this._currentMessageId = null;
+    if (this._phase === 'processing' || this._phase === 'paused') {
+      this._setPhase(this._context ? 'ready' : 'idle');
+    }
+  }
+
+  pause(): void {
+    this._audioCtx?.suspend().catch(() => {});
+    if (this._phase === 'processing') {
+      this._setPhase('paused');
+    }
+  }
+
+  resume(): void {
+    this._audioCtx?.resume().catch(() => {});
+    if (this._phase === 'paused') {
+      this._setPhase('processing');
+    }
+  }
+
+  // ── React Bridge ────────────────────────────────────────────────────────
+
+  getBridgeComponent(): React.ComponentType | null {
+    return null; // Fully imperative
+  }
+
+  // ── Audio Cache (app-level convenience) ─────────────────────────────────
+
+  async getAudioCacheSizeMB(): Promise<number> {
+    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
+    if (!(await RNFS.exists(cacheRoot))) return 0;
+    let totalBytes = 0;
+    const convDirs = await RNFS.readDir(cacheRoot);
+    for (const convDir of convDirs) {
+      if (convDir.isDirectory()) {
+        const files = await RNFS.readDir(convDir.path);
+        for (const file of files) { totalBytes += Number(file.size); }
+      }
+    }
+    return totalBytes / (1024 * 1024);
+  }
+
+  async clearAudioCache(): Promise<void> {
+    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
+    if (await RNFS.exists(cacheRoot)) {
+      await RNFS.unlink(cacheRoot);
+    }
+  }
+
+  async isAudioCached(conversationId: string, messageId: string): Promise<boolean> {
+    return RNFS.exists(this._getAudioFilePath(conversationId, messageId));
+  }
+
+  // ── Utilities ───────────────────────────────────────────────────────────
+
+  private async _playFromSamples(samples: Float32Array, speed: number): Promise<void> {
+    this._audioCtx?.close().catch(() => {});
+    this._audioCtx = new AudioContext({ sampleRate: OUTETTS_SAMPLE_RATE });
+    const buffer = this._audioCtx.createBuffer(1, samples.length, OUTETTS_SAMPLE_RATE);
+    buffer.copyToChannel(samples, 0);
+    const source = this._audioCtx.createBufferSource();
+    source.buffer = buffer;
+    source.playbackRate.value = speed;
+    source.connect(this._audioCtx.destination);
+    this._currentSource = source;
+
+    await new Promise<void>((resolve, reject) => {
+      // Guard against hanging promise if onEnded never fires
+      const timeout = setTimeout(() => {
+        this._currentSource = null;
+        resolve();
+      }, (samples.length / OUTETTS_SAMPLE_RATE / speed) * 1000 + 5000); // estimated duration + 5s buffer
+
+      source.onEnded = () => {
+        clearTimeout(timeout);
+        this._currentSource = null;
+        resolve();
+      };
+      try {
+        source.start();
+      } catch (err) {
+        clearTimeout(timeout);
+        reject(err);
+      }
+    });
+  }
+
+  private _buildWaveformData(samples: Float32Array, points: number): number[] {
+    const blockSize = Math.floor(samples.length / points);
+    const result: number[] = [];
+    for (let i = 0; i < points; i++) {
+      let sum = 0;
+      for (let j = 0; j < blockSize; j++) {
+        sum += Math.abs(samples[i * blockSize + j] ?? 0);
+      }
+      result.push(blockSize > 0 ? sum / blockSize : 0);
+    }
+    return result;
+  }
+
+  private _float32ToBase64(samples: Float32Array): string {
+    const uint8 = new Uint8Array(samples.buffer);
+    let binary = '';
+    for (let i = 0; i < uint8.length; i++) {
+      binary += String.fromCharCode(uint8[i]);
+    }
+    return btoa(binary);
+  }
+}
diff --git a/src/engine/tts/engines/outetts/index.ts b/src/engine/tts/engines/outetts/index.ts
new file mode 100644
index 000000000..2347e6784
--- /dev/null
+++ b/src/engine/tts/engines/outetts/index.ts
@@ -0,0 +1,2 @@
+export { OuteTTSEngine } from './OuteTTSEngine';
+export { OUTETTS_ASSETS, OUTETTS_BACKBONE, OUTETTS_VOCODER, OUTETTS_SAMPLE_RATE } from './models';
diff --git a/src/engine/tts/engines/outetts/models.ts b/src/engine/tts/engines/outetts/models.ts
new file mode 100644
index 000000000..ee712bb98
--- /dev/null
+++ b/src/engine/tts/engines/outetts/models.ts
@@ -0,0 +1,26 @@
+/**
+ * OuteTTS model definitions.
+ *
+ * Moved from constants/ttsModels.ts into the engine boundary.
+ */
+import type { ModelAsset } from '../../../types';
+
+export const OUTETTS_BACKBONE: ModelAsset = {
+  id: 'backbone',
+  label: 'Voice Model',
+  url: 'https://huggingface.co/OuteAI/OuteTTS-0.3-500M-GGUF/resolve/main/OuteTTS-0.3-500M-Q4_K_M.gguf',
+  sizeBytes: 454 * 1024 * 1024,
+  filename: 'OuteTTS-0.3-500M-Q4_K_M.gguf',
+};
+
+export const OUTETTS_VOCODER: ModelAsset = {
+  id: 'vocoder',
+  label: 'Audio Decoder',
+  url: 'https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-Q5_1.gguf',
+  sizeBytes: 73 * 1024 * 1024,
+  filename: 'WavTokenizer-Large-75-Q5_1.gguf',
+};
+
+export const OUTETTS_ASSETS: ModelAsset[] = [OUTETTS_BACKBONE, OUTETTS_VOCODER];
+
+export const OUTETTS_SAMPLE_RATE = 24000;
diff --git a/src/engine/tts/engines/qwen3/Qwen3TTSEngine.ts b/src/engine/tts/engines/qwen3/Qwen3TTSEngine.ts
new file mode 100644
index 000000000..e1d9c1606
--- /dev/null
+++ b/src/engine/tts/engines/qwen3/Qwen3TTSEngine.ts
@@ -0,0 +1,357 @@
+/**
+ * Qwen3TTSEngine — TTSEngine stub for Qwen3-TTS.
+ *
+ * Multi-model pipeline:
+ *   1. Talker (0.6B LLM, GGUF) — generates speech token sequences from text
+ *   2. Predictor (GGUF) — fills parallel codebook tracks (16 codebooks)
+ *   3. Codec decoder (ONNX) — converts token grid to PCM audio waveform
+ *
+ * The talker and predictor run via llama.rn (GGUF).
+ * The codec decoder runs via ONNX Runtime (onnxruntime-react-native).
+ *
+ * 12Hz frame rate = dramatically fewer tokens per second of audio than
+ * OuteTTS (75Hz) or most other TTS models. This makes on-device inference
+ * much more feasible.
+ *
+ * STATUS: Stub — asset management and lifecycle are wired up; the actual
+ * inference pipeline is TODO pending integration testing.
+ */
+import RNFS from 'react-native-fs';
+import { OnDeviceEngineEmitter } from '../../../OnDeviceEngineEmitter';
+import type {
+  EnginePhase,
+  TTSEngine,
+  TTSEngineCapabilities,
+  TTSEngineEvents,
+  TTSSpeakOptions,
+  TTSGenerateResult,
+  TTSVoice,
+  ModelAsset,
+  ModelAssetState,
+} from '../../../types';
+import { QWEN3_TTS_ASSETS } from './models';
+import logger from '../../../../utils/logger';
+
+export class Qwen3TTSEngine
+  extends OnDeviceEngineEmitter<TTSEngineEvents>
+  implements TTSEngine
+{
+  readonly id = 'qwen3-tts';
+  readonly displayName = 'Qwen3 TTS (0.6B)';
+  readonly capabilities: TTSEngineCapabilities = {
+    streaming: false, // Generate-then-play (streaming planned for v2)
+    voiceCloning: true,
+    pauseResume: true,
+    generateAndSave: true,
+    platformRequirements: {
+      android: { minSdkVersion: 26 },
+      ios: { minVersion: 15 },
+    },
+    peakRamMB: 600,
+  };
+
+  private _phase: EnginePhase = 'idle';
+  private _assetStates: ModelAssetState[] = [];
+
+  // TODO: llama.rn contexts for talker + predictor
+  // private _talkerContext: LlamaContext | null = null;
+  // private _predictorContext: LlamaContext | null = null;
+  // TODO: ONNX Runtime session for codec decoder
+  // private _codecSession: InferenceSession | null = null;
+
+  constructor() {
+    super();
+    this._assetStates = QWEN3_TTS_ASSETS.map(asset => ({
+      asset,
+      status: 'not-downloaded' as const,
+      progress: 0,
+    }));
+  }
+
+  // ── State ───────────────────────────────────────────────────────────────
+
+  getPhase(): EnginePhase {
+    return this._phase;
+  }
+
+  private _setPhase(phase: EnginePhase): void {
+    if (phase === this._phase) return;
+    const prev = this._phase;
+    this._phase = phase;
+    this.emit('phaseChange', phase, prev);
+  }
+
+  // ── Paths ───────────────────────────────────────────────────────────────
+
+  private _getModelsDir(): string {
+    return `${RNFS.DocumentDirectoryPath}/tts-models/qwen3`;
+  }
+
+  private _getAssetPath(asset: ModelAsset): string {
+    return `${this._getModelsDir()}/${asset.filename}`;
+  }
+
+  private async _ensureDir(dir: string): Promise<void> {
+    if (!(await RNFS.exists(dir))) {
+      await RNFS.mkdir(dir);
+    }
+  }
+
+  // ── Lifecycle ───────────────────────────────────────────────────────────
+
+  isSupported(): boolean {
+    // TODO: Runtime platform version check
+    return true;
+  }
+
+  async initialize(): Promise<void> {
+    if (!this.isFullyDownloaded()) {
+      throw new Error('Qwen3-TTS models not downloaded.');
+    }
+
+    this._setPhase('loading');
+
+    try {
+      // TODO: Load all three models
+      //
+      // const talkerPath = this._getAssetPath(QWEN3_TTS_TALKER);
+      // const predictorPath = this._getAssetPath(QWEN3_TTS_PREDICTOR);
+      // const codecPath = this._getAssetPath(QWEN3_TTS_CODEC);
+      //
+      // this._talkerContext = await initLlama({
+      //   model: talkerPath,
+      //   n_ctx: 4096,
+      //   n_threads: 4,
+      // });
+      //
+      // this._predictorContext = await initLlama({
+      //   model: predictorPath,
+      //   n_ctx: 2048,
+      //   n_threads: 4,
+      // });
+      //
+      // this._codecSession = await InferenceSession.create(codecPath);
+
+      logger.log('[Qwen3TTSEngine] Models loaded (stub).');
+      this._setPhase('ready');
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : 'Failed to load Qwen3-TTS';
+      this._setPhase('error');
+      this.emit('error', { code: 'QWEN3_LOAD', message: msg, recoverable: true });
+      throw err;
+    }
+  }
+
+  async release(): Promise<void> {
+    // TODO: Release llama.rn contexts and ONNX session
+    // this._talkerContext?.release();
+    // this._predictorContext?.release();
+    // this._codecSession?.release();
+    this._setPhase('idle');
+  }
+
+  async destroy(): Promise<void> {
+    await this.release();
+    await this.deleteAssets();
+  }
+
+  // ── Assets ──────────────────────────────────────────────────────────────
+
+  getRequiredAssets(): ModelAsset[] {
+    return QWEN3_TTS_ASSETS;
+  }
+
+  async checkAssetStatus(): Promise<ModelAssetState[]> {
+    await this._ensureDir(this._getModelsDir());
+    const states: ModelAssetState[] = [];
+    for (const asset of QWEN3_TTS_ASSETS) {
+      const path = this._getAssetPath(asset);
+      const exists = await RNFS.exists(path);
+      states.push({
+        asset,
+        status: exists ? 'downloaded' : 'not-downloaded',
+        progress: exists ? 1 : 0,
+        localPath: exists ? path : undefined,
+      });
+    }
+    this._assetStates = states;
+    return states;
+  }
+
+  async downloadAssets(assetIds?: string[]): Promise<void> {
+    await this._ensureDir(this._getModelsDir());
+    const toDownload = assetIds
+      ? QWEN3_TTS_ASSETS.filter(a => assetIds.includes(a.id))
+      : QWEN3_TTS_ASSETS;
+
+    this._setPhase('downloading');
+
+    for (const asset of toDownload) {
+      const dest = this._getAssetPath(asset);
+      if (await RNFS.exists(dest)) {
+        this._updateAssetState(asset.id, { status: 'downloaded', progress: 1, localPath: dest });
+        continue;
+      }
+
+      this._updateAssetState(asset.id, { status: 'downloading', progress: 0 });
+
+      const dl = RNFS.downloadFile({
+        fromUrl: asset.url,
+        toFile: dest,
+        progressDivider: 1,
+        progress: (res) => {
+          const p = res.bytesWritten / res.contentLength;
+          this._updateAssetState(asset.id, { status: 'downloading', progress: p });
+          this.emit('downloadProgress', {
+            assetId: asset.id,
+            progress: p,
+            bytesWritten: res.bytesWritten,
+            totalBytes: res.contentLength,
+          });
+        },
+      });
+
+      const result = await dl.promise;
+      if (result.statusCode !== 200) {
+        await RNFS.unlink(dest).catch(() => {});
+        this._updateAssetState(asset.id, { status: 'error', progress: 0, error: `HTTP ${result.statusCode}` });
+        throw new Error(`Download failed for ${asset.label}: HTTP ${result.statusCode}`);
+      }
+      this._updateAssetState(asset.id, { status: 'downloaded', progress: 1, localPath: dest });
+    }
+
+    if (this.isFullyDownloaded()) {
+      this._setPhase('idle');
+    }
+  }
+
+  async deleteAssets(assetIds?: string[]): Promise<void> {
+    await this.release();
+    const toDelete = assetIds
+      ? QWEN3_TTS_ASSETS.filter(a => assetIds.includes(a.id))
+      : QWEN3_TTS_ASSETS;
+
+    for (const asset of toDelete) {
+      const path = this._getAssetPath(asset);
+      if (await RNFS.exists(path)) {
+        await RNFS.unlink(path);
+      }
+      this._updateAssetState(asset.id, { status: 'not-downloaded', progress: 0 });
+    }
+  }
+
+  getOverallDownloadProgress(): number {
+    const totalSize = QWEN3_TTS_ASSETS.reduce((sum, a) => sum + a.sizeBytes, 0);
+    let weightedProgress = 0;
+    for (const state of this._assetStates) {
+      weightedProgress += state.progress * (state.asset.sizeBytes / totalSize);
+    }
+    return weightedProgress;
+  }
+
+  isFullyDownloaded(): boolean {
+    return this._assetStates.every(s => s.status === 'downloaded');
+  }
+
+  private _updateAssetState(
+    assetId: string,
+    patch: Pick<ModelAssetState, 'status' | 'progress'> & { localPath?: string; error?: string },
+  ): void {
+    const idx = this._assetStates.findIndex(s => s.asset.id === assetId);
+    if (idx >= 0) {
+      this._assetStates[idx] = { ...this._assetStates[idx], ...patch };
+    }
+  }
+
+  // ── Voices ──────────────────────────────────────────────────────────────
+
+  getVoices(): TTSVoice[] {
+    // TODO: Qwen3-TTS CustomVoice variant has 9 built-in voices.
+    // For now expose a default. Voice cloning via referenceAudioPath.
+    return [
+      { id: 'default', label: 'Default', metadata: { language: 'multilingual' } },
+    ];
+  }
+
+  getActiveVoice(): TTSVoice | null {
+    return this.getVoices()[0];
+  }
+
+  async setVoice(voiceId: string): Promise<void> {
+    this.emit('voiceChanged', voiceId);
+  }
+
+  // ── Speech ──────────────────────────────────────────────────────────────
+
+  async speak(_text: string, _options?: TTSSpeakOptions): Promise<void> {
+    // TODO: Implement the three-stage pipeline:
+    //
+    // 1. Talker inference (llama.rn):
+    //    - Format prompt with text + voice tokens
+    //    - Run autoregressive generation to produce first-codebook tokens
+    //    - 12Hz frame rate = ~12 tokens per second of audio
+    //
+    // 2. Predictor inference (llama.rn):
+    //    - Take first-codebook tokens from talker
+    //    - Predict remaining 15 codebook tracks in parallel
+    //    - Output: 16-codebook token grid
+    //
+    // 3. Codec decoding (ONNX Runtime):
+    //    - Take 16-codebook token grid
+    //    - Decode to PCM Float32 audio at 24kHz
+    //    - Emit audioComplete event
+    //
+    // 4. Play the resulting audio via AudioContext
+
+    throw new Error(
+      'Qwen3-TTS inference pipeline not yet implemented. ' +
+      'Asset management and lifecycle are ready — the inference integration is TODO.',
+    );
+  }
+
+  // eslint-disable-next-line max-params
+  async generateAndSave(
+    _text: string,
+    _conversationId: string,
+    _messageId: string,
+    _options?: TTSSpeakOptions,
+  ): Promise<TTSGenerateResult> {
+    // TODO: Same pipeline as speak(), but save to file instead of playing
+    throw new Error('Qwen3-TTS generateAndSave not yet implemented.');
+  }
+
+  async playFromFile(
+    _filePath: string,
+    _options?: { speed?: number; startOffset?: number; messageId?: string },
+  ): Promise<void> {
+    // TODO: Standard AudioContext file playback (same as OuteTTS)
+    throw new Error('Qwen3-TTS playFromFile not yet implemented.');
+  }
+
+  stop(): void {
+    // TODO: Abort any in-flight inference + stop audio playback
+    if (this._phase === 'processing' || this._phase === 'paused') {
+      this._setPhase('ready');
+    }
+  }
+
+  pause(): void {
+    // TODO: Suspend AudioContext
+    if (this._phase === 'processing') {
+      this._setPhase('paused');
+    }
+  }
+
+  resume(): void {
+    // TODO: Resume AudioContext
+    if (this._phase === 'paused') {
+      this._setPhase('processing');
+    }
+  }
+
+  // ── React Bridge ────────────────────────────────────────────────────────
+
+  getBridgeComponent(): React.ComponentType | null {
+    return null; // Fully imperative via llama.rn + ONNX Runtime
+  }
+}
diff --git a/src/engine/tts/engines/qwen3/index.ts b/src/engine/tts/engines/qwen3/index.ts
new file mode 100644
index 000000000..8a4122dd8
--- /dev/null
+++ b/src/engine/tts/engines/qwen3/index.ts
@@ -0,0 +1,2 @@
+export { Qwen3TTSEngine } from './Qwen3TTSEngine';
+export { QWEN3_TTS_ASSETS, QWEN3_TTS_TALKER, QWEN3_TTS_PREDICTOR, QWEN3_TTS_CODEC, QWEN3_TTS_SAMPLE_RATE } from './models';
diff --git a/src/engine/tts/engines/qwen3/models.ts b/src/engine/tts/engines/qwen3/models.ts
new file mode 100644
index 000000000..bd66d8218
--- /dev/null
+++ b/src/engine/tts/engines/qwen3/models.ts
@@ -0,0 +1,41 @@
+/**
+ * Qwen3-TTS model asset definitions.
+ *
+ * Three-model pipeline: Talker (LLM) + Predictor + Codec decoder.
+ * GGUF conversions via LunaVox project.
+ *
+ * TODO: Verify exact URLs and file sizes once we commit to a quant level.
+ */
+import type { ModelAsset } from '../../../types';
+
+export const QWEN3_TTS_TALKER: ModelAsset = {
+  id: 'talker',
+  label: 'Talker Model (0.6B)',
+  url: 'https://huggingface.co/wkwong/Lunavox-Qwen3-TTS-GGUF/resolve/main/base_small/qwen3_tts_talker.q5_k.gguf',
+  sizeBytes: 450 * 1024 * 1024, // ~450MB Q5_K estimate
+  filename: 'qwen3-tts-talker-q5k.gguf',
+};
+
+export const QWEN3_TTS_PREDICTOR: ModelAsset = {
+  id: 'predictor',
+  label: 'Predictor Model',
+  url: 'https://huggingface.co/wkwong/Lunavox-Qwen3-TTS-GGUF/resolve/main/base_small/qwen3_tts_predictor.q8_0.gguf',
+  sizeBytes: 150 * 1024 * 1024, // ~150MB Q8 estimate
+  filename: 'qwen3-tts-predictor-q8.gguf',
+};
+
+export const QWEN3_TTS_CODEC: ModelAsset = {
+  id: 'codec',
+  label: 'Audio Codec',
+  url: 'https://huggingface.co/wkwong/Lunavox-Qwen3-TTS-GGUF/resolve/main/base_small/qwen3_tts_decoder.fp16.onnx',
+  sizeBytes: 50 * 1024 * 1024, // ~50MB estimate
+  filename: 'qwen3-tts-decoder-fp16.onnx',
+};
+
+export const QWEN3_TTS_ASSETS: ModelAsset[] = [
+  QWEN3_TTS_TALKER,
+  QWEN3_TTS_PREDICTOR,
+  QWEN3_TTS_CODEC,
+];
+
+export const QWEN3_TTS_SAMPLE_RATE = 24000;
diff --git a/src/engine/types.ts b/src/engine/types.ts
new file mode 100644
index 000000000..5ee592536
--- /dev/null
+++ b/src/engine/types.ts
@@ -0,0 +1,344 @@
+/**
+ * On-Device Engine Types
+ *
+ * Base interfaces for multimodal on-device AI engines.
+ * TTS is the first concrete implementation; STT, Vision, and LLM
+ * engines will inherit the same base pattern.
+ *
+ * Designed for mobile — optimized for llama.rn, llama.cpp, ONNX Runtime,
+ * and ExecuTorch runtimes.
+ */
+import type React from 'react';
+
+// ─── Engine Phase ───────────────────────────────────────────────────────────
+
+/** Unified lifecycle phase for any on-device engine */
+export type EnginePhase =
+  | 'idle'         // Not loaded, not doing anything
+  | 'downloading'  // One or more assets downloading
+  | 'loading'      // Models being loaded into memory
+  | 'ready'        // Models loaded, ready to process
+  | 'processing'   // Actively running inference or playback
+  | 'paused'       // Processing suspended (resumable)
+  | 'error';       // Something went wrong
+
+// ─── Model Assets ───────────────────────────────────────────────────────────
+
+export type ModelAssetStatus = 'not-downloaded' | 'downloading' | 'downloaded' | 'error';
+
+/** Describes a single downloadable model file (GGUF, ONNX, .pte, .bin, etc.) */
+export interface ModelAsset {
+  /** Engine-scoped unique ID (e.g., 'backbone', 'vocoder', 'talker') */
+  id: string;
+  /** Human-readable label for UI */
+  label: string;
+  /** Remote URL to download from (e.g., HuggingFace) */
+  url: string;
+  /** Expected file size in bytes */
+  sizeBytes: number;
+  /** Local filename (engine decides the directory) */
+  filename: string;
+}
+
+/** Runtime state of a single model asset */
+export interface ModelAssetState {
+  asset: ModelAsset;
+  status: ModelAssetStatus;
+  /** Download progress 0–1 */
+  progress: number;
+  /** Absolute local file path once downloaded */
+  localPath?: string;
+  /** Error message if status === 'error' */
+  error?: string;
+}
+
+// ─── Engine Capabilities ────────────────────────────────────────────────────
+
+export interface EngineCapabilities {
+  /** Supports streaming output (chunks emitted during processing) */
+  streaming: boolean;
+  /** Minimum OS requirements — engine enforces at runtime */
+  platformRequirements?: {
+    android?: { minSdkVersion: number };
+    ios?: { minVersion: number };
+  };
+  /** Approximate peak RAM usage in MB during inference */
+  peakRamMB: number;
+}
+
+// ─── Base Event Map ─────────────────────────────────────────────────────────
+
+/** Events shared by all engine modalities */
+export interface BaseEngineEvents {
+  [key: string]: (...args: any[]) => void;
+  /** Fired on every lifecycle phase transition */
+  phaseChange: (phase: EnginePhase, previousPhase: EnginePhase) => void;
+  /** Fired on download progress for any asset */
+  downloadProgress: (data: {
+    assetId: string;
+    progress: number;
+    bytesWritten: number;
+    totalBytes: number;
+  }) => void;
+  /** Fired on any error */
+  error: (data: {
+    code: string;
+    message: string;
+    recoverable: boolean;
+  }) => void;
+}
+
+// ─── Base Engine Interface ──────────────────────────────────────────────────
+
+/**
+ * Base interface for all on-device AI engines.
+ *
+ * Every modality (TTS, STT, Vision, LLM) extends this with modality-specific
+ * methods and events. The shared surface covers lifecycle, asset management,
+ * and the typed event system.
+ *
+ * @typeParam TEvents — union of base + modality-specific events
+ */
+export interface OnDeviceEngine<
+  TEvents extends BaseEngineEvents = BaseEngineEvents,
+> {
+  /** Unique engine identifier (e.g., 'kokoro', 'outetts', 'qwen3-tts') */
+  readonly id: string;
+  /** Human-readable display name */
+  readonly displayName: string;
+  /** Static capabilities — does not change at runtime */
+  readonly capabilities: EngineCapabilities;
+
+  // ── State ───────────────────────────────────────────────────────────────
+
+  /** Current lifecycle phase */
+  getPhase(): EnginePhase;
+
+  // ── Events ──────────────────────────────────────────────────────────────
+
+  /** Subscribe to an event. Returns an unsubscribe function. */
+  on<K extends keyof TEvents>(
+    event: K,
+    listener: TEvents[K],
+  ): () => void;
+
+  /** Unsubscribe a specific listener */
+  off<K extends keyof TEvents>(
+    event: K,
+    listener: TEvents[K],
+  ): void;
+
+  /** Subscribe to an event once — auto-unsubscribes after first fire */
+  once<K extends keyof TEvents>(
+    event: K,
+    listener: TEvents[K],
+  ): () => void;
+
+  // ── Lifecycle ───────────────────────────────────────────────────────────
+
+  /** Runtime platform compatibility check */
+  isSupported(): boolean;
+
+  /**
+   * Load models into memory. For hook-based engines this may be a no-op
+   * (initialization happens via the React bridge component).
+   *
+   * Phase transition: idle → loading → ready
+   */
+  initialize(): Promise<void>;
+
+  /**
+   * Release models and resources. Engine returns to 'idle' but retains
+   * downloaded assets on disk.
+   *
+   * Phase transition: any → idle
+   */
+  release(): Promise<void>;
+
+  /**
+   * Full teardown — release models AND delete downloaded assets.
+   *
+   * Phase transition: any → idle (assets cleared)
+   */
+  destroy(): Promise<void>;
+
+  // ── Asset Management ────────────────────────────────────────────────────
+
+  /** List of model files this engine requires */
+  getRequiredAssets(): ModelAsset[];
+
+  /** Check which assets exist on disk. Updates internal state + emits events. */
+  checkAssetStatus(): Promise<ModelAssetState[]>;
+
+  /**
+   * Download required assets. Emits `downloadProgress` per asset.
+   * @param assetIds — optional subset; omit to download all missing
+   */
+  downloadAssets(assetIds?: string[]): Promise<void>;
+
+  /**
+   * Delete downloaded assets from disk. Releases models first if loaded.
+   * @param assetIds — optional subset; omit to delete all
+   */
+  deleteAssets(assetIds?: string[]): Promise<void>;
+
+  /** Aggregate download progress across all assets (0–1), weighted by size */
+  getOverallDownloadProgress(): number;
+
+  /** True if every required asset exists on disk */
+  isFullyDownloaded(): boolean;
+
+  // ── React Bridge ────────────────────────────────────────────────────────
+
+  /**
+   * If the engine requires a React component mounted in the tree (e.g.,
+   * wrapping a React hook), return it here. The app renders it near the
+   * root via <EngineBridge />. Return null for fully imperative engines.
+   */
+  getBridgeComponent(): React.ComponentType | null;
+}
+
+// ─── TTS-Specific Types ─────────────────────────────────────────────────────
+
+export interface TTSVoice {
+  /** Engine-scoped unique ID (e.g., 'af_heart', 'default', 'zh-female-1') */
+  id: string;
+  /** Human-readable label */
+  label: string;
+  /** Freeform metadata — accent, gender, persona, language, etc. */
+  metadata: Record<string, string>;
+  /** True if this voice supports cloning from reference audio */
+  isCloneable?: boolean;
+}
+
+export interface TTSEngineCapabilities extends EngineCapabilities {
+  /** Supports zero-shot voice cloning from reference audio */
+  voiceCloning: boolean;
+  /** Supports pause/resume during playback */
+  pauseResume: boolean;
+  /** Supports generate-and-save-to-file (Audio Mode) */
+  generateAndSave: boolean;
+}
+
+export interface TTSSpeakOptions {
+  /** Playback speed multiplier (0.5–2.0) */
+  speed?: number;
+  /** Voice ID override (uses active voice if omitted) */
+  voiceId?: string;
+  /** Message ID for ownership tracking */
+  messageId?: string;
+  /** Path to reference audio for voice cloning engines */
+  referenceAudioPath?: string;
+  /** Abort signal for cancellation */
+  signal?: AbortSignal;
+}
+
+export interface TTSGenerateResult {
+  /** Absolute path to saved audio file */
+  filePath: string;
+  /** Audio duration in seconds */
+  durationSeconds: number;
+  /** Downsampled amplitude envelope (~200 points) for waveform UI */
+  waveformData: number[];
+}
+
+/** TTS-specific events (extends base events) */
+export interface TTSEngineEvents extends BaseEngineEvents {
+  /** Streaming audio chunk (for engines that support streaming) */
+  audioChunk: (data: {
+    samples: Float32Array;
+    sampleRate: number;
+    chunkIndex: number;
+    /** True if this is the last chunk in the current utterance */
+    isFinal: boolean;
+  }) => void;
+
+  /** Full audio generation complete (for non-streaming engines) */
+  audioComplete: (data: {
+    samples: Float32Array;
+    sampleRate: number;
+    durationSeconds: number;
+    waveformData: number[];
+  }) => void;
+
+  /** RMS amplitude update for waveform visualization */
+  amplitudeChange: (amplitude: number) => void;
+
+  /** Playback elapsed time tick */
+  playbackTick: (elapsedSeconds: number) => void;
+
+  /** Active voice changed */
+  voiceChanged: (voiceId: string) => void;
+}
+
+// ─── TTS Engine Interface ───────────────────────────────────────────────────
+
+/**
+ * The TTS engine interface. Every TTS implementation (Kokoro, OuteTTS,
+ * Qwen3-TTS, etc.) implements this. The store delegates to the active
+ * engine without knowing which one it is.
+ */
+export interface TTSEngine extends OnDeviceEngine<TTSEngineEvents> {
+  readonly capabilities: TTSEngineCapabilities;
+
+  // ── Voices ──────────────────────────────────────────────────────────────
+
+  /** All voices this engine supports */
+  getVoices(): TTSVoice[];
+
+  /** Currently active voice (null if none set) */
+  getActiveVoice(): TTSVoice | null;
+
+  /**
+   * Set the active voice. Some engines require a reload/remount to change
+   * voices — this method handles that transparently. Emits `voiceChanged`
+   * when the voice is actually active.
+   */
+  setVoice(voiceId: string): Promise<void>;
+
+  // ── Speech ──────────────────────────────────────────────────────────────
+
+  /**
+   * Speak text aloud (Chat Mode primary method).
+   *
+   * Streaming engines emit `audioChunk` during playback.
+   * Non-streaming engines emit `audioComplete` after generation, then play.
+   *
+   * Resolves when playback finishes or is stopped.
+   * Phase transition: ready → processing → ready
+   */
+  speak(text: string, options?: TTSSpeakOptions): Promise<void>;
+
+  /**
+   * Generate audio and save to file (Audio Mode primary method).
+   * Check `capabilities.generateAndSave` before calling.
+   */
+  generateAndSave(
+    text: string,
+    conversationId: string,
+    messageId: string,
+    options?: TTSSpeakOptions,
+  ): Promise<TTSGenerateResult>;
+
+  /**
+   * Play a previously saved audio file.
+   * Used by Audio Mode to replay cached messages.
+   */
+  playFromFile(
+    filePath: string,
+    options?: {
+      speed?: number;
+      startOffset?: number;
+      messageId?: string;
+    },
+  ): Promise<void>;
+
+  /** Stop all speech/playback immediately */
+  stop(): void;
+
+  /** Pause current playback (requires capabilities.pauseResume) */
+  pause(): void;
+
+  /** Resume paused playback */
+  resume(): void;
+}
diff --git a/src/hooks/useTTS.ts b/src/hooks/useTTS.ts
new file mode 100644
index 000000000..e0cec108d
--- /dev/null
+++ b/src/hooks/useTTS.ts
@@ -0,0 +1,39 @@
+import { useEffect, useCallback } from 'react';
+import { useTTSStore } from '../stores/ttsStore';
+import { hardwareService } from '../services/hardware';
+import { TTS_WARN_RAM_GB, TTS_BLOCK_RAM_GB } from '../constants/ttsModels';
+
+export function useTTS() {
+  const store = useTTSStore();
+
+  useEffect(() => {
+    store.checkDownloadStatus();
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+
+  const canRunOnDevice = useCallback((): { allowed: boolean; warning: boolean } => {
+    const ramGB = hardwareService.getTotalMemoryGB();
+    return {
+      allowed: ramGB >= TTS_BLOCK_RAM_GB,
+      warning: ramGB < TTS_WARN_RAM_GB,
+    };
+  }, []);
+
+  const speakMessage = useCallback(
+    (text: string, messageId: string) => {
+      store.speak(text, messageId);
+    },
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+    [store.isReady],
+  );
+
+  return {
+    ...store,
+    speakMessage,
+    canRunOnDevice,
+    isDownloading: store.isDownloading,
+    overallDownloadProgress: store.overallDownloadProgress,
+    isAudioMode: store.settings.interfaceMode === 'audio',
+    isChatMode: store.settings.interfaceMode === 'chat',
+  };
+}
diff --git a/src/navigation/AppNavigator.tsx b/src/navigation/AppNavigator.tsx
index 1d15b73a0..517357a23 100644
--- a/src/navigation/AppNavigator.tsx
+++ b/src/navigation/AppNavigator.tsx
@@ -32,6 +32,7 @@ import {
   DownloadManagerScreen,
   ModelSettingsScreen,
   VoiceSettingsScreen,
+  TTSSettingsScreen,
   DeviceInfoScreen,
   StorageSettingsScreen,
   SecuritySettingsScreen,
@@ -229,6 +230,7 @@ export const AppNavigator: React.FC = () => {
         <RootStack.Screen name="ModelSettings" component={ModelSettingsScreen} />
         <RootStack.Screen name="RemoteServers" component={RemoteServersScreen} />
         <RootStack.Screen name="VoiceSettings" component={VoiceSettingsScreen} />
+        <RootStack.Screen name="TTSSettings" component={TTSSettingsScreen} />
         <RootStack.Screen name="DeviceInfo" component={DeviceInfoScreen} />
         <RootStack.Screen name="StorageSettings" component={StorageSettingsScreen} />
         <RootStack.Screen name="SecuritySettings" component={SecuritySettingsScreen} />
diff --git a/src/navigation/types.ts b/src/navigation/types.ts
index 21b876daa..b58d03c1b 100644
--- a/src/navigation/types.ts
+++ b/src/navigation/types.ts
@@ -16,6 +16,7 @@ export type RootStackParamList = {
   ModelSettings: undefined;
   RemoteServers: undefined;
   VoiceSettings: undefined;
+  TTSSettings: undefined;
   DeviceInfo: undefined;
   StorageSettings: undefined;
   SecuritySettings: undefined;
diff --git a/src/screens/ChatScreen/ChatMessageArea.tsx b/src/screens/ChatScreen/ChatMessageArea.tsx
index f7611cc00..374c80bc2 100644
--- a/src/screens/ChatScreen/ChatMessageArea.tsx
+++ b/src/screens/ChatScreen/ChatMessageArea.tsx
@@ -1,5 +1,6 @@
 import React, { useState, useMemo } from 'react';
-import { View, FlatList, Text, Keyboard, ActivityIndicator, Platform } from 'react-native';
+import { View, FlatList, Text, Keyboard, ActivityIndicator, Platform, StyleSheet } from 'react-native';
+import { useTTSStore } from '../../stores/ttsStore';
 import Icon from 'react-native-vector-icons/Feather';
 import Animated, { FadeIn } from 'react-native-reanimated';
 import { AttachStep } from 'react-native-spotlight-tour';
@@ -28,6 +29,10 @@ export type ChatMessageAreaProps = {
 export const ChatMessageArea: React.FC<ChatMessageAreaProps> = ({
   flatListRef, isNearBottomRef, chat, styles, colors, handleScroll, renderItem, chatSpotlight,
 }) => {
+  // Hide FlatList until initial layout + scroll is complete to prevent visible scroll jump
+  const [isListReady, setIsListReady] = useState(false);
+  const hasScrolledRef = React.useRef(false);
+  const interfaceMode = useTTSStore((s) => s.settings.interfaceMode);
   const tabNav = useNavigation<NativeStackNavigationProp<RootStackParamList>>();
   const [inputHeight, setInputHeight] = useState(84);
   const activeModelRepoId = chat.activeModelId?.split('/').slice(0, 2).join('/');
@@ -52,12 +57,26 @@ export const ChatMessageArea: React.FC<ChatMessageAreaProps> = ({
       ) : (
         <FlatList
           ref={flatListRef}
+          style={isListReady ? undefined : hiddenStyle.hidden}
           data={chat.displayMessages}
           renderItem={renderItem}
           keyExtractor={(item) => item.id}
+          extraData={interfaceMode}
           contentContainerStyle={styles.messageList}
           onScroll={handleScroll}
-          onContentSizeChange={(_w, _h) => { if (isNearBottomRef.current) flatListRef.current?.scrollToEnd({ animated: false }); }}
+          onContentSizeChange={(_w, h) => {
+            if (!hasScrolledRef.current && h > 0) {
+              // Initial layout: force scroll to bottom regardless of isNearBottom
+              flatListRef.current?.scrollToEnd({ animated: false });
+              hasScrolledRef.current = true;
+              // Reveal after a frame so the scroll position settles
+              requestAnimationFrame(() => {
+                requestAnimationFrame(() => setIsListReady(true));
+              });
+            } else if (isNearBottomRef.current) {
+              flatListRef.current?.scrollToEnd({ animated: false });
+            }
+          }}
           onLayout={() => { }}
           scrollEventThrottle={16}
           keyboardDismissMode="on-drag"
@@ -140,3 +159,7 @@ export const ChatMessageArea: React.FC<ChatMessageAreaProps> = ({
     </>
   );
 };
+
+const hiddenStyle = StyleSheet.create({
+  hidden: { opacity: 0 },
+});
diff --git a/src/screens/ChatScreen/ChatModalSection.tsx b/src/screens/ChatScreen/ChatModalSection.tsx
index 301b3bdc0..76f90703a 100644
--- a/src/screens/ChatScreen/ChatModalSection.tsx
+++ b/src/screens/ChatScreen/ChatModalSection.tsx
@@ -83,6 +83,7 @@ export const ChatModalSection: React.FC<ChatModalSectionProps> = ({
       onOpenProject={() => setShowProjectSelector(true)}
       onOpenGallery={imageCount > 0 ? () => navigation.navigate('Gallery', { conversationId: activeConversationId }) : undefined}
       onDeleteConversation={activeConversation ? handleDeleteConversation : undefined}
+      onOpenTTSSettings={() => { setShowSettingsPanel(false); navigation.navigate('TTSSettings'); }}
       conversationImageCount={imageCount}
       activeProjectName={activeProject?.name || null}
       isRemote={isRemote}
diff --git a/src/screens/ChatScreen/MessageRenderer.tsx b/src/screens/ChatScreen/MessageRenderer.tsx
index 5cf4a0cc1..e5511441c 100644
--- a/src/screens/ChatScreen/MessageRenderer.tsx
+++ b/src/screens/ChatScreen/MessageRenderer.tsx
@@ -1,7 +1,18 @@
-import React from 'react';
+import React, { useState } from 'react';
+import { View, StyleSheet } from 'react-native';
 import { ChatMessage } from '../../components';
+import { AudioMessageBubble } from '../../components/AudioMessageBubble';
+import { TTSButton } from '../../components/TTSButton';
+import { AnimatedEntry } from '../../components/AnimatedEntry';
+import { useTTSStore } from '../../stores/ttsStore';
+import { stripControlTokens } from '../../utils/messageContent';
 import { Message } from '../../types';
+import '../../types/tts';
 import { ChatMessageItem } from './useChatScreen';
+import { parseThinkingContent, buildMessageData } from '../../components/ChatMessage/utils';
+import { ThinkingBlock } from '../../components/ChatMessage/components/ThinkingBlock';
+import { createStyles as createChatStyles } from '../../components/ChatMessage/styles';
+import { useThemedStyles } from '../../theme';
 
 type MessageRendererProps = {
   item: Message | ChatMessageItem;
@@ -19,31 +30,215 @@ type MessageRendererProps = {
   onImagePress: (uri: string) => void;
 };
 
-export const MessageRenderer: React.FC<MessageRendererProps> = ({
-  item,
-  index,
-  displayMessagesLength,
-  animateLastN,
-  imageModelLoaded,
-  isStreaming,
-  isGeneratingImage,
-  showGenerationDetails,
-  onCopy,
-  onRetry,
-  onEdit,
-  onGenerateImage,
-  onImagePress,
-}) => (
-  <ChatMessage
-    message={item as Message}
-    isStreaming={item.id === 'streaming'}
-    onCopy={onCopy}
-    onRetry={onRetry}
-    onEdit={onEdit}
-    onGenerateImage={onGenerateImage}
-    onImagePress={onImagePress}
-    canGenerateImage={imageModelLoaded && !isStreaming && !isGeneratingImage}
-    showGenerationDetails={showGenerationDetails}
-    animateEntry={animateLastN > 0 && index >= displayMessagesLength - animateLastN}
-  />
-);
+/** Renders the thinking/reasoning block for audio mode without the ChatMessage bubble wrapper */
+const AudioModeThinkingBlock: React.FC<{ msg: Message }> = ({ msg }) => {
+  const chatStyles = useThemedStyles(createChatStyles);
+  const [showThinking, setShowThinking] = useState(false);
+  const { parsedContent } = buildMessageData(msg);
+  if (!parsedContent.thinking) return null;
+  return (
+    <View style={chatStyles.thinkingBlockWrapper}>
+      <ThinkingBlock
+        parsedContent={parsedContent}
+        showThinking={showThinking}
+        onToggle={() => setShowThinking((v) => !v)}
+        styles={chatStyles}
+      />
+    </View>
+  );
+};
+
+interface AudioBubbleProps {
+  messageId: string;
+  audioPath: string;
+  waveformData: number[];
+  durationSeconds: number;
+  transcript: string;
+  _reasoningContent?: string;
+}
+
+function buildAudioBubbleProps(msg: Message): AudioBubbleProps {
+  const transcript = stripControlTokens(msg.content);
+  console.log('[AudioBubble] buildProps: msgId=', msg.id, 'contentLen=', msg.content.length, 'transcriptLen=', transcript.length);
+  return {
+    messageId: msg.id,
+    audioPath: msg.audioPath ?? '',
+    waveformData: msg.waveformData ?? [],
+    durationSeconds: msg.audioDurationSeconds ?? 0,
+    transcript,
+    _reasoningContent: msg.reasoningContent,
+  };
+}
+
+/** Wraps content with AnimatedEntry if needed */
+function wrapAnimated(content: React.ReactElement, shouldAnimate: boolean): React.ReactElement {
+  return shouldAnimate ? <AnimatedEntry index={0}>{content}</AnimatedEntry> : content;
+}
+
+/** Renders a user voice message as an audio bubble */
+function renderUserAudioBubble(
+  opts: { msg: Message; audioAtt: any; shouldAnimate: boolean },
+  props: MessageRendererProps,
+): React.ReactElement {
+  const { msg, audioAtt, shouldAnimate } = opts;
+  const bubble = (
+    <View style={audioStyles.userContainer}>
+      <AudioMessageBubble
+        messageId={msg.id}
+        audioPath={audioAtt.uri}
+        waveformData={[]}
+        durationSeconds={audioAtt.audioDurationSeconds ?? 0}
+        transcript={msg.content}
+        isUser
+        onCopy={props.onCopy}
+        onRetry={() => props.onRetry(msg)}
+      />
+    </View>
+  );
+  return wrapAnimated(bubble, shouldAnimate);
+}
+
+/** Renders a streaming/thinking assistant message in audio mode as a ChatMessage */
+function renderAudioStreamingMessage(
+  msg: Message,
+  isStreamingThis: boolean,
+  props: MessageRendererProps,
+): React.ReactElement {
+  return (
+    <ChatMessage
+      message={msg}
+      isStreaming={isStreamingThis}
+      onCopy={props.onCopy}
+      onRetry={props.onRetry}
+      onEdit={props.onEdit}
+      onGenerateImage={props.onGenerateImage}
+      onImagePress={props.onImagePress}
+      canGenerateImage={false}
+      showGenerationDetails={props.showGenerationDetails}
+      animateEntry={false}
+    />
+  );
+}
+
+/** Renders a completed assistant audio bubble, with optional tool call UI */
+function renderAudioAssistantBubble(
+  msg: Message,
+  shouldAnimate: boolean,
+  props: MessageRendererProps,
+): React.ReactElement {
+  const hasThinking = !!msg.reasoningContent || !!parseThinkingContent(msg.content).thinking;
+  const hasToolCalls = !!msg.toolCalls?.length;
+
+  // For messages with tool calls, render as a regular ChatMessage (has proper tool call UI)
+  // followed by the audio bubble for the spoken text
+  if (hasToolCalls) {
+    const element = (
+      <View style={audioStyles.assistantContainer}>
+        <ChatMessage
+          message={msg}
+          isStreaming={false}
+          onCopy={props.onCopy}
+          onRetry={props.onRetry}
+          onEdit={props.onEdit}
+          onGenerateImage={props.onGenerateImage}
+          onImagePress={props.onImagePress}
+          canGenerateImage={false}
+          showGenerationDetails={props.showGenerationDetails}
+          animateEntry={false}
+        />
+      </View>
+    );
+    return wrapAnimated(element, shouldAnimate);
+  }
+
+  const bubble = (
+    <View style={audioStyles.assistantContainer}>
+      {hasThinking && <AudioModeThinkingBlock msg={msg} />}
+      <AudioMessageBubble
+        {...buildAudioBubbleProps(msg)}
+        onCopy={props.onCopy}
+        onRetry={() => props.onRetry(msg)}
+      />
+    </View>
+  );
+  return wrapAnimated(bubble, shouldAnimate);
+}
+
+export const MessageRenderer: React.FC<MessageRendererProps> = (props) => {
+  const {
+    item,
+    index,
+    displayMessagesLength,
+    animateLastN,
+    imageModelLoaded,
+    isStreaming,
+    isGeneratingImage,
+    showGenerationDetails,
+    onCopy,
+    onRetry,
+    onEdit,
+    onGenerateImage,
+    onImagePress,
+  } = props;
+
+  const ttsMode = useTTSStore((s) => s.settings.interfaceMode);
+  const msg = item as Message;
+  const animateEntry = animateLastN > 0 && index >= displayMessagesLength - animateLastN;
+  const isStreamingThis = item.id === 'streaming';
+
+  // User voice message: always show as audio bubble
+  if (msg.role === 'user') {
+    const audioAtt = msg.attachments?.find((a) => a.type === 'audio');
+    if (audioAtt) {
+      return renderUserAudioBubble({ msg, audioAtt, shouldAnimate: animateEntry }, props);
+    }
+  }
+
+  const isAudioAssistant = msg.role === 'assistant' && !msg.isSystemInfo;
+
+  // Thinking placeholder + audio streaming
+  const isThinkingItem = !!(msg as any).isThinking;
+  if (isAudioAssistant && ttsMode === 'audio' && (isStreamingThis || isThinkingItem)) {
+    return renderAudioStreamingMessage(msg, isStreamingThis, props);
+  }
+
+  // Audio Mode: show assistant messages as audio bubbles after streaming ends
+  if (isAudioAssistant && ttsMode === 'audio' && !isStreamingThis) {
+    return renderAudioAssistantBubble(msg, animateEntry, props);
+  }
+
+  // Chat Mode: TTSButton lives in the meta row
+  const isPlainAssistant = msg.role === 'assistant' && !msg.isSystemInfo && !msg.toolCalls?.length;
+  const ttsMeta = isPlainAssistant && !isStreamingThis
+    ? <TTSButton text={stripControlTokens(msg.content)} messageId={msg.id} />
+    : undefined;
+
+  return (
+    <ChatMessage
+      message={msg}
+      isStreaming={isStreamingThis}
+      onCopy={onCopy}
+      onRetry={onRetry}
+      onEdit={onEdit}
+      onGenerateImage={onGenerateImage}
+      onImagePress={onImagePress}
+      canGenerateImage={imageModelLoaded && !isStreaming && !isGeneratingImage}
+      showGenerationDetails={showGenerationDetails}
+      animateEntry={animateEntry}
+      metaExtra={ttsMeta}
+    />
+  );
+};
+
+const audioStyles = StyleSheet.create({
+  userContainer: {
+    paddingRight: 16,
+    marginVertical: 8,
+    alignItems: 'flex-end',
+  },
+  assistantContainer: {
+    paddingHorizontal: 16,
+    marginVertical: 8,
+    alignItems: 'flex-start',
+  },
+});
diff --git a/src/screens/ChatScreen/index.tsx b/src/screens/ChatScreen/index.tsx
index 2be6468e8..bdf0c1380 100644
--- a/src/screens/ChatScreen/index.tsx
+++ b/src/screens/ChatScreen/index.tsx
@@ -1,5 +1,6 @@
 import React, { useCallback, useEffect, useRef, useState } from 'react';
 import { FlatList, KeyboardAvoidingView, InteractionManager } from 'react-native';
+import { useTTSStore } from '../../stores/ttsStore';
 import { SafeAreaView } from 'react-native-safe-area-context';
 import { useFocusEffect } from '@react-navigation/native';
 import { useSpotlightTour } from 'react-native-spotlight-tour';
@@ -101,6 +102,22 @@ export const ChatScreen: React.FC = () => {
       setTimeout(() => { flatListRef.current?.scrollToEnd({ animated: true }); }, 100);
     }
   }, [chat.activeConversation?.messages.length]);
+
+  // Reset scroll when switching between chat/audio interface modes
+  const interfaceMode = useTTSStore((s) => s.settings.interfaceMode);
+  const prevModeRef = React.useRef(interfaceMode);
+  React.useEffect(() => {
+    if (prevModeRef.current !== interfaceMode) {
+      prevModeRef.current = interfaceMode;
+      isNearBottomRef.current = true;
+      chat.setShowScrollToBottom(false);
+      // FlatList re-renders via extraData; onContentSizeChange fires and scrolls.
+      // Backup: scroll after items have had time to re-measure.
+      setTimeout(() => { flatListRef.current?.scrollToEnd({ animated: false }); }, 300);
+    }
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [interfaceMode]);
+
   const alertEl = (
     <CustomAlert
       visible={chat.alertState.visible}
diff --git a/src/screens/ChatScreen/useChatGenerationActions.ts b/src/screens/ChatScreen/useChatGenerationActions.ts
index f48e558ec..556ef9a5f 100644
--- a/src/screens/ChatScreen/useChatGenerationActions.ts
+++ b/src/screens/ChatScreen/useChatGenerationActions.ts
@@ -18,11 +18,28 @@ import {
   retrievalService,
 } from '../../services';
 import { embeddingService } from '../../services/rag/embedding';
-import { useChatStore, useProjectStore, useRemoteServerStore } from '../../stores';
+import { useChatStore, useProjectStore, useRemoteServerStore, useTTSStore } from '../../stores';
 import { Message, MediaAttachment, Project, DownloadedModel, RemoteModel, ModelLoadingStrategy, CacheType } from '../../types';
 import logger from '../../utils/logger';
 type SetState<T> = Dispatch<SetStateAction<T>>;
 const FALLBACK_RECENT_MESSAGE_COUNT = 2;
+
+/**
+ * Appended to the system prompt when TTS audio mode is active.
+ * Guides the model to respond conversationally for voice output.
+ */
+const AUDIO_MODE_PROMPT_HINT = `
+
+[VOICE MODE ACTIVE — your response will be spoken aloud via text-to-speech]
+Respond as if you are speaking to the user in a natural conversation:
+- Be concise and conversational — talk like a person, not a document
+- Never use markdown formatting (no headers, bullets, bold, code blocks, tables)
+- Never use special characters, symbols, or emoji that sound awkward when read aloud
+- Use short sentences and natural spoken transitions ("So,", "Basically,", "Here's the thing —")
+- If summarizing research or long content, give the key takeaways in a few spoken paragraphs, not an essay
+- Numbers: say "about two thousand" not "~2,000"
+- Keep responses under 2-3 paragraphs unless the user explicitly asks for detail
+- Use expressive punctuation for natural prosody: exclamation marks for emphasis!, question marks for curiosity?, ellipses for pauses..., and vary sentence length for rhythm`;
 export type GenerationDeps = {
   activeModelId: string | null;
   activeModel: DownloadedModel | null | undefined;
@@ -248,7 +265,13 @@ export async function startGenerationFn(deps: GenerationDeps, call: StartGenerat
   }
   const conversation = useChatStore.getState().conversations.find(c => c.id === targetConversationId);
   const { enabledTools, rawPrompt } = resolveToolsAndPrompt(deps, conversation);
-  const basePrompt = await injectRagContext(conversation?.projectId, messageText, rawPrompt);
+  let basePrompt = await injectRagContext(conversation?.projectId, messageText, rawPrompt);
+
+  // In audio mode, append instructions for conversational voice-friendly responses
+  if (useTTSStore.getState().settings.interfaceMode === 'audio') {
+    basePrompt += AUDIO_MODE_PROMPT_HINT;
+  }
+
   const isRemote = !!useRemoteServerStore.getState().activeRemoteTextModelId;
   const activeTools = enabledTools;
   const systemPrompt = applyGemma4ThinkToken(
diff --git a/src/screens/ChatScreen/useChatMessageHandlers.ts b/src/screens/ChatScreen/useChatMessageHandlers.ts
index c9ff7f1c4..f20d82379 100644
--- a/src/screens/ChatScreen/useChatMessageHandlers.ts
+++ b/src/screens/ChatScreen/useChatMessageHandlers.ts
@@ -1,6 +1,7 @@
 import { Dispatch, SetStateAction } from 'react';
 import { showAlert, AlertState } from '../../components';
 import { Message } from '../../types';
+import { useTTSStore } from '../../stores/ttsStore';
 import {
   regenerateResponseFn, executeDeleteConversationFn, handleImageGenerationFn,
 } from './useChatGenerationActions';
@@ -20,6 +21,8 @@ export async function handleRetryMessageFn(
   message: Message, genDeps: GenerationDeps, p: RetryParams,
 ): Promise<void> {
   if (!p.activeConversationId || !p.hasActiveModel) return;
+  // Stop any in-flight TTS before deleting messages
+  useTTSStore.getState().stop();
   const msgs = p.activeConversation?.messages || [];
   if (message.role === 'user') {
     const idx = msgs.findIndex((m: Message) => m.id === message.id);
diff --git a/src/screens/ChatScreen/useChatScreen.ts b/src/screens/ChatScreen/useChatScreen.ts
index e543b7e5c..cd426c75d 100644
--- a/src/screens/ChatScreen/useChatScreen.ts
+++ b/src/screens/ChatScreen/useChatScreen.ts
@@ -1,7 +1,9 @@
 import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
+import { AppState } from 'react-native';
 import { useNavigation, useRoute, RouteProp } from '@react-navigation/native';
 import { AlertState, initialAlertState } from '../../components';
-import { useAppStore, useChatStore, useProjectStore, useRemoteServerStore } from '../../stores';
+import { useAppStore, useChatStore, useProjectStore, useRemoteServerStore, useTTSStore } from '../../stores';
+import '../../types/tts';
 import logger from '../../utils/logger';
 import {
   llmService, generationService, imageGenerationService, activeModelService,
@@ -15,10 +17,16 @@ import { startGenerationFn, handleSendFn, handleStopFn, handleSelectProjectFn }
 import { handleRetryMessageFn, handleEditMessageFn, handleDeleteConversationFn, handleGenerateImageFromMsgFn } from './useChatMessageHandlers';
 import { getDisplayMessages, getPlaceholderText, ChatMessageItem, StreamingState } from './types';
 import { saveImageToGallery } from './useSaveImage';
+import { stripControlTokens, stripMarkdownForSpeech } from '../../utils/messageContent';
 
 export type { AlertState, ChatMessageItem, StreamingState };
 export { getDisplayMessages, getPlaceholderText };
 
+function _triggerAudioModeGeneration(conversationId: string, messageId: string, content: string) {
+  useChatStore.getState().updateMessageAudio(conversationId, messageId, { isAudioModeMessage: true });
+  useTTSStore.getState().speak(stripMarkdownForSpeech(stripControlTokens(content)), messageId);
+}
+
 type ChatScreenRouteProp = RouteProp<RootStackParamList, 'Chat'>;
 
 type ActiveModelInfo = {
@@ -53,6 +61,26 @@ export const useChatScreen = () => {
   const [isCompacting, setIsCompacting] = useState(false);
   const lastMessageCountRef = useRef(0);
   const generatingForConversationRef = useRef<string | null>(null);
+
+  // Stop TTS when navigating away, app backgrounded, or screen locked
+  useEffect(() => {
+    const unsubBlur = navigation.addListener('blur', () => {
+      useTTSStore.getState().stop();
+    });
+    // beforeRemove fires on back button — more reliable than blur for native-stack
+    const unsubRemove = navigation.addListener('beforeRemove', () => {
+      useTTSStore.getState().stop();
+    });
+    const appStateSub = AppState.addEventListener('change', (nextState) => {
+      const tts = useTTSStore.getState();
+      if (nextState !== 'active') {
+        if (tts.isSpeaking && !tts.isPaused) { tts.pause(); }
+      } else {
+        if (tts.isSpeaking && tts.isPaused) { tts.resume(); }
+      }
+    });
+    return () => { unsubBlur(); unsubRemove(); appStateSub.remove(); };
+  }, [navigation]);
   const modelLoadStartTimeRef = useRef<number | null>(null);
   const startGenerationRef = useRef<(id: string, text: string) => Promise<void>>(null as any);
   const addMessageRef = useRef<typeof addMessage>(null as any);
@@ -193,6 +221,40 @@ export const useChatScreen = () => {
     lastMessageCountRef.current = curr;
   }, [displayMessages.length]);
   useEffect(() => { lastMessageCountRef.current = 0; setAnimateLastN(0); }, [activeConversationId]);
+  const prevStreamingRef = useRef(false);
+
+  // Stop any in-flight TTS when a new streaming response begins
+  useEffect(() => {
+    if (isStreamingForThisConversation && useTTSStore.getState().isSpeaking) {
+      useTTSStore.getState().stop();
+    }
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isStreamingForThisConversation]);
+
+  // When streaming ends, speak the full response as a single TTS call
+  useEffect(() => {
+    const was = prevStreamingRef.current;
+    prevStreamingRef.current = isStreamingForThisConversation;
+    if (!was || isStreamingForThisConversation || !activeConversationId) return;
+    const tts = useTTSStore.getState();
+    if (tts.settings.interfaceMode !== 'audio') return;
+    const conv = useChatStore.getState().conversations.find((c) => c.id === activeConversationId);
+    const last = (conv?.messages ?? []).at(-1);
+    if (!last || last.role !== 'assistant' || last.isSystemInfo || last.toolCalls?.length || last.audioPath) return;
+    // Stamp as audio-mode. Estimate duration from word count (avg 2.5 words/sec)
+    const wordCount = last.content.split(/\s+/).filter(Boolean).length;
+    const speed = useTTSStore.getState().settings.speed || 1;
+    const estDuration = Math.max(1, wordCount / (2.5 * speed));
+    useChatStore.getState().updateMessageAudio(activeConversationId, last.id, {
+      isAudioModeMessage: true,
+      audioDurationSeconds: estDuration,
+    });
+    if (!tts.isReady) return;
+    const fullText = stripMarkdownForSpeech(stripControlTokens(last.content)).trim();
+    if (fullText) {
+      useTTSStore.getState().speak(fullText, last.id);
+    }
+  }, [isStreamingForThisConversation]); // eslint-disable-line react-hooks/exhaustive-deps
 
   const startGeneration = async (targetConversationId: string, messageText: string) => {
     await startGenerationFn(genDeps, { setDebugInfo, targetConversationId, messageText });
diff --git a/src/screens/DownloadManagerScreen/index.tsx b/src/screens/DownloadManagerScreen/index.tsx
index 3829299fa..46c2312ff 100644
--- a/src/screens/DownloadManagerScreen/index.tsx
+++ b/src/screens/DownloadManagerScreen/index.tsx
@@ -1,5 +1,5 @@
-import React from 'react';
-import { View, Text, FlatList, TouchableOpacity, RefreshControl } from 'react-native';
+import React, { useState, useCallback } from 'react';
+import { View, Text, FlatList, TouchableOpacity, RefreshControl, ScrollView } from 'react-native';
 import { SafeAreaView } from 'react-native-safe-area-context';
 import Icon from 'react-native-vector-icons/Feather';
 import { Card } from '../../components';
@@ -7,13 +7,35 @@ import { CustomAlert, hideAlert } from '../../components/CustomAlert';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useNavigation } from '@react-navigation/native';
 import { createStyles } from './styles';
-import { ActiveDownloadCard, CompletedDownloadCard, formatBytes } from './items';
+import { ActiveDownloadCard, CompletedDownloadCard, formatBytes, type DownloadItem } from './items';
 import { useDownloadManager } from './useDownloadManager';
 
+type FilterType = 'all' | 'text' | 'vision' | 'image' | 'tts' | 'stt';
+
+const FILTERS: { id: FilterType; label: string }[] = [
+  { id: 'all',    label: 'All' },
+  { id: 'text',   label: 'Text' },
+  { id: 'vision', label: 'Vision' },
+  { id: 'image',  label: 'Image Gen' },
+  { id: 'tts',    label: 'Text to Speech' },
+  { id: 'stt',    label: 'Speech to Text' },
+];
+
+function matchesFilter(item: DownloadItem, filter: FilterType): boolean {
+  if (filter === 'all')    return true;
+  if (filter === 'vision') return item.modelType === 'text' && !!item.isVisionModel;
+  if (filter === 'text')   return item.modelType === 'text' && !item.isVisionModel;
+  if (filter === 'image')  return item.modelType === 'image';
+  if (filter === 'tts')    return item.modelType === 'tts';
+  if (filter === 'stt')    return item.modelType === 'stt';
+  return true;
+}
+
 export const DownloadManagerScreen: React.FC = () => {
   const navigation = useNavigation();
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
+  const [activeFilter, setActiveFilter] = useState<FilterType>('all');
   const {
     isRefreshing,
     activeItems,
@@ -27,6 +49,30 @@ export const DownloadManagerScreen: React.FC = () => {
     totalStorageUsed,
   } = useDownloadManager();
 
+  const filteredActive = activeItems.filter(item => matchesFilter(item, activeFilter));
+  const filteredCompleted = completedItems.filter(item => matchesFilter(item, activeFilter));
+
+  const renderHeader = useCallback(() => (
+    <ScrollView
+      horizontal
+      showsHorizontalScrollIndicator={false}
+      contentContainerStyle={styles.filterBarContent}
+    >
+      {FILTERS.map(f => {
+        const active = activeFilter === f.id;
+        return (
+          <TouchableOpacity
+            key={f.id}
+            style={[styles.filterChip, active && styles.filterChipActive]}
+            onPress={() => setActiveFilter(f.id)}
+          >
+            <Text style={[styles.filterChipText, active && styles.filterChipTextActive]}>{f.label}</Text>
+          </TouchableOpacity>
+        );
+      })}
+    </ScrollView>
+  ), [activeFilter, colors, styles]);
+
   return (
     <SafeAreaView style={styles.container} edges={['top']} testID="downloaded-models-screen">
       <View style={styles.header}>
@@ -39,52 +85,47 @@ export const DownloadManagerScreen: React.FC = () => {
 
       <FlatList
         data={[{ key: 'content' }]}
+        ListHeaderComponent={renderHeader}
         renderItem={() => (
           <View style={styles.content}>
-            {/* Active Downloads */}
-            <View style={styles.section}>
-              <View style={styles.sectionHeader}>
-                <Icon name="download" size={18} color={colors.primary} />
-                <Text style={styles.sectionTitle}>Active Downloads</Text>
-                <View style={styles.countBadge}>
-                  <Text style={styles.countText}>{activeItems.length}</Text>
+            {/* Active Downloads — only show when there are active items */}
+            {filteredActive.length > 0 && (
+              <View style={styles.section}>
+                <View style={styles.sectionHeader}>
+                  <Icon name="download" size={16} color={colors.primary} />
+                  <Text style={styles.sectionTitle}>Active Downloads</Text>
+                  <View style={styles.countBadge}>
+                    <Text style={styles.countText}>{filteredActive.length}</Text>
+                  </View>
                 </View>
-              </View>
-              {activeItems.length > 0 ? (
-                activeItems.map(item => (
+                {filteredActive.map(item => (
                   <View key={`active-${item.modelId}-${item.fileName}`}>
                     <ActiveDownloadCard item={item} onRemove={handleRemoveDownload} />
                   </View>
-                ))
-              ) : (
-                <Card style={styles.emptyCard}>
-                  <Icon name="inbox" size={32} color={colors.textMuted} />
-                  <Text style={styles.emptyText}>No active downloads</Text>
-                </Card>
-              )}
-            </View>
+                ))}
+              </View>
+            )}
 
-            {/* Completed Downloads */}
+            {/* Downloaded Models */}
             <View style={styles.section}>
               <View style={styles.sectionHeader}>
-                <Icon name="check-circle" size={18} color={colors.success} />
+                <Icon name="check-circle" size={16} color={colors.success} />
                 <Text style={styles.sectionTitle}>Downloaded Models</Text>
                 <View style={styles.countBadge}>
-                  <Text style={styles.countText}>{completedItems.length}</Text>
+                  <Text style={styles.countText}>{filteredCompleted.length}</Text>
                 </View>
               </View>
-              {completedItems.length > 0 ? (
-                completedItems.map(item => (
+              {filteredCompleted.length > 0 ? (
+                filteredCompleted.map(item => (
                   <View key={`completed-${item.modelId}-${item.fileName}`}>
                     <CompletedDownloadCard item={item} onDelete={handleDeleteItem} onRepairVision={handleRepairVision} />
                   </View>
                 ))
               ) : (
                 <Card style={styles.emptyCard}>
-                  <Icon name="package" size={32} color={colors.textMuted} />
-                  <Text style={styles.emptyText}>No models downloaded yet</Text>
-                  <Text style={styles.emptySubtext}>
-                    Go to the Models tab to browse and download models
+                  <Icon name="package" size={24} color={colors.textMuted} />
+                  <Text style={styles.emptyText}>
+                    {activeFilter === 'all' ? 'No models downloaded yet' : `No ${FILTERS.find(f => f.id === activeFilter)?.label ?? ''} models`}
                   </Text>
                 </Card>
               )}
diff --git a/src/screens/DownloadManagerScreen/items.tsx b/src/screens/DownloadManagerScreen/items.tsx
index f2d20d809..8cc45992a 100644
--- a/src/screens/DownloadManagerScreen/items.tsx
+++ b/src/screens/DownloadManagerScreen/items.tsx
@@ -12,7 +12,7 @@ import { createStyles } from './styles';
 
 export type DownloadItem = {
   type: 'active' | 'completed';
-  modelType: 'text' | 'image';
+  modelType: 'text' | 'image' | 'tts' | 'stt';
   downloadId?: number;
   modelId: string;
   fileName: string;
@@ -222,9 +222,9 @@ export const CompletedDownloadCard: React.FC<CompletedDownloadCardProps> = ({ it
       <View style={styles.downloadHeader}>
         <View style={styles.modelTypeIcon}>
           <Icon
-            name={item.modelType === 'image' ? 'image' : 'message-square'}
+            name={item.modelType === 'image' ? 'image' : item.modelType === 'tts' ? 'volume-2' : item.modelType === 'stt' ? 'mic' : item.isVisionModel ? 'eye' : 'message-square'}
             size={16}
-            color={item.modelType === 'image' ? colors.info : colors.primary}
+            color={item.modelType === 'image' ? colors.info : item.modelType === 'tts' || item.modelType === 'stt' ? colors.success : item.isVisionModel ? colors.warning : colors.primary}
           />
         </View>
         <View style={styles.downloadInfo}>
diff --git a/src/screens/DownloadManagerScreen/styles.ts b/src/screens/DownloadManagerScreen/styles.ts
index 39120fa09..8f40c2839 100644
--- a/src/screens/DownloadManagerScreen/styles.ts
+++ b/src/screens/DownloadManagerScreen/styles.ts
@@ -33,17 +33,17 @@ export const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
     flex: 1,
   },
   listContent: {
-    paddingTop: SPACING.lg,
+    paddingTop: SPACING.md,
     paddingBottom: SPACING.xxl,
   },
   section: {
-    marginBottom: SPACING.xl,
+    marginBottom: SPACING.md,
   },
   sectionHeader: {
     flexDirection: 'row' as const,
     alignItems: 'center' as const,
     paddingHorizontal: SPACING.lg,
-    marginBottom: SPACING.md,
+    marginBottom: SPACING.sm,
     gap: SPACING.sm,
   },
   sectionTitle: {
@@ -63,7 +63,7 @@ export const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
   },
   downloadCard: {
     marginHorizontal: SPACING.lg,
-    marginBottom: SPACING.md,
+    marginBottom: SPACING.sm,
   },
   downloadHeader: {
     flexDirection: 'row' as const,
@@ -160,19 +160,47 @@ export const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
   emptyCard: {
     marginHorizontal: SPACING.lg,
     alignItems: 'center' as const,
-    paddingVertical: SPACING.xxl,
-    gap: SPACING.sm,
+    paddingVertical: SPACING.xl,
+    gap: SPACING.xs,
   },
   emptyText: {
-    ...TYPOGRAPHY.body,
-    color: colors.textSecondary,
-    marginTop: SPACING.sm,
+    ...TYPOGRAPHY.bodySmall,
+    color: colors.textMuted,
+    marginTop: SPACING.xs,
   },
   emptySubtext: {
-    ...TYPOGRAPHY.bodySmall,
+    ...TYPOGRAPHY.meta,
     color: colors.textMuted,
     textAlign: 'center' as const,
   },
+  filterBarContent: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    paddingHorizontal: SPACING.lg,
+    paddingVertical: SPACING.sm,
+    gap: SPACING.xs,
+  },
+  filterChip: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    paddingHorizontal: SPACING.sm + 2,
+    paddingVertical: 5,
+    borderRadius: 12,
+    borderWidth: 1,
+    borderColor: colors.border,
+    backgroundColor: colors.background,
+  },
+  filterChipActive: {
+    borderColor: colors.primary,
+    backgroundColor: `${colors.primary}15`,
+  },
+  filterChipText: {
+    ...TYPOGRAPHY.meta,
+    color: colors.textSecondary,
+  },
+  filterChipTextActive: {
+    color: colors.primary,
+  },
   storageSection: {
     paddingHorizontal: SPACING.lg,
   },
diff --git a/src/screens/ModelSettingsScreen/ImageGenerationSection.tsx b/src/screens/ModelSettingsScreen/ImageGenerationSection.tsx
index ea7c9306d..4d84b1309 100644
--- a/src/screens/ModelSettingsScreen/ImageGenerationSection.tsx
+++ b/src/screens/ModelSettingsScreen/ImageGenerationSection.tsx
@@ -1,7 +1,7 @@
 import React, { useState } from 'react';
 import { View, Text, Switch, Platform, TouchableOpacity } from 'react-native';
-import Slider from '@react-native-community/slider';
 import { AdvancedToggle, Card } from '../../components';
+import { NumericStepper } from '../../components/NumericStepper';
 import { Button } from '../../components/Button';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
@@ -114,49 +114,28 @@ const DetectionMethodRow: React.FC = () => {
 // ─── Advanced Section ────────────────────────────────────────────────────────
 
 const ImageAdvancedSection: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
 
   return (
     <>
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Guidance Scale</Text>
-          <Text style={styles.sliderValue}>{(settings?.imageGuidanceScale || 7.5).toFixed(1)}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Guidance Scale</Text>
         <Text style={styles.sliderDesc}>Higher = follows prompt more strictly</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={20}
-          step={0.5}
+        <NumericStepper
           value={settings?.imageGuidanceScale || 7.5}
-          onSlidingComplete={(value) => updateSettings({ imageGuidanceScale: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={1} max={20} step={0.5} decimals={1}
+          onChange={(value) => updateSettings({ imageGuidanceScale: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Image Threads</Text>
-          <Text style={styles.sliderValue}>{settings?.imageThreads ?? 4}</Text>
-        </View>
-        <Text style={styles.sliderDesc}>
-          CPU threads used for image generation (applies on next image model load)
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={8}
-          step={1}
+        <Text style={styles.sliderLabel}>Image Threads</Text>
+        <Text style={styles.sliderDesc}>CPU threads used for image generation (applies on next image model load)</Text>
+        <NumericStepper
           value={settings?.imageThreads ?? 4}
-          onSlidingComplete={(value) => updateSettings({ imageThreads: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={1} max={8} step={1}
+          onChange={(value) => updateSettings({ imageThreads: value })}
         />
       </View>
 
@@ -212,40 +191,23 @@ export const ImageGenerationSection: React.FC = () => {
       </Text>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Image Steps</Text>
-          <Text style={styles.sliderValue}>{settings?.imageSteps || 8}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Image Steps</Text>
         <Text style={styles.sliderDesc}>More steps = better quality but slower (4-8 fast, 20-50 high quality)</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={4}
-          maximumValue={50}
-          step={1}
+        <NumericStepper
           value={settings?.imageSteps || 8}
-          onSlidingComplete={(value) => updateSettings({ imageSteps: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={4} max={50} step={1}
+          onChange={(value) => updateSettings({ imageSteps: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Image Size</Text>
-          <Text style={styles.sliderValue}>{settings?.imageWidth ?? 256}x{settings?.imageHeight ?? 256}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Image Size</Text>
         <Text style={styles.sliderDesc}>Output resolution (smaller = faster, larger = more detail)</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={128}
-          maximumValue={512}
-          step={64}
+        <NumericStepper
           value={settings?.imageWidth ?? 256}
-          onSlidingComplete={(value) => updateSettings({ imageWidth: value, imageHeight: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={128} max={512} step={64}
+          formatValue={(v) => `${v}x${v}`}
+          onChange={(value) => updateSettings({ imageWidth: value, imageHeight: value })}
         />
       </View>
 
diff --git a/src/screens/ModelSettingsScreen/TextGenerationAdvanced.tsx b/src/screens/ModelSettingsScreen/TextGenerationAdvanced.tsx
index 33faa2293..e1387488b 100644
--- a/src/screens/ModelSettingsScreen/TextGenerationAdvanced.tsx
+++ b/src/screens/ModelSettingsScreen/TextGenerationAdvanced.tsx
@@ -1,7 +1,7 @@
 import React from 'react';
 import { View, Text, Switch, Platform } from 'react-native';
-import Slider from '@react-native-community/slider';
 import { Button } from '../../components/Button';
+import { NumericStepper } from '../../components/NumericStepper';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
 import { CacheType } from '../../types';
@@ -52,24 +52,15 @@ const GpuSection: React.FC<GpuSectionProps> = ({
 
       {isGpuEnabled && (
         <View style={styles.sliderSection}>
-          <View style={styles.sliderHeader}>
-            <Text style={styles.sliderLabel}>GPU Layers</Text>
-            <Text style={styles.sliderValue}>{gpuLayersEffective}</Text>
-          </View>
+          <Text style={styles.sliderLabel}>GPU Layers</Text>
           <Text style={styles.sliderDesc}>
             Layers offloaded to GPU. Higher = faster but may crash on low-VRAM devices.
           </Text>
-          <Slider
-            testID="gpu-layers-slider"
-            style={styles.slider}
-            minimumValue={1}
-            maximumValue={GPU_LAYERS_MAX}
-            step={1}
+          <NumericStepper
+            testID="gpu-layers-stepper"
             value={gpuLayersEffective}
-            onSlidingComplete={(value) => updateSettings({ gpuLayers: value })}
-            minimumTrackTintColor={colors.primary}
-            maximumTrackTintColor={colors.surface}
-            thumbTintColor={colors.primary}
+            min={1} max={GPU_LAYERS_MAX} step={1}
+            onChange={(value) => updateSettings({ gpuLayers: value })}
           />
         </View>
       )}
@@ -207,78 +198,42 @@ export const TextGenerationAdvanced: React.FC = () => {
   return (
     <>
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Top P</Text>
-          <Text style={styles.sliderValue}>{(settings?.topP || 0.9).toFixed(2)}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Top P</Text>
         <Text style={styles.sliderDesc}>Nucleus sampling threshold</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={0.1}
-          maximumValue={1.0}
-          step={0.05}
+        <NumericStepper
           value={settings?.topP || 0.9}
-          onSlidingComplete={(value) => updateSettings({ topP: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={0.1} max={1.0} step={0.05} decimals={2}
+          onChange={(value) => updateSettings({ topP: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Repeat Penalty</Text>
-          <Text style={styles.sliderValue}>{(settings?.repeatPenalty || 1.1).toFixed(2)}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Repeat Penalty</Text>
         <Text style={styles.sliderDesc}>Penalize repeated tokens</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1.0}
-          maximumValue={2.0}
-          step={0.05}
+        <NumericStepper
           value={settings?.repeatPenalty || 1.1}
-          onSlidingComplete={(value) => updateSettings({ repeatPenalty: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={1.0} max={2.0} step={0.05} decimals={2}
+          onChange={(value) => updateSettings({ repeatPenalty: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>CPU Threads</Text>
-          <Text style={styles.sliderValue}>{settings?.nThreads || 6}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>CPU Threads</Text>
         <Text style={styles.sliderDesc}>Parallel threads for inference</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={12}
-          step={1}
+        <NumericStepper
           value={settings?.nThreads || 6}
-          onSlidingComplete={(value) => updateSettings({ nThreads: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={1} max={12} step={1}
+          onChange={(value) => updateSettings({ nThreads: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Batch Size</Text>
-          <Text style={styles.sliderValue}>{settings?.nBatch || 256}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Batch Size</Text>
         <Text style={styles.sliderDesc}>Tokens processed per batch</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={32}
-          maximumValue={512}
-          step={32}
+        <NumericStepper
           value={settings?.nBatch || 256}
-          onSlidingComplete={(value) => updateSettings({ nBatch: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={32} max={512} step={32}
+          onChange={(value) => updateSettings({ nBatch: value })}
         />
       </View>
 
diff --git a/src/screens/ModelSettingsScreen/TextGenerationSection.tsx b/src/screens/ModelSettingsScreen/TextGenerationSection.tsx
index 5b1d9099f..3ae132f43 100644
--- a/src/screens/ModelSettingsScreen/TextGenerationSection.tsx
+++ b/src/screens/ModelSettingsScreen/TextGenerationSection.tsx
@@ -1,7 +1,7 @@
 import React, { useState } from 'react';
 import { View, Text, Switch } from 'react-native';
-import Slider from '@react-native-community/slider';
 import { AdvancedToggle, Card } from '../../components';
+import { NumericStepper } from '../../components/NumericStepper';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
 import { createStyles } from './styles';
@@ -26,56 +26,40 @@ export const TextGenerationSection: React.FC = () => {
   const contextLengthLabel = contextLength >= 1024
     ? `${(contextLength / 1024).toFixed(0)}K`
     : String(contextLength);
-  const ctxSliderMax = modelMaxContext || FALLBACK_MAX_CONTEXT;
+  const ctxMax = modelMaxContext || FALLBACK_MAX_CONTEXT;
 
   return (
     <Card style={styles.section}>
       <Text style={styles.settingHelp}>Configure LLM behavior for text responses.</Text>
 
-      {/* ── Basic Settings ── */}
-
       <View style={styles.sliderSection}>
         <View style={styles.sliderHeader}>
           <Text style={styles.sliderLabel}>Temperature</Text>
-          <Text style={styles.sliderValue}>{(settings?.temperature || 0.7).toFixed(2)}</Text>
         </View>
         <Text style={styles.sliderDesc}>Higher = more creative, Lower = more focused</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={0}
-          maximumValue={2}
-          step={0.05}
+        <NumericStepper
           value={settings?.temperature || 0.7}
-          onSlidingComplete={(value) => updateSettings({ temperature: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={0} max={2} step={0.05} decimals={2}
+          onChange={(value) => updateSettings({ temperature: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
         <View style={styles.sliderHeader}>
           <Text style={styles.sliderLabel}>Max Tokens</Text>
-          <Text style={styles.sliderValue}>{maxTokensLabel}</Text>
         </View>
         <Text style={styles.sliderDesc}>Maximum response length</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={64}
-          maximumValue={8192}
-          step={64}
+        <NumericStepper
           value={maxTokens}
-          onSlidingComplete={(value) => updateSettings({ maxTokens: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={64} max={8192} step={64}
+          formatValue={() => maxTokensLabel}
+          onChange={(value) => updateSettings({ maxTokens: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
         <View style={styles.sliderHeader}>
           <Text style={styles.sliderLabel}>Context Length</Text>
-          <Text style={styles.sliderValue}>{contextLengthLabel}</Text>
         </View>
         <Text style={styles.sliderDesc}>KV cache size — larger uses more RAM (requires reload)</Text>
         {contextLength > HIGH_CONTEXT_THRESHOLD && (
@@ -83,16 +67,11 @@ export const TextGenerationSection: React.FC = () => {
             High context uses significant RAM and may crash on some devices
           </Text>
         )}
-        <Slider
-          style={styles.slider}
-          minimumValue={512}
-          maximumValue={ctxSliderMax}
-          step={1024}
+        <NumericStepper
           value={contextLength}
-          onSlidingComplete={(value) => updateSettings({ contextLength: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={512} max={ctxMax} step={1024}
+          formatValue={() => contextLengthLabel}
+          onChange={(value) => updateSettings({ contextLength: value })}
         />
       </View>
 
diff --git a/src/screens/ModelSettingsScreen/index.tsx b/src/screens/ModelSettingsScreen/index.tsx
index e0aefc790..319c93026 100644
--- a/src/screens/ModelSettingsScreen/index.tsx
+++ b/src/screens/ModelSettingsScreen/index.tsx
@@ -33,6 +33,7 @@ export const ModelSettingsScreen: React.FC = () => {
       const task = InteractionManager.runAfterInteractions(() => goTo(pending));
       return () => task.cancel();
     }
+  // eslint-disable-next-line react-hooks/exhaustive-deps
   }, []);
 
   const handleReset = () => {
diff --git a/src/screens/SettingsScreen.tsx b/src/screens/SettingsScreen.tsx
index f1cd721a0..353c9b238 100644
--- a/src/screens/SettingsScreen.tsx
+++ b/src/screens/SettingsScreen.tsx
@@ -151,6 +151,7 @@ export const SettingsScreen: React.FC = () => {
               { icon: 'wifi', title: 'Remote Servers', desc: 'Connect to Ollama, LM Studio, and more', screen: 'RemoteServers' as const },
             //  { icon: 'search', title: 'Web Search', desc: 'Configure search API key for reliable results', screen: 'WebSearchSettings' as const },
               { icon: 'mic', title: 'Voice Transcription', desc: 'On-device speech to text', screen: 'VoiceSettings' as const },
+              { icon: 'volume-2', title: 'Text to Speech', desc: 'On-device voice responses', screen: 'TTSSettings' as const },
               { icon: 'lock', title: 'Security', desc: 'Passphrase and app lock', screen: 'SecuritySettings' as const },
               { icon: 'smartphone', title: 'Device Information', desc: 'Hardware and compatibility', screen: 'DeviceInfo' as const },
               { icon: 'hard-drive', title: 'Storage', desc: 'Models and data usage', screen: 'StorageSettings' as const },
diff --git a/src/screens/TTSSettingsScreen/index.tsx b/src/screens/TTSSettingsScreen/index.tsx
new file mode 100644
index 000000000..a00ca7773
--- /dev/null
+++ b/src/screens/TTSSettingsScreen/index.tsx
@@ -0,0 +1,441 @@
+import React, { useEffect, useState } from 'react';
+import { View, Text, ScrollView, TouchableOpacity, Switch, ActivityIndicator } from 'react-native';
+import { SafeAreaView } from 'react-native-safe-area-context';
+import Icon from 'react-native-vector-icons/Feather';
+import { NumericStepper } from '../../components/NumericStepper';
+import { useNavigation } from '@react-navigation/native';
+import { Card, Button } from '../../components';
+import { CustomAlert, showAlert, hideAlert, AlertState, initialAlertState } from '../../components/CustomAlert';
+import { useTheme, useThemedStyles } from '../../theme';
+import type { ThemeColors, ThemeShadows } from '../../theme';
+import { TYPOGRAPHY, SPACING } from '../../constants';
+import { useTTSStore } from '../../stores/ttsStore';
+import { ttsRegistry } from '../../engine';
+import { hardwareService } from '../../services/hardware';
+import { TTS_WARN_RAM_GB, TTS_BLOCK_RAM_GB } from '../../constants/ttsModels';
+import type { InterfaceMode } from '../../stores/ttsStore';
+
+// ─── Sub-components ───────────────────────────────────────────────────────────
+
+type Styles = ReturnType<typeof createStyles>;
+
+const AssetRow: React.FC<{
+  label: string;
+  sizeMB: number;
+  status: string;
+  progress: number;
+  styles: Styles;
+  colors: ThemeColors;
+  border?: boolean;
+}> = ({ label, sizeMB, status, progress, styles, colors, border }) => (
+  <View>
+    <View style={[styles.modelRow, border ? styles.modelRowBorder : undefined]}>
+      <View style={styles.modelInfo}>
+        <Text style={styles.modelName}>{label}</Text>
+        <Text style={styles.modelSize}>{sizeMB} MB</Text>
+      </View>
+      {status === 'downloaded' && <Icon name="check-circle" size={14} color={colors.primary} />}
+      {status === 'downloading' && <Text style={styles.progressText}>{Math.round(progress * 100)}%</Text>}
+      {status === 'not-downloaded' && <Icon name="download" size={14} color={colors.textMuted} />}
+      {status === 'error' && <Icon name="alert-circle" size={14} color={colors.error} />}
+    </View>
+    {status === 'downloading' && (
+      <View style={styles.progressBar}>
+        <View style={[styles.progressFill, { width: `${progress * 100}%` }]} />
+      </View>
+    )}
+  </View>
+);
+
+const InterfaceModeCard: React.FC<{
+  mode: InterfaceMode;
+  deviceBlocked: boolean;
+  engineReady: boolean;
+  onModeChange: (m: InterfaceMode) => void;
+  styles: Styles;
+}> = ({ mode, deviceBlocked, engineReady, onModeChange, styles }) => (
+  <Card style={styles.section}>
+    <Text style={styles.sectionLabel}>Interface Mode</Text>
+    <Text style={styles.description}>
+      Audio Mode renders responses as voice notes. Chat Mode adds a play button to text bubbles.
+    </Text>
+    <View style={styles.modeRow}>
+      {(['chat', 'audio'] as InterfaceMode[]).map((m) => {
+        const active = mode === m;
+        const blocked = m === 'audio' && (deviceBlocked || !engineReady);
+        return (
+          <TouchableOpacity
+            key={m}
+            style={[styles.modeChip, active && styles.modeChipActive, blocked && styles.modeChipDisabled]}
+            onPress={() => onModeChange(m)}
+            disabled={blocked}
+          >
+            <Text style={[styles.modeChipText, active && styles.modeChipTextActive]}>
+              {m === 'chat' ? 'Chat' : 'Audio'}
+            </Text>
+          </TouchableOpacity>
+        );
+      })}
+    </View>
+    {!engineReady && (
+      <Text style={styles.hintText}>Download models below to enable Audio Mode.</Text>
+    )}
+  </Card>
+);
+
+const PlaybackCard: React.FC<{
+  settings: ReturnType<typeof useTTSStore.getState>['settings'];
+  onUpdate: (patch: Partial<ReturnType<typeof useTTSStore.getState>['settings']>) => void;
+  colors: ThemeColors;
+  styles: Styles;
+}> = ({ settings, onUpdate, colors, styles }) => (
+  <Card style={styles.section}>
+    <Text style={styles.sectionLabel}>Playback</Text>
+    <Text style={styles.sliderLabel}>Speed</Text>
+    <NumericStepper
+      value={settings.speed}
+      min={0.5} max={2.0} step={0.1} decimals={1}
+      formatValue={(v) => `${v.toFixed(1)}x`}
+      onChange={(v) => onUpdate({ speed: v })}
+    />
+    {settings.interfaceMode === 'chat' && (
+      <View style={[styles.toggleRow, styles.toggleRowBorder]}>
+        <View style={styles.toggleInfo}>
+          <Text style={styles.toggleTitle}>Auto-play</Text>
+          <Text style={styles.toggleDesc}>Speak AI responses automatically</Text>
+        </View>
+        <Switch
+          value={settings.autoPlay}
+          onValueChange={(v) => onUpdate({ autoPlay: v })}
+          trackColor={{ true: colors.primary }}
+        />
+      </View>
+    )}
+  </Card>
+);
+
+const CompatibilityCard: React.FC<{
+  ramGB: number;
+  deviceBlocked: boolean;
+  deviceWarning: boolean;
+  styles: Styles;
+  colors: ThemeColors;
+}> = ({ ramGB, deviceBlocked, deviceWarning, styles, colors }) => {
+  if (!deviceWarning && !deviceBlocked) { return null; }
+  return (
+    <Card style={deviceBlocked ? styles.errorCard : styles.warningCard}>
+      <View style={styles.compatRow}>
+        <Icon name="alert-triangle" size={14} color={deviceBlocked ? colors.error : colors.textSecondary} />
+        <Text style={[styles.compatText, deviceBlocked && styles.errorText]}>
+          {deviceBlocked
+            ? `TTS requires at least ${TTS_BLOCK_RAM_GB} GB RAM. Your device has ${ramGB.toFixed(1)} GB.`
+            : `Your device (${ramGB.toFixed(1)} GB RAM) may run TTS but performance could be slow. 8 GB recommended.`}
+        </Text>
+      </View>
+    </Card>
+  );
+};
+
+const EnginePickerCard: React.FC<{
+  styles: Styles;
+  colors: ThemeColors;
+}> = ({ styles, colors }) => {
+  const { settings, setEngine } = useTTSStore();
+  const engineIds = ttsRegistry.getRegisteredIds();
+
+  const handleSelect = async (id: string) => {
+    if (id === settings.engineId) return;
+    await setEngine(id);
+  };
+
+  return (
+    <Card style={styles.section}>
+      <Text style={styles.sectionLabel}>Engine</Text>
+      <Text style={styles.description}>
+        Choose which on-device TTS engine powers speech synthesis.
+      </Text>
+      {engineIds.map((id, i) => {
+        const engine = ttsRegistry.getEngine(id);
+        const active = id === settings.engineId;
+        const supported = engine.isSupported();
+        return (
+          <TouchableOpacity
+            key={id}
+            style={[styles.voiceRow, i > 0 && styles.voiceRowBorder]}
+            onPress={() => handleSelect(id)}
+            disabled={!supported}
+          >
+            <View style={styles.voiceInfo}>
+              <Text style={[styles.voiceName, !supported && { color: colors.textMuted }]}>
+                {engine.displayName}
+              </Text>
+              <Text style={styles.voiceMeta}>
+                {engine.capabilities.peakRamMB} MB
+                {engine.capabilities.voiceCloning ? ' · Voice cloning' : ''}
+                {engine.capabilities.streaming ? ' · Streaming' : ''}
+                {!supported ? ' · Not supported on this device' : ''}
+              </Text>
+            </View>
+            {active && <Icon name="check" size={14} color={colors.primary} />}
+          </TouchableOpacity>
+        );
+      })}
+    </Card>
+  );
+};
+
+const VoiceCard: React.FC<{
+  styles: Styles;
+  colors: ThemeColors;
+}> = ({ styles, colors }) => {
+  const { voices, activeVoiceId, isReady, isDownloading, overallDownloadProgress, setVoice } = useTTSStore();
+
+  return (
+    <Card style={styles.section}>
+      <View style={styles.kokoroHeader}>
+        <Text style={styles.sectionLabel}>Voice</Text>
+        {isDownloading && overallDownloadProgress > 0 && (
+          <Text style={styles.hintText}>{Math.round(overallDownloadProgress * 100)}%</Text>
+        )}
+        {!isReady && !isDownloading && (
+          <ActivityIndicator size="small" color={colors.textMuted} />
+        )}
+        {isReady && (
+          <Icon name="check-circle" size={14} color={colors.primary} />
+        )}
+      </View>
+      <Text style={styles.description}>
+        Fast on-device voice synthesis. Used for the speak button in Chat Mode.
+      </Text>
+      {voices.map((voice, i) => {
+        const active = activeVoiceId === voice.id;
+        return (
+          <TouchableOpacity
+            key={voice.id}
+            style={[styles.voiceRow, i > 0 && styles.voiceRowBorder]}
+            onPress={() => setVoice(voice.id)}
+          >
+            <View style={styles.voiceInfo}>
+              <Text style={styles.voiceName}>{voice.label}</Text>
+              <Text style={styles.voiceMeta}>
+                {voice.metadata.accent ? `${voice.metadata.accent} · ` : ''}
+                {voice.metadata.gender || ''}
+              </Text>
+            </View>
+            {active && <Icon name="check" size={14} color={colors.primary} />}
+          </TouchableOpacity>
+        );
+      })}
+    </Card>
+  );
+};
+
+// ─── Main screen ──────────────────────────────────────────────────────────────
+
+export const TTSSettingsScreen: React.FC = () => {
+  const navigation = useNavigation();
+  const { colors } = useTheme();
+  const styles = useThemedStyles(createStyles);
+  const [alertState, setAlertState] = useState<AlertState>(initialAlertState);
+  const [ramGB, setRamGB] = useState<number>(8);
+
+  const {
+    assets, isReady, isDownloading, isLoading,
+    audioCacheSizeMB, settings, error,
+    downloadModels, deleteModels,
+    checkDownloadStatus, refreshCacheSize, clearAudioCache, updateSettings, clearError,
+    initializeEngine,
+  } = useTTSStore();
+
+  useEffect(() => {
+    setRamGB(hardwareService.getTotalMemoryGB());
+    checkDownloadStatus();
+    refreshCacheSize();
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+
+  const deviceBlocked = ramGB < TTS_BLOCK_RAM_GB;
+  const deviceWarning = !deviceBlocked && ramGB < TTS_WARN_RAM_GB;
+  const totalSizeMB = Math.round(assets.reduce((sum, a) => sum + a.asset.sizeBytes, 0) / (1024 * 1024));
+  const allDownloaded = assets.every(a => a.status === 'downloaded');
+
+  const handleDelete = () => {
+    setAlertState(
+      showAlert('Remove TTS Models', 'This will delete all model files and disable text-to-speech.', [
+        { text: 'Cancel', style: 'cancel' },
+        { text: 'Remove', style: 'destructive', onPress: () => { setAlertState(hideAlert()); deleteModels(); } },
+      ]),
+    );
+  };
+
+  const handleClearCache = () => {
+    setAlertState(
+      showAlert('Clear Audio Cache', `This will delete ${audioCacheSizeMB.toFixed(1)} MB of cached audio.`, [
+        { text: 'Cancel', style: 'cancel' },
+        { text: 'Clear', style: 'destructive', onPress: () => { setAlertState(hideAlert()); clearAudioCache(); } },
+      ]),
+    );
+  };
+
+  const handleModeChange = (mode: InterfaceMode) => {
+    if (mode === 'audio' && deviceBlocked) return;
+    updateSettings({ interfaceMode: mode });
+    if (mode === 'audio') initializeEngine();
+  };
+
+  return (
+    <SafeAreaView style={styles.container} edges={['top']}>
+      <View style={styles.header}>
+        <TouchableOpacity style={styles.backButton} onPress={() => navigation.goBack()}>
+          <Icon name="arrow-left" size={20} color={colors.text} />
+        </TouchableOpacity>
+        <Text style={styles.title}>Text to Speech</Text>
+        {isLoading && <ActivityIndicator size="small" color={colors.primary} />}
+      </View>
+
+      <ScrollView style={styles.scrollView} contentContainerStyle={styles.content}>
+
+        <EnginePickerCard styles={styles} colors={colors} />
+
+        <InterfaceModeCard
+          mode={settings.interfaceMode}
+          deviceBlocked={deviceBlocked}
+          engineReady={isReady}
+          onModeChange={handleModeChange}
+          styles={styles}
+        />
+
+        {settings.interfaceMode === 'chat' && (
+          <Card style={styles.section}>
+            <View style={styles.toggleRow}>
+              <View style={styles.toggleInfo}>
+                <Text style={styles.toggleTitle}>Enable TTS</Text>
+                <Text style={styles.toggleDesc}>Show play buttons on assistant messages</Text>
+              </View>
+              <Switch value={settings.enabled} onValueChange={(v) => updateSettings({ enabled: v })} trackColor={{ true: colors.primary }} />
+            </View>
+          </Card>
+        )}
+
+        <Card style={styles.section}>
+          <Text style={styles.sectionLabel}>Models{totalSizeMB > 0 ? ` (${totalSizeMB} MB total)` : ''}</Text>
+          {assets.map((assetState, i) => (
+            <AssetRow
+              key={assetState.asset.id}
+              label={assetState.asset.label}
+              sizeMB={Math.round(assetState.asset.sizeBytes / (1024 * 1024))}
+              status={assetState.status}
+              progress={assetState.progress}
+              styles={styles}
+              colors={colors}
+              border={i > 0}
+            />
+          ))}
+          <View style={styles.downloadActions}>
+            {allDownloaded
+              ? <Button title="Remove Models" variant="outline" size="small" onPress={handleDelete} style={styles.removeButton} />
+              : <Button title={isDownloading ? 'Downloading...' : `Download${totalSizeMB > 0 ? ` (${totalSizeMB} MB)` : ''}`}
+                  variant="primary" size="small" onPress={downloadModels} disabled={isDownloading || deviceBlocked} />}
+          </View>
+          {error && <TouchableOpacity onPress={clearError}><Text style={styles.error}>{error}</Text></TouchableOpacity>}
+        </Card>
+
+        <VoiceCard styles={styles} colors={colors} />
+
+        {isReady && (
+          <PlaybackCard settings={settings} onUpdate={updateSettings} colors={colors} styles={styles} />
+        )}
+
+        {settings.interfaceMode === 'audio' && (
+          <Card style={styles.section}>
+            <View style={styles.toggleRow}>
+              <View style={styles.toggleInfo}>
+                <Text style={styles.toggleTitle}>Audio cache</Text>
+                <Text style={styles.toggleDesc}>{audioCacheSizeMB.toFixed(1)} MB</Text>
+              </View>
+              <Button title="Clear" variant="outline" size="small" onPress={handleClearCache} disabled={audioCacheSizeMB === 0} />
+            </View>
+          </Card>
+        )}
+
+        <CompatibilityCard ramGB={ramGB} deviceBlocked={deviceBlocked} deviceWarning={deviceWarning} styles={styles} colors={colors} />
+
+        <Card style={styles.privacyCard}>
+          <Icon name="shield" size={18} color={colors.textSecondary} style={styles.privacyIcon} />
+          <Text style={styles.privacyTitle}>Fully private</Text>
+          <Text style={styles.privacyText}>
+            All speech is generated on your device. Nothing is sent to any server.
+          </Text>
+        </Card>
+
+      </ScrollView>
+
+      <CustomAlert visible={alertState.visible} title={alertState.title}
+        message={alertState.message} buttons={alertState.buttons}
+        onClose={() => setAlertState(hideAlert())} />
+    </SafeAreaView>
+  );
+};
+
+const createStyles = (colors: ThemeColors, shadows: ThemeShadows) =>
+  ({
+    container: { flex: 1, backgroundColor: colors.background },
+    header: {
+      flexDirection: 'row' as const, alignItems: 'center' as const,
+      paddingHorizontal: SPACING.lg, paddingVertical: SPACING.md,
+      borderBottomWidth: 1, borderBottomColor: colors.border,
+      backgroundColor: colors.surface, ...shadows.small, zIndex: 1, gap: SPACING.md,
+    },
+    backButton: { padding: SPACING.xs },
+    title: { ...TYPOGRAPHY.h2, flex: 1, color: colors.text },
+    scrollView: { flex: 1 },
+    content: { paddingHorizontal: SPACING.lg, paddingTop: SPACING.lg, paddingBottom: SPACING.xxl },
+    section: { marginBottom: SPACING.lg },
+    sectionLabel: {
+      ...TYPOGRAPHY.label, textTransform: 'uppercase' as const, color: colors.textMuted,
+      letterSpacing: 0.3, marginBottom: SPACING.sm,
+    },
+    description: { ...TYPOGRAPHY.bodySmall, color: colors.textSecondary, lineHeight: 18, marginBottom: SPACING.md },
+    modeRow: { flexDirection: 'row' as const, gap: SPACING.sm },
+    modeChip: {
+      flex: 1, paddingVertical: SPACING.sm, borderRadius: 8, borderWidth: 1,
+      borderColor: colors.border, alignItems: 'center' as const, backgroundColor: colors.surfaceLight,
+    },
+    modeChipActive: { backgroundColor: colors.primary, borderColor: colors.primary },
+    modeChipDisabled: { opacity: 0.4 },
+    modeChipText: { ...TYPOGRAPHY.bodySmall, color: colors.textSecondary },
+    modeChipTextActive: { color: colors.background },
+    hintText: { ...TYPOGRAPHY.meta, color: colors.textMuted, marginTop: SPACING.sm },
+    toggleRow: { flexDirection: 'row' as const, alignItems: 'center' as const, justifyContent: 'space-between' as const },
+    toggleRowBorder: { paddingTop: SPACING.md, marginTop: SPACING.md, borderTopWidth: 1, borderTopColor: colors.border },
+    toggleInfo: { flex: 1, marginRight: SPACING.md },
+    toggleTitle: { ...TYPOGRAPHY.body, color: colors.text },
+    toggleDesc: { ...TYPOGRAPHY.meta, color: colors.textMuted, marginTop: 2 },
+    modelRow: { flexDirection: 'row' as const, alignItems: 'center' as const, justifyContent: 'space-between' as const, paddingVertical: SPACING.sm },
+    modelRowBorder: { borderTopWidth: 1, borderTopColor: colors.border, marginTop: SPACING.xs },
+    modelInfo: { flex: 1 },
+    modelName: { ...TYPOGRAPHY.body, color: colors.text },
+    modelSize: { ...TYPOGRAPHY.meta, color: colors.textMuted, marginTop: 2 },
+    progressText: { ...TYPOGRAPHY.meta, color: colors.primary },
+    progressBar: { height: 4, backgroundColor: colors.surfaceLight, borderRadius: 2, overflow: 'hidden' as const, marginBottom: SPACING.xs },
+    progressFill: { height: '100%' as const, backgroundColor: colors.primary, borderRadius: 2 },
+    downloadActions: { marginTop: SPACING.md },
+    removeButton: { borderColor: colors.error },
+    error: { ...TYPOGRAPHY.bodySmall, color: colors.error, marginTop: SPACING.md, textAlign: 'center' as const },
+    sliderLabel: { ...TYPOGRAPHY.body, color: colors.text },
+    compatRow: { flexDirection: 'row' as const, alignItems: 'flex-start' as const, gap: SPACING.sm },
+    compatText: { ...TYPOGRAPHY.bodySmall, color: colors.textSecondary, flex: 1, lineHeight: 18 },
+    errorText: { color: colors.error },
+    warningCard: { marginBottom: SPACING.lg, borderColor: colors.border },
+    errorCard: { marginBottom: SPACING.lg, borderColor: colors.error },
+    privacyCard: { alignItems: 'center' as const, backgroundColor: colors.surface, borderWidth: 1, borderColor: colors.border },
+    privacyIcon: { marginBottom: SPACING.sm },
+    privacyTitle: { ...TYPOGRAPHY.h3, color: colors.text, marginBottom: SPACING.sm },
+    privacyText: { ...TYPOGRAPHY.body, color: colors.textSecondary, textAlign: 'center' as const, lineHeight: 20 },
+    kokoroHeader: { flexDirection: 'row' as const, alignItems: 'center' as const, justifyContent: 'space-between' as const, marginBottom: SPACING.xs },
+    voiceRow: { flexDirection: 'row' as const, alignItems: 'center' as const, justifyContent: 'space-between' as const, paddingVertical: SPACING.sm },
+    voiceRowBorder: { borderTopWidth: 1, borderTopColor: colors.border },
+    voiceInfo: { flex: 1 },
+    voiceName: { ...TYPOGRAPHY.body, color: colors.text },
+    voiceMeta: { ...TYPOGRAPHY.meta, color: colors.textMuted, marginTop: 2 },
+  });
diff --git a/src/screens/VoiceSettingsScreen.tsx b/src/screens/VoiceSettingsScreen.tsx
index 491176b3a..f69ace94d 100644
--- a/src/screens/VoiceSettingsScreen.tsx
+++ b/src/screens/VoiceSettingsScreen.tsx
@@ -1,9 +1,10 @@
-import React, { useState } from 'react';
+import React, { useState, useCallback, useRef } from 'react';
 import {
   View,
   Text,
   ScrollView,
   TouchableOpacity,
+  TextInput,
   ActivityIndicator,
 } from 'react-native';
 import { SafeAreaView } from 'react-native-safe-area-context';
@@ -16,128 +17,361 @@ import type { ThemeColors, ThemeShadows } from '../theme';
 import { TYPOGRAPHY, SPACING } from '../constants';
 import { useWhisperStore } from '../stores';
 import { WHISPER_MODELS } from '../services';
+import { huggingFaceService } from '../services/huggingface';
+import logger from '../utils/logger';
+
+// ─── Types ────────────────────────────────────────────────────────────────────
+
+interface HFRepo {
+  id: string;
+  author: string;
+  downloads: number;
+}
+
+interface HFFile {
+  name: string;
+  downloadUrl: string;
+  sizeMb: number;
+}
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+const ENGLISH_MODELS = WHISPER_MODELS.filter(m => m.lang === 'en');
+const MULTI_MODELS = WHISPER_MODELS.filter(m => m.lang === 'multi');
+
+function formatSize(mb: number): string {
+  if (mb >= 1000) return `${(mb / 1000).toFixed(1)} GB`;
+  return `${mb} MB`;
+}
+
+// ─── Sub-components ───────────────────────────────────────────────────────────
+
+interface ModelRowProps {
+  id: string;
+  name: string;
+  sizeMb: number;
+  description: string;
+  isDownloaded: boolean;
+  isDownloading: boolean;
+  downloadProgress: number;
+  onDownload: () => void;
+}
+
+const ModelRow: React.FC<ModelRowProps> = ({ id, name, sizeMb, description, isDownloaded, isDownloading, downloadProgress, onDownload }) => {
+  const { colors } = useTheme();
+  const styles = useThemedStyles(createStyles);
+  if (isDownloaded) {
+    return (
+      <View style={styles.modelRow} testID={`model-row-${id}`}>
+        <View style={styles.modelRowInfo}>
+          <Text style={styles.modelRowName}>{name}</Text>
+          <Text style={styles.modelRowDesc}>{description}</Text>
+        </View>
+        <View style={[styles.badge, styles.badgeDownloaded]}>
+          <Icon name="check" size={11} color={colors.primary} />
+          <Text style={[styles.badgeText, { color: colors.primary }]}>Active</Text>
+        </View>
+      </View>
+    );
+  }
+  if (isDownloading) {
+    return (
+      <View style={styles.modelRow}>
+        <View style={styles.modelRowInfo}>
+          <Text style={styles.modelRowName}>{name}</Text>
+          <Text style={styles.modelRowDesc}>{Math.round(downloadProgress * 100)}%</Text>
+        </View>
+        <ActivityIndicator size="small" color={colors.primary} />
+      </View>
+    );
+  }
+  return (
+    <TouchableOpacity style={styles.modelRow} onPress={onDownload} testID={`model-download-${id}`}>
+      <View style={styles.modelRowInfo}>
+        <Text style={styles.modelRowName}>{name}</Text>
+        <Text style={styles.modelRowDesc}>{description}</Text>
+      </View>
+      <View style={styles.modelRowRight}>
+        <Text style={styles.modelRowSize}>{formatSize(sizeMb)}</Text>
+        <Icon name="download" size={14} color={colors.textMuted} />
+      </View>
+    </TouchableOpacity>
+  );
+};
+
+// ─── Main Screen ──────────────────────────────────────────────────────────────
 
 export const VoiceSettingsScreen: React.FC = () => {
   const navigation = useNavigation();
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const [alertState, setAlertState] = useState<AlertState>(initialAlertState);
+  const [searchQuery, setSearchQuery] = useState('');
+  const [hfRepos, setHfRepos] = useState<HFRepo[]>([]);
+  const [hfFiles, setHfFiles] = useState<Record<string, HFFile[]>>({});
+  const [expandedRepo, setExpandedRepo] = useState<string | null>(null);
+  const [isSearching, setIsSearching] = useState(false);
+  const [loadingFiles, setLoadingFiles] = useState<string | null>(null);
+  const debounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+
   const {
-    downloadedModelId: whisperModelId,
-    isDownloading: isWhisperDownloading,
-    downloadProgress: whisperProgress,
-    downloadModel: downloadWhisperModel,
-    deleteModel: deleteWhisperModel,
+    downloadedModelId,
+    isDownloading,
+    downloadProgress,
+    downloadModel,
+    downloadFromUrl,
+    deleteModel,
     error: whisperError,
-    clearError: clearWhisperError,
+    clearError,
   } = useWhisperStore();
 
+  const currentModel = WHISPER_MODELS.find(m => m.id === downloadedModelId);
+
+  const handleSearch = useCallback((q: string) => {
+    setSearchQuery(q);
+    if (debounceRef.current) clearTimeout(debounceRef.current);
+    if (!q.trim()) { setHfRepos([]); return; }
+    debounceRef.current = setTimeout(async () => {
+      setIsSearching(true);
+      try {
+        const results = await huggingFaceService.searchWhisperRepos(q);
+        setHfRepos(results);
+      } catch (err) {
+        logger.error('[VoiceSettings] HF search error:', err);
+      } finally {
+        setIsSearching(false);
+      }
+    }, 500);
+  }, []);
+
+  const handleExpandRepo = useCallback(async (repoId: string) => {
+    if (expandedRepo === repoId) { setExpandedRepo(null); return; }
+    setExpandedRepo(repoId);
+    if (hfFiles[repoId]) return;
+    setLoadingFiles(repoId);
+    try {
+      const files = await huggingFaceService.getWhisperFiles(repoId);
+      setHfFiles(prev => ({ ...prev, [repoId]: files }));
+    } catch (err) {
+      logger.error('[VoiceSettings] Failed to fetch repo files:', err);
+    } finally {
+      setLoadingFiles(null);
+    }
+  }, [expandedRepo, hfFiles]);
+
+  const handleDownloadHfFile = useCallback((file: HFFile, repoId: string) => {
+    const modelId = `hf-${repoId.replace('/', '-')}-${file.name.replace('.bin', '')}`;
+    setAlertState(showAlert(
+      'Download Model',
+      `Download "${file.name}" (${formatSize(file.sizeMb)}) from ${repoId}?`,
+      [
+        { text: 'Cancel', style: 'cancel' },
+        {
+          text: 'Download',
+          onPress: () => {
+            setAlertState(hideAlert());
+            downloadFromUrl(file.downloadUrl, modelId).catch((err) => {
+              logger.error('[VoiceSettings] Custom download failed:', err);
+            });
+          },
+        },
+      ],
+    ));
+  }, [downloadFromUrl]);
+
+  const confirmDelete = () => {
+    setAlertState(showAlert(
+      'Remove Voice Model',
+      'This will disable voice input until you download a model again.',
+      [
+        { text: 'Cancel', style: 'cancel' },
+        {
+          text: 'Remove',
+          style: 'destructive',
+          onPress: () => { setAlertState(hideAlert()); deleteModel(); },
+        },
+      ],
+    ));
+  };
+
+  const filteredEnglish = searchQuery
+    ? ENGLISH_MODELS.filter(m => m.name.toLowerCase().includes(searchQuery.toLowerCase()))
+    : ENGLISH_MODELS;
+
+  const filteredMulti = searchQuery
+    ? MULTI_MODELS.filter(m => m.name.toLowerCase().includes(searchQuery.toLowerCase()) || 'multilingual'.includes(searchQuery.toLowerCase()))
+    : MULTI_MODELS;
+
   return (
     <SafeAreaView style={styles.container} edges={['top']}>
       <View style={styles.header}>
-        <TouchableOpacity
-          style={styles.backButton}
-          onPress={() => navigation.goBack()}
-        >
+        <TouchableOpacity style={styles.backButton} onPress={() => navigation.goBack()}>
           <Icon name="arrow-left" size={20} color={colors.text} />
         </TouchableOpacity>
         <Text style={styles.title}>Voice Transcription</Text>
       </View>
 
-      <ScrollView style={styles.scrollView} contentContainerStyle={styles.content}>
-        <Card style={styles.section}>
-          <Text style={styles.description}>
-            Download a Whisper model to enable on-device voice input. All transcription happens locally - no data is sent to any server.
-          </Text>
-
-          {(() => {
-            if (whisperModelId) {
-              return (
-                <View style={styles.modelInfo}>
-                  <View style={styles.modelHeader}>
-                    <Text style={styles.modelName}>
-                      {WHISPER_MODELS.find(m => m.id === whisperModelId)?.name || whisperModelId}
-                    </Text>
-                    <Text style={styles.modelStatus}>Downloaded</Text>
-                  </View>
-                  <Button
-                    title="Remove Model"
-                    variant="outline"
-                    size="small"
-                    onPress={() => {
-                      setAlertState(showAlert(
-                        'Remove Whisper Model',
-                        'This will disable voice input until you download a model again.',
-                        [
-                          { text: 'Cancel', style: 'cancel' },
-                          {
-                            text: 'Remove',
-                            style: 'destructive',
-                            onPress: () => {
-                              setAlertState(hideAlert());
-                              deleteWhisperModel();
-                            },
-                          },
-                        ]
-                      ));
-                    }}
-                    style={styles.removeButton}
-                  />
-                </View>
-              );
-            }
-            if (isWhisperDownloading) {
-              return (
-                <View style={styles.downloading}>
-                  <ActivityIndicator size="small" color={colors.primary} />
-                  <Text style={styles.downloadingText}>
-                    Downloading... {Math.round(whisperProgress * 100)}%
-                  </Text>
-                  <View style={styles.progressBar}>
-                    <View
-                      style={[styles.progressFill, { width: `${whisperProgress * 100}%` }]}
-                    />
-                  </View>
+      <ScrollView style={styles.scrollView} contentContainerStyle={styles.content} keyboardShouldPersistTaps="handled">
+
+        {/* ── Current model ── */}
+        {downloadedModelId && (
+          <Card style={styles.section}>
+            <Text style={styles.sectionLabel}>ACTIVE MODEL</Text>
+            <View style={styles.currentModelRow}>
+              <View style={styles.modelRowInfo}>
+                <Text style={styles.modelRowName}>
+                  {currentModel ? `${currentModel.name} — ${currentModel.lang === 'en' ? 'English' : 'Multilingual'}` : downloadedModelId}
+                </Text>
+                {currentModel && <Text style={styles.modelRowDesc}>{currentModel.description}</Text>}
+              </View>
+              <Button
+                title="Remove"
+                variant="outline"
+                size="small"
+                onPress={confirmDelete}
+                style={styles.removeButton}
+              />
+            </View>
+            {isDownloading && (
+              <View style={styles.progressWrap}>
+                <View style={styles.progressBar}>
+                  <View style={[styles.progressFill, { width: `${downloadProgress * 100}%` }]} />
                 </View>
-              );
-            }
-            return (
-              <View style={styles.modelList}>
-                <Text style={styles.selectLabel}>Select a model to download:</Text>
-                {WHISPER_MODELS.slice(0, 3).map((model) => (
-                  <TouchableOpacity
-                    key={model.id}
-                    style={styles.modelOption}
-                    onPress={() => downloadWhisperModel(model.id)}
-                  >
-                    <View style={styles.modelOptionInfo}>
-                      <Text style={styles.modelOptionName}>{model.name}</Text>
-                      <Text style={styles.modelOptionSize}>{model.size} MB</Text>
-                    </View>
-                    <Text style={styles.modelOptionDesc}>{model.description}</Text>
-                  </TouchableOpacity>
-                ))}
+                <Text style={styles.progressText}>{Math.round(downloadProgress * 100)}%</Text>
               </View>
-            );
-          })()}
-
-          {whisperError && (
-            <TouchableOpacity onPress={clearWhisperError}>
-              <Text style={styles.error}>{whisperError}</Text>
-            </TouchableOpacity>
-          )}
-        </Card>
-
-        <Card style={styles.privacyCard}>
-          <View style={styles.privacyIconContainer}>
-            <Icon name="mic" size={18} color={colors.textSecondary} />
-          </View>
-          <Text style={styles.privacyTitle}>Privacy First</Text>
-          <Text style={styles.privacyText}>
-            Voice transcription happens entirely on your device. Your audio is never sent to any server or stored anywhere.
-          </Text>
-        </Card>
+            )}
+          </Card>
+        )}
+
+        {/* ── Download progress when no model yet ── */}
+        {!downloadedModelId && isDownloading && (
+          <Card style={styles.section}>
+            <View style={styles.downloadingRow}>
+              <ActivityIndicator size="small" color={colors.primary} />
+              <Text style={styles.downloadingText}>Downloading... {Math.round(downloadProgress * 100)}%</Text>
+            </View>
+            <View style={styles.progressBar}>
+              <View style={[styles.progressFill, { width: `${downloadProgress * 100}%` }]} />
+            </View>
+          </Card>
+        )}
+
+        {/* ── Error ── */}
+        {whisperError && (
+          <TouchableOpacity onPress={clearError}>
+            <Text style={styles.error}>{whisperError} (tap to dismiss)</Text>
+          </TouchableOpacity>
+        )}
+
+        {/* ── Search bar ── */}
+        <View style={styles.searchBar}>
+          <Icon name="search" size={16} color={colors.textMuted} />
+          <TextInput
+            style={styles.searchInput}
+            value={searchQuery}
+            onChangeText={handleSearch}
+            placeholder="Search models or HuggingFace..."
+            placeholderTextColor={colors.textMuted}
+            autoCapitalize="none"
+            autoCorrect={false}
+            clearButtonMode="while-editing"
+          />
+          {isSearching && <ActivityIndicator size="small" color={colors.primary} />}
+        </View>
+
+        {/* ── Curated: English ── */}
+        {filteredEnglish.length > 0 && (
+          <Card style={styles.section}>
+            <Text style={styles.sectionLabel}>ENGLISH ONLY</Text>
+            {filteredEnglish.map((model, idx) => (
+              <React.Fragment key={model.id}>
+                {idx > 0 && <View style={styles.divider} />}
+                <ModelRow
+                  id={model.id}
+                  name={model.name}
+                  sizeMb={model.size}
+                  description={model.description}
+                  isDownloaded={downloadedModelId === model.id}
+                  isDownloading={isDownloading && downloadedModelId === model.id}
+                  downloadProgress={downloadProgress}
+                  onDownload={() => downloadModel(model.id)}
+                />
+              </React.Fragment>
+            ))}
+          </Card>
+        )}
+
+        {/* ── Curated: Multilingual ── */}
+        {filteredMulti.length > 0 && (
+          <Card style={styles.section}>
+            <Text style={styles.sectionLabel}>MULTILINGUAL — 99 LANGUAGES</Text>
+            {filteredMulti.map((model, idx) => (
+              <React.Fragment key={model.id}>
+                {idx > 0 && <View style={styles.divider} />}
+                <ModelRow
+                  id={model.id}
+                  name={model.name}
+                  sizeMb={model.size}
+                  description={model.description}
+                  isDownloaded={downloadedModelId === model.id}
+                  isDownloading={isDownloading && downloadedModelId === model.id}
+                  downloadProgress={downloadProgress}
+                  onDownload={() => downloadModel(model.id)}
+                />
+              </React.Fragment>
+            ))}
+          </Card>
+        )}
+
+        {/* ── HuggingFace search results ── */}
+        {hfRepos.length > 0 && (
+          <Card style={styles.section}>
+            <Text style={styles.sectionLabel}>HUGGINGFACE RESULTS</Text>
+            {hfRepos.map((repo, idx) => (
+              <React.Fragment key={repo.id}>
+                {idx > 0 && <View style={styles.divider} />}
+                <TouchableOpacity style={styles.repoRow} onPress={() => handleExpandRepo(repo.id)}>
+                  <View style={styles.modelRowInfo}>
+                    <Text style={styles.modelRowName} numberOfLines={1}>{repo.id}</Text>
+                    <Text style={styles.modelRowDesc}>{(repo.downloads / 1000).toFixed(0)}k downloads</Text>
+                  </View>
+                  {loadingFiles === repo.id
+                    ? <ActivityIndicator size="small" color={colors.textMuted} />
+                    : <Icon name={expandedRepo === repo.id ? 'chevron-up' : 'chevron-down'} size={16} color={colors.textMuted} />
+                  }
+                </TouchableOpacity>
+                {expandedRepo === repo.id && (
+                  <View style={styles.repoFiles}>
+                    {hfFiles[repo.id]?.length === 0 && (
+                      <Text style={styles.noFilesText}>No ggml .bin files found in this repo.</Text>
+                    )}
+                    {hfFiles[repo.id]?.map((file) => (
+                      <TouchableOpacity
+                        key={file.name}
+                        style={styles.fileRow}
+                        onPress={() => handleDownloadHfFile(file, repo.id)}
+                      >
+                        <Text style={styles.fileName} numberOfLines={1}>{file.name}</Text>
+                        <View style={styles.modelRowRight}>
+                          <Text style={styles.modelRowSize}>{formatSize(file.sizeMb)}</Text>
+                          <Icon name="download" size={13} color={colors.textMuted} />
+                        </View>
+                      </TouchableOpacity>
+                    ))}
+                  </View>
+                )}
+              </React.Fragment>
+            ))}
+          </Card>
+        )}
+
+        {/* ── Privacy note ── */}
+        <View style={styles.privacyNote}>
+          <Icon name="lock" size={13} color={colors.textMuted} />
+          <Text style={styles.privacyText}>All transcription runs on-device. Audio is never sent to any server.</Text>
+        </View>
       </ScrollView>
+
       <CustomAlert
         visible={alertState.visible}
         title={alertState.title}
@@ -149,11 +383,10 @@ export const VoiceSettingsScreen: React.FC = () => {
   );
 };
 
+// ─── Styles ───────────────────────────────────────────────────────────────────
+
 const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
-  container: {
-    flex: 1,
-    backgroundColor: colors.background,
-  },
+  container: { flex: 1, backgroundColor: colors.background },
   header: {
     flexDirection: 'row' as const,
     alignItems: 'center' as const,
@@ -166,148 +399,79 @@ const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
     zIndex: 1,
     gap: SPACING.md,
   },
-  backButton: {
-    padding: SPACING.xs,
-  },
-  title: {
-    ...TYPOGRAPHY.h2,
-    flex: 1,
-    color: colors.text,
-  },
-  scrollView: {
-    flex: 1,
-  },
-  content: {
-    paddingHorizontal: SPACING.lg,
-    paddingTop: SPACING.lg,
-    paddingBottom: SPACING.xxl,
-  },
-  section: {
-    marginBottom: SPACING.lg,
-  },
-  description: {
-    ...TYPOGRAPHY.bodySmall,
-    color: colors.textSecondary,
-    lineHeight: 18,
-    marginBottom: SPACING.lg,
-  },
-  modelInfo: {
-    backgroundColor: colors.surfaceLight,
-    borderRadius: 8,
-    borderWidth: 1,
-    borderColor: colors.border,
-    padding: SPACING.lg,
+  backButton: { padding: SPACING.xs },
+  title: { ...TYPOGRAPHY.h2, flex: 1, color: colors.text },
+  scrollView: { flex: 1 },
+  content: { paddingHorizontal: SPACING.lg, paddingTop: SPACING.lg, paddingBottom: SPACING.xxl, gap: SPACING.md },
+  section: { gap: SPACING.xs },
+  sectionLabel: {
+    ...TYPOGRAPHY.label,
+    color: colors.textMuted,
+    textTransform: 'uppercase' as const,
+    letterSpacing: 0.5,
+    marginBottom: SPACING.xs,
   },
-  modelHeader: {
+  currentModelRow: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.md },
+  modelRow: { flexDirection: 'row' as const, alignItems: 'center' as const, paddingVertical: SPACING.sm, gap: SPACING.md },
+  modelRowInfo: { flex: 1, gap: 2 },
+  modelRowName: { ...TYPOGRAPHY.body, color: colors.text },
+  modelRowDesc: { ...TYPOGRAPHY.bodySmall, color: colors.textMuted, lineHeight: 16 },
+  modelRowRight: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.xs },
+  modelRowSize: { ...TYPOGRAPHY.meta, color: colors.textMuted },
+  badge: {
     flexDirection: 'row' as const,
-    justifyContent: 'space-between' as const,
     alignItems: 'center' as const,
-    marginBottom: SPACING.md,
-  },
-  modelName: {
-    ...TYPOGRAPHY.body,
-    color: colors.text,
-  },
-  modelStatus: {
-    ...TYPOGRAPHY.label,
-    textTransform: 'uppercase' as const,
-    color: colors.primary,
-    backgroundColor: `${colors.primary  }20`,
+    gap: 3,
     paddingHorizontal: SPACING.sm,
-    paddingVertical: SPACING.xs,
+    paddingVertical: 3,
     borderRadius: 6,
   },
-  removeButton: {
-    borderColor: colors.error,
-  },
-  downloading: {
-    alignItems: 'center' as const,
-    padding: SPACING.lg,
-  },
-  downloadingText: {
-    ...TYPOGRAPHY.body,
-    color: colors.textSecondary,
-    marginTop: SPACING.sm,
-  },
+  badgeDownloaded: { backgroundColor: `${colors.primary}18` },
+  badgeText: { ...TYPOGRAPHY.meta },
+  removeButton: { borderColor: colors.error, flexShrink: 1 },
+  progressWrap: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.sm, marginTop: SPACING.sm },
   progressBar: {
-    width: '100%' as const,
-    height: 6,
+    flex: 1,
+    height: 4,
     backgroundColor: colors.surfaceLight,
-    borderRadius: 3,
-    marginTop: SPACING.md,
+    borderRadius: 2,
     overflow: 'hidden' as const,
   },
-  progressFill: {
-    height: '100%' as const,
-    backgroundColor: colors.primary,
-    borderRadius: 3,
-  },
-  modelList: {
+  progressFill: { height: '100%' as const, backgroundColor: colors.primary, borderRadius: 2 },
+  progressText: { ...TYPOGRAPHY.meta, color: colors.textMuted, minWidth: 36 },
+  downloadingRow: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.sm, marginBottom: SPACING.sm },
+  downloadingText: { ...TYPOGRAPHY.body, color: colors.textSecondary },
+  error: { ...TYPOGRAPHY.bodySmall, color: colors.error, textAlign: 'center' as const, paddingHorizontal: SPACING.sm },
+  searchBar: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
     gap: SPACING.sm,
-  },
-  selectLabel: {
-    ...TYPOGRAPHY.label,
-    textTransform: 'uppercase' as const,
-    color: colors.textMuted,
-    marginBottom: SPACING.sm,
-    letterSpacing: 0.3,
-  },
-  modelOption: {
-    backgroundColor: colors.surfaceLight,
-    borderRadius: 8,
-    padding: SPACING.md,
+    backgroundColor: colors.surface,
+    borderRadius: 10,
     borderWidth: 1,
     borderColor: colors.border,
+    paddingHorizontal: SPACING.md,
+    paddingVertical: SPACING.sm,
+    ...shadows.small,
   },
-  modelOptionInfo: {
+  searchInput: { ...TYPOGRAPHY.body, flex: 1, color: colors.text, padding: 0 },
+  divider: { height: 1, backgroundColor: colors.border, marginVertical: 2 },
+  repoRow: { flexDirection: 'row' as const, alignItems: 'center' as const, paddingVertical: SPACING.sm, gap: SPACING.md },
+  repoFiles: { paddingLeft: SPACING.md, paddingBottom: SPACING.xs, gap: 4 },
+  fileRow: {
     flexDirection: 'row' as const,
-    justifyContent: 'space-between' as const,
-    alignItems: 'center' as const,
-    marginBottom: SPACING.xs,
-  },
-  modelOptionName: {
-    ...TYPOGRAPHY.body,
-    color: colors.text,
-  },
-  modelOptionSize: {
-    ...TYPOGRAPHY.meta,
-    color: colors.primary,
-  },
-  modelOptionDesc: {
-    ...TYPOGRAPHY.bodySmall,
-    color: colors.textMuted,
-    lineHeight: 18,
-  },
-  error: {
-    ...TYPOGRAPHY.bodySmall,
-    color: colors.error,
-    marginTop: SPACING.md,
-    textAlign: 'center' as const,
-  },
-  privacyCard: {
     alignItems: 'center' as const,
-    backgroundColor: colors.surface,
-    borderWidth: 1,
-    borderColor: colors.border,
+    paddingVertical: SPACING.xs,
+    gap: SPACING.md,
   },
-  privacyIconContainer: {
-    width: 36,
-    height: 36,
-    borderRadius: 18,
-    backgroundColor: 'transparent',
+  fileName: { ...TYPOGRAPHY.bodySmall, flex: 1, color: colors.textSecondary },
+  noFilesText: { ...TYPOGRAPHY.bodySmall, color: colors.textMuted, paddingVertical: SPACING.xs },
+  privacyNote: {
+    flexDirection: 'row' as const,
     alignItems: 'center' as const,
+    gap: SPACING.xs,
     justifyContent: 'center' as const,
-    marginBottom: SPACING.md,
-  },
-  privacyTitle: {
-    ...TYPOGRAPHY.h3,
-    color: colors.text,
-    marginBottom: SPACING.sm,
-  },
-  privacyText: {
-    ...TYPOGRAPHY.body,
-    color: colors.textSecondary,
-    textAlign: 'center' as const,
-    lineHeight: 20,
+    paddingTop: SPACING.sm,
   },
+  privacyText: { ...TYPOGRAPHY.meta, color: colors.textMuted },
 });
diff --git a/src/screens/index.ts b/src/screens/index.ts
index 49f370201..2fee9d28e 100644
--- a/src/screens/index.ts
+++ b/src/screens/index.ts
@@ -17,6 +17,7 @@ export { PassphraseSetupScreen } from './PassphraseSetupScreen';
 export { DownloadManagerScreen } from './DownloadManagerScreen';
 export { ModelSettingsScreen } from './ModelSettingsScreen';
 export { VoiceSettingsScreen } from './VoiceSettingsScreen';
+export { TTSSettingsScreen } from './TTSSettingsScreen';
 export { DeviceInfoScreen } from './DeviceInfoScreen';
 export { StorageSettingsScreen } from './StorageSettingsScreen';
 export { SecuritySettingsScreen } from './SecuritySettingsScreen';
diff --git a/src/services/audioRecorderService.ts b/src/services/audioRecorderService.ts
new file mode 100644
index 000000000..7beda22fa
--- /dev/null
+++ b/src/services/audioRecorderService.ts
@@ -0,0 +1,99 @@
+import { AudioRecorder, FileFormat, FileDirectory, BitDepth, IOSAudioQuality, FlacCompressionLevel } from 'react-native-audio-api';
+import { PermissionsAndroid, Platform } from 'react-native';
+import logger from '../utils/logger';
+
+/** Supported formats for llama.rn audio input */
+export type AudioInputFormat = 'wav' | 'mp3';
+
+class AudioRecorderService {
+  private recorder: AudioRecorder | null = null;
+  private isRecording = false;
+
+  supportsDirectAudioInput(): boolean {
+    return true;
+  }
+
+  getFormat(): AudioInputFormat {
+    return 'wav';
+  }
+
+  async requestPermissions(): Promise<boolean> {
+    if (Platform.OS === 'android') {
+      try {
+        const granted = await PermissionsAndroid.request(
+          PermissionsAndroid.PERMISSIONS.RECORD_AUDIO,
+          {
+            title: 'Microphone Permission',
+            message: 'This app needs microphone access for voice input.',
+            buttonPositive: 'OK',
+            buttonNegative: 'Cancel',
+          },
+        );
+        return granted === PermissionsAndroid.RESULTS.GRANTED;
+      } catch {
+        return false;
+      }
+    }
+    return true; // iOS: triggered by AVAudioSession on first use
+  }
+
+  async startRecording(): Promise<void> {
+    if (this.isRecording) {
+      await this.stopRecording().catch(() => {});
+    }
+    const hasPermission = await this.requestPermissions();
+    if (!hasPermission) {
+      throw new Error('Microphone permission denied');
+    }
+    const rec = new AudioRecorder();
+    // Whisper requires 16 kHz mono int16 PCM.
+    // Set sampleRate via preset so the WAV header and data match what whisper.rn expects.
+    rec.enableFileOutput({
+      format: FileFormat.Wav,
+      directory: FileDirectory.Document,
+      subDirectory: 'audio-input',
+      fileNamePrefix: `input_${Date.now()}`,
+      channelCount: 1,
+      preset: {
+        sampleRate: 16000,
+        bitDepth: BitDepth.Bit16,
+        bitRate: 256000,
+        iosQuality: IOSAudioQuality.High,
+        flacCompressionLevel: FlacCompressionLevel.L5,
+      },
+    });
+    this.recorder = rec;
+    this.isRecording = true;
+    rec.start();
+    logger.log('[AudioRecorder] Recording started');
+  }
+
+  async stopRecording(): Promise<{ path: string; durationSeconds: number }> {
+    if (!this.isRecording || !this.recorder) {
+      throw new Error('No active recording');
+    }
+    const result = this.recorder.stop();
+    this.isRecording = false;
+    this.recorder = null;
+    if (result.status !== 'success') {
+      throw new Error('Recording failed to save');
+    }
+    const path = result.path;
+    const durationSeconds = (result as any).duration ?? 0;
+    logger.log('[AudioRecorder] Saved to:', path, 'duration:', durationSeconds);
+    return { path, durationSeconds };
+  }
+
+  cancelRecording(): void {
+    if (!this.isRecording || !this.recorder) return;
+    this.recorder.stop();
+    this.isRecording = false;
+    this.recorder = null;
+  }
+
+  isCurrentlyRecording(): boolean {
+    return this.isRecording;
+  }
+}
+
+export const audioRecorderService = new AudioRecorderService();
diff --git a/src/services/generationToolLoop.ts b/src/services/generationToolLoop.ts
index e5b78f2c8..964b3163f 100644
--- a/src/services/generationToolLoop.ts
+++ b/src/services/generationToolLoop.ts
@@ -29,19 +29,36 @@ function parseToolCallBody(body: string, idSuffix: number): ToolCall | null {
   } catch { /* Not JSON — fall through to XML */ }
   return parseXmlStyleToolCall(body, idSuffix);
 }
-/** Parse tool calls from text output (fallback for small models). Supports JSON and XML-like formats. */
+/** Parse <invoke name="fn"><parameter name="k">v</parameter></invoke> blocks (minimax, Anthropic-style). */
+function parseInvokeBlocks(text: string, toolCalls: ToolCall[], matchedRanges: [number, number][]): void {
+  const invokePattern = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g;
+  let match;
+  while ((match = invokePattern.exec(text)) !== null) {
+    const name = match[1];
+    const args: Record<string, any> = {};
+    const paramPattern = /<parameter\s+name="([^"]+)">([\s\S]*?)<\/parameter>/g;
+    let pm;
+    while ((pm = paramPattern.exec(match[2])) !== null) { args[pm[1]] = pm[2].trim(); }
+    toolCalls.push({ id: `text-tc-${Date.now()}-${toolCalls.length}`, name, arguments: args });
+    matchedRanges.push([match.index, match.index + match[0].length]);
+  }
+}
+
+/** Parse tool calls from text output (fallback for small models). Supports JSON, XML, and invoke formats. */
 export function parseToolCallsFromText(text: string): { cleanText: string; toolCalls: ToolCall[] } {
   const toolCalls: ToolCall[] = [];
+  const matchedRanges: [number, number][] = [];
+
+  // 1. Standard <tool_call>...</tool_call> blocks (JSON or XML body)
   const closedPattern = /<tool_call>([\s\S]*?)<\/tool_call>/g;
   let match;
-  const matchedRanges: [number, number][] = [];
   while ((match = closedPattern.exec(text)) !== null) {
     matchedRanges.push([match.index, match.index + match[0].length]);
     const call = parseToolCallBody(match[1].trim(), toolCalls.length);
     if (call) { toolCalls.push(call); }
     else { logger.log(`[ToolLoop] Failed to parse tool_call tag: ${match[1].trim().substring(0, 100)}`); }
   }
-  // Also match unclosed <tool_call> at end of text (model hit EOS without closing tag)
+  // Unclosed <tool_call> at end of text (model hit EOS without closing tag)
   const unclosedMatch = /<tool_call>([\s\S]+)$/.exec(text);
   if (unclosedMatch) {
     const unclosedStart = text.lastIndexOf(unclosedMatch[0]);
@@ -52,6 +69,21 @@ export function parseToolCallsFromText(text: string): { cleanText: string; toolC
       matchedRanges.push([unclosedStart, text.length]);
     }
   }
+
+  // 2. <invoke name="...">...</invoke> blocks (minimax, Anthropic-style)
+  parseInvokeBlocks(text, toolCalls, matchedRanges);
+
+  // 3. Namespaced wrapper blocks: namespace:tool_call ... </namespace:tool_call>
+  const nsPattern = /[\w]+:tool_call[\s\S]*?<\/[\w]+:tool_call>/g;
+  while ((match = nsPattern.exec(text)) !== null) {
+    const alreadyMatched = matchedRanges.some(([s, e]) => match!.index >= s && match!.index < e);
+    if (!alreadyMatched) {
+      // Parse invoke blocks within this namespace wrapper
+      parseInvokeBlocks(match[0], toolCalls, []);
+      matchedRanges.push([match.index, match.index + match[0].length]);
+    }
+  }
+
   // Remove all matched ranges from text (reverse order to preserve indices)
   matchedRanges.sort((a, b) => b[0] - a[0]);
   let cleanText = text;
@@ -207,9 +239,17 @@ async function callLLMWithRetry(
   return callLocalWithRetry(messages, tools, onStream);
 }
 
-/** If no structured tool calls, try parsing <tool_call> tags from text. */
+/** Detect if text contains any tool call pattern (various model formats). */
+function containsToolCallMarkup(text: string): boolean {
+  return text.includes('<tool_call>') ||
+    text.includes('<invoke') ||
+    /\w+:tool_call/.test(text) ||
+    text.includes('<function_call>');
+}
+
+/** If no structured tool calls, try parsing tool call markup from text. */
 function resolveToolCalls(fullResponse: string, toolCalls: ToolCall[]) {
-  if (toolCalls.length > 0 || !fullResponse.includes('<tool_call>'))
+  if (toolCalls.length > 0 || !containsToolCallMarkup(fullResponse))
     return { effectiveToolCalls: toolCalls, displayResponse: fullResponse };
   const parsed = parseToolCallsFromText(fullResponse);
   if (parsed.toolCalls.length > 0) {
diff --git a/src/services/huggingface.ts b/src/services/huggingface.ts
index a91cfcc31..5f38f81b2 100644
--- a/src/services/huggingface.ts
+++ b/src/services/huggingface.ts
@@ -223,6 +223,46 @@ class HuggingFaceService {
     };
   }
 
+  /** Search HuggingFace for Whisper/ASR models (returns repos that may contain ggml .bin files). */
+  async searchWhisperRepos(query: string, limit = 20): Promise<Array<{ id: string; author: string; downloads: number; lastModified?: string }>> {
+    const params = new URLSearchParams({
+      search: query || 'whisper',
+      pipeline_tag: 'automatic-speech-recognition',
+      sort: 'downloads',
+      direction: '-1',
+      limit: limit.toString(),
+    });
+    try {
+      const results = await this.fetchJson<HFModelSearchResult[]>(`${this.apiUrl}/models?${params.toString()}`);
+      return results.map(r => ({
+        id: r.id,
+        author: r.author || r.id.split('/')[0] || '',
+        downloads: r.downloads || 0,
+        lastModified: r.lastModified,
+      }));
+    } catch {
+      return [];
+    }
+  }
+
+  /** Fetch ggml-compatible .bin files from any HuggingFace model repo tree. */
+  async getWhisperFiles(modelId: string): Promise<Array<{ name: string; downloadUrl: string; sizeMb: number }>> {
+    try {
+      const files: Array<{ type: string; path: string; size?: number; lfs?: { size: number } }> =
+        await this.fetchJson(`${this.apiUrl}/models/${modelId}/tree/main`);
+      return files
+        .filter(f => f.type === 'file' && f.path.endsWith('.bin') && f.path.toLowerCase().includes('ggml'))
+        .map(f => ({
+          name: f.path.split('/').pop() || f.path,
+          downloadUrl: `${this.baseUrl}/${modelId}/resolve/main/${f.path}`,
+          sizeMb: Math.round((f.lfs?.size || f.size || 0) / (1024 * 1024)),
+        }))
+        .sort((a, b) => a.sizeMb - b.sizeMb);
+    } catch {
+      return [];
+    }
+  }
+
 }
 
 export const huggingFaceService = new HuggingFaceService();
diff --git a/src/services/index.ts b/src/services/index.ts
index b4fe5fcd8..4f1b9eb64 100644
--- a/src/services/index.ts
+++ b/src/services/index.ts
@@ -8,6 +8,7 @@ export type { Intent } from './intentClassifier';
 export { voiceService } from './voiceService';
 export { authService } from './authService';
 export { whisperService, WHISPER_MODELS } from './whisperService';
+// ttsService deprecated — logic absorbed into OuteTTSEngine (src/engine/tts/engines/outetts/).
 export type { TranscriptionResult, TranscriptionCallback } from './whisperService';
 export { backgroundDownloadService } from './backgroundDownloadService';
 export { activeModelService } from './activeModelService';
diff --git a/src/services/llm.ts b/src/services/llm.ts
index 1fdcf1454..bbb549edc 100644
--- a/src/services/llm.ts
+++ b/src/services/llm.ts
@@ -287,8 +287,8 @@ class LLMService {
     return { gpu: this.gpuEnabled, gpuBackend: resolveGpuBackend(this.gpuEnabled, this.gpuDevices), gpuLayers: this.activeGpuLayers, reasonNoGPU: this.gpuReason };
   }
   isCurrentlyGenerating(): boolean { return this.isGenerating; }
-  private formatMessages(messages: Message[]): string { return formatLlamaMessages(messages, this.supportsVision()); }
-  private convertToOAIMessages(messages: Message[]): RNLlamaOAICompatibleMessage[] { return buildOAIMessages(messages); }
+  private formatMessages(messages: Message[]): string { return formatLlamaMessages(messages, this.supportsVision(), this.multimodalSupport?.audio ?? false); }
+  private convertToOAIMessages(messages: Message[]): RNLlamaOAICompatibleMessage[] { return buildOAIMessages(messages, this.multimodalSupport?.audio ?? false); }
   async getModelInfo() { return this.context ? { contextLength: APP_CONFIG.maxContextLength, vocabSize: 0 } : null; }
   async tokenize(text: string) {
     if (!this.context) throw new Error('No model loaded');
diff --git a/src/services/llmMessages.ts b/src/services/llmMessages.ts
index c359651bd..1e93f6e7b 100644
--- a/src/services/llmMessages.ts
+++ b/src/services/llmMessages.ts
@@ -1,19 +1,21 @@
 import { RNLlamaOAICompatibleMessage, RNLlamaMessagePart } from 'llama.rn';
 import { Message } from '../types';
 
-export function formatLlamaMessages(messages: Message[], supportsVision: boolean): string {
+export function formatLlamaMessages(messages: Message[], supportsVision: boolean, supportsAudio = false): string {
   let prompt = '';
   for (const message of messages.filter(m => !m.isSystemInfo)) {
     if (message.role === 'system') {
       prompt += `<|im_start|>system\n${message.content}<|im_end|>\n`;
     } else if (message.role === 'user') {
       let content = message.content;
-      if (message.attachments && message.attachments.length > 0 && supportsVision) {
-        const imageMarkers = message.attachments
-          .filter(a => a.type === 'image')
-          .map(() => '<__media__>')
-          .join('');
-        content = imageMarkers + content;
+      if (message.attachments && message.attachments.length > 0) {
+        const imageMarkers = supportsVision
+          ? message.attachments.filter(a => a.type === 'image').map(() => '<__media__>').join('')
+          : '';
+        const audioMarkers = supportsAudio
+          ? message.attachments.filter(a => a.type === 'audio').map(() => '<__media__>').join('')
+          : '';
+        content = imageMarkers + audioMarkers + content;
       }
       prompt += `<|im_start|>user\n${content}<|im_end|>\n`;
     } else if (message.role === 'assistant') {
@@ -48,45 +50,38 @@ function formatToolCallAsText(tc: { name: string; arguments: string }): string {
   return `<tool_call>{"name":${escapedName},"arguments":${tc.arguments}}</tool_call>`;
 }
 
-export function buildOAIMessages(messages: Message[]): RNLlamaOAICompatibleMessage[] {
-  const filtered = messages.filter(m => !m.isSystemInfo);
-  return filtered.map((message) => {
-    // Flatten tool result messages into user messages —
-    // avoids role:"tool" which some Jinja templates don't handle
+function toFileUrl(uri: string, requireFilePrefix = false): string {
+  if (requireFilePrefix) return uri.startsWith('file://') ? uri : `file://${uri}`;
+  return uri.startsWith('file://') || uri.startsWith('http') ? uri : `file://${uri}`;
+}
+
+function buildMediaParts(message: Message, supportsAudio: boolean): RNLlamaMessagePart[] {
+  const parts: RNLlamaMessagePart[] = [];
+  for (const a of message.attachments?.filter(att => att.type === 'image') ?? []) {
+    parts.push({ type: 'image_url', image_url: { url: toFileUrl(a.uri) } });
+  }
+  if (supportsAudio) {
+    for (const a of message.attachments?.filter(att => att.type === 'audio') ?? []) {
+      parts.push({ type: 'input_audio', input_audio: { format: a.audioFormat ?? 'wav', url: toFileUrl(a.uri, true) } });
+    }
+  }
+  if (message.content) parts.push({ type: 'text', text: message.content });
+  return parts;
+}
+
+export function buildOAIMessages(messages: Message[], supportsAudio = false): RNLlamaOAICompatibleMessage[] {
+  return messages.filter(m => !m.isSystemInfo).map((message) => {
     if (message.role === 'tool') {
       const label = message.toolName || 'tool';
-      return {
-        role: 'user' as const,
-        content: `[Tool Result: ${label}]\n${message.content}\n[End Tool Result]`,
-      };
+      return { role: 'user' as const, content: `[Tool Result: ${label}]\n${message.content}\n[End Tool Result]` };
     }
-
-    // Flatten assistant tool calls into plain text —
-    // structured tool_calls in history cause Jinja/C++ conflicts
     if (message.role === 'assistant' && message.toolCalls?.length) {
       const toolCallText = message.toolCalls.map(formatToolCallAsText).join('\n');
-      const content = message.content
-        ? `${message.content}\n${toolCallText}`
-        : toolCallText;
-      return { role: 'assistant' as const, content };
-    }
-
-    const imageAttachments = message.attachments?.filter(a => a.type === 'image') || [];
-    if (imageAttachments.length === 0 || message.role !== 'user') {
-      return { role: message.role, content: message.content };
-    }
-
-    const contentParts: RNLlamaMessagePart[] = [];
-    for (const attachment of imageAttachments) {
-      let imagePath = attachment.uri;
-      if (!imagePath.startsWith('file://') && !imagePath.startsWith('http')) {
-        imagePath = `file://${imagePath}`;
-      }
-      contentParts.push({ type: 'image_url', image_url: { url: imagePath } });
-    }
-    if (message.content) {
-      contentParts.push({ type: 'text', text: message.content });
+      return { role: 'assistant' as const, content: message.content ? `${message.content}\n${toolCallText}` : toolCallText };
     }
-    return { role: message.role, content: contentParts };
+    const hasImage = message.role === 'user' && message.attachments?.some(a => a.type === 'image');
+    const hasAudio = supportsAudio && message.role === 'user' && message.attachments?.some(a => a.type === 'audio');
+    if (!hasImage && !hasAudio) return { role: message.role, content: message.content };
+    return { role: message.role, content: buildMediaParts(message, supportsAudio) };
   });
 }
diff --git a/src/services/ttsService.ts b/src/services/ttsService.ts
new file mode 100644
index 000000000..1fd9506a1
--- /dev/null
+++ b/src/services/ttsService.ts
@@ -0,0 +1,354 @@
+import { initLlama, LlamaContext } from 'llama.rn';
+import RNFS from 'react-native-fs';
+import { AudioContext, AudioBufferSourceNode } from 'react-native-audio-api';
+import logger from '../utils/logger';
+import { TTS_BACKBONE_MODEL } from '../constants/ttsModels';
+
+export interface TTSOptions {
+  speed?: number;
+  voiceId?: string;
+}
+
+export interface GeneratedAudio {
+  samples: Float32Array;
+  durationSeconds: number;
+  sampleRate: number;
+  /** Downsampled amplitude envelope (~200 points) for waveform visualization */
+  waveformData: number[];
+}
+
+class TTSService {
+  private context: LlamaContext | null = null;
+  private isVocoderReady = false;
+  private isSpeakingFlag = false;
+  private audioCtx: AudioContext | null = null;
+  private currentSource: AudioBufferSourceNode | null = null;
+  private contextLoadPromise: Promise<void> = Promise.resolve();
+
+  // ─── Paths ────────────────────────────────────────────────────────────────
+
+  getModelsDir(): string {
+    return `${RNFS.DocumentDirectoryPath}/tts-models`;
+  }
+
+  getAudioCacheDir(conversationId: string): string {
+    return `${RNFS.DocumentDirectoryPath}/audio-cache/${conversationId}`;
+  }
+
+  getAudioFilePath(conversationId: string, messageId: string): string {
+    return `${this.getAudioCacheDir(conversationId)}/${messageId}.pcm`;
+  }
+
+  getBackbonePath(): string {
+    return `${this.getModelsDir()}/${TTS_BACKBONE_MODEL.backboneFile}`;
+  }
+
+  getVocoderPath(): string {
+    return `${this.getModelsDir()}/${TTS_BACKBONE_MODEL.vocoderFile}`;
+  }
+
+  private async ensureDir(dir: string): Promise<void> {
+    if (!(await RNFS.exists(dir))) {
+      await RNFS.mkdir(dir);
+    }
+  }
+
+  // ─── Download Status ─────────────────────────────────────────────────────
+
+  async isBackboneDownloaded(): Promise<boolean> {
+    return RNFS.exists(this.getBackbonePath());
+  }
+
+  async isVocoderDownloaded(): Promise<boolean> {
+    return RNFS.exists(this.getVocoderPath());
+  }
+
+  async areBothModelsDownloaded(): Promise<boolean> {
+    return (await this.isBackboneDownloaded()) && (await this.isVocoderDownloaded());
+  }
+
+  async isAudioCached(conversationId: string, messageId: string): Promise<boolean> {
+    return RNFS.exists(this.getAudioFilePath(conversationId, messageId));
+  }
+
+  async getAudioCacheSizeMB(): Promise<number> {
+    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
+    if (!(await RNFS.exists(cacheRoot))) return 0;
+    let totalBytes = 0;
+    const convDirs = await RNFS.readDir(cacheRoot);
+    for (const convDir of convDirs) {
+      if (convDir.isDirectory()) {
+        const files = await RNFS.readDir(convDir.path);
+        for (const file of files) { totalBytes += Number(file.size); }
+      }
+    }
+    return totalBytes / (1024 * 1024);
+  }
+
+  async clearAudioCache(): Promise<void> {
+    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
+    if (await RNFS.exists(cacheRoot)) {
+      await RNFS.unlink(cacheRoot);
+    }
+  }
+
+  // ─── Download ────────────────────────────────────────────────────────────
+
+  async downloadBackbone(onProgress?: (p: number) => void): Promise<string> {
+    await this.ensureDir(this.getModelsDir());
+    const dest = this.getBackbonePath();
+    if (await RNFS.exists(dest)) {
+      return dest;
+    }
+    const dl = RNFS.downloadFile({
+      fromUrl: TTS_BACKBONE_MODEL.backboneUrl,
+      toFile: dest,
+      progressDivider: 1,
+      progress: (res) => onProgress?.(res.bytesWritten / res.contentLength),
+    });
+    const result = await dl.promise;
+    if (result.statusCode !== 200) {
+      await RNFS.unlink(dest).catch(() => {});
+      throw new Error(`Backbone download failed: HTTP ${result.statusCode}`);
+    }
+    return dest;
+  }
+
+  async downloadVocoder(onProgress?: (p: number) => void): Promise<string> {
+    await this.ensureDir(this.getModelsDir());
+    const dest = this.getVocoderPath();
+    if (await RNFS.exists(dest)) {
+      return dest;
+    }
+    const dl = RNFS.downloadFile({
+      fromUrl: TTS_BACKBONE_MODEL.vocoderUrl,
+      toFile: dest,
+      progressDivider: 1,
+      progress: (res) => onProgress?.(res.bytesWritten / res.contentLength),
+    });
+    const result = await dl.promise;
+    if (result.statusCode !== 200) {
+      await RNFS.unlink(dest).catch(() => {});
+      throw new Error(`Vocoder download failed: HTTP ${result.statusCode}`);
+    }
+    return dest;
+  }
+
+  async deleteModels(): Promise<void> {
+    await this.unloadModels();
+    const bp = this.getBackbonePath();
+    const vp = this.getVocoderPath();
+    if (await RNFS.exists(bp)) {
+      await RNFS.unlink(bp);
+    }
+    if (await RNFS.exists(vp)) {
+      await RNFS.unlink(vp);
+    }
+  }
+
+  // ─── Model Lifecycle ─────────────────────────────────────────────────────
+
+  async loadModels(): Promise<void> {
+    if (this.context && this.isVocoderReady) {
+      return;
+    }
+    // Serial load — prevent double init
+    this.contextLoadPromise = this.contextLoadPromise.then(async () => {
+      if (this.context && this.isVocoderReady) {
+        return;
+      }
+      logger.log('[TTS] Loading backbone...');
+      this.context = await initLlama({
+        model: this.getBackbonePath(),
+        n_ctx: 8192,
+        n_threads: 4,
+      });
+      logger.log('[TTS] Loading vocoder...');
+      await this.context.initVocoder({ path: this.getVocoderPath(), n_batch: 4096 });
+      this.isVocoderReady = await this.context.isVocoderEnabled();
+      if (!this.isVocoderReady) {
+        throw new Error('Vocoder failed to initialize — check model files.');
+      }
+      logger.log('[TTS] Ready.');
+    });
+    return this.contextLoadPromise;
+  }
+
+  async unloadModels(): Promise<void> {
+    this.stop();
+    if (this.context) {
+      await this.context.releaseVocoder().catch(() => {});
+      await this.context.release().catch(() => {});
+      this.context = null;
+    }
+    this.isVocoderReady = false;
+    this.audioCtx?.close().catch(() => {});
+    this.audioCtx = null;
+  }
+
+  isLoaded(): boolean {
+    return this.context !== null && this.isVocoderReady;
+  }
+
+  // ─── Audio Generation ────────────────────────────────────────────────────
+
+  async generate(text: string, _options: TTSOptions = {}): Promise<GeneratedAudio> {
+    if (!this.context || !this.isVocoderReady) {
+      throw new Error('TTS models not loaded.');
+    }
+    const { prompt, grammar } = await this.context.getFormattedAudioCompletion(
+      null, // null = default speaker
+      text,
+    );
+    const guideTokens = (await this.context.getAudioCompletionGuideTokens(text)) ?? [];
+    const result = await this.context.completion({
+      prompt,
+      grammar,
+      guide_tokens: guideTokens,
+      n_predict: 4096,
+      temperature: 0.7,
+      top_p: 0.9,
+      stop: ['<|im_end|>'],
+    });
+    const pcmArray = await this.context.decodeAudioTokens(result.audio_tokens ?? []);
+    const samples = new Float32Array(pcmArray);
+    const sampleRate = TTS_BACKBONE_MODEL.sampleRate;
+    return {
+      samples,
+      durationSeconds: samples.length / sampleRate,
+      sampleRate,
+      waveformData: this.buildWaveformData(samples, 200),
+    };
+  }
+
+  async saveToFile(audio: GeneratedAudio, conversationId: string, messageId: string): Promise<string> {
+    await this.ensureDir(this.getAudioCacheDir(conversationId));
+    const path = this.getAudioFilePath(conversationId, messageId);
+    const base64 = this.float32ToBase64(audio.samples);
+    await RNFS.writeFile(path, base64, 'base64');
+    return path;
+  }
+
+  async generateAndSave(
+    text: string,
+    ctx: { conversationId: string; messageId: string },
+    options: TTSOptions = {},
+  ): Promise<{ path: string; audio: GeneratedAudio }> {
+    const audio = await this.generate(text, options);
+    const path = await this.saveToFile(audio, ctx.conversationId, ctx.messageId);
+    return { path, audio };
+  }
+
+  // ─── Playback ────────────────────────────────────────────────────────────
+
+  async playFromSamples(samples: Float32Array, speed = 1.0, startOffset = 0): Promise<void> {
+    const sampleRate = TTS_BACKBONE_MODEL.sampleRate;
+    this.audioCtx?.close().catch(() => {});
+    this.audioCtx = new AudioContext({ sampleRate });
+    const buffer = this.audioCtx.createBuffer(1, samples.length, sampleRate);
+    buffer.copyToChannel(samples, 0);
+    const source = this.audioCtx.createBufferSource();
+    source.buffer = buffer;
+    source.playbackRate.value = speed;
+    source.connect(this.audioCtx.destination);
+    this.currentSource = source;
+    this.isSpeakingFlag = true;
+    return new Promise((resolve) => {
+      source.onEnded = () => {
+        this.currentSource = null;
+        this.isSpeakingFlag = false;
+        resolve();
+      };
+      source.start(0, startOffset);
+    });
+  }
+
+  async playFromFile(filePath: string, speed = 1.0, startOffset = 0): Promise<void> {
+    // WAV/PCM files must be decoded with decodeAudioData — NOT cast from raw bytes.
+    // The old base64→Float32 path was designed for OuteTTS raw Float32 output only.
+    this.audioCtx?.close().catch(() => {});
+    this.audioCtx = new AudioContext();
+    const src = filePath.startsWith('file://') ? filePath : `file://${filePath}`;
+    // decodeAudioData accepts a string path as DecodeDataInput
+    const buffer = await this.audioCtx.decodeAudioData(src as unknown as ArrayBuffer);
+    const source = this.audioCtx.createBufferSource();
+    source.buffer = buffer;
+    source.playbackRate.value = speed;
+    source.connect(this.audioCtx.destination);
+    this.currentSource = source;
+    this.isSpeakingFlag = true;
+    return new Promise((resolve) => {
+      source.onEnded = () => {
+        this.currentSource = null;
+        this.isSpeakingFlag = false;
+        resolve();
+      };
+      source.start(0, startOffset);
+    });
+  }
+
+  /** Chat Mode: generate + play + discard. No disk write.
+   *  @param onStartPlayback  Called once generation is done and audio is about to play.
+   */
+  async speak(text: string, options: TTSOptions = {}, onStartPlayback?: () => void): Promise<void> {
+    this.stop();
+    this.isSpeakingFlag = true; // mark in-progress so stop() during generation works
+    try {
+      const audio = await this.generate(text, options);
+      if (!this.isSpeakingFlag) return; // stop() was called during generation
+      onStartPlayback?.();
+      await this.playFromSamples(audio.samples, options.speed ?? 1.0);
+    } finally {
+      this.isSpeakingFlag = false;
+    }
+  }
+
+  stop(): void {
+    this.isSpeakingFlag = false;
+    try {
+      this.currentSource?.stop();
+    } catch {
+      // already stopped
+    }
+    this.currentSource = null;
+  }
+
+  isSpeaking(): boolean {
+    return this.isSpeakingFlag;
+  }
+
+  // ─── Utilities ───────────────────────────────────────────────────────────
+
+  private buildWaveformData(samples: Float32Array, points: number): number[] {
+    const blockSize = Math.floor(samples.length / points);
+    const result: number[] = [];
+    for (let i = 0; i < points; i++) {
+      let sum = 0;
+      for (let j = 0; j < blockSize; j++) {
+        sum += Math.abs(samples[i * blockSize + j] ?? 0);
+      }
+      result.push(blockSize > 0 ? sum / blockSize : 0);
+    }
+    return result;
+  }
+
+  private float32ToBase64(samples: Float32Array): string {
+    const uint8 = new Uint8Array(samples.buffer);
+    let binary = '';
+    for (let i = 0; i < uint8.length; i++) {
+      binary += String.fromCharCode(uint8[i]);
+    }
+    return btoa(binary);
+  }
+
+  private base64ToFloat32(base64: string): Float32Array {
+    const binary = atob(base64);
+    const uint8 = new Uint8Array(binary.length);
+    for (let i = 0; i < binary.length; i++) {
+      uint8[i] = binary.charCodeAt(i);
+    }
+    return new Float32Array(uint8.buffer);
+  }
+}
+
+export const ttsService = new TTSService();
diff --git a/src/services/whisperService.ts b/src/services/whisperService.ts
index 4d945423f..d1b77cd5d 100644
--- a/src/services/whisperService.ts
+++ b/src/services/whisperService.ts
@@ -11,12 +11,21 @@ export interface TranscriptionResult {
 }
 export type TranscriptionCallback = (result: TranscriptionResult) => void;
 
+const GGML_BASE = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main';
+
 export const WHISPER_MODELS = [
-  { id: 'tiny.en', name: 'Whisper Tiny (English)', size: 75, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin', description: 'Fastest, English only, good for basic transcription' },
-  { id: 'tiny', name: 'Whisper Tiny (Multilingual)', size: 75, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin', description: 'Fast, supports multiple languages' },
-  { id: 'base.en', name: 'Whisper Base (English)', size: 142, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin', description: 'Better accuracy, English only' },
-  { id: 'base', name: 'Whisper Base (Multilingual)', size: 142, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin', description: 'Better accuracy, multiple languages' },
-  { id: 'small.en', name: 'Whisper Small (English)', size: 466, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin', description: 'High accuracy, English only, needs more RAM' },
+  // ── English-only ──────────────────────────────────────────────────────────
+  { id: 'tiny.en',   name: 'Tiny',   size: 75,   lang: 'en',    url: `${GGML_BASE}/ggml-tiny.en.bin`,   description: 'Fastest, English only' },
+  { id: 'base.en',   name: 'Base',   size: 142,  lang: 'en',    url: `${GGML_BASE}/ggml-base.en.bin`,   description: 'Better accuracy, English only' },
+  { id: 'small.en',  name: 'Small',  size: 466,  lang: 'en',    url: `${GGML_BASE}/ggml-small.en.bin`,  description: 'High accuracy, English only' },
+  { id: 'medium.en', name: 'Medium', size: 1500, lang: 'en',    url: `${GGML_BASE}/ggml-medium.en.bin`, description: 'Near human-level, English only, ~2 GB RAM' },
+  // ── Multilingual ──────────────────────────────────────────────────────────
+  { id: 'tiny',           name: 'Tiny',             size: 75,   lang: 'multi', url: `${GGML_BASE}/ggml-tiny.bin`,           description: 'Fastest, 99 languages' },
+  { id: 'base',           name: 'Base',             size: 142,  lang: 'multi', url: `${GGML_BASE}/ggml-base.bin`,           description: 'Better accuracy, 99 languages' },
+  { id: 'small',          name: 'Small',            size: 466,  lang: 'multi', url: `${GGML_BASE}/ggml-small.bin`,          description: 'High accuracy, 99 languages' },
+  { id: 'medium',         name: 'Medium',           size: 1500, lang: 'multi', url: `${GGML_BASE}/ggml-medium.bin`,         description: 'Near human-level, 99 languages, ~2 GB RAM' },
+  { id: 'large-v3-turbo', name: 'Large v3 Turbo',  size: 809,  lang: 'multi', url: `${GGML_BASE}/ggml-large-v3-turbo.bin`, description: 'Fast + accurate, distilled large, 99 languages' },
+  { id: 'large-v3',       name: 'Large v3',         size: 1550, lang: 'multi', url: `${GGML_BASE}/ggml-large-v3.bin`,       description: 'Best quality, 99 languages, ~3 GB RAM' },
 ];
 
 class WhisperService {
@@ -62,6 +71,30 @@ class WhisperService {
     logger.log(`[Whisper] Downloaded to ${destPath}`);
     return destPath;
   }
+  async downloadFromUrl(url: string, modelId: string, onProgress?: (progress: number) => void): Promise<string> {
+    await this.ensureModelsDirExists();
+    const destPath = this.getModelPath(modelId);
+    if (await RNFS.exists(destPath)) return destPath;
+    logger.log(`[Whisper] Downloading from URL: ${url}`);
+    const download = RNFS.downloadFile({
+      fromUrl: url, toFile: destPath, progressDivider: 1,
+      progress: (res) => { onProgress?.(res.bytesWritten / res.contentLength); },
+    });
+    const result = await download.promise;
+    if (result.statusCode !== 200) {
+      await RNFS.unlink(destPath).catch(() => {});
+      throw new Error(`Download failed with status ${result.statusCode}`);
+    }
+    try {
+      await this.validateModelFile(destPath);
+    } catch (validationError) {
+      await RNFS.unlink(destPath).catch(() => {});
+      throw validationError;
+    }
+    logger.log(`[Whisper] Downloaded to ${destPath}`);
+    return destPath;
+  }
+
   async deleteModel(modelId: string): Promise<void> {
     const path = this.getModelPath(modelId);
     if (await RNFS.exists(path)) await RNFS.unlink(path);
diff --git a/src/stores/chatStore.ts b/src/stores/chatStore.ts
index 31b93335c..de80080bd 100644
--- a/src/stores/chatStore.ts
+++ b/src/stores/chatStore.ts
@@ -4,6 +4,7 @@ import AsyncStorage from '@react-native-async-storage/async-storage';
 import { Message, Conversation, GenerationMeta } from '../types';
 import { stripControlTokens, stripStreamingControlTokens } from '../utils/messageContent';
 import { generateId } from '../utils/generateId';
+import '../types/tts';
 
 function nextUpdatedAt(previousUpdatedAt?: string): string {
   const now = Date.now();
@@ -51,6 +52,10 @@ function extractChannelThinking(rawContent: string): { reasoningContent: string
   // Qwen channel format: <|channel|>analysis<|message|>[thinking]<|channel|>final<|message|>[response]
   const qwen = sliceThinkingBlock(rawContent, '<|channel|>analysis<|message|>', '<|channel|>final<|message|>');
   if (qwen) return qwen;
+  // <think>...</think> format (Qwen 3.5, DeepSeek, etc.)
+  const thinkTags = sliceThinkingBlock(rawContent, '<think>', '</think>');
+  if (thinkTags) return thinkTags;
+
   return { reasoningContent: undefined, responseContent: rawContent };
 }
 
@@ -86,6 +91,7 @@ interface ChatState {
   addMessage: (conversationId: string, message: Omit<Message, 'id' | 'timestamp'>) => Message;
   updateMessageContent: (conversationId: string, messageId: string, content: string) => void;
   updateMessageThinking: (conversationId: string, messageId: string, isThinking: boolean) => void;
+  updateMessageAudio: (conversationId: string, messageId: string, audio: { audioPath?: string; waveformData?: number[]; audioDurationSeconds?: number; isGeneratingAudio?: boolean; isAudioModeMessage?: boolean }) => void;
   deleteMessage: (conversationId: string, messageId: string) => void;
   deleteMessagesAfter: (conversationId: string, messageId: string) => void;
   startStreaming: (conversationId: string) => void;
@@ -198,6 +204,10 @@ export const useChatStore = create<ChatState>()(
         }));
       },
 
+      updateMessageAudio: (conversationId, messageId, audio) => {
+        set((state) => ({ conversations: mapConversation(state.conversations, conversationId, (conv) => updateMessageInConv(conv, messageId, (msg) => ({ ...msg, ...audio }))) }));
+      },
+
       deleteMessage: (conversationId, messageId) => {
         set((state) => ({
           conversations: mapConversation(state.conversations, conversationId, (conv) => ({
diff --git a/src/stores/index.ts b/src/stores/index.ts
index fd14cb482..92a1201f6 100644
--- a/src/stores/index.ts
+++ b/src/stores/index.ts
@@ -3,4 +3,6 @@ export { useChatStore } from './chatStore';
 export { useProjectStore } from './projectStore';
 export { useAuthStore } from './authStore';
 export { useWhisperStore } from './whisperStore';
+export { useTTSStore } from './ttsStore';
+export type { TTSSettings, TTSState, InterfaceMode } from './ttsStore';
 export { useRemoteServerStore } from './remoteServerStore';
diff --git a/src/stores/ttsStore.ts b/src/stores/ttsStore.ts
new file mode 100644
index 000000000..20bc71a69
--- /dev/null
+++ b/src/stores/ttsStore.ts
@@ -0,0 +1,481 @@
+import { create } from 'zustand';
+import { persist, createJSONStorage } from 'zustand/middleware';
+import AsyncStorage from '@react-native-async-storage/async-storage';
+import { ttsRegistry } from '../engine';
+import type {
+  EnginePhase,
+  TTSEngine,
+  TTSVoice,
+  ModelAssetState,
+} from '../engine';
+import { OuteTTSEngine } from '../engine';
+import logger from '../utils/logger';
+
+export type InterfaceMode = 'chat' | 'audio';
+
+export interface TTSSettings {
+  interfaceMode: InterfaceMode;
+  enabled: boolean;
+  autoPlay: boolean;
+  speed: number;
+  /** Active engine ID */
+  engineId: string;
+  /** Per-engine voice selection — remembers voice when switching engines */
+  voiceByEngine: Record<string, string>;
+}
+
+export interface TTSState {
+  // ── Engine state (synced from active engine events) ─────────────────────
+  phase: EnginePhase;
+  currentMessageId: string | null;
+  currentAmplitude: number;
+  playbackElapsed: number;
+  playSessionId: number;
+  error: string | null;
+
+  // ── Derived booleans (from phase — backward compat for UI) ──────────────
+  isReady: boolean;
+  isDownloading: boolean;
+  isLoading: boolean;
+  isSpeaking: boolean;
+  isPaused: boolean;
+  isGeneratingAudio: boolean;
+
+  // ── Assets (from active engine) ─────────────────────────────────────────
+  assets: ModelAssetState[];
+  overallDownloadProgress: number;
+
+  // ── Voices (from active engine) ─────────────────────────────────────────
+  voices: TTSVoice[];
+  activeVoiceId: string | null;
+
+  // ── Cache ───────────────────────────────────────────────────────────────
+  audioCacheSizeMB: number;
+
+  // ── Settings (persisted) ────────────────────────────────────────────────
+  settings: TTSSettings;
+
+  // ── Actions ─────────────────────────────────────────────────────────────
+  setEngine: (engineId: string) => Promise<void>;
+  initializeEngine: () => Promise<void>;
+  releaseEngine: () => Promise<void>;
+
+  // Download
+  checkDownloadStatus: () => Promise<void>;
+  downloadModels: () => Promise<void>;
+  deleteModels: () => Promise<void>;
+
+  // Chat Mode
+  speak: (text: string, messageId: string) => Promise<void>;
+  stop: () => void;
+  pause: () => void;
+  resume: () => void;
+
+  // Audio Mode
+  generateAndSave: (
+    text: string,
+    conversationId: string,
+    messageId: string,
+  ) => Promise<{ path: string; waveformData: number[]; durationSeconds: number }>;
+  playMessage: (messageId: string, filePath: string, startOffset?: number) => Promise<void>;
+  stopPlayback: () => void;
+
+  // Voice
+  setVoice: (voiceId: string) => Promise<void>;
+
+  // Cache
+  refreshCacheSize: () => Promise<void>;
+  clearAudioCache: () => Promise<void>;
+
+  // Settings
+  updateSettings: (patch: Partial<TTSSettings>) => void;
+  clearError: () => void;
+
+  // ── Internal ────────────────────────────────────────────────────────────
+  _subscribeToEngine: (engine: TTSEngine) => () => void;
+  _unsubscribe: (() => void) | null;
+}
+
+// ── Helpers ───────────────────────────────────────────────────────────────────
+
+function phaseToFlags(phase: EnginePhase) {
+  return {
+    isReady: phase === 'ready' || phase === 'processing' || phase === 'paused',
+    isDownloading: phase === 'downloading',
+    isLoading: phase === 'loading',
+    isSpeaking: phase === 'processing',
+    isPaused: phase === 'paused',
+    isGeneratingAudio: false, // Set explicitly during speak for non-streaming engines
+  };
+}
+
+// ── Default engine ────────────────────────────────────────────────────────────
+
+const DEFAULT_ENGINE_ID = 'kokoro';
+
+// ── Store ─────────────────────────────────────────────────────────────────────
+
+export const useTTSStore = create<TTSState>()(
+  persist(
+    (set, get) => ({
+      // Initial state
+      phase: 'idle',
+      currentMessageId: null,
+      currentAmplitude: 0,
+      playbackElapsed: 0,
+      playSessionId: 0,
+      error: null,
+      ...phaseToFlags('idle'),
+      assets: [],
+      overallDownloadProgress: 0,
+      voices: [],
+      activeVoiceId: null,
+      audioCacheSizeMB: 0,
+      _unsubscribe: null,
+
+      settings: {
+        interfaceMode: 'chat',
+        enabled: true,
+        autoPlay: false,
+        speed: 1.0,
+        engineId: DEFAULT_ENGINE_ID,
+        voiceByEngine: {},
+      },
+
+      // ── Subscribe to engine events ────────────────────────────────────────
+
+      _subscribeToEngine: (engine: TTSEngine) => {
+        const unsubPhase = engine.on('phaseChange', (phase) => {
+          set({
+            phase,
+            ...phaseToFlags(phase),
+            error: phase === 'error' ? get().error : null,
+          });
+        });
+
+        const unsubDownload = engine.on('downloadProgress', (_data) => {
+          set({ overallDownloadProgress: engine.getOverallDownloadProgress() });
+        });
+
+        const unsubAmplitude = engine.on('amplitudeChange', (amplitude) => {
+          set({ currentAmplitude: amplitude });
+        });
+
+        const unsubTick = engine.on('playbackTick', (elapsed) => {
+          set({ playbackElapsed: elapsed });
+        });
+
+        const unsubError = engine.on('error', (data) => {
+          logger.error('[TTS Store] Engine error:', data.code, data.message);
+          set({ error: data.message });
+        });
+
+        const unsubVoice = engine.on('voiceChanged', (voiceId) => {
+          set({ activeVoiceId: voiceId });
+        });
+
+        return () => {
+          unsubPhase();
+          unsubDownload();
+          unsubAmplitude();
+          unsubTick();
+          unsubError();
+          unsubVoice();
+        };
+      },
+
+      // ── Engine management ─────────────────────────────────────────────────
+
+      setEngine: async (engineId: string) => {
+        const prev = get()._unsubscribe;
+        prev?.();
+
+        const engine = await ttsRegistry.setActiveEngine(engineId);
+        const unsub = get()._subscribeToEngine(engine);
+
+        // Sync voices and assets
+        const voices = engine.getVoices();
+        const activeVoice = engine.getActiveVoice();
+        const voiceByEngine = { ...get().settings.voiceByEngine };
+        const savedVoice = voiceByEngine[engineId];
+
+        // Restore saved voice or use engine default
+        if (savedVoice && voices.some(v => v.id === savedVoice)) {
+          await engine.setVoice(savedVoice).catch(() => {});
+        }
+
+        const assets = await engine.checkAssetStatus().catch(() => [] as ModelAssetState[]);
+
+        set({
+          _unsubscribe: unsub,
+          phase: engine.getPhase(),
+          ...phaseToFlags(engine.getPhase()),
+          voices,
+          activeVoiceId: savedVoice ?? activeVoice?.id ?? null,
+          assets,
+          overallDownloadProgress: engine.getOverallDownloadProgress(),
+          error: null,
+          settings: { ...get().settings, engineId },
+        });
+      },
+
+      initializeEngine: async () => {
+        const engine = ttsRegistry.getActiveEngine();
+        if (!engine) return;
+
+        set({ error: null });
+        try {
+          await engine.initialize();
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : 'Failed to initialize engine';
+          logger.error('[TTS Store] Initialize error:', msg);
+          set({ error: msg });
+        }
+      },
+
+      releaseEngine: async () => {
+        const engine = ttsRegistry.getActiveEngine();
+        if (!engine) return;
+        await engine.release();
+      },
+
+      // ── Download ──────────────────────────────────────────────────────────
+
+      checkDownloadStatus: async () => {
+        const engine = ttsRegistry.getActiveEngine();
+        if (!engine) return;
+        const assets = await engine.checkAssetStatus();
+        set({
+          assets,
+          overallDownloadProgress: engine.getOverallDownloadProgress(),
+        });
+      },
+
+      downloadModels: async () => {
+        const engine = ttsRegistry.getActiveEngine();
+        if (!engine) return;
+        if (get().isDownloading) return; // Prevent double downloads
+        set({ error: null });
+        try {
+          await engine.downloadAssets();
+          const assets = await engine.checkAssetStatus();
+          set({ assets, overallDownloadProgress: engine.getOverallDownloadProgress() });
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : 'Download failed';
+          logger.error('[TTS Store] Download error:', msg);
+          set({ error: msg });
+        }
+      },
+
+      deleteModels: async () => {
+        const engine = ttsRegistry.getActiveEngine();
+        if (!engine) return;
+        await engine.deleteAssets();
+        const assets = await engine.checkAssetStatus();
+        set({ assets, overallDownloadProgress: 0 });
+      },
+
+      // ── Chat Mode ─────────────────────────────────────────────────────────
+
+      speak: async (text: string, messageId: string) => {
+        const { settings } = get();
+        if (!settings.enabled) return;
+
+        // Toggle off if same message
+        if (get().currentMessageId === messageId && get().isSpeaking) {
+          get().stop();
+          return;
+        }
+
+        const engine = ttsRegistry.getActiveEngine();
+        if (!engine) return;
+
+        // If engine not ready, try to initialize (for OuteTTS which needs explicit load)
+        if (engine.getPhase() === 'idle' && engine.isFullyDownloaded()) {
+          try {
+            await engine.initialize();
+          } catch (err) {
+            const msg = err instanceof Error ? err.message : 'Failed to initialize engine';
+            logger.error('[TTS Store] Auto-init failed:', msg);
+            set({ error: msg });
+            return;
+          }
+        }
+
+        if (engine.getPhase() !== 'ready') return;
+
+        set({
+          currentMessageId: messageId,
+          playSessionId: get().playSessionId + 1,
+          error: null,
+        });
+
+        try {
+          await engine.speak(text, {
+            speed: settings.speed,
+            voiceId: settings.voiceByEngine[settings.engineId],
+            messageId,
+          });
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : 'Speech failed';
+          logger.error('[TTS Store] Speak error:', msg);
+          set({ error: msg });
+        } finally {
+          if (get().currentMessageId === messageId) {
+            set({ currentMessageId: null, currentAmplitude: 0, playbackElapsed: 0 });
+          }
+        }
+      },
+
+      stop: () => {
+        const engine = ttsRegistry.getActiveEngine();
+        engine?.stop();
+        set({
+          currentMessageId: null,
+          currentAmplitude: 0,
+          playbackElapsed: 0,
+        });
+      },
+
+      pause: () => {
+        const engine = ttsRegistry.getActiveEngine();
+        engine?.pause();
+        set({ currentAmplitude: 0 });
+      },
+
+      resume: () => {
+        const engine = ttsRegistry.getActiveEngine();
+        engine?.resume();
+      },
+
+      // ── Audio Mode ────────────────────────────────────────────────────────
+
+      generateAndSave: async (text, conversationId, messageId) => {
+        const engine = ttsRegistry.getActiveEngine();
+        if (!engine) throw new Error('No active TTS engine');
+        if (!engine.capabilities.generateAndSave) {
+          throw new Error(`${engine.displayName} does not support audio generation.`);
+        }
+
+        const { settings } = get();
+        const result = await engine.generateAndSave(text, conversationId, messageId, {
+          speed: settings.speed,
+          voiceId: settings.voiceByEngine[settings.engineId],
+        });
+
+        await get().refreshCacheSize();
+        return {
+          path: result.filePath,
+          waveformData: result.waveformData,
+          durationSeconds: result.durationSeconds,
+        };
+      },
+
+      playMessage: async (messageId, filePath, startOffset = 0) => {
+        if (get().currentMessageId === messageId && get().isSpeaking) {
+          get().stopPlayback();
+          return;
+        }
+
+        const engine = ttsRegistry.getActiveEngine();
+        if (!engine) return;
+
+        set({
+          currentMessageId: messageId,
+          playSessionId: get().playSessionId + 1,
+          error: null,
+        });
+
+        try {
+          await engine.playFromFile(filePath, {
+            speed: get().settings.speed,
+            startOffset,
+            messageId,
+          });
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : 'Playback failed';
+          logger.error('[TTS Store] Playback error:', msg);
+          if (get().currentMessageId === messageId) set({ error: msg });
+        } finally {
+          if (get().currentMessageId === messageId) {
+            set({ currentMessageId: null });
+          }
+        }
+      },
+
+      stopPlayback: () => {
+        const engine = ttsRegistry.getActiveEngine();
+        engine?.stop();
+        set({ currentMessageId: null });
+      },
+
+      // ── Voice ─────────────────────────────────────────────────────────────
+
+      setVoice: async (voiceId: string) => {
+        const engine = ttsRegistry.getActiveEngine();
+        if (!engine) return;
+
+        // Save per-engine voice preference
+        const voiceByEngine = {
+          ...get().settings.voiceByEngine,
+          [get().settings.engineId]: voiceId,
+        };
+        set({ settings: { ...get().settings, voiceByEngine } });
+
+        await engine.setVoice(voiceId);
+      },
+
+      // ── Cache ─────────────────────────────────────────────────────────────
+
+      refreshCacheSize: async () => {
+        const engine = ttsRegistry.getActiveEngine();
+        if (engine && engine instanceof OuteTTSEngine) {
+          const mb = await engine.getAudioCacheSizeMB();
+          set({ audioCacheSizeMB: mb });
+        }
+      },
+
+      clearAudioCache: async () => {
+        const engine = ttsRegistry.getActiveEngine();
+        if (engine && engine instanceof OuteTTSEngine) {
+          await engine.clearAudioCache();
+          set({ audioCacheSizeMB: 0 });
+        }
+      },
+
+      // ── Settings ──────────────────────────────────────────────────────────
+
+      updateSettings: (patch) => {
+        set((state) => ({ settings: { ...state.settings, ...patch } }));
+      },
+
+      clearError: () => set({ error: null }),
+    }),
+    {
+      name: 'tts-store',
+      storage: createJSONStorage(() => AsyncStorage),
+      partialize: (state) => ({ settings: state.settings }),
+      // Migrate persisted settings from pre-engine-interface format
+      onRehydrateStorage: () => (state) => {
+        if (!state) return;
+        const s = state.settings as unknown as Record<string, unknown>;
+        // Old format had voiceId (OuteTTS) and kokoroVoiceId (Kokoro) as flat fields
+        if (!s.voiceByEngine || typeof s.voiceByEngine !== 'object') {
+          s.voiceByEngine = {};
+        }
+        const vbe = s.voiceByEngine as Record<string, string>;
+        if (s.kokoroVoiceId && typeof s.kokoroVoiceId === 'string' && !vbe.kokoro) {
+          vbe.kokoro = s.kokoroVoiceId as string;
+          delete s.kokoroVoiceId;
+        }
+        if (s.voiceId && typeof s.voiceId === 'string' && !vbe.outetts) {
+          vbe.outetts = s.voiceId as string;
+          delete s.voiceId;
+        }
+        if (!s.engineId) {
+          s.engineId = DEFAULT_ENGINE_ID;
+        }
+      },
+    },
+  ),
+);
diff --git a/src/stores/whisperStore.ts b/src/stores/whisperStore.ts
index 6c3d811b8..6b3f9739d 100644
--- a/src/stores/whisperStore.ts
+++ b/src/stores/whisperStore.ts
@@ -14,6 +14,7 @@ interface WhisperState {
 
   // Actions
   downloadModel: (modelId: string) => Promise<void>;
+  downloadFromUrl: (url: string, modelId: string) => Promise<void>;
   loadModel: () => Promise<void>;
   unloadModel: () => Promise<void>;
   deleteModel: () => Promise<void>;
@@ -55,6 +56,23 @@ export const useWhisperStore = create<WhisperState>()(
         }
       },
 
+      downloadFromUrl: async (url: string, modelId: string) => {
+        set({ isDownloading: true, downloadProgress: 0, error: null });
+        try {
+          await whisperService.downloadFromUrl(url, modelId, (progress) => {
+            set({ downloadProgress: progress });
+          });
+          set({ downloadedModelId: modelId, isDownloading: false, downloadProgress: 1 });
+          await get().loadModel();
+        } catch (error) {
+          set({
+            isDownloading: false,
+            downloadProgress: 0,
+            error: error instanceof Error ? error.message : 'Download failed',
+          });
+        }
+      },
+
       loadModel: async () => {
         const { downloadedModelId, isModelLoading } = get();
         if (!downloadedModelId) {
diff --git a/src/types/index.ts b/src/types/index.ts
index dc72044a3..db78c3492 100644
--- a/src/types/index.ts
+++ b/src/types/index.ts
@@ -125,16 +125,16 @@ export interface ModelRecommendation {
 // Media attachment types
 export interface MediaAttachment {
   id: string;
-  type: 'image' | 'document';
+  type: 'image' | 'document' | 'audio';
   uri: string;
   mimeType?: string;
   width?: number;
   height?: number;
   fileName?: string;
-  /** For documents: the extracted text content */
-  textContent?: string;
-  /** For documents: file size in bytes */
-  fileSize?: number;
+  textContent?: string; // documents: extracted text
+  fileSize?: number; // documents: file size in bytes
+  audioFormat?: 'wav' | 'mp3'; // audio attachments: format for model input
+  audioDurationSeconds?: number; // audio attachments: recorded duration in seconds
 }
 
 // Generation metadata - details about how a message was generated
@@ -187,6 +187,8 @@ export interface Message {
   toolCalls?: Array<{ id?: string; name: string; arguments: string }>;
   /** Tool name (for tool result messages) */
   toolName?: string;
+  /** True when this assistant message was generated while interfaceMode === 'audio' */
+  isAudioModeMessage?: boolean;
 }
 
 export interface Conversation {
diff --git a/src/types/tts.ts b/src/types/tts.ts
new file mode 100644
index 000000000..e9fed2a14
--- /dev/null
+++ b/src/types/tts.ts
@@ -0,0 +1,17 @@
+// Extends the Message interface with Audio Mode fields.
+// Kept separate to avoid exceeding the line limit in types/index.ts.
+
+declare module './index' {
+  interface Message {
+    /** Audio Mode: path to PCM file on disk */
+    audioPath?: string;
+    /** Audio Mode: 200-point amplitude envelope for waveform bar */
+    waveformData?: number[];
+    /** Audio Mode: total audio duration in seconds */
+    audioDurationSeconds?: number;
+    /** True while TTS is generating audio for this message */
+    isGeneratingAudio?: boolean;
+  }
+}
+
+export {};
diff --git a/src/utils/messageContent.ts b/src/utils/messageContent.ts
index 59dc47c43..a80cc8ea0 100644
--- a/src/utils/messageContent.ts
+++ b/src/utils/messageContent.ts
@@ -33,7 +33,26 @@ export function stripControlTokens(content: string): string {
   result = result.replace(CHANNEL_FINAL_START, '');
   result = result.replace(GEMMA4_THINK_OPEN, '');
   result = result.replace(GEMMA4_THINK_CLOSE, '');
-  return result;
+
+  // ── Generic XML/structured block stripping ──────────────────────────────
+  // Catches tool calls from any provider (minimax, anthropic, gemma, generic)
+  // by matching any XML-like block whose tag name contains tool/invoke/function/parameter keywords.
+  // This is intentionally broad — these blocks never contain natural language the user should see.
+  result = result.replace(/<\/?(?:[\w:-]*(?:tool_call|invoke|function_call|parameters?)[\w:-]*)(?:\s[^>]*)?>[\s\S]*?(?=<\/?(?:[\w:-]*(?:tool_call|invoke|function_call|parameters?)[\w:-]*)(?:\s[^>]*)?>|$)/gi, '');
+  // Safety net: strip any remaining paired XML blocks with tool/invoke in the tag name
+  result = result.replace(/<([\w:-]*(?:tool_call|invoke|function_call)[\w:-]*)[\s\S]*?<\/\1>/gi, '');
+  // Strip bare lines that are just a namespace:tag_name pattern (e.g. "minimax:tool_call")
+  result = result.replace(/^[\w]+:[\w_]+\s*$/gm, '');
+
+  // ── Thinking blocks ─────────────────────────────────────────────────────
+  // Complete <think>...</think> blocks (Qwen 3.5, DeepSeek, etc.)
+  result = result.replace(/<think>[\s\S]*?<\/think>/gi, '');
+  // Orphaned thinking: streaming parser may consume <think> but leave content + </think>
+  result = result.replace(/^[\s\S]*?<\/think>\s*/i, '');
+  // Bare <think> or </think> tags
+  result = result.replace(/<\/?think>/gi, '');
+
+  return result.trim();
 }
 
 /**
@@ -43,4 +62,39 @@ export function stripControlTokens(content: string): string {
  */
 export function stripStreamingControlTokens(content: string): string {
   return CONTROL_TOKEN_PATTERNS.reduce((acc, pattern) => acc.replace(pattern, ''), content);
+}
+
+/**
+ * Strip markdown formatting for TTS speech. Preserves the readable text
+ * but removes syntax that Kokoro would read aloud as literal characters.
+ */
+export function stripMarkdownForSpeech(content: string): string {
+  let result = content;
+  // Headers: ### Title → Title
+  result = result.replace(/^#{1,6}\s+/gm, '');
+  // Bold/italic: **text** or *text* or __text__ or _text_ → text
+  result = result.replace(/\*{1,3}([^*]+)\*{1,3}/g, '$1');
+  result = result.replace(/_{1,3}([^_]+)_{1,3}/g, '$1');
+  // Links: [text](url) → text
+  result = result.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
+  // Images: ![alt](url) → alt
+  result = result.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1');
+  // Inline code: `code` → code
+  result = result.replace(/`([^`]+)`/g, '$1');
+  // Code blocks: ```...``` → (removed)
+  result = result.replace(/```[\s\S]*?```/g, '');
+  // Tables: | cell | cell | → cell, cell (keep cell content, drop pipes/dashes)
+  result = result.replace(/^\|[-:|\s]+\|$/gm, ''); // separator rows
+  result = result.replace(/\|/g, ','); // pipes → commas
+  // Bullet markers: * item or - item → item
+  result = result.replace(/^[\s]*[*\-+]\s+/gm, '');
+  // Numbered lists: 1. item → item
+  result = result.replace(/^[\s]*\d+\.\s+/gm, '');
+  // Horizontal rules
+  result = result.replace(/^[-*_]{3,}$/gm, '');
+  // Blockquotes: > text → text
+  result = result.replace(/^>\s+/gm, '');
+  // Clean up excessive whitespace/newlines
+  result = result.replace(/\n{3,}/g, '\n\n');
+  return result.trim();
 }
\ No newline at end of file