Skip to content

Commit cc04706

Browse files
sjwoo1999claude
andcommitted
feat: implement Web Speech API fallback for STT
- Create useFallbackSTT hook with browser Speech Recognition API - Auto-switch to fallback when WebSocket STT times out - Support for Korean (ko-KR) speech recognition - Continuous recognition with auto-restart - Network error auto-retry mechanism - Fallback cascade: WebSocket → Browser API → Disabled - Cleanup on unmount Features: - Browser compatibility check - Graceful error handling - User-friendly error messages - Automatic mode switching Resolves: P0-2 (Fallback STT) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent eb9a76c commit cc04706

2 files changed

Lines changed: 318 additions & 3 deletions

File tree

src/App.tsx

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import { useWebSocket } from './hooks/useWebSocket';
2121
import { useKeyboardShortcuts } from './hooks/useKeyboardShortcuts';
2222
import { useTheme } from './contexts/ThemeContext';
2323
import { useOverallConnectionStatus } from './hooks/useOverallConnectionStatus';
24+
import { useFallbackSTT } from './hooks/useFallbackSTT';
2425
import type { EmotionType, VADMetrics } from './types';
2526
import type { KeyboardShortcut } from './hooks/useKeyboardShortcuts';
2627
import { VADMonitorSkeleton } from './components/Skeleton/Skeleton';
@@ -157,6 +158,40 @@ function App() {
157158
const sttTimeoutRef = useRef<number | null>(null);
158159
const lastSpeechTimeRef = useRef<number>(0);
159160
const STT_TIMEOUT_MS = 5000; // 5 seconds
161+
const [sttMode, setSTTMode] = useState<'websocket' | 'fallback' | 'disabled'>('websocket');
162+
163+
// Fallback STT (Web Speech API)
164+
const fallbackSTT = useFallbackSTT({
165+
onResult: (text) => {
166+
Logger.info('✅ Fallback STT result', { text });
167+
setSttText(text);
168+
169+
// Dispatch user message
170+
if (text.trim()) {
171+
window.dispatchEvent(new CustomEvent('ai:userMessage', {
172+
detail: {
173+
message: text,
174+
timestamp: Date.now()
175+
}
176+
}));
177+
Logger.debug('🗣️ User message dispatched from fallback STT', { text });
178+
179+
// Trigger AI response
180+
sendToSession({
181+
type: 'request_ai_response',
182+
data: {
183+
message: text,
184+
emotion: currentEmotion,
185+
timestamp: Date.now()
186+
}
187+
});
188+
}
189+
},
190+
onError: (error) => {
191+
Logger.error('❌ Fallback STT error', { error });
192+
setOverlayError(`폴백 음성 인식 오류: ${error}`);
193+
},
194+
});
160195

161196
// WebSocket 연결
162197
const { isConnected: wsConnected, connectionStatus, connect: connectWS, disconnect: disconnectWS, suppressReconnect: suppressWSReconnect, landmarksWs, sendToSession } = useWebSocket({
@@ -295,7 +330,19 @@ function App() {
295330
lastSpeechTime: new Date(lastSpeechTimeRef.current).toISOString()
296331
});
297332

298-
setOverlayError('음성 인식 시간 초과. 다시 말씀해주세요.');
333+
// Try fallback STT if supported
334+
if (sttMode === 'websocket' && fallbackSTT.isSupported()) {
335+
Logger.info('🔄 Switching to fallback STT (Web Speech API)');
336+
setSTTMode('fallback');
337+
fallbackSTT.start();
338+
setOverlayError('WebSocket STT 시간 초과. 브라우저 음성 인식으로 전환합니다.');
339+
} else if (sttMode === 'fallback') {
340+
Logger.warn('⚠️ Fallback STT also timed out');
341+
setOverlayError('음성 인식을 사용할 수 없습니다. 네트워크를 확인해주세요.');
342+
setSTTMode('disabled');
343+
} else {
344+
setOverlayError('음성 인식 시간 초과. 다시 말씀해주세요.');
345+
}
299346

300347
// Clear timeout ref
301348
sttTimeoutRef.current = null;
@@ -910,16 +957,21 @@ function App() {
910957
}
911958
}, [consent, openDialog]);
912959

913-
// Cleanup STT timeout on unmount
960+
// Cleanup STT timeout and fallback on unmount
914961
useEffect(() => {
915962
return () => {
916963
if (sttTimeoutRef.current) {
917964
clearTimeout(sttTimeoutRef.current);
918965
sttTimeoutRef.current = null;
919966
Logger.debug('🧹 STT timeout cleaned up on unmount');
920967
}
968+
969+
if (fallbackSTT.isActive) {
970+
fallbackSTT.stop();
971+
Logger.debug('🧹 Fallback STT stopped on unmount');
972+
}
921973
};
922-
}, []);
974+
}, [fallbackSTT]);
923975

924976

925977
return (

src/hooks/useFallbackSTT.ts

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
import { useRef, useCallback, useState } from 'react';
2+
import { Logger } from '../config/env';
3+
4+
interface FallbackSTTOptions {
5+
lang?: string;
6+
continuous?: boolean;
7+
interimResults?: boolean;
8+
onResult: (text: string) => void;
9+
onError?: (error: string) => void;
10+
}
11+
12+
interface FallbackSTTReturn {
13+
isSupported: () => boolean;
14+
start: () => void;
15+
stop: () => void;
16+
isActive: boolean;
17+
}
18+
19+
// Extend Window interface for Web Speech API
20+
declare global {
21+
interface Window {
22+
SpeechRecognition?: new () => SpeechRecognition;
23+
webkitSpeechRecognition?: new () => SpeechRecognition;
24+
}
25+
}
26+
27+
interface SpeechRecognition extends EventTarget {
28+
continuous: boolean;
29+
interimResults: boolean;
30+
lang: string;
31+
start: () => void;
32+
stop: () => void;
33+
abort: () => void;
34+
onresult: ((event: SpeechRecognitionEvent) => void) | null;
35+
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
36+
onend: (() => void) | null;
37+
}
38+
39+
interface SpeechRecognitionEvent extends Event {
40+
resultIndex: number;
41+
results: SpeechRecognitionResultList;
42+
}
43+
44+
interface SpeechRecognitionResultList {
45+
length: number;
46+
item: (index: number) => SpeechRecognitionResult;
47+
[index: number]: SpeechRecognitionResult;
48+
}
49+
50+
interface SpeechRecognitionResult {
51+
isFinal: boolean;
52+
[index: number]: SpeechRecognitionAlternative;
53+
length: number;
54+
}
55+
56+
interface SpeechRecognitionAlternative {
57+
transcript: string;
58+
confidence: number;
59+
}
60+
61+
interface SpeechRecognitionErrorEvent extends Event {
62+
error: string;
63+
message: string;
64+
}
65+
66+
/**
67+
* Web Speech API fallback for STT
68+
* Provides browser-native speech recognition as backup for WebSocket STT
69+
*
70+
* @example
71+
* const fallbackSTT = useFallbackSTT({
72+
* onResult: (text) => console.log('Recognized:', text),
73+
* onError: (err) => console.error('STT Error:', err)
74+
* });
75+
*
76+
* if (fallbackSTT.isSupported()) {
77+
* fallbackSTT.start();
78+
* }
79+
*/
80+
export const useFallbackSTT = (options: FallbackSTTOptions): FallbackSTTReturn => {
81+
const {
82+
lang = 'ko-KR',
83+
continuous = true,
84+
interimResults = false,
85+
onResult,
86+
onError,
87+
} = options;
88+
89+
const recognitionRef = useRef<SpeechRecognition | null>(null);
90+
const [isActive, setIsActive] = useState(false);
91+
92+
/**
93+
* Check if Web Speech API is supported in current browser
94+
*/
95+
const isSupported = useCallback((): boolean => {
96+
const hasAPI =
97+
typeof window !== 'undefined' &&
98+
(window.SpeechRecognition !== undefined ||
99+
window.webkitSpeechRecognition !== undefined);
100+
101+
Logger.debug('🎤 Web Speech API support check', {
102+
supported: hasAPI,
103+
SpeechRecognition: !!window.SpeechRecognition,
104+
webkitSpeechRecognition: !!window.webkitSpeechRecognition,
105+
});
106+
107+
return hasAPI;
108+
}, []);
109+
110+
/**
111+
* Start speech recognition
112+
*/
113+
const start = useCallback(() => {
114+
if (!isSupported()) {
115+
const error = 'Web Speech API not supported in this browser';
116+
Logger.error('❌ Cannot start fallback STT', { reason: error });
117+
onError?.(error);
118+
return;
119+
}
120+
121+
// Stop existing recognition if any
122+
if (recognitionRef.current) {
123+
try {
124+
recognitionRef.current.stop();
125+
} catch (err) {
126+
Logger.warn('Failed to stop existing recognition', { error: err });
127+
}
128+
}
129+
130+
try {
131+
// Create recognition instance
132+
const SpeechRecognitionAPI =
133+
window.SpeechRecognition || window.webkitSpeechRecognition;
134+
135+
if (!SpeechRecognitionAPI) {
136+
throw new Error('SpeechRecognition constructor not available');
137+
}
138+
139+
const recognition = new SpeechRecognitionAPI();
140+
recognition.continuous = continuous;
141+
recognition.interimResults = interimResults;
142+
recognition.lang = lang;
143+
144+
// Handle results
145+
recognition.onresult = (event: SpeechRecognitionEvent) => {
146+
const result = event.results[event.resultIndex];
147+
148+
if (result && result.isFinal && result[0]) {
149+
const transcript = result[0].transcript;
150+
const confidence = result[0].confidence;
151+
152+
Logger.info('✅ Fallback STT result', {
153+
text: transcript,
154+
confidence: confidence.toFixed(2),
155+
resultIndex: event.resultIndex,
156+
});
157+
158+
onResult(transcript);
159+
} else if (interimResults && result && result[0]) {
160+
const transcript = result[0].transcript;
161+
Logger.debug('🔄 Interim STT result', { text: transcript });
162+
}
163+
};
164+
165+
// Handle errors
166+
recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
167+
Logger.error('❌ Fallback STT error', {
168+
error: event.error,
169+
message: event.message,
170+
});
171+
172+
const errorMessage = getErrorMessage(event.error);
173+
onError?.(errorMessage);
174+
175+
// Auto-restart on network error (transient)
176+
if (event.error === 'network') {
177+
Logger.info('🔄 Attempting to restart after network error');
178+
setTimeout(() => {
179+
if (recognitionRef.current === recognition) {
180+
start();
181+
}
182+
}, 2000);
183+
} else if (event.error !== 'aborted') {
184+
setIsActive(false);
185+
}
186+
};
187+
188+
// Handle end
189+
recognition.onend = () => {
190+
Logger.debug('🛑 Fallback STT ended');
191+
192+
// Auto-restart if continuous mode and not manually stopped
193+
if (continuous && recognitionRef.current === recognition) {
194+
Logger.debug('🔄 Restarting continuous recognition');
195+
setTimeout(() => {
196+
if (recognitionRef.current === recognition) {
197+
start();
198+
}
199+
}, 100);
200+
} else {
201+
setIsActive(false);
202+
}
203+
};
204+
205+
// Start recognition
206+
recognition.start();
207+
recognitionRef.current = recognition;
208+
setIsActive(true);
209+
210+
Logger.info('✅ Fallback STT started', {
211+
lang,
212+
continuous,
213+
interimResults,
214+
});
215+
} catch (err) {
216+
const errorMessage =
217+
err instanceof Error ? err.message : 'Failed to start speech recognition';
218+
Logger.error('❌ Failed to initialize fallback STT', { error: err });
219+
onError?.(errorMessage);
220+
setIsActive(false);
221+
}
222+
}, [isSupported, lang, continuous, interimResults, onResult, onError]);
223+
224+
/**
225+
* Stop speech recognition
226+
*/
227+
const stop = useCallback(() => {
228+
if (recognitionRef.current) {
229+
try {
230+
recognitionRef.current.stop();
231+
recognitionRef.current = null;
232+
setIsActive(false);
233+
Logger.info('✅ Fallback STT stopped');
234+
} catch (err) {
235+
Logger.error('❌ Failed to stop fallback STT', { error: err });
236+
}
237+
}
238+
}, []);
239+
240+
return {
241+
isSupported,
242+
start,
243+
stop,
244+
isActive,
245+
};
246+
};
247+
248+
/**
249+
* Get user-friendly error message from speech recognition error code
250+
*/
251+
function getErrorMessage(errorCode: string): string {
252+
const errorMessages: Record<string, string> = {
253+
'no-speech': '음성이 감지되지 않았습니다',
254+
'audio-capture': '마이크에 접근할 수 없습니다',
255+
'not-allowed': '마이크 권한이 거부되었습니다',
256+
'network': '네트워크 오류가 발생했습니다',
257+
'aborted': '음성 인식이 중단되었습니다',
258+
'bad-grammar': '음성 인식 설정 오류',
259+
'language-not-supported': '지원하지 않는 언어입니다',
260+
};
261+
262+
return errorMessages[errorCode] || `음성 인식 오류: ${errorCode}`;
263+
}

0 commit comments

Comments
 (0)