diff --git a/Type4Me/ASR/Providers/VolcanoASRConfig.swift b/Type4Me/ASR/Providers/VolcanoASRConfig.swift index d2f8efd..9a8241d 100644 --- a/Type4Me/ASR/Providers/VolcanoASRConfig.swift +++ b/Type4Me/ASR/Providers/VolcanoASRConfig.swift @@ -6,7 +6,7 @@ struct VolcanoASRConfig: ASRProviderConfig, Sendable { static var displayName: String { L("火山引擎 (Doubao)", "Volcano (Doubao)") } static var credentialFields: [CredentialField] {[ - CredentialField(key: "appKey", label: "App Key", placeholder: "APPID", isSecure: false, isOptional: false, defaultValue: ""), + CredentialField(key: "appKey", label: "App ID", placeholder: "APPID", isSecure: false, isOptional: false, defaultValue: ""), CredentialField(key: "accessKey", label: "Access Token", placeholder: L("访问令牌", "Access token"), isSecure: true, isOptional: false, defaultValue: ""), ]} diff --git a/Type4Me/Session/RecognitionSession.swift b/Type4Me/Session/RecognitionSession.swift index fe7d793..4e3d361 100644 --- a/Type4Me/Session/RecognitionSession.swift +++ b/Type4Me/Session/RecognitionSession.swift @@ -76,6 +76,9 @@ actor RecognitionSession { private var eventConsumptionTask: Task? private var activeFlashTask: Task? private var hasEmittedReadyForCurrentSession = false + private var isASRConnected = false + private var pendingAudioChunks: [Data] = [] + private let maxPendingAudioChunks = 40 // MARK: - Speculative LLM (fire during recording pauses) @@ -108,6 +111,8 @@ actor RecognitionSession { self.currentMode = mode self.recordingStartTime = nil hasEmittedReadyForCurrentSession = false + isASRConnected = false + pendingAudioChunks = [] state = .starting // Load credentials for selected provider @@ -159,46 +164,16 @@ actor RecognitionSession { boostingTableID: biasSettings.boostingTableID ) - do { - try await client.connect(config: config, options: requestOptions) - NSLog( - "[Session] ASR connected OK (streaming, hotwords=%d, history=%d)", - hotwords.count, - requestOptions.contextHistoryLength - ) - DebugFileLogger.log("ASR connected OK") - } catch { - NSLog("[Session] ASR connect FAILED: %@", String(describing: error)) - DebugFileLogger.log("ASR connect failed: \(String(describing: error))") - SoundFeedback.playError() - await client.disconnect() - self.asrClient = nil - state = .idle - onASREvent?(.error(error)) - onASREvent?(.completed) - return - } - // Reset text state currentTranscript = .empty - // Start ASR event consumption - let events = await client.events - eventConsumptionTask = Task { [weak self] in - for await event in events { - guard let self else { break } - await self.handleASREvent(event) - if case .completed = event { break } - } - } - // Wire audio level → UI let levelHandler = self.onAudioLevel audioEngine.onAudioLevel = { level in levelHandler?(level) } - // Wire audio callback → ASR + // Wire audio callback → ASR (buffers while ASR is still connecting) var chunkCount = 0 audioEngine.onAudioChunk = { [weak self] data in guard let self else { return } @@ -216,9 +191,12 @@ actor RecognitionSession { } do { + let audioStartT0 = ContinuousClock.now try audioEngine.start() NSLog("[Session] Audio engine started OK") - DebugFileLogger.log("audio engine started OK") + DebugFileLogger.log("audio engine started OK +\(ContinuousClock.now - audioStartT0)") + state = .recording + DebugFileLogger.log("session entered recording state, awaiting ASR connect") } catch { NSLog("[Session] Audio engine start FAILED: %@", String(describing: error)) DebugFileLogger.log("audio engine start failed: \(String(describing: error))") @@ -230,8 +208,56 @@ actor RecognitionSession { return } - state = .recording - DebugFileLogger.log("session entered recording state, waiting for first audio chunk") + do { + let connectT0 = ContinuousClock.now + try await client.connect(config: config, options: requestOptions) + NSLog( + "[Session] ASR connected OK (streaming, hotwords=%d, history=%d)", + hotwords.count, + requestOptions.contextHistoryLength + ) + DebugFileLogger.log("ASR connected OK +\(ContinuousClock.now - connectT0)") + isASRConnected = true + } catch { + NSLog("[Session] ASR connect FAILED: %@", String(describing: error)) + DebugFileLogger.log("ASR connect failed: \(String(describing: error))") + SoundFeedback.playError() + audioEngine.onAudioChunk = nil + audioEngine.stop() + audioEngine.onAudioLevel = nil + await client.disconnect() + self.asrClient = nil + state = .idle + onASREvent?(.error(error)) + onASREvent?(.completed) + return + } + + // Recording may have already been stopped while connect() was in flight. + guard state == .recording else { + await client.disconnect() + self.asrClient = nil + isASRConnected = false + pendingAudioChunks.removeAll(keepingCapacity: true) + return + } + + // Start ASR event consumption + let events = await client.events + eventConsumptionTask = Task { [weak self] in + for await event in events { + guard let self else { break } + await self.handleASREvent(event) + if case .completed = event { break } + } + } + + do { + try await flushPendingAudioToASR() + } catch { + NSLog("[Session] Failed to flush buffered audio: %@", String(describing: error)) + DebugFileLogger.log("flush buffered audio failed: \(String(describing: error))") + } // Pre-warm LLM connection for modes with post-processing if !currentMode.prompt.isEmpty, let llmConfig = KeychainService.loadLLMConfig() { @@ -364,6 +390,8 @@ actor RecognitionSession { } eventConsumptionTask = nil asrClient = nil + isASRConnected = false + pendingAudioChunks.removeAll(keepingCapacity: true) hasEmittedReadyForCurrentSession = false // Combine confirmed segments + any trailing unconfirmed partial. @@ -503,9 +531,54 @@ actor RecognitionSession { private func sendAudioToASR(_ data: Data) async throws { guard let client = asrClient else { return } + if !isASRConnected { + bufferAudioChunk(data) + return + } try await client.sendAudio(data) } + private func bufferAudioChunk(_ data: Data) { + pendingAudioChunks.append(data) + if pendingAudioChunks.count > maxPendingAudioChunks { + pendingAudioChunks.removeFirst(pendingAudioChunks.count - maxPendingAudioChunks) + } + } + + private func flushPendingAudioToASR() async throws { + guard isASRConnected, let client = asrClient else { return } + guard !pendingAudioChunks.isEmpty else { return } + let bufferedCount = pendingAudioChunks.count + for chunk in pendingAudioChunks { + try await client.sendAudio(chunk) + } + pendingAudioChunks.removeAll(keepingCapacity: true) + DebugFileLogger.log("flushed buffered audio chunks=\(bufferedCount)") + } + +#if DEBUG + func _debugSetASRClient(_ client: any SpeechRecognizer, connected: Bool) { + asrClient = client + isASRConnected = connected + } + + func _debugSetASRConnected(_ connected: Bool) { + isASRConnected = connected + } + + func _debugSendAudioToASR(_ data: Data) async throws { + try await sendAudioToASR(data) + } + + func _debugFlushPendingAudioToASR() async throws { + try await flushPendingAudioToASR() + } + + func _debugPendingAudioChunkCount() -> Int { + pendingAudioChunks.count + } +#endif + private func markReadyIfNeeded() { guard !hasEmittedReadyForCurrentSession else { return } hasEmittedReadyForCurrentSession = true @@ -594,6 +667,8 @@ actor RecognitionSession { await client.disconnect() } asrClient = nil + isASRConnected = false + pendingAudioChunks.removeAll(keepingCapacity: true) state = .idle currentTranscript = .empty diff --git a/Type4Me/UI/Settings/GeneralSettingsTab.swift b/Type4Me/UI/Settings/GeneralSettingsTab.swift index c3415ca..0bd332e 100644 --- a/Type4Me/UI/Settings/GeneralSettingsTab.swift +++ b/Type4Me/UI/Settings/GeneralSettingsTab.swift @@ -213,6 +213,15 @@ struct ASRSettingsCard: View, SettingsCardHelpers { } else { dynamicCredentialFields } + if selectedASRProvider == .volcano { + Text(L( + "提示:App ID 需要在「旧版豆包语音」控制台的应用配置页面获取(不是 API Key)。", + "Tip: App ID is from the legacy Doubao Speech console app settings page (not an API key)." + )) + .font(.system(size: 10)) + .foregroundStyle(TF.settingsTextTertiary) + .padding(.top, 8) + } HStack(spacing: 8) { Spacer() @@ -622,6 +631,135 @@ struct LLMSettingsCard: View, SettingsCardHelpers { } } +// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +// MARK: - Hotkey Settings Card +// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +struct HotkeySettingsCard: View, SettingsCardHelpers { + + @Environment(AppState.self) private var appState + @State private var modes: [ProcessingMode] = [] + @State private var hasDirtyChanges = false + @State private var saveStatus: SettingsTestStatus = .idle + + var body: some View { + settingsGroupCard(L("快捷键设置", "Hotkey Settings")) { + Text(L("每个模式可单独配置快捷键与触发方式", "Configure per-mode shortcuts and trigger style")) + .font(.system(size: 10)) + .foregroundStyle(TF.settingsTextTertiary) + .padding(.bottom, 6) + + if modes.isEmpty { + Text(L("暂无模式", "No modes")) + .font(.system(size: 12)) + .foregroundStyle(TF.settingsTextTertiary) + .frame(minHeight: 72, alignment: .center) + } else { + VStack(spacing: 0) { + ForEach(Array(modes.indices), id: \.self) { index in + hotkeyModeRow($modes[index]) + if index < modes.count - 1 { + SettingsDivider() + } + } + } + } + + HStack(spacing: 8) { + Spacer() + statusBadge(saveStatus) + secondaryButton(L("高级设置", "Advanced")) { + NotificationCenter.default.post(name: .navigateToMode, object: nil) + } + saveButton { saveModes() } + .disabled(!hasDirtyChanges) + } + .padding(.top, 10) + } + .task { + modes = ModeStorage().load() + hasDirtyChanges = false + } + .onChange(of: modes) { _, _ in + hasDirtyChanges = true + saveStatus = .idle + } + } + + private func hotkeyModeRow(_ mode: Binding) -> some View { + VStack(alignment: .leading, spacing: 8) { + HStack(spacing: 8) { + Text(mode.wrappedValue.name) + .font(.system(size: 13, weight: .medium)) + .foregroundStyle(TF.settingsText) + if mode.wrappedValue.isBuiltin { + Text(L("内置", "Built-in")) + .font(.system(size: 9, weight: .medium)) + .foregroundStyle(TF.settingsTextTertiary) + .padding(.horizontal, 6) + .padding(.vertical, 2) + .background( + RoundedRectangle(cornerRadius: 4) + .fill(TF.settingsCardAlt) + ) + } + Spacer() + HotkeyRecorderView( + keyCode: mode.hotkeyCode, + modifiers: mode.hotkeyModifiers + ) + } + + Picker("", selection: mode.hotkeyStyle) { + Text(L("按住录制", "Hold to record")).tag(ProcessingMode.HotkeyStyle.hold) + Text(L("按下切换", "Toggle")).tag(ProcessingMode.HotkeyStyle.toggle) + } + .labelsHidden() + .pickerStyle(.segmented) + .frame(width: 220) + } + .padding(.vertical, 6) + } + + private func saveModes() { + let duplicates = duplicateHotkeyNames(in: modes) + if !duplicates.isEmpty { + saveStatus = .failed(L("快捷键冲突", "Hotkey conflict")) + return + } + do { + try ModeStorage().save(modes) + appState.availableModes = modes + if let updated = modes.first(where: { $0.id == appState.currentMode.id }) { + appState.currentMode = updated + } else if let fallback = modes.first { + appState.currentMode = fallback + } + NotificationCenter.default.post(name: .modesDidChange, object: nil) + hasDirtyChanges = false + saveStatus = .success + } catch { + saveStatus = .failed(L("保存失败", "Save failed")) + } + } + + private func duplicateHotkeyNames(in modes: [ProcessingMode]) -> [String] { + var seen: Set = [] + var duplicates: [String] = [] + for mode in modes { + guard let code = mode.hotkeyCode else { continue } + let mods = mode.hotkeyModifiers ?? 0 + let key = "\(code)-\(mods)" + if seen.contains(key) { + duplicates.append(mode.name) + } else { + seen.insert(key) + } + } + return duplicates + } +} + // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ // MARK: - General Settings Tab // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ @@ -647,7 +785,7 @@ struct GeneralSettingsTab: View, SettingsCardHelpers { SettingsSectionHeader( label: "GENERAL", title: L("通用设置", "General Settings"), - description: L("接口配置与偏好设置。快捷键请在「处理模式」中配置。", "API configuration and preferences. Hotkeys are configured in Modes.") + description: L("快捷键、接口配置与偏好设置。", "Hotkeys, API configuration and preferences.") ) // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ @@ -711,7 +849,45 @@ struct GeneralSettingsTab: View, SettingsCardHelpers { moduleSpacer() // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - // MODULE 2: API 设置 + // MODULE 2: 快捷键设置 + // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + moduleHeader(L("快捷键设置", "Hotkey Settings")) + + twoColumnLayout { + HotkeySettingsCard() + } right: { + settingsGroupCard(L("快捷键说明", "Hotkey Guide")) { + Text(L( + "你可以在此页面直接为每个模式录制快捷键,并切换「按住录制 / 按下切换」触发方式。", + "Record shortcuts per mode here, and switch trigger style between hold-to-record and toggle." + )) + .font(.system(size: 12)) + .foregroundStyle(TF.settingsTextSecondary) + .lineSpacing(2) + .padding(.bottom, 10) + + Text(L( + "若遇到复杂冲突、模式排序或 Prompt 调整,可进入「处理模式」页继续配置。", + "For conflict resolution, mode ordering, or prompt editing, use the Modes page." + )) + .font(.system(size: 11)) + .foregroundStyle(TF.settingsTextTertiary) + .lineSpacing(2) + + HStack { + Spacer() + secondaryButton(L("打开处理模式", "Open Modes")) { + NotificationCenter.default.post(name: .navigateToMode, object: nil) + } + } + .padding(.top, 12) + } + } + + moduleSpacer() + + // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + // MODULE 3: API 设置 // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ moduleHeader(L("API 设置", "API Settings")) diff --git a/Type4MeTests/RecognitionSessionTests.swift b/Type4MeTests/RecognitionSessionTests.swift index 0fd93d5..67ba048 100644 --- a/Type4MeTests/RecognitionSessionTests.swift +++ b/Type4MeTests/RecognitionSessionTests.swift @@ -26,4 +26,51 @@ final class RecognitionSessionTests: XCTestCase { XCTAssertFalse(canStart) await session.setState(.idle) } + + func testAudioChunksAreBufferedBeforeASRConnectAndFlushedAfterConnect() async throws { + let session = RecognitionSession() + let mock = MockSpeechRecognizer() + + await session._debugSetASRClient(mock, connected: false) + + for i in 0..<45 { + let payload = Data("chunk-\(i)".utf8) + try await session._debugSendAudioToASR(payload) + } + + let bufferedBefore = await session._debugPendingAudioChunkCount() + XCTAssertEqual(bufferedBefore, 40, "Should keep only the most recent buffered chunks") + + let sentBefore = await mock.sentAudioCount() + XCTAssertEqual(sentBefore, 0, "No audio should be sent before ASR is connected") + + await session._debugSetASRConnected(true) + try await session._debugFlushPendingAudioToASR() + + let bufferedAfter = await session._debugPendingAudioChunkCount() + XCTAssertEqual(bufferedAfter, 0, "Buffer should be empty after flush") + + let sentAfter = await mock.sentAudioCount() + XCTAssertEqual(sentAfter, 40, "Buffered chunks should be flushed after connection") + } +} + +private actor MockSpeechRecognizer: SpeechRecognizer { + private var sent: [Data] = [] + + func connect(config: any ASRProviderConfig, options: ASRRequestOptions) async throws {} + + func sendAudio(_ data: Data) async throws { + sent.append(data) + } + + func endAudio() async throws {} + + func disconnect() async {} + + var events: AsyncStream { + get async { AsyncStream { continuation in continuation.finish() } } + } + + func sentAudioCount() -> Int { sent.count } }