From 198e88d21254c6556e95d01d1666b3277efabf82 Mon Sep 17 00:00:00 2001 From: Braden Wong <13159333+braden-w@users.noreply.github.com> Date: Sat, 29 Jun 2024 12:13:27 -0700 Subject: [PATCH] feat: restore auto detect language feature --- apps/app/src/lib/stores/settings.svelte.ts | 2 +- packages/shared/src/index.ts | 8 +- .../src/services/TranscriptionService.ts | 4 +- .../TranscriptionServiceWhisperingLive.ts | 192 ++++++++++++------ 4 files changed, 138 insertions(+), 68 deletions(-) diff --git a/apps/app/src/lib/stores/settings.svelte.ts b/apps/app/src/lib/stores/settings.svelte.ts index 1668b3972..0af16adeb 100644 --- a/apps/app/src/lib/stores/settings.svelte.ts +++ b/apps/app/src/lib/stores/settings.svelte.ts @@ -27,7 +27,7 @@ const createSettings = Effect.gen(function* () { currentLocalShortcut: registerShortcutsService.defaultLocalShortcut, currentGlobalShortcut: registerShortcutsService.defaultGlobalShortcut, apiKey: '', - outputLanguage: 'en', + outputLanguage: 'auto', }, }); diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index a25528149..0e1b8ed87 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -1,6 +1,7 @@ import { Schema as S } from '@effect/schema'; import { Data, Effect } from 'effect'; import { notificationOptionsSchema } from './services/NotificationService.js'; +import { SUPPORTED_LANGUAGES } from './services/TranscriptionServiceWhisperingLive.js'; export const WHISPERING_URL = process.env.NODE_ENV === 'production' @@ -17,7 +18,7 @@ export const settingsSchema = S.Struct({ currentLocalShortcut: S.String, currentGlobalShortcut: S.String, apiKey: S.String, - outputLanguage: S.String, + outputLanguage: S.Literal(...SUPPORTED_LANGUAGES), }); export type Settings = S.Schema.Type; @@ -35,9 +36,8 @@ export type WhisperingErrorProperties = { error?: unknown; }; -export class WhisperingError extends Data.TaggedError( - 'WhisperingError', -)> & Omit +export class WhisperingError extends Data.TaggedError('WhisperingError')< + Required> & Omit > { constructor(properties: WhisperingErrorProperties) { super({ diff --git a/packages/shared/src/services/TranscriptionService.ts b/packages/shared/src/services/TranscriptionService.ts index c57ff5ab8..39560d600 100644 --- a/packages/shared/src/services/TranscriptionService.ts +++ b/packages/shared/src/services/TranscriptionService.ts @@ -1,4 +1,4 @@ -import type { WhisperingError } from '@repo/shared'; +import type { SupportedLanguage, WhisperingError } from '@repo/shared'; import type { Effect } from 'effect'; import { Context } from 'effect'; @@ -8,7 +8,7 @@ export class TranscriptionService extends Context.Tag('TranscriptionService')< readonly supportedLanguages: readonly { label: string; value: string }[]; readonly transcribe: ( blob: Blob, - options: { apiKey: string; outputLanguage: string }, + options: { apiKey: string; outputLanguage: SupportedLanguage }, ) => Effect.Effect; } >() {} diff --git a/packages/shared/src/services/TranscriptionServiceWhisperingLive.ts b/packages/shared/src/services/TranscriptionServiceWhisperingLive.ts index cd19bb937..4334e76e6 100644 --- a/packages/shared/src/services/TranscriptionServiceWhisperingLive.ts +++ b/packages/shared/src/services/TranscriptionServiceWhisperingLive.ts @@ -1,5 +1,5 @@ import { WhisperingError } from '@repo/shared'; -import { Effect, Layer } from 'effect'; +import { Effect, Layer, Option } from 'effect'; import { TranscriptionService } from './TranscriptionService.js'; function isString(input: unknown): input is string { @@ -10,70 +10,140 @@ const MAX_FILE_SIZE_MB = 25 as const; const FILE_NAME = 'recording.wav'; /** Supported languages pulled from OpenAI Website: https://platform.openai.com/docs/guides/speech-to-text/supported-languages */ -const SUPPORTED_LANGUAGES = [ - { label: 'Afrikaans', value: 'af' }, - { label: 'Arabic', value: 'ar' }, - { label: 'Armenian', value: 'hy' }, - { label: 'Azerbaijani', value: 'az' }, - { label: 'Belarusian', value: 'be' }, - { label: 'Bosnian', value: 'bs' }, - { label: 'Bulgarian', value: 'bg' }, - { label: 'Catalan', value: 'ca' }, - { label: 'Chinese', value: 'zh' }, - { label: 'Croatian', value: 'hr' }, - { label: 'Czech', value: 'cs' }, - { label: 'Danish', value: 'da' }, - { label: 'Dutch', value: 'nl' }, - { label: 'English', value: 'en' }, - { label: 'Estonian', value: 'et' }, - { label: 'Finnish', value: 'fi' }, - { label: 'French', value: 'fr' }, - { label: 'Galician', value: 'gl' }, - { label: 'German', value: 'de' }, - { label: 'Greek', value: 'el' }, - { label: 'Hebrew', value: 'he' }, - { label: 'Hindi', value: 'hi' }, - { label: 'Hungarian', value: 'hu' }, - { label: 'Icelandic', value: 'is' }, - { label: 'Indonesian', value: 'id' }, - { label: 'Italian', value: 'it' }, - { label: 'Japanese', value: 'ja' }, - { label: 'Kannada', value: 'kn' }, - { label: 'Kazakh', value: 'kk' }, - { label: 'Korean', value: 'ko' }, - { label: 'Latvian', value: 'lv' }, - { label: 'Lithuanian', value: 'lt' }, - { label: 'Macedonian', value: 'mk' }, - { label: 'Malay', value: 'ms' }, - { label: 'Marathi', value: 'mr' }, - { label: 'Maori', value: 'mi' }, - { label: 'Nepali', value: 'ne' }, - { label: 'Norwegian', value: 'no' }, - { label: 'Persian', value: 'fa' }, - { label: 'Polish', value: 'pl' }, - { label: 'Portuguese', value: 'pt' }, - { label: 'Romanian', value: 'ro' }, - { label: 'Russian', value: 'ru' }, - { label: 'Serbian', value: 'sr' }, - { label: 'Slovak', value: 'sk' }, - { label: 'Slovenian', value: 'sl' }, - { label: 'Spanish', value: 'es' }, - { label: 'Swahili', value: 'sw' }, - { label: 'Swedish', value: 'sv' }, - { label: 'Tagalog', value: 'tl' }, - { label: 'Tamil', value: 'ta' }, - { label: 'Thai', value: 'th' }, - { label: 'Turkish', value: 'tr' }, - { label: 'Ukrainian', value: 'uk' }, - { label: 'Urdu', value: 'ur' }, - { label: 'Vietnamese', value: 'vi' }, - { label: 'Welsh', value: 'cy' }, +export const SUPPORTED_LANGUAGES = [ + 'auto', + 'af', + 'ar', + 'hy', + 'az', + 'be', + 'bs', + 'bg', + 'ca', + 'zh', + 'hr', + 'cs', + 'da', + 'nl', + 'en', + 'et', + 'fi', + 'fr', + 'gl', + 'de', + 'el', + 'he', + 'hi', + 'hu', + 'is', + 'id', + 'it', + 'ja', + 'kn', + 'kk', + 'ko', + 'lv', + 'lt', + 'mk', + 'ms', + 'mr', + 'mi', + 'ne', + 'no', + 'fa', + 'pl', + 'pt', + 'ro', + 'ru', + 'sr', + 'sk', + 'sl', + 'es', + 'sw', + 'sv', + 'tl', + 'ta', + 'th', + 'tr', + 'uk', + 'ur', + 'vi', + 'cy', ] as const; +export type SupportedLanguage = (typeof SUPPORTED_LANGUAGES)[number]; + +const SUPPORTED_LANGUAGES_TO_LABEL = { + auto: 'Auto', + af: 'Afrikaans', + ar: 'Arabic', + hy: 'Armenian', + az: 'Azerbaijani', + be: 'Belarusian', + bs: 'Bosnian', + bg: 'Bulgarian', + ca: 'Catalan', + zh: 'Chinese', + hr: 'Croatian', + cs: 'Czech', + da: 'Danish', + nl: 'Dutch', + en: 'English', + et: 'Estonian', + fi: 'Finnish', + fr: 'French', + gl: 'Galician', + de: 'German', + el: 'Greek', + he: 'Hebrew', + hi: 'Hindi', + hu: 'Hungarian', + is: 'Icelandic', + id: 'Indonesian', + it: 'Italian', + ja: 'Japanese', + kn: 'Kannada', + kk: 'Kazakh', + ko: 'Korean', + lv: 'Latvian', + lt: 'Lithuanian', + mk: 'Macedonian', + ms: 'Malay', + mr: 'Marathi', + mi: 'Maori', + ne: 'Nepali', + no: 'Norwegian', + fa: 'Persian', + pl: 'Polish', + pt: 'Portuguese', + ro: 'Romanian', + ru: 'Russian', + sr: 'Serbian', + sk: 'Slovak', + sl: 'Slovenian', + es: 'Spanish', + sw: 'Swahili', + sv: 'Swedish', + tl: 'Tagalog', + ta: 'Tamil', + th: 'Thai', + tr: 'Turkish', + uk: 'Ukrainian', + ur: 'Urdu', + vi: 'Vietnamese', + cy: 'Welsh', +} as const satisfies Record; + export const TranscriptionServiceWhisperLive = Layer.succeed( TranscriptionService, TranscriptionService.of({ - supportedLanguages: SUPPORTED_LANGUAGES, + supportedLanguages: SUPPORTED_LANGUAGES.map( + (lang) => + ({ + label: SUPPORTED_LANGUAGES_TO_LABEL[lang], + value: lang, + }) as const, + ), transcribe: (audioBlob, { apiKey, outputLanguage }) => Effect.gen(function* () { if (!apiKey.startsWith('sk-')) { @@ -97,7 +167,7 @@ export const TranscriptionServiceWhisperLive = Layer.succeed( const formData = new FormData(); formData.append('file', wavFile); formData.append('model', 'whisper-1'); - formData.append('language', outputLanguage); + if (outputLanguage !== 'auto') formData.append('language', outputLanguage); const data = yield* Effect.tryPromise({ try: () => fetch('https://api.openai.com/v1/audio/transcriptions', {