Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ class Application extends App implements IBootstrap {
public const MODELS_CACHE_KEY = 'models';
public const MODELS_CACHE_TTL = 60 * 30;

public const AUDIO_TO_TEXT_LANGUAGES = [['en', 'English'], ['zh', '中文'], ['de', 'Deutsch'], ['es', 'Español'], ['ru', 'Русский'], ['ko', '한국어'], ['fr', 'Français'], ['ja', '日本語'], ['pt', 'Português'], ['tr', 'Türkçe'], ['pl', 'Polski'], ['ca', 'Català'], ['nl', 'Nederlands'], ['ar', 'العربية'], ['sv', 'Svenska'], ['it', 'Italiano'], ['id', 'Bahasa Indonesia'], ['hi', 'हिन्दी'], ['fi', 'Suomi'], ['vi', 'Tiếng Việt'], ['he', 'עברית'], ['uk', 'Українська'], ['el', 'Ελληνικά'], ['ms', 'Bahasa Melayu'], ['cs', 'Česky'], ['ro', 'Română'], ['da', 'Dansk'], ['hu', 'Magyar'], ['ta', 'தமிழ்'], ['no', 'Norsk (bokmål / riksmål)'], ['th', 'ไทย / Phasa Thai'], ['ur', 'اردو'], ['hr', 'Hrvatski'], ['bg', 'Български'], ['lt', 'Lietuvių'], ['la', 'Latina'], ['mi', 'Māori'], ['ml', 'മലയാളം'], ['cy', 'Cymraeg'], ['sk', 'Slovenčina'], ['te', 'తెలుగు'], ['fa', 'فارسی'], ['lv', 'Latviešu'], ['bn', 'বাংলা'], ['sr', 'Српски'], ['az', 'Azərbaycanca / آذربايجان'], ['sl', 'Slovenščina'], ['kn', 'ಕನ್ನಡ'], ['et', 'Eesti'], ['mk', 'Македонски'], ['br', 'Brezhoneg'], ['eu', 'Euskara'], ['is', 'Íslenska'], ['hy', 'Հայերեն'], ['ne', 'नेपाली'], ['mn', 'Монгол'], ['bs', 'Bosanski'], ['kk', 'Қазақша'], ['sq', 'Shqip'], ['sw', 'Kiswahili'], ['gl', 'Galego'], ['mr', 'मराठी'], ['pa', 'ਪੰਜਾਬੀ / पंजाबी / پنجابي'], ['si', 'සිංහල'], ['km', 'ភាសាខ្មែរ'], ['sn', 'chiShona'], ['yo', 'Yorùbá'], ['so', 'Soomaaliga'], ['af', 'Afrikaans'], ['oc', 'Occitan'], ['ka', 'ქართული'], ['be', 'Беларуская'], ['tg', 'Тоҷикӣ'], ['sd', 'सिनधि'], ['gu', 'ગુજરાતી'], ['am', 'አማርኛ'], ['yi', 'ייִדיש'], ['lo', 'ລາວ / Pha xa lao'], ['uz', 'Ўзбек'], ['fo', 'Føroyskt'], ['ht', 'Krèyol ayisyen'], ['ps', 'پښتو'], ['tk', 'Туркмен / تركمن'], ['nn', 'Norsk (nynorsk)'], ['mt', 'bil-Malti'], ['sa', 'संस्कृतम्'], ['lb', 'Lëtzebuergesch'], ['my', 'Myanmasa'], ['bo', 'བོད་ཡིག / Bod skad'], ['tl', 'Tagalog'], ['mg', 'Malagasy'], ['as', 'অসমীয়া'], ['tt', 'Tatarça'], ['haw', 'ʻŌlelo Hawaiʻi'], ['ln', 'Lingála'], ['ha', 'هَوُسَ'], ['ba', 'Башҡорт'], ['jw', 'ꦧꦱꦗꦮ'], ['su', 'Basa Sunda'], ['yue', '粤语']];

private IAppConfig $appConfig;

public function __construct(array $urlParams = []) {
Expand Down
10 changes: 9 additions & 1 deletion lib/Service/OpenAiAPIService.php
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,8 @@ public function transcribeBase64Mp3(
* @param string|null $userId
* @param File $file
* @param bool $translate
* @param string $model
* @param string $language
* @return string
* @throws Exception
*/
Expand All @@ -691,9 +693,10 @@ public function transcribeFile(
File $file,
bool $translate = false,
string $model = Application::DEFAULT_MODEL_ID,
string $language = 'detect_language',
): string {
try {
$transcriptionResponse = $this->transcribe($userId, $file->getContent(), $translate, $model);
$transcriptionResponse = $this->transcribe($userId, $file->getContent(), $translate, $model, $language);
} catch (NotPermittedException|LockedException|GenericFileException $e) {
$this->logger->warning('Could not read audio file: ' . $file->getPath() . '. Error: ' . $e->getMessage(), ['app' => Application::APP_ID]);
throw new Exception($this->l10n->t('Could not read audio file.'), Http::STATUS_INTERNAL_SERVER_ERROR);
Expand All @@ -707,6 +710,7 @@ public function transcribeFile(
* @param string $audioFileContent
* @param bool $translate
* @param string $model
* @param string $language
* @return string
* @throws Exception
*/
Expand All @@ -715,6 +719,7 @@ public function transcribe(
string $audioFileContent,
bool $translate = true,
string $model = Application::DEFAULT_MODEL_ID,
string $language = 'detect_language',
): string {
if ($this->isQuotaExceeded($userId, Application::QUOTA_TYPE_TRANSCRIPTION)) {
throw new Exception($this->l10n->t('Audio transcription quota exceeded'), Http::STATUS_TOO_MANY_REQUESTS);
Expand All @@ -730,6 +735,9 @@ public function transcribe(
'response_format' => 'verbose_json',
// Verbose needed for extraction of audio duration
];
if ($language !== 'detect_language') {
$params['language'] = $language;
}
$endpoint = $translate ? 'audio/translations' : 'audio/transcriptions';
$contentType = 'multipart/form-data';

Expand Down
27 changes: 23 additions & 4 deletions lib/TaskProcessing/AudioToTextProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@
use OCA\OpenAi\Service\OpenAiAPIService;
use OCP\Files\File;
use OCP\IAppConfig;
use OCP\IL10N;
use OCP\L10N\IFactory;
use OCP\TaskProcessing\EShapeType;
use OCP\TaskProcessing\ISynchronousProvider;
use OCP\TaskProcessing\ShapeDescriptor;
use OCP\TaskProcessing\ShapeEnumValue;
use OCP\TaskProcessing\TaskTypes\AudioToText;
use Psr\Log\LoggerInterface;
use RuntimeException;
Expand All @@ -25,6 +30,8 @@ public function __construct(
private OpenAiAPIService $openAiAPIService,
private LoggerInterface $logger,
private IAppConfig $appConfig,
private IFactory $l10nFactory,
private IL10N $l,
) {
}

Expand Down Expand Up @@ -53,15 +60,23 @@ public function getInputShapeDefaults(): array {
}

public function getOptionalInputShape(): array {
return [];
return ['language' => new ShapeDescriptor(
$this->l->t('Language'),
$this->l->t('The language of the audio file'),
EShapeType::Enum
)];
}

public function getOptionalInputShapeEnumValues(): array {
return [];
$languageEnumValues = array_map(static function (array $language) {
return new ShapeEnumValue($language[1], $language[0]);
}, Application::AUDIO_TO_TEXT_LANGUAGES);
$detectLanguageEnumValue = new ShapeEnumValue($this->l->t('Detect language'), 'detect_language');
return ['language' => array_merge([$detectLanguageEnumValue], $languageEnumValues)];
}

public function getOptionalInputShapeDefaults(): array {
return [];
return ['language' => 'detect_language'];
}

public function getOutputShapeEnumValues(): array {
Expand All @@ -81,11 +96,15 @@ public function process(?string $userId, array $input, callable $reportProgress)
throw new RuntimeException('Invalid input file');
}
$inputFile = $input['input'];
$language = $input['language'] ?? 'detect_language';
if (!is_string($language)) {
throw new RuntimeException('Invalid language');
}

$model = $this->appConfig->getValueString(Application::APP_ID, 'default_stt_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID;

try {
$transcription = $this->openAiAPIService->transcribeFile($userId, $inputFile, false, $model);
$transcription = $this->openAiAPIService->transcribeFile($userId, $inputFile, false, $model, $language);
return ['output' => $transcription];
} catch (Exception $e) {
$this->logger->warning('OpenAI\'s Whisper transcription failed with: ' . $e->getMessage(), ['exception' => $e]);
Expand Down
Loading