Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
- Micke Nordin <[email protected]>
- rakekniven <[email protected]>
- Richard Steinmetz <[email protected]>
- Lukas Schaefer <[email protected]>
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ Positive:
Negative:
* The training data is not freely available, limiting the ability of external parties to check and correct for bias or optimise the model’s performance and CO2 usage.

### Rating for Text-To-Speech via the OpenAI API: 🔴

Negative:
* The software for training and inferencing of this model is proprietary, limiting running it locally or training by yourself
* The trained model is not freely available, so the model can not be ran on-premises
* The training data is not freely available, limiting the ability of external parties to check and correct for bias or optimise the model’s performance and CO2 usage.


### Rating for Text generation via LocalAI: 🟢

Positive:
Expand Down
7 changes: 7 additions & 0 deletions appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ Positive:
Negative:
* The training data is not freely available, limiting the ability of external parties to check and correct for bias or optimise the model’s performance and CO2 usage.

### Rating for Text-To-Speech via the OpenAI API: 🔴

Negative:
* The software for training and inferencing of this model is proprietary, limiting running it locally or training by yourself
* The trained model is not freely available, so the model can not be ran on-premises
* The training data is not freely available, limiting the ability of external parties to check and correct for bias or optimise the model’s performance and CO2 usage.

### Rating for Text generation via LocalAI: 🟢

Positive:
Expand Down
13 changes: 13 additions & 0 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
use OCA\OpenAi\TaskProcessing\ReformulateProvider;
use OCA\OpenAi\TaskProcessing\SummaryProvider;
use OCA\OpenAi\TaskProcessing\TextToImageProvider;
use OCA\OpenAi\TaskProcessing\TextToSpeechProvider;
use OCA\OpenAi\TaskProcessing\TextToTextChatProvider;
use OCA\OpenAi\TaskProcessing\TextToTextProvider;
use OCA\OpenAi\TaskProcessing\TopicsProvider;
Expand All @@ -40,6 +41,12 @@ class Application extends App implements IBootstrap {
public const DEFAULT_COMPLETION_MODEL_ID = 'gpt-3.5-turbo';
public const DEFAULT_IMAGE_MODEL_ID = 'dall-e-2';
public const DEFAULT_TRANSCRIPTION_MODEL_ID = 'whisper-1';
public const DEFAULT_SPEECH_MODEL_ID = 'tts-1-hd';
public const DEFAULT_SPEECH_VOICE = 'alloy';
public const DEFAULT_SPEECH_VOICES = [
'alloy', 'ash', 'ballad', 'coral', 'echo', 'fable',
'onyx', 'nova', 'sage', 'shimmer', 'verse'
];
public const DEFAULT_DEFAULT_IMAGE_SIZE = '1024x1024';
public const MAX_GENERATION_IDLE_TIME = 60 * 60 * 24 * 10;
public const DEFAULT_CHUNK_SIZE = 10000;
Expand All @@ -56,11 +63,13 @@ class Application extends App implements IBootstrap {
public const QUOTA_TYPE_TEXT = 0;
public const QUOTA_TYPE_IMAGE = 1;
public const QUOTA_TYPE_TRANSCRIPTION = 2;
public const QUOTA_TYPE_SPEECH = 3;

public const DEFAULT_QUOTAS = [
self::QUOTA_TYPE_TEXT => 0, // 0 = unlimited
self::QUOTA_TYPE_IMAGE => 0, // 0 = unlimited
self::QUOTA_TYPE_TRANSCRIPTION => 0, // 0 = unlimited
self::QUOTA_TYPE_SPEECH => 0, // 0 = unlimited

];

Expand Down Expand Up @@ -110,6 +119,10 @@ public function register(IRegistrationContext $context): void {
$context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\ProofreadProvider::class);
}
}
if (!class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech')) {
$context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\TextToSpeechTaskType::class);
}
$context->registerTaskProcessingProvider(TextToSpeechProvider::class);
if ($this->appConfig->getValueString(Application::APP_ID, 't2i_provider_enabled', '1') === '1') {
$context->registerTaskProcessingProvider(TextToImageProvider::class);
}
Expand Down
59 changes: 54 additions & 5 deletions lib/Service/OpenAiAPIService.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
use OCP\TaskProcessing\ShapeEnumValue;
use Psr\Log\LoggerInterface;
use RuntimeException;
use Throwable;
use function json_encode;

/**
* Service to make requests to OpenAI/LocalAI REST API
Expand Down Expand Up @@ -132,7 +134,7 @@ public function getModels(string $userId): array {
throw $e;
}
if (isset($modelsResponse['error'])) {
$this->logger->warning('Error retrieving models: ' . \json_encode($modelsResponse));
$this->logger->warning('Error retrieving models: ' . json_encode($modelsResponse));
$this->areCredsValid = false;
throw new Exception($modelsResponse['error'], Http::STATUS_INTERNAL_SERVER_ERROR);
}
Expand All @@ -142,7 +144,7 @@ public function getModels(string $userId): array {
}

if (!$this->isModelListValid($modelsResponse['data'])) {
$this->logger->warning('Invalid models response: ' . \json_encode($modelsResponse));
$this->logger->warning('Invalid models response: ' . json_encode($modelsResponse));
$this->areCredsValid = false;
throw new Exception($this->l10n->t('Invalid models response received'), Http::STATUS_INTERNAL_SERVER_ERROR);
}
Expand Down Expand Up @@ -185,7 +187,7 @@ public function getModelEnumValues(?string $userId): array {
array_unshift($modelEnumValues, new ShapeEnumValue($this->l10n->t('Default'), 'Default'));
}
return $modelEnumValues;
} catch (\Throwable $e) {
} catch (Throwable $e) {
// avoid flooding the logs with errors from calls of task processing
$this->logger->info('Error getting model enum values', ['exception' => $e]);
return [];
Expand Down Expand Up @@ -248,6 +250,8 @@ public function translatedQuotaType(int $type): string {
return $this->l10n->t('Image generation');
case Application::QUOTA_TYPE_TRANSCRIPTION:
return $this->l10n->t('Audio transcription');
case Application::QUOTA_TYPE_SPEECH:
return $this->l10n->t('Text to speech');
default:
return $this->l10n->t('Unknown');
}
Expand All @@ -266,6 +270,8 @@ public function translatedQuotaUnit(int $type): string {
return $this->l10n->t('images');
case Application::QUOTA_TYPE_TRANSCRIPTION:
return $this->l10n->t('seconds');
case Application::QUOTA_TYPE_SPEECH:
return $this->l10n->t('characters');
default:
return $this->l10n->t('Unknown');
}
Expand Down Expand Up @@ -742,6 +748,42 @@ public function getImageRequestOptions(?string $userId): array {
return $requestOptions;
}

/**
* @param string|null $userId
* @param string $prompt
* @param string $model
* @param string $voice
* @param int $speed
* @return array
* @throws Exception
*/
public function requestSpeechCreation(
?string $userId, string $prompt, string $model, string $voice, int $speed = 1,
): array {
$this->logger->error('speed is ' . $speed);
if ($this->isQuotaExceeded($userId, Application::QUOTA_TYPE_SPEECH)) {
throw new Exception($this->l10n->t('Speech generation quota exceeded'), Http::STATUS_TOO_MANY_REQUESTS);
}

$params = [
'input' => $prompt,
'voice' => $voice === Application::DEFAULT_MODEL_ID ? Application::DEFAULT_SPEECH_VOICE : $voice,
'model' => $model === Application::DEFAULT_MODEL_ID ? Application::DEFAULT_SPEECH_MODEL_ID : $model,
'response_format' => 'mp3',
'speed' => $speed,
];

$apiResponse = $this->request($userId, 'audio/speech', $params, 'POST');

try {
$charCount = mb_strlen($prompt);
$this->quotaUsageMapper->createQuotaUsage($userId ?? '', Application::QUOTA_TYPE_SPEECH, $charCount);
} catch (DBException $e) {
$this->logger->warning('Could not create quota usage for user: ' . $userId . ' and quota type: ' . Application::QUOTA_TYPE_IMAGE . '. Error: ' . $e->getMessage(), ['app' => Application::APP_ID]);
}
return $apiResponse;
}

/**
* @return int
*/
Expand Down Expand Up @@ -893,9 +935,16 @@ public function request(?string $userId, string $endPoint, array $params = [], s

if ($respCode >= 400) {
return ['error' => $this->l10n->t('Bad credentials')];
} else {
return json_decode($body, true) ?: [];
}
if ($response->getHeader('Content-Type') === 'application/json') {
$parsedBody = json_decode($body, true);
if ($parsedBody === null) {
$this->logger->warning('Could not JSON parse the response', ['body' => $body]);
return ['error' => 'Could not JSON parse the response'];
}
return $parsedBody;
}
return ['body' => $body];
} catch (ClientException|ServerException $e) {
$responseBody = $e->getResponse()->getBody();
$parsedResponseBody = json_decode($responseBody, true);
Expand Down
92 changes: 92 additions & 0 deletions lib/Service/OpenAiSettingsService.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ class OpenAiSettingsService {
'api_key' => 'string',
'default_completion_model_id' => 'string',
'default_stt_model_id' => 'string',
'default_tts_model_id' => 'string',
'tts_voices' => 'array',
'default_tts_voice' => 'string',
'default_image_model_id' => 'string',
'default_image_size' => 'string',
'image_request_auth' => 'boolean',
Expand All @@ -36,6 +39,7 @@ class OpenAiSettingsService {
'llm_provider_enabled' => 'boolean',
't2i_provider_enabled' => 'boolean',
'stt_provider_enabled' => 'boolean',
'tts_provider_enabled' => 'boolean',
'chat_endpoint_enabled' => 'boolean',
'basic_user' => 'string',
'basic_password' => 'string',
Expand Down Expand Up @@ -118,6 +122,37 @@ public function getAdminDefaultImageSize(): string {
return $this->appConfig->getValueString(Application::APP_ID, 'default_image_size') ?: Application::DEFAULT_DEFAULT_IMAGE_SIZE;
}

/**
* @return string
*/
public function getAdminDefaultTtsModelId(): string {
return $this->appConfig->getValueString(Application::APP_ID, 'default_speech_model_id') ?: Application::DEFAULT_MODEL_ID;
}

/**
* @return string
*/
public function getAdminDefaultTtsVoice(): string {
return $this->appConfig->getValueString(Application::APP_ID, 'default_speech_voice') ?: Application::DEFAULT_SPEECH_VOICE;
}

/**
* @return array
*/
public function getAdminTtsVoices(): array {
$voices = json_decode(
$this->appConfig->getValueString(
Application::APP_ID, 'tts_voices',
json_encode(Application::DEFAULT_SPEECH_VOICES)
) ?: json_encode(Application::DEFAULT_SPEECH_VOICES),
true,
);
if (!is_array($voices)) {
$voices = Application::DEFAULT_SPEECH_VOICES;
}
return $voices;
}

/**
* @return string
*/
Expand Down Expand Up @@ -266,6 +301,9 @@ public function getAdminConfig(): array {
'api_key' => $this->getAdminApiKey(),
'default_completion_model_id' => $this->getAdminDefaultCompletionModelId(),
'default_stt_model_id' => $this->getAdminDefaultSttModelId(),
'default_tts_model_id' => $this->getAdminDefaultTtsModelId(),
'default_tts_voice' => $this->getAdminDefaultTtsVoice(),
'tts_voices' => $this->getAdminTtsVoices(),
'default_image_model_id' => $this->getAdminDefaultImageModelId(),
'default_image_size' => $this->getAdminDefaultImageSize(),
'image_request_auth' => $this->getIsImageRetrievalAuthenticated(),
Expand All @@ -282,6 +320,7 @@ public function getAdminConfig(): array {
'llm_provider_enabled' => $this->getLlmProviderEnabled(),
't2i_provider_enabled' => $this->getT2iProviderEnabled(),
'stt_provider_enabled' => $this->getSttProviderEnabled(),
'tts_provider_enabled' => $this->getTtsProviderEnabled(),
'chat_endpoint_enabled' => $this->getChatEndpointEnabled(),
'basic_user' => $this->getAdminBasicUser(),
'basic_password' => $this->getAdminBasicPassword(),
Expand Down Expand Up @@ -354,6 +393,13 @@ public function getSttProviderEnabled(): bool {
return $this->appConfig->getValueString(Application::APP_ID, 'stt_provider_enabled', '1') === '1';
}

/**
* @return bool
*/
public function getTtsProviderEnabled(): bool {
return $this->appConfig->getValueString(Application::APP_ID, 'tts_provider_enabled', '1') === '1';
}

////////////////////////////////////////////
//////////// Setters for settings //////////

Expand Down Expand Up @@ -425,6 +471,15 @@ public function setAdminDefaultSttModelId(string $defaultSttModelId): void {
$this->appConfig->setValueString(Application::APP_ID, 'default_stt_model_id', $defaultSttModelId);
}

/**
* @param string $defaultTtsModelId
* @return void
*/
public function setAdminDefaultTtsModelId(string $defaultTtsModelId): void {
// No need to validate. As long as it's a string, we're happy campers
$this->appConfig->setValueString(Application::APP_ID, 'default_speech_model_id', $defaultTtsModelId);
}

/**
* @param string $defaultImageModelId
* @return void
Expand All @@ -434,6 +489,14 @@ public function setAdminDefaultImageModelId(string $defaultImageModelId): void {
$this->appConfig->setValueString(Application::APP_ID, 'default_image_model_id', $defaultImageModelId);
}

/**
* @param string $voice
* @return void
*/
public function setAdminDefaultTtsVoice(string $voice): void {
$this->appConfig->setValueString(Application::APP_ID, 'default_speech_voice', $voice);
}

/**
* @param string $defaultImageSize
* @return void
Expand Down Expand Up @@ -575,6 +638,15 @@ public function setUseBasicAuth(bool $useBasicAuth): void {
$this->invalidateModelsCache();
}

/**
* @param array $voices
* @return void
*/
public function setAdminTtsVoices(array $voices): void {
$this->appConfig->setValueString(Application::APP_ID, 'tts_voices', json_encode($voices));
$this->invalidateModelsCache();
}

/**
* Set the admin config for the settings page
* @param mixed[] $adminConfig
Expand Down Expand Up @@ -614,6 +686,9 @@ public function setAdminConfig(array $adminConfig): void {
if (isset($adminConfig['default_stt_model_id'])) {
$this->setAdminDefaultSttModelId($adminConfig['default_stt_model_id']);
}
if (isset($adminConfig['default_tts_model_id'])) {
$this->setAdminDefaultTtsModelId($adminConfig['default_tts_model_id']);
}
if (isset($adminConfig['default_image_model_id'])) {
$this->setAdminDefaultImageModelId($adminConfig['default_image_model_id']);
}
Expand Down Expand Up @@ -653,6 +728,12 @@ public function setAdminConfig(array $adminConfig): void {
if (isset($adminConfig['stt_provider_enabled'])) {
$this->setSttProviderEnabled($adminConfig['stt_provider_enabled']);
}
if (isset($adminConfig['tts_provider_enabled'])) {
$this->setTtsProviderEnabled($adminConfig['tts_provider_enabled']);
}
if (isset($adminConfig['default_tts_voice'])) {
$this->setAdminDefaultTtsVoice($adminConfig['default_tts_voice']);
}
if (isset($adminConfig['chat_endpoint_enabled'])) {
$this->setChatEndpointEnabled($adminConfig['chat_endpoint_enabled']);
}
Expand All @@ -665,6 +746,9 @@ public function setAdminConfig(array $adminConfig): void {
if (isset($adminConfig['use_basic_auth'])) {
$this->setUseBasicAuth($adminConfig['use_basic_auth']);
}
if (isset($adminConfig['tts_voices'])) {
$this->setAdminTtsVoices($adminConfig['tts_voices']);
}
}

/**
Expand Down Expand Up @@ -741,6 +825,14 @@ public function setSttProviderEnabled(bool $enabled): void {
$this->appConfig->setValueString(Application::APP_ID, 'stt_provider_enabled', $enabled ? '1' : '0');
}

/**
* @param bool $enabled
* @return void
*/
public function setTtsProviderEnabled(bool $enabled): void {
$this->appConfig->setValueString(Application::APP_ID, 'tts_provider_enabled', $enabled ? '1' : '0');
}

/**
* @param bool $enabled
*/
Expand Down
Loading
Loading