From 1c66b8c3823c8616b3b35137043b1b91382e2bfb Mon Sep 17 00:00:00 2001 From: Lukas Schaefer Date: Tue, 1 Jul 2025 08:16:04 -0400 Subject: [PATCH 1/8] feat: add picture question Signed-off-by: Lukas Schaefer --- lib/AppInfo/Application.php | 4 + lib/TaskProcessing/ImageQuestionProvider.php | 159 +++++++++++++++++++ lib/TaskProcessing/ImageQuestionTaskType.php | 77 +++++++++ 3 files changed, 240 insertions(+) create mode 100644 lib/TaskProcessing/ImageQuestionProvider.php create mode 100644 lib/TaskProcessing/ImageQuestionTaskType.php diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index 02c27003..dd7a7ea0 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -127,6 +127,10 @@ public function register(IRegistrationContext $context): void { $context->registerTaskProcessingProvider(TextToImageProvider::class); } + + $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\ImageQuestionTaskType::class); + $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\ImageQuestionProvider::class); + $context->registerCapability(Capabilities::class); } diff --git a/lib/TaskProcessing/ImageQuestionProvider.php b/lib/TaskProcessing/ImageQuestionProvider.php new file mode 100644 index 00000000..fc00667a --- /dev/null +++ b/lib/TaskProcessing/ImageQuestionProvider.php @@ -0,0 +1,159 @@ +openAiAPIService->getServiceName(); + } + + public function getTaskTypeId(): string { + return ImageQuestionTaskType::ID; + } + + public function getExpectedRuntime(): int { + return $this->openAiAPIService->getExpTextProcessingTime(); + } + + public function getInputShapeEnumValues(): array { + return []; + } + + public function getInputShapeDefaults(): array { + return []; + } + + + public function getOptionalInputShape(): array { + return [ + 'max_tokens' => new ShapeDescriptor( + $this->l->t('Maximum output words'), + $this->l->t('The maximum number of words/tokens that can be generated in the completion.'), + EShapeType::Number + ), + 'model' => new ShapeDescriptor( + $this->l->t('Model'), + $this->l->t('The model used to generate the completion'), + EShapeType::Enum + ), + ]; + } + + public function getOptionalInputShapeEnumValues(): array { + return [ + 'model' => $this->openAiAPIService->getModelEnumValues($this->userId), + ]; + } + + public function getOptionalInputShapeDefaults(): array { + $adminModel = $this->openAiAPIService->isUsingOpenAi() + ? ($this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID) + : $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id'); + return [ + 'max_tokens' => 1000, + 'model' => $adminModel, + ]; + } + + public function getOutputShapeEnumValues(): array { + return []; + } + + public function getOptionalOutputShape(): array { + return []; + } + + public function getOptionalOutputShapeEnumValues(): array { + return []; + } + + public function process(?string $userId, array $input, callable $reportProgress): array { + + if (!$this->openAiAPIService->isUsingOpenAi() && !$this->openAiSettingsService->getChatEndpointEnabled()) { + throw new RuntimeException('Must support chat completion endpoint'); + } + + if (!isset($input['image']) || !$input['image'] instanceof File || !$input['image']->isReadable()) { + throw new RuntimeException('Invalid input file'); + } + + $inputFile = base64_encode(stream_get_contents($input['image']->fopen('rb'))); + $fileType = $input['image']->getMimeType(); + if (!str_starts_with($fileType, 'image/')) { + throw new RuntimeException('Invalid input file type ' . $fileType); + } + + if (!isset($input['input']) || !is_string($input['input'])) { + throw new RuntimeException('Invalid prompt'); + } + $prompt = $input['input']; + + if (isset($input['model']) && is_string($input['model'])) { + $model = $input['model']; + } else { + $model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_COMPLETION_MODEL_ID) ?: Application::DEFAULT_COMPLETION_MODEL_ID; + } + + $maxTokens = null; + if (isset($input['max_tokens']) && is_int($input['max_tokens'])) { + $maxTokens = $input['max_tokens']; + } + + try { + $systemPrompt = 'Take the users question and answer it based on the provided image. Ensure that the answer matches the language of the user\'s text input.'; + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, $systemPrompt, [json_encode([ + 'role' => 'user', + 'content' => [[ + 'type' => 'image_url', + 'image_url' => [ + 'url' => 'data:' . $fileType . ';base64,' . $inputFile + ]] + ] + ])], 1, $maxTokens); + $completion = $completion['messages']; + + if (count($completion) > 0) { + return ['output' => array_pop($completion)]; + } + + throw new RuntimeException('No result in OpenAI/LocalAI response.'); + } catch (\Exception $e) { + $this->logger->warning('OpenAI/LocalAI\'s image question generation failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new RuntimeException('OpenAI/LocalAI\'s image question generation failed with: ' . $e->getMessage()); + } + } +} diff --git a/lib/TaskProcessing/ImageQuestionTaskType.php b/lib/TaskProcessing/ImageQuestionTaskType.php new file mode 100644 index 00000000..39ed868c --- /dev/null +++ b/lib/TaskProcessing/ImageQuestionTaskType.php @@ -0,0 +1,77 @@ +l->t('Image question'); + } + + /** + * @inheritDoc + */ + public function getDescription(): string { + return $this->l->t('Ask a question about an image.'); + } + + /** + * @return string + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + */ + public function getInputShape(): array { + return [ + 'image' => new ShapeDescriptor( + $this->l->t('Image'), + $this->l->t('Image to ask question about'), + EShapeType::Image, + ), + 'input' => new ShapeDescriptor( + $this->l->t('Question'), + $this->l->t('What to ask about the image.'), + EShapeType::Text, + ), + ]; + } + + /** + * @return ShapeDescriptor[] + */ + public function getOutputShape(): array { + return [ + 'output' => new ShapeDescriptor( + $this->l->t('Generated response'), + $this->l->t('The answer to the question:'), + EShapeType::Text + ), + ]; + } +} From 9705e8a2f680999d1853487b3e385d7576f88d8b Mon Sep 17 00:00:00 2001 From: Lukas Schaefer Date: Wed, 2 Jul 2025 08:31:27 -0400 Subject: [PATCH 2/8] fix feedback and rename to AnalyzeImage Signed-off-by: Lukas Schaefer --- lib/AppInfo/Application.php | 4 +- ...nProvider.php => AnalyzeImageProvider.php} | 46 +++++++++++++------ ...nTaskType.php => AnalyzeImageTaskType.php} | 10 ++-- 3 files changed, 38 insertions(+), 22 deletions(-) rename lib/TaskProcessing/{ImageQuestionProvider.php => AnalyzeImageProvider.php} (81%) rename lib/TaskProcessing/{ImageQuestionTaskType.php => AnalyzeImageTaskType.php} (83%) diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index dd7a7ea0..3e71f22e 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -128,8 +128,8 @@ public function register(IRegistrationContext $context): void { } - $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\ImageQuestionTaskType::class); - $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\ImageQuestionProvider::class); + $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\AnalyzeImageTaskType::class); + $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\AnalyzeImageProvider::class); $context->registerCapability(Capabilities::class); } diff --git a/lib/TaskProcessing/ImageQuestionProvider.php b/lib/TaskProcessing/AnalyzeImageProvider.php similarity index 81% rename from lib/TaskProcessing/ImageQuestionProvider.php rename to lib/TaskProcessing/AnalyzeImageProvider.php index fc00667a..08111009 100644 --- a/lib/TaskProcessing/ImageQuestionProvider.php +++ b/lib/TaskProcessing/AnalyzeImageProvider.php @@ -21,7 +21,7 @@ use Psr\Log\LoggerInterface; use RuntimeException; -class ImageQuestionProvider implements ISynchronousProvider { +class AnalyzeImageProvider implements ISynchronousProvider { public function __construct( private OpenAiAPIService $openAiAPIService, @@ -34,7 +34,7 @@ public function __construct( } public function getId(): string { - return Application::APP_ID . '-image_question'; + return Application::APP_ID . '-analyze-image'; } public function getName(): string { @@ -42,7 +42,7 @@ public function getName(): string { } public function getTaskTypeId(): string { - return ImageQuestionTaskType::ID; + return AnalyzeImageTaskType::ID; } public function getExpectedRuntime(): int { @@ -62,12 +62,12 @@ public function getOptionalInputShape(): array { return [ 'max_tokens' => new ShapeDescriptor( $this->l->t('Maximum output words'), - $this->l->t('The maximum number of words/tokens that can be generated in the completion.'), + $this->l->t('The maximum number of words/tokens that can be generated in the output.'), EShapeType::Number ), 'model' => new ShapeDescriptor( $this->l->t('Model'), - $this->l->t('The model used to generate the completion'), + $this->l->t('The model used to generate the output'), EShapeType::Enum ), ]; @@ -116,6 +116,18 @@ public function process(?string $userId, array $input, callable $reportProgress) if (!str_starts_with($fileType, 'image/')) { throw new RuntimeException('Invalid input file type ' . $fileType); } + if ($this->openAiAPIService->isUsingOpenAi()) { + $validFileTypes = [ + 'image/jpeg', + 'image/jpg', + 'image/png', + 'image/gif', + 'image/webp', + ]; + if (!in_array($fileType, $validFileTypes)) { + throw new RuntimeException('Invalid input file type for OpenAI ' . $fileType); + } + } if (!isset($input['input']) || !is_string($input['input'])) { throw new RuntimeException('Invalid prompt'); @@ -134,16 +146,20 @@ public function process(?string $userId, array $input, callable $reportProgress) } try { - $systemPrompt = 'Take the users question and answer it based on the provided image. Ensure that the answer matches the language of the user\'s text input.'; - $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, $systemPrompt, [json_encode([ - 'role' => 'user', - 'content' => [[ - 'type' => 'image_url', - 'image_url' => [ - 'url' => 'data:' . $fileType . ';base64,' . $inputFile - ]] - ] - ])], 1, $maxTokens); + $systemPrompt = 'Take the user\'s question and answer it based on the provided image. Ensure that the answer matches the language of the user\'s question.'; + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, $systemPrompt, [ + json_encode([ + 'role' => 'user', + 'content' => [ + [ + 'type' => 'image_url', + 'image_url' => [ + 'url' => 'data:' . $fileType . ';base64,' . $inputFile + ] + ] + ] + ]) + ], 1, $maxTokens); $completion = $completion['messages']; if (count($completion) > 0) { diff --git a/lib/TaskProcessing/ImageQuestionTaskType.php b/lib/TaskProcessing/AnalyzeImageTaskType.php similarity index 83% rename from lib/TaskProcessing/ImageQuestionTaskType.php rename to lib/TaskProcessing/AnalyzeImageTaskType.php index 39ed868c..a7e3667b 100644 --- a/lib/TaskProcessing/ImageQuestionTaskType.php +++ b/lib/TaskProcessing/AnalyzeImageTaskType.php @@ -15,8 +15,8 @@ use OCP\TaskProcessing\ITaskType; use OCP\TaskProcessing\ShapeDescriptor; -class ImageQuestionTaskType implements ITaskType { - public const ID = Application::APP_ID . ':image_question'; +class AnalyzeImageTaskType implements ITaskType { + public const ID = Application::APP_ID . ':analyze-image'; public function __construct( private IL10N $l, @@ -27,7 +27,7 @@ public function __construct( * @inheritDoc */ public function getName(): string { - return $this->l->t('Image question'); + return $this->l->t('Analyze image'); } /** @@ -51,7 +51,7 @@ public function getInputShape(): array { return [ 'image' => new ShapeDescriptor( $this->l->t('Image'), - $this->l->t('Image to ask question about'), + $this->l->t('Image to ask a question about'), EShapeType::Image, ), 'input' => new ShapeDescriptor( @@ -69,7 +69,7 @@ public function getOutputShape(): array { return [ 'output' => new ShapeDescriptor( $this->l->t('Generated response'), - $this->l->t('The answer to the question:'), + $this->l->t('The answer to the question'), EShapeType::Text ), ]; From 4a2841a5b273868acd5aba3afff9f94faa0f9dd1 Mon Sep 17 00:00:00 2001 From: Lukas Schaefer Date: Wed, 2 Jul 2025 09:11:58 -0400 Subject: [PATCH 3/8] add feedback and only load task type if needed Signed-off-by: Lukas Schaefer --- lib/AppInfo/Application.php | 8 ++++---- lib/TaskProcessing/AnalyzeImageProvider.php | 9 ++++----- lib/TaskProcessing/AnalyzeImageTaskType.php | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index 3e71f22e..aba56930 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -118,6 +118,10 @@ public function register(IRegistrationContext $context): void { if (class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToTextProofread')) { $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\ProofreadProvider::class); } + if (!class_exists('OCP\\TaskProcessing\\TaskTypes\\AnalyzeImage')) { + $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\AnalyzeImageTaskType::class); + } + $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\AnalyzeImageProvider::class); } if (!class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech')) { $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\TextToSpeechTaskType::class); @@ -127,10 +131,6 @@ public function register(IRegistrationContext $context): void { $context->registerTaskProcessingProvider(TextToImageProvider::class); } - - $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\AnalyzeImageTaskType::class); - $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\AnalyzeImageProvider::class); - $context->registerCapability(Capabilities::class); } diff --git a/lib/TaskProcessing/AnalyzeImageProvider.php b/lib/TaskProcessing/AnalyzeImageProvider.php index 08111009..04821be4 100644 --- a/lib/TaskProcessing/AnalyzeImageProvider.php +++ b/lib/TaskProcessing/AnalyzeImageProvider.php @@ -119,7 +119,6 @@ public function process(?string $userId, array $input, callable $reportProgress) if ($this->openAiAPIService->isUsingOpenAi()) { $validFileTypes = [ 'image/jpeg', - 'image/jpg', 'image/png', 'image/gif', 'image/webp', @@ -154,10 +153,10 @@ public function process(?string $userId, array $input, callable $reportProgress) [ 'type' => 'image_url', 'image_url' => [ - 'url' => 'data:' . $fileType . ';base64,' . $inputFile - ] - ] - ] + 'url' => 'data:' . $fileType . ';base64,' . $inputFile, + ], + ], + ], ]) ], 1, $maxTokens); $completion = $completion['messages']; diff --git a/lib/TaskProcessing/AnalyzeImageTaskType.php b/lib/TaskProcessing/AnalyzeImageTaskType.php index a7e3667b..a146158d 100644 --- a/lib/TaskProcessing/AnalyzeImageTaskType.php +++ b/lib/TaskProcessing/AnalyzeImageTaskType.php @@ -3,7 +3,7 @@ declare(strict_types=1); /** - * SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors + * SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors * SPDX-License-Identifier: AGPL-3.0-or-later */ From 9ed215a75c68ab759c43a5118f9f8164f1d1de86 Mon Sep 17 00:00:00 2001 From: Lukas Schaefer Date: Wed, 2 Jul 2025 09:21:12 -0400 Subject: [PATCH 4/8] correct tasktypeid Signed-off-by: Lukas Schaefer --- lib/TaskProcessing/AnalyzeImageProvider.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/TaskProcessing/AnalyzeImageProvider.php b/lib/TaskProcessing/AnalyzeImageProvider.php index 04821be4..74b53cc6 100644 --- a/lib/TaskProcessing/AnalyzeImageProvider.php +++ b/lib/TaskProcessing/AnalyzeImageProvider.php @@ -42,6 +42,9 @@ public function getName(): string { } public function getTaskTypeId(): string { + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\AnalyzeImage')) { + return \OCP\TaskProcessing\TaskTypes\AnalyzeImage::ID; + } return AnalyzeImageTaskType::ID; } From 7b786568c682cb5de28b2119eda843394eecf971 Mon Sep 17 00:00:00 2001 From: Lukas Schaefer Date: Thu, 3 Jul 2025 09:28:30 -0400 Subject: [PATCH 5/8] support multiple images Signed-off-by: Lukas Schaefer --- lib/TaskProcessing/AnalyzeImageProvider.php | 66 +++++++++++---------- lib/TaskProcessing/AnalyzeImageTaskType.php | 6 +- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/lib/TaskProcessing/AnalyzeImageProvider.php b/lib/TaskProcessing/AnalyzeImageProvider.php index 74b53cc6..ee3f2008 100644 --- a/lib/TaskProcessing/AnalyzeImageProvider.php +++ b/lib/TaskProcessing/AnalyzeImageProvider.php @@ -110,27 +110,45 @@ public function process(?string $userId, array $input, callable $reportProgress) throw new RuntimeException('Must support chat completion endpoint'); } - if (!isset($input['image']) || !$input['image'] instanceof File || !$input['image']->isReadable()) { - throw new RuntimeException('Invalid input file'); - } + $history = []; - $inputFile = base64_encode(stream_get_contents($input['image']->fopen('rb'))); - $fileType = $input['image']->getMimeType(); - if (!str_starts_with($fileType, 'image/')) { - throw new RuntimeException('Invalid input file type ' . $fileType); + if (!isset($input['image']) || !is_array($input['image'])) { + throw new RuntimeException('Invalid file list'); } - if ($this->openAiAPIService->isUsingOpenAi()) { - $validFileTypes = [ - 'image/jpeg', - 'image/png', - 'image/gif', - 'image/webp', - ]; - if (!in_array($fileType, $validFileTypes)) { - throw new RuntimeException('Invalid input file type for OpenAI ' . $fileType); + foreach ($input['image'] as $image) { + if (!$image instanceof File || !$image->isReadable()) { + throw new RuntimeException('Invalid input file'); + } + $inputFile = base64_encode(stream_get_contents($image->fopen('rb'))); + $fileType = $image->getMimeType(); + if (!str_starts_with($fileType, 'image/')) { + throw new RuntimeException('Invalid input file type ' . $fileType); } + if ($this->openAiAPIService->isUsingOpenAi()) { + $validFileTypes = [ + 'image/jpeg', + 'image/png', + 'image/gif', + 'image/webp', + ]; + if (!in_array($fileType, $validFileTypes)) { + throw new RuntimeException('Invalid input file type for OpenAI ' . $fileType); + } + } + $history[] = json_encode([ + 'role' => 'user', + 'content' => [ + [ + 'type' => 'image_url', + 'image_url' => [ + 'url' => 'data:' . $fileType . ';base64,' . $inputFile, + ], + ], + ], + ]); } + if (!isset($input['input']) || !is_string($input['input'])) { throw new RuntimeException('Invalid prompt'); } @@ -148,20 +166,8 @@ public function process(?string $userId, array $input, callable $reportProgress) } try { - $systemPrompt = 'Take the user\'s question and answer it based on the provided image. Ensure that the answer matches the language of the user\'s question.'; - $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, $systemPrompt, [ - json_encode([ - 'role' => 'user', - 'content' => [ - [ - 'type' => 'image_url', - 'image_url' => [ - 'url' => 'data:' . $fileType . ';base64,' . $inputFile, - ], - ], - ], - ]) - ], 1, $maxTokens); + $systemPrompt = 'Take the user\'s question and answer it based on the provided images. Ensure that the answer matches the language of the user\'s question.'; + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, $systemPrompt, $history, 1, $maxTokens); $completion = $completion['messages']; if (count($completion) > 0) { diff --git a/lib/TaskProcessing/AnalyzeImageTaskType.php b/lib/TaskProcessing/AnalyzeImageTaskType.php index a146158d..befc1a5c 100644 --- a/lib/TaskProcessing/AnalyzeImageTaskType.php +++ b/lib/TaskProcessing/AnalyzeImageTaskType.php @@ -50,9 +50,9 @@ public function getId(): string { public function getInputShape(): array { return [ 'image' => new ShapeDescriptor( - $this->l->t('Image'), - $this->l->t('Image to ask a question about'), - EShapeType::Image, + $this->l->t('Images'), + $this->l->t('Images to ask a question about'), + EShapeType::ListOfImages, ), 'input' => new ShapeDescriptor( $this->l->t('Question'), From 40d91cbb861a25b17e1973936df5c5c37ae33d51 Mon Sep 17 00:00:00 2001 From: Lukas Schaefer Date: Thu, 3 Jul 2025 11:04:18 -0400 Subject: [PATCH 6/8] implement most feedback Signed-off-by: Lukas Schaefer --- lib/AppInfo/Application.php | 6 +++--- ...ImageProvider.php => AnalyzeImagesProvider.php} | 14 +++++++------- ...ImageTaskType.php => AnalyzeImagesTaskType.php} | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-) rename lib/TaskProcessing/{AnalyzeImageProvider.php => AnalyzeImagesProvider.php} (94%) rename lib/TaskProcessing/{AnalyzeImageTaskType.php => AnalyzeImagesTaskType.php} (83%) diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index aba56930..2097212e 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -118,10 +118,10 @@ public function register(IRegistrationContext $context): void { if (class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToTextProofread')) { $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\ProofreadProvider::class); } - if (!class_exists('OCP\\TaskProcessing\\TaskTypes\\AnalyzeImage')) { - $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\AnalyzeImageTaskType::class); + if (!class_exists('OCP\\TaskProcessing\\TaskTypes\\AnalyzeImages')) { + $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\AnalyzeImagesTaskType::class); } - $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\AnalyzeImageProvider::class); + $context->registerTaskProcessingProvider(\OCA\OpenAi\TaskProcessing\AnalyzeImagesProvider::class); } if (!class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToSpeech')) { $context->registerTaskProcessingTaskType(\OCA\OpenAi\TaskProcessing\TextToSpeechTaskType::class); diff --git a/lib/TaskProcessing/AnalyzeImageProvider.php b/lib/TaskProcessing/AnalyzeImagesProvider.php similarity index 94% rename from lib/TaskProcessing/AnalyzeImageProvider.php rename to lib/TaskProcessing/AnalyzeImagesProvider.php index ee3f2008..ee49f1b8 100644 --- a/lib/TaskProcessing/AnalyzeImageProvider.php +++ b/lib/TaskProcessing/AnalyzeImagesProvider.php @@ -21,7 +21,7 @@ use Psr\Log\LoggerInterface; use RuntimeException; -class AnalyzeImageProvider implements ISynchronousProvider { +class AnalyzeImagesProvider implements ISynchronousProvider { public function __construct( private OpenAiAPIService $openAiAPIService, @@ -34,7 +34,7 @@ public function __construct( } public function getId(): string { - return Application::APP_ID . '-analyze-image'; + return Application::APP_ID . '-analyze-images'; } public function getName(): string { @@ -42,10 +42,10 @@ public function getName(): string { } public function getTaskTypeId(): string { - if (class_exists('OCP\\TaskProcessing\\TaskTypes\\AnalyzeImage')) { - return \OCP\TaskProcessing\TaskTypes\AnalyzeImage::ID; + if (class_exists('OCP\\TaskProcessing\\TaskTypes\\AnalyzeImages')) { + return \OCP\TaskProcessing\TaskTypes\AnalyzeImages::ID; } - return AnalyzeImageTaskType::ID; + return AnalyzeImagesTaskType::ID; } public function getExpectedRuntime(): int { @@ -112,10 +112,10 @@ public function process(?string $userId, array $input, callable $reportProgress) $history = []; - if (!isset($input['image']) || !is_array($input['image'])) { + if (!isset($input['images']) || !is_array($input['images'])) { throw new RuntimeException('Invalid file list'); } - foreach ($input['image'] as $image) { + foreach ($input['images'] as $image) { if (!$image instanceof File || !$image->isReadable()) { throw new RuntimeException('Invalid input file'); } diff --git a/lib/TaskProcessing/AnalyzeImageTaskType.php b/lib/TaskProcessing/AnalyzeImagesTaskType.php similarity index 83% rename from lib/TaskProcessing/AnalyzeImageTaskType.php rename to lib/TaskProcessing/AnalyzeImagesTaskType.php index befc1a5c..13e12a53 100644 --- a/lib/TaskProcessing/AnalyzeImageTaskType.php +++ b/lib/TaskProcessing/AnalyzeImagesTaskType.php @@ -15,8 +15,8 @@ use OCP\TaskProcessing\ITaskType; use OCP\TaskProcessing\ShapeDescriptor; -class AnalyzeImageTaskType implements ITaskType { - public const ID = Application::APP_ID . ':analyze-image'; +class AnalyzeImagesTaskType implements ITaskType { + public const ID = Application::APP_ID . ':analyze-images'; public function __construct( private IL10N $l, @@ -27,14 +27,14 @@ public function __construct( * @inheritDoc */ public function getName(): string { - return $this->l->t('Analyze image'); + return $this->l->t('Analyze images'); } /** * @inheritDoc */ public function getDescription(): string { - return $this->l->t('Ask a question about an image.'); + return $this->l->t('Ask a question about the given images.'); } /** @@ -49,7 +49,7 @@ public function getId(): string { */ public function getInputShape(): array { return [ - 'image' => new ShapeDescriptor( + 'images' => new ShapeDescriptor( $this->l->t('Images'), $this->l->t('Images to ask a question about'), EShapeType::ListOfImages, From 8f389f18d6088bddf9ff4e9927f726d9a1152c70 Mon Sep 17 00:00:00 2001 From: Lukas Schaefer Date: Thu, 3 Jul 2025 12:34:20 -0400 Subject: [PATCH 7/8] Add file size and file count limit Signed-off-by: Lukas Schaefer --- lib/TaskProcessing/AnalyzeImagesProvider.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/TaskProcessing/AnalyzeImagesProvider.php b/lib/TaskProcessing/AnalyzeImagesProvider.php index ee49f1b8..b186f52c 100644 --- a/lib/TaskProcessing/AnalyzeImagesProvider.php +++ b/lib/TaskProcessing/AnalyzeImagesProvider.php @@ -115,10 +115,20 @@ public function process(?string $userId, array $input, callable $reportProgress) if (!isset($input['images']) || !is_array($input['images'])) { throw new RuntimeException('Invalid file list'); } + // Maximum file count for openai is 500. Seems reasonable enough to enforce for all apis though (https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=url#image-input-requirements) + if (count($input['images']) > 500) { + throw new RuntimeException('Too many files given. Max is 100'); + } + $fileSize = 0; foreach ($input['images'] as $image) { if (!$image instanceof File || !$image->isReadable()) { throw new RuntimeException('Invalid input file'); } + $fileSize += intval($image->getSize()); + // Maximum file size for openai is 50MB. Seems reasonable enough to enforce for all apis though. (https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=url#image-input-requirements) + if ($fileSize > 50 * 1000 * 1000) { + throw new RuntimeException('Filesize of input files too large. Max is 50MB'); + } $inputFile = base64_encode(stream_get_contents($image->fopen('rb'))); $fileType = $image->getMimeType(); if (!str_starts_with($fileType, 'image/')) { From 8f6c8cb8d7122f9e83b681b74e60910ab2033558 Mon Sep 17 00:00:00 2001 From: Lukas Schaefer Date: Tue, 8 Jul 2025 08:08:45 -0400 Subject: [PATCH 8/8] Update lib/TaskProcessing/AnalyzeImagesProvider.php Co-authored-by: Anupam Kumar Signed-off-by: Lukas Schaefer --- lib/TaskProcessing/AnalyzeImagesProvider.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/TaskProcessing/AnalyzeImagesProvider.php b/lib/TaskProcessing/AnalyzeImagesProvider.php index b186f52c..324f885c 100644 --- a/lib/TaskProcessing/AnalyzeImagesProvider.php +++ b/lib/TaskProcessing/AnalyzeImagesProvider.php @@ -117,7 +117,7 @@ public function process(?string $userId, array $input, callable $reportProgress) } // Maximum file count for openai is 500. Seems reasonable enough to enforce for all apis though (https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=url#image-input-requirements) if (count($input['images']) > 500) { - throw new RuntimeException('Too many files given. Max is 100'); + throw new RuntimeException('Too many files given. Max is 500'); } $fileSize = 0; foreach ($input['images'] as $image) {