Skip to content

Commit 2e4ceaa

Browse files
committed
feat: add ocr-translation task type and provider
Signed-off-by: Julien Veyssier <[email protected]>
1 parent c1e0975 commit 2e4ceaa

File tree

4 files changed

+243
-0
lines changed

4 files changed

+243
-0
lines changed

lib/AppInfo/Application.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
use OCA\Assistant\Reference\Text2StickerProvider;
3232
use OCA\Assistant\TaskProcessing\AudioToAudioChatProvider;
3333
use OCA\Assistant\TaskProcessing\ContextAgentAudioInteractionProvider;
34+
use OCA\Assistant\TaskProcessing\ImageToTextTranslateProvider;
35+
use OCA\Assistant\TaskProcessing\ImageToTextTranslateTaskType;
3436
use OCA\Assistant\TaskProcessing\TextToStickerProvider;
3537
use OCA\Assistant\TaskProcessing\TextToStickerTaskType;
3638
use OCA\Files\Event\LoadAdditionalScriptsEvent;
@@ -111,6 +113,9 @@ public function register(IRegistrationContext $context): void {
111113
$context->registerTaskProcessingTaskType(TextToStickerTaskType::class);
112114
$context->registerTaskProcessingProvider(TextToStickerProvider::class);
113115
$context->registerReferenceProvider(Text2StickerProvider::class);
116+
117+
$context->registerTaskProcessingTaskType(ImageToTextTranslateTaskType::class);
118+
$context->registerTaskProcessingProvider(ImageToTextTranslateProvider::class);
114119
}
115120

116121
public function boot(IBootContext $context): void {

lib/Service/TaskProcessingService.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
use OCP\TaskProcessing\Exception\UnauthorizedException;
2020
use OCP\TaskProcessing\Exception\ValidationException;
2121
use OCP\TaskProcessing\IManager;
22+
use OCP\TaskProcessing\IProvider;
2223
use OCP\TaskProcessing\Task;
2324
use OCP\TaskProcessing\TaskTypes\AudioToText;
2425
use OCP\TaskProcessing\TaskTypes\TextToTextSummary;
@@ -35,6 +36,10 @@ public function __construct(
3536
) {
3637
}
3738

39+
public function getPreferredProvider(string $taskTypeId): IProvider {
40+
return $this->taskProcessingManager->getPreferredProvider($taskTypeId);
41+
}
42+
3843
/**
3944
* @param Task $task
4045
* @return array
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/**
6+
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
7+
* SPDX-License-Identifier: AGPL-3.0-or-later
8+
*/
9+
10+
namespace OCA\Assistant\TaskProcessing;
11+
12+
use Exception;
13+
use OCA\Assistant\AppInfo\Application;
14+
use OCA\Assistant\Service\TaskProcessingService;
15+
use OCP\Files\File;
16+
use OCP\IL10N;
17+
use OCP\TaskProcessing\ISynchronousProvider;
18+
use OCP\TaskProcessing\Task;
19+
use OCP\TaskProcessing\TaskTypes\ImageToTextOpticalCharacterRecognition;
20+
use OCP\TaskProcessing\TaskTypes\TextToTextTranslate;
21+
use Psr\Log\LoggerInterface;
22+
use RuntimeException;
23+
24+
class ImageToTextTranslateProvider implements ISynchronousProvider {
25+
26+
public function __construct(
27+
private IL10N $l,
28+
private TaskProcessingService $taskProcessingService,
29+
private LoggerInterface $logger,
30+
) {
31+
}
32+
33+
public function getId(): string {
34+
return Application::APP_ID . '-image2text:translate';
35+
}
36+
37+
public function getName(): string {
38+
return $this->l->t('Assistant');
39+
}
40+
41+
public function getTaskTypeId(): string {
42+
return ImageToTextTranslateTaskType::ID;
43+
}
44+
45+
public function getExpectedRuntime(): int {
46+
return 60;
47+
}
48+
49+
public function getInputShapeEnumValues(): array {
50+
$translateProvider = $this->taskProcessingService->getPreferredProvider(TextToTextTranslate::ID);
51+
52+
return [
53+
'origin_language' => $translateProvider->getInputShapeEnumValues()['origin_language'],
54+
'target_language' => $translateProvider->getInputShapeEnumValues()['target_language'],
55+
];
56+
}
57+
58+
public function getInputShapeDefaults(): array {
59+
$translateProvider = $this->taskProcessingService->getPreferredProvider(TextToTextTranslate::ID);
60+
return [
61+
'origin_language' => $translateProvider->getInputShapeDefaults()['origin_language'],
62+
];
63+
}
64+
65+
66+
public function getOptionalInputShape(): array {
67+
return [];
68+
}
69+
70+
public function getOptionalInputShapeEnumValues(): array {
71+
return [];
72+
}
73+
74+
public function getOptionalInputShapeDefaults(): array {
75+
return [];
76+
}
77+
78+
public function getOutputShapeEnumValues(): array {
79+
return [];
80+
}
81+
82+
public function getOptionalOutputShape(): array {
83+
return [];
84+
}
85+
86+
public function getOptionalOutputShapeEnumValues(): array {
87+
return [];
88+
}
89+
90+
public function process(?string $userId, array $input, callable $reportProgress): array {
91+
if (!isset($input['input']) || !is_array($input['input'])) {
92+
throw new RuntimeException('Invalid input');
93+
}
94+
foreach ($input['input'] as $i => $inputImage) {
95+
if (!($inputImage instanceof File) || !$inputImage->isReadable()) {
96+
throw new RuntimeException('Invalid input images');
97+
}
98+
}
99+
100+
if (!isset($input['origin_language']) || !is_string($input['origin_language'])) {
101+
throw new RuntimeException('Invalid origin_language input');
102+
}
103+
if (!isset($input['target_language']) || !is_string($input['target_language'])) {
104+
throw new RuntimeException('Invalid target_language input');
105+
}
106+
107+
// OCR
108+
$ocrInputs = array_map(static function (File $file) {
109+
return $file->getId();
110+
}, $input['input']);
111+
try {
112+
$task = new Task(
113+
ImageToTextOpticalCharacterRecognition::ID,
114+
['input' => $ocrInputs],
115+
Application::APP_ID . ':internal',
116+
$userId,
117+
);
118+
$taskOutput = $this->taskProcessingService->runTaskProcessingTask($task);
119+
$ocrOutputs = $taskOutput['output'];
120+
} catch (Exception $e) {
121+
$this->logger->warning('OCR sub task failed with: ' . $e->getMessage(), ['exception' => $e]);
122+
throw new RuntimeException('OCR sub task failed with: ' . $e->getMessage());
123+
}
124+
125+
$translatedOutputs = [];
126+
foreach ($ocrOutputs as $ocrOutput) {
127+
try {
128+
$task = new Task(
129+
TextToTextTranslate::ID,
130+
[
131+
'input' => $ocrOutput,
132+
'origin_language' => $input['origin_language'],
133+
'target_language' => $input['target_language'],
134+
],
135+
Application::APP_ID . ':internal',
136+
$userId,
137+
);
138+
$taskOutput = $this->taskProcessingService->runTaskProcessingTask($task);
139+
$translatedOutputs[] = $taskOutput['output'];
140+
} catch (Exception $e) {
141+
$this->logger->warning('Translation sub task failed with: ' . $e->getMessage(), ['exception' => $e]);
142+
throw new RuntimeException('Translation sub task failed with: ' . $e->getMessage());
143+
}
144+
}
145+
146+
// Translation
147+
return [
148+
'output' => $translatedOutputs,
149+
];
150+
}
151+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/**
6+
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
7+
* SPDX-License-Identifier: AGPL-3.0-or-later
8+
*/
9+
10+
namespace OCA\Assistant\TaskProcessing;
11+
12+
use OCA\Assistant\AppInfo\Application;
13+
use OCP\IL10N;
14+
use OCP\TaskProcessing\EShapeType;
15+
use OCP\TaskProcessing\ITaskType;
16+
use OCP\TaskProcessing\ShapeDescriptor;
17+
18+
class ImageToTextTranslateTaskType implements ITaskType {
19+
public const ID = Application::APP_ID . ':image2text:translate';
20+
21+
public function __construct(
22+
private IL10N $l,
23+
) {
24+
}
25+
26+
/**
27+
* @inheritDoc
28+
*/
29+
public function getName(): string {
30+
return $this->l->t('Translate image');
31+
}
32+
33+
/**
34+
* @inheritDoc
35+
*/
36+
public function getDescription(): string {
37+
return $this->l->t('Translate the text content of an image');
38+
}
39+
40+
/**
41+
* @return string
42+
*/
43+
public function getId(): string {
44+
return self::ID;
45+
}
46+
47+
/**
48+
* @return ShapeDescriptor[]
49+
*/
50+
public function getInputShape(): array {
51+
return [
52+
'input' => new ShapeDescriptor(
53+
$this->l->t('Input files'),
54+
$this->l->t('The files to extract text from'),
55+
EShapeType::ListOfFiles
56+
),
57+
'origin_language' => new ShapeDescriptor(
58+
$this->l->t('Origin language'),
59+
$this->l->t('The language of the origin text'),
60+
EShapeType::Enum
61+
),
62+
'target_language' => new ShapeDescriptor(
63+
$this->l->t('Target language'),
64+
$this->l->t('The desired language to translate the origin text in'),
65+
EShapeType::Enum
66+
),
67+
];
68+
}
69+
70+
/**
71+
* @return ShapeDescriptor[]
72+
*/
73+
public function getOutputShape(): array {
74+
return [
75+
'output' => new ShapeDescriptor(
76+
$this->l->t('Output texts'),
77+
$this->l->t('The texts that were extracted from the files'),
78+
EShapeType::ListOfTexts
79+
),
80+
];
81+
}
82+
}

0 commit comments

Comments
 (0)