diff --git a/lib/BackgroundJobs/IndexerJob.php b/lib/BackgroundJobs/IndexerJob.php index 9a765fd5..7323a16c 100644 --- a/lib/BackgroundJobs/IndexerJob.php +++ b/lib/BackgroundJobs/IndexerJob.php @@ -239,6 +239,7 @@ protected function index(array $files): void { $file->getMtime(), $file->getMimeType(), ProviderConfigService::getDefaultProviderKey(), + (int)$fileSize, ); $allSourceIds[] = ProviderConfigService::getSourceId($file->getId()); diff --git a/lib/Service/LangRopeService.php b/lib/Service/LangRopeService.php index 6be3ff33..d704ad59 100644 --- a/lib/Service/LangRopeService.php +++ b/lib/Service/LangRopeService.php @@ -60,7 +60,7 @@ private function requestToExApp( // todo: app_api is always available now (composer update) try { - $appApiFunctions = \OCP\Server::get(\OCA\AppAPI\PublicFunctions::class); + $appApiFunctions = $this->getAppApiFunctions(); } catch (ContainerExceptionInterface|NotFoundExceptionInterface $e) { throw new RuntimeException('Could not get AppAPI public functions'); } @@ -287,10 +287,24 @@ public function indexSources(array $sources): array { } $params = array_map(function (Source $source) { + $contents = $source->content; + if ($source->size !== null) { + if (class_exists('\GuzzleHttp\Psr7\Utils')) { + $stream = \GuzzleHttp\Psr7\Utils::streamFor($source->content); + } else { + $stream = \GuzzleHttp\Psr7\stream_for($source->content); + } + $contents = \GuzzleHttp\Psr7\FnStream::decorate($stream, [ + 'getSize' => function () use ($source) { + return $source->size; + }, + ]); + } + return [ 'name' => 'sources', 'filename' => $source->reference, // eg. 'files__default: 555' - 'contents' => $source->content, + 'contents' => $contents, 'headers' => [ 'userIds' => implode(',', $source->userIds), 'title' => $source->title, @@ -424,4 +438,8 @@ public function getWithPresentableSources(string $llmResponse, string ...$source return $llmResponse . $output; } + + protected function getAppApiFunctions() { + return \OCP\Server::get(\OCA\AppAPI\PublicFunctions::class); + } } diff --git a/lib/Service/ScanService.php b/lib/Service/ScanService.php index bdf7a914..3896efcd 100644 --- a/lib/Service/ScanService.php +++ b/lib/Service/ScanService.php @@ -39,6 +39,15 @@ public function scanUserFiles(string $userId, array $mimeTypeFilter, ?string $di $userFolder = $this->root->getUserFolder($userId)->get($directory); } + if ($userFolder instanceof File) { + $source = $this->getSourceFromFile($mimeTypeFilter, $userFolder); + if ($source !== null) { + $this->langRopeService->indexSources([$source]); + yield $source; + } + return []; + } + yield from ($this->scanDirectory($mimeTypeFilter, $userFolder)); return []; } @@ -123,6 +132,7 @@ public function getSourceFromFile(array $mimeTypeFilter, File $node): ?Source { $node->getMTime(), $node->getMimeType(), $providerKey, + (int)$node->getSize(), ); } } diff --git a/lib/Type/Source.php b/lib/Type/Source.php index 9870aac4..a0acf1bb 100644 --- a/lib/Type/Source.php +++ b/lib/Type/Source.php @@ -16,6 +16,7 @@ public function __construct( public int|string $modified, public string $type, public string $provider, + public ?int $size = null, ) { } } diff --git a/mock_server.log b/mock_server.log new file mode 100644 index 00000000..1ad8c41e --- /dev/null +++ b/mock_server.log @@ -0,0 +1,11 @@ + * Serving Flask app 'mock_server' + * Debug mode: off +WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. + * Running on all addresses (0.0.0.0) + * Running on http://127.0.0.1:23000 + * Running on http://192.168.0.2:23000 +Press CTRL+C to quit +127.0.0.1 - - [05/Feb/2026 19:24:49] "PUT /loadSources HTTP/1.1" 200 - +127.0.0.1 - - [05/Feb/2026 19:31:28] "PUT /loadSources HTTP/1.1" 400 - +127.0.0.1 - - [05/Feb/2026 19:32:18] "PUT /loadSources HTTP/1.1" 200 - +127.0.0.1 - - [05/Feb/2026 19:32:20] "PUT /loadSources HTTP/1.1" 400 - diff --git a/tests/reproduction/create_test_file.php b/tests/reproduction/create_test_file.php new file mode 100644 index 00000000..ba572273 --- /dev/null +++ b/tests/reproduction/create_test_file.php @@ -0,0 +1,26 @@ +getUserFolder('admin'); + + if ($userFolder->nodeExists('test.txt')) { + $file = $userFolder->get('test.txt'); + $file->delete(); + } + + $file = $userFolder->newFile('test.txt'); + // Write 1MB of data + $file->putContent(str_repeat('A', 1024 * 1024)); + + echo "Created encrypted test.txt successfully.\n"; + +} catch (\Exception $e) { + echo "Error creating file: " . $e->getMessage() . "\n"; + exit(1); +} diff --git a/tests/reproduction/debug_sizes.php b/tests/reproduction/debug_sizes.php new file mode 100644 index 00000000..cef2ab64 --- /dev/null +++ b/tests/reproduction/debug_sizes.php @@ -0,0 +1,36 @@ +getUserFolder('admin'); + if (!$userFolder->nodeExists($path)) { + echo "File $path not found.\n"; + return; + } + $file = $userFolder->get($path); + + $reportedSize = $file->getSize(); + echo "File::getSize() for $path: " . $reportedSize . "\n"; + + $handle = $file->fopen('rb'); + $stat = fstat($handle); + echo "fstat()['size'] for $path: " . $stat['size'] . "\n"; + + $contents = stream_get_contents($handle); + $actualReadSize = strlen($contents); + echo "Actual Read Size for $path: " . $actualReadSize . "\n"; + + echo "Mismatch for $path: " . ($reportedSize - $actualReadSize) . "\n"; + } catch (\Exception $e) { + echo "Error checking $path: " . $e->getMessage() . "\n"; + } +} + +checkFile('test.txt'); +checkFile('Nextcloud Manual.pdf'); // Check the default file too diff --git a/tests/reproduction/docker-compose.yml b/tests/reproduction/docker-compose.yml new file mode 100644 index 00000000..fc45c412 --- /dev/null +++ b/tests/reproduction/docker-compose.yml @@ -0,0 +1,20 @@ +services: + nextcloud: + image: nextcloud:latest + environment: + - NEXTCLOUD_ADMIN_USER=admin + - NEXTCLOUD_ADMIN_PASSWORD=password + volumes: + - ../../:/var/www/html/custom_apps/context_chat + ports: + - "8080:80" + + context_chat_backend: + image: python:3.9-slim + command: sh -c "pip install flask && python /mock_server.py" + volumes: + - ./mock_server.py:/mock_server.py + networks: + default: + aliases: + - context_chat_backend diff --git a/tests/reproduction/mock_server.py b/tests/reproduction/mock_server.py new file mode 100644 index 00000000..4364d9d5 --- /dev/null +++ b/tests/reproduction/mock_server.py @@ -0,0 +1,33 @@ +from flask import Flask, request, jsonify +import sys + +app = Flask(__name__) + +@app.route('/heartbeat', methods=['GET']) +def heartbeat(): + return jsonify({"status": "ok"}), 200 + +@app.route('/loadSources', methods=['PUT']) +def load_sources(): + content_length = request.headers.get('Content-Length') + if content_length: + content_length = int(content_length) + + body = request.get_data() + actual_length = len(body) + + print(f"Header Content-Length: {content_length}") + print(f"Actual Body Length: {actual_length}") + + if content_length is not None and content_length != actual_length: + print("FAIL: Size Mismatch") + return jsonify({"error": "Size Mismatch"}), 400 + + print("SUCCESS") + return jsonify({ + "loaded_sources": ["test_source"], + "sources_to_retry": [] + }), 200 + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=23000) diff --git a/tests/reproduction/register_mock.php b/tests/reproduction/register_mock.php new file mode 100644 index 00000000..947809c2 --- /dev/null +++ b/tests/reproduction/register_mock.php @@ -0,0 +1,52 @@ +find('context_chat_backend'); + $mapper->delete($existing); + echo "Deleted existing registration.\n"; + } catch (\Exception $e) { + // Not found, ignore + } + + $exApp = new ExApp(); + $exApp->setAppId('context_chat_backend'); + $exApp->setName('Context Chat Backend'); + $exApp->setDeployMethod('manual_install'); + $exApp->setVersion('1.0.0'); + $exApp->setEnabled(1); + $exApp->setHost('context_chat_backend'); + $exApp->setPort(23000); + $exApp->setProtocol('http'); + $exApp->setSecret('secret'); + $exApp->setHash('hash'); + $exApp->setLastUpdated(time()); + + // Set other required fields if any (based on standard ExApp entity) + // Some versions require 'scopes' or 'daemon_config_name' + if (method_exists($exApp, 'setDaemonConfigName')) { + $exApp->setDaemonConfigName('manual_install'); + } + + $mapper->insert($exApp); + + echo "Registered context_chat_backend successfully via Mapper.\n"; + +} catch (\Exception $e) { + echo "Error registering app: " . $e->getMessage() . "\n"; + exit(1); +} diff --git a/tests/reproduction/run_test.sh b/tests/reproduction/run_test.sh new file mode 100755 index 00000000..f0236d75 --- /dev/null +++ b/tests/reproduction/run_test.sh @@ -0,0 +1,104 @@ +#!/bin/bash +set -e + +# Start containers +docker-compose up -d + +echo "Waiting for container to accept commands..." +sleep 10 + +# Check if Nextcloud is installed +echo "Checking Nextcloud status..." +if docker-compose exec -u 33 nextcloud php occ status | grep -q "installed: true"; then + echo "Nextcloud is already installed." +else + echo "Nextcloud is not installed. Installing..." + docker-compose exec -u 33 nextcloud php occ maintenance:install \ + --database "sqlite" \ + --admin-user "admin" \ + --admin-pass "password" +fi + +echo "Waiting for Nextcloud to be fully ready..." +max_retries=10 +count=0 +while [ $count -lt $max_retries ]; do + if docker-compose exec -u 33 nextcloud php occ status | grep -q "installed: true"; then + echo "Nextcloud is ready." + break + fi + echo "Waiting for status update... (Attempt $((count+1))/$max_retries)" + sleep 5 + count=$((count+1)) +done + +if [ $count -eq $max_retries ]; then + echo "Timeout waiting for Nextcloud to be ready." + exit 1 +fi + +echo "Configuring Nextcloud..." + +# Enable encryption +docker-compose exec -u 33 nextcloud php occ app:enable encryption +docker-compose exec -u 33 nextcloud php occ encryption:enable +docker-compose exec -u 33 nextcloud php occ encryption:enable-master-key + +# Enable apps +docker-compose exec -u 33 nextcloud php occ app:enable context_chat +docker-compose exec -u 33 nextcloud php occ app:enable app_api + +# Register Mock Backend via OCC +echo "Cleaning up previous registrations..." +docker-compose exec -u 33 nextcloud php occ app_api:app:unregister context_chat_backend --force --no-interaction || true +docker-compose exec -u 33 nextcloud php occ app_api:daemon:unregister manual_install --no-interaction || true + +echo "Registering Mock Backend..." + +# Register daemon config +# Using just hostname for daemon, allowing app port to be appended correctly +docker-compose exec -u 33 nextcloud php occ app_api:daemon:register manual_install "Manual Install" manual-install http context_chat_backend http://localhost --no-interaction || true + +# Register the app +# We use --force-scopes to avoid interactive prompts +docker-compose exec -u 33 nextcloud php occ app_api:app:register context_chat_backend manual_install --json-info '{"id":"context_chat_backend","name":"Context Chat Backend","deploy_method":"manual_install","version":"1.0.0","secret":"secret","host":"context_chat_backend","port":23000,"scopes":[],"protocol":"http","system_app":0}' --force-scopes --no-interaction || true + +# Enable the app (it was listed as disabled) +echo "Enabling Context Chat Backend..." +docker-compose exec -u 33 nextcloud php occ app_api:app:enable context_chat_backend --no-interaction || true + +# Debug: List registered apps +echo "Listing AppAPI apps..." +docker-compose exec -u 33 nextcloud php occ app_api:app:list + +# Configure context_chat +docker-compose exec -u 33 nextcloud php occ config:app:set context_chat backend_init --value true + +# Create test file +echo "Creating test file via VFS (Encrypted)..." +docker-compose cp create_test_file.php nextcloud:/var/www/html/create_test_file.php +docker-compose exec -u 33 nextcloud php /var/www/html/create_test_file.php + +# Verify file existence via PHP +echo "Verifying file existence in Nextcloud VFS..." +if docker-compose exec -u 33 nextcloud php -r 'define("NC_CLI_MODE", true); require_once "/var/www/html/lib/base.php"; echo \OCP\Server::get(\OCP\Files\IRootFolder::class)->getUserFolder("admin")->nodeExists("test.txt") ? "YES" : "NO";' | grep -q "YES"; then + echo "SUCCESS: test.txt found in Nextcloud VFS." +else + echo "FAILURE: test.txt NOT found in Nextcloud VFS." + exit 1 +fi + +# DEBUG: Check Sizes +echo "DEBUG: Checking file sizes..." +docker-compose cp debug_sizes.php nextcloud:/var/www/html/debug_sizes.php +docker-compose exec -u 33 nextcloud php /var/www/html/debug_sizes.php + +# Run Indexer on the specific file +echo "Running Scan (Direct Indexing) on test.txt..." +docker-compose exec -u 33 nextcloud php occ context_chat:scan admin --directory test.txt + +# Check logs +echo "Checking backend logs..." +docker-compose logs --no-log-prefix context_chat_backend + +echo "Test completed successfully." diff --git a/tests/reproduction/verify_mock.php b/tests/reproduction/verify_mock.php new file mode 100644 index 00000000..6c46f5d9 --- /dev/null +++ b/tests/reproduction/verify_mock.php @@ -0,0 +1,56 @@ + $error]; + } + return ['code' => $httpCode]; +} + +$url = 'http://localhost:23000/loadSources'; +$body = 'test_content'; +$len = strlen($body); + +// Test 1: Matching Content-Length +echo "Test 1: Matching Content-Length ($len)... "; +$res = sendRequest($url, $body, $len); +if (isset($res['code']) && $res['code'] === 200) { + echo "PASS (Got 200)\n"; +} else { + echo "FAIL (Result: " . json_encode($res) . ")\n"; + exit(1); +} + +// Test 2: Mismatching Content-Length +echo "Test 2: Mismatching Content-Length (" . ($len + 10) . ")... "; +$res = sendRequest($url, $body, $len + 10); + +// We expect either a 400 (if server catches it fast) or a Timeout/EOF error (if server waits) +// cURL error 28 is Timeout. +// cURL error 18 is Partial File. +if ((isset($res['code']) && $res['code'] === 400) || isset($res['error'])) { + echo "PASS (Got Expected Failure: " . json_encode($res) . ")\n"; +} else { + echo "FAIL (Got Unexpected Success: " . json_encode($res) . ")\n"; + exit(1); +} + +echo "All mock server tests passed.\n"; diff --git a/tests/unit/Service/LangRopeServiceTest.php b/tests/unit/Service/LangRopeServiceTest.php new file mode 100644 index 00000000..98c53a53 --- /dev/null +++ b/tests/unit/Service/LangRopeServiceTest.php @@ -0,0 +1,179 @@ +logger = $this->createMock(Logger::class); + $this->l10n = $this->createMock(IL10N::class); + $this->appConfig = $this->createMock(IAppConfig::class); + $this->appManager = $this->createMock(IAppManager::class); + $this->urlGenerator = $this->createMock(IURLGenerator::class); + $this->userManager = $this->createMock(IUserManager::class); + $this->providerService = $this->createMock(ProviderConfigService::class); + } + + public function testIndexSourcesWithContentLength() { + $source = new Source( + ['user1'], + 'ref1', + 'title1', + 'content1', + 1234567890, + 'text/plain', + 'provider1', + 100 // size + ); + + $responseMock = $this->getMockBuilder(\stdClass::class) + ->addMethods(['getHeader', 'getBody', 'getStatusCode']) + ->getMock(); + $responseMock->method('getHeader')->willReturn('application/json'); + $responseMock->method('getBody')->willReturn(json_encode(['loaded_sources' => [], 'sources_to_retry' => []])); + $responseMock->method('getStatusCode')->willReturn(200); + + $appApiMock = $this->createMock(PublicFunctions::class); + $appApiMock->expects($this->once()) + ->method('exAppRequest') + ->with( + 'context_chat_backend', + '/loadSources', + null, // userId is null in constructor + 'PUT', + $this->callback(function($params) { + // verify params structure + if (!is_array($params) || count($params) !== 1) return false; + $p = $params[0]; + if ($p['name'] !== 'sources') return false; + if (!isset($p['headers']['Content-Length'])) return false; + if ($p['headers']['Content-Length'] !== 100) return false; + return true; + }), + $this->anything() + ) + ->willReturn($responseMock); + + $this->appManager->method('isEnabledForUser')->willReturn(true); + $this->appManager->method('getAppVersion')->willReturn(Application::MIN_APP_API_VERSION); + $this->appConfig->method('getAppValueString')->willReturnCallback(function($key, $default, $lazy) { + if ($key === 'backend_init') return 'true'; + if ($key === 'request_timeout') return '30'; + return $default; + }); + + $service = $this->getMockBuilder(LangRopeService::class) + ->setConstructorArgs([ + $this->logger, + $this->l10n, + $this->appConfig, + $this->appManager, + $this->urlGenerator, + $this->userManager, + $this->providerService, + null // userId + ]) + ->onlyMethods(['getAppApiFunctions']) + ->getMock(); + + $service->method('getAppApiFunctions')->willReturn($appApiMock); + + $service->indexSources([$source]); + } + + public function testIndexSourcesWithoutContentLength() { + $source = new Source( + ['user1'], + 'ref1', + 'title1', + 'content1', + 1234567890, + 'text/plain', + 'provider1', + null // size + ); + + $responseMock = $this->getMockBuilder(\stdClass::class) + ->addMethods(['getHeader', 'getBody', 'getStatusCode']) + ->getMock(); + $responseMock->method('getHeader')->willReturn('application/json'); + $responseMock->method('getBody')->willReturn(json_encode(['loaded_sources' => [], 'sources_to_retry' => []])); + $responseMock->method('getStatusCode')->willReturn(200); + + $appApiMock = $this->createMock(PublicFunctions::class); + $appApiMock->expects($this->once()) + ->method('exAppRequest') + ->with( + 'context_chat_backend', + '/loadSources', + null, + 'PUT', + $this->callback(function($params) { + if (!is_array($params) || count($params) !== 1) return false; + $p = $params[0]; + if (isset($p['headers']['Content-Length'])) return false; + return true; + }), + $this->anything() + ) + ->willReturn($responseMock); + + $this->appManager->method('isEnabledForUser')->willReturn(true); + $this->appManager->method('getAppVersion')->willReturn(Application::MIN_APP_API_VERSION); + $this->appConfig->method('getAppValueString')->willReturnCallback(function($key, $default, $lazy) { + if ($key === 'backend_init') return 'true'; + if ($key === 'request_timeout') return '30'; + return $default; + }); + + $service = $this->getMockBuilder(LangRopeService::class) + ->setConstructorArgs([ + $this->logger, + $this->l10n, + $this->appConfig, + $this->appManager, + $this->urlGenerator, + $this->userManager, + $this->providerService, + null // userId + ]) + ->onlyMethods(['getAppApiFunctions']) + ->getMock(); + + $service->method('getAppApiFunctions')->willReturn($appApiMock); + + $service->indexSources([$source]); + } +} + +}