diff --git a/.github/workflows/e2e_tests_lightspeed_evaluation.yaml b/.github/workflows/e2e_tests_lightspeed_evaluation.yaml new file mode 100644 index 000000000..366e414d0 --- /dev/null +++ b/.github/workflows/e2e_tests_lightspeed_evaluation.yaml @@ -0,0 +1,143 @@ +name: E2E Tests for Lightspeed Evaluation + +on: [push, pull_request_target] + +jobs: + e2e_tests: + runs-on: ubuntu-latest + strategy: + fail-fast: false + + name: "E2E Tests for Lightspeed Evaluation job" + + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + E2E_OPENAI_MODEL: ${{ vars.E2E_OPENAI_MODEL }} + FAISS_VECTOR_STORE_ID: ${{ vars.FAISS_VECTOR_STORE_ID }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + # On PR_TARGET → the fork (or same repo) that opened the PR. + # On push → falls back to the current repository. + repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }} + + # On PR_TARGET → the PR head *commit* (reproducible). + # On push → the pushed commit that triggered the workflow. + ref: ${{ github.event.pull_request.head.ref || github.sha }} + + # Don’t keep credentials when running untrusted PR code under PR_TARGET. + persist-credentials: ${{ github.event_name != 'pull_request_target' }} + + - name: Verify actual git checkout result + run: | + echo "=== Git Status After Checkout ===" + echo "Remote URLs:" + git remote -v + echo "" + echo "Current branch: $(git branch --show-current 2>/dev/null || echo 'detached HEAD')" + echo "Current commit: $(git rev-parse HEAD)" + echo "Current commit message: $(git log -1 --oneline)" + echo "" + echo "=== Recent commits ===" + git log --oneline -5 + + - name: Checkout lightspeed-Evaluation + uses: actions/checkout@v4 + with: + repository: lightspeed-core/lightspeed-evaluation + path: lightspeed-evaluation + + - name: Load lightspeed-stack.yaml configuration + run: | + CONFIG_FILE="./lightspeed-evaluation/tests/integration/lightspeed-stack.yaml" + + if [ ! -f "${CONFIG_FILE}" ]; then + echo "❌ Configuration file not found: ${CONFIG_FILE}" + exit 1 + fi + + cp "${CONFIG_FILE}" lightspeed-stack.yaml + echo "✅ Configuration loaded successfully" + + - name: Select and configure run.yaml + run: | + CONFIG_FILE="./lightspeed-evaluation/tests/integration/run.yaml" + + if [ ! -f "${CONFIG_FILE}" ]; then + echo "❌ Configuration file not found: ${CONFIG_FILE}" + exit 1 + fi + + cp "$CONFIG_FILE" run.yaml + + - name: Show final configuration + run: | + echo "=== Configuration Preview ===" + echo "Providers: $(grep -c "provider_id:" run.yaml)" + echo "Models: $(grep -c "model_id:" run.yaml)" + echo "" + echo "=== lightspeed-stack.yaml ===" + grep -A 3 "llama_stack:" lightspeed-stack.yaml + + - name: Run services (Library Mode) + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + echo "Starting service in library mode (1 container)" + docker compose -f docker-compose-library.yaml up -d + + if docker compose -f docker-compose-library.yaml ps | grep -E 'Exit|exited|stopped'; then + echo "Service failed to start - showing logs:" + docker compose -f docker-compose-library.yaml logs + exit 1 + else + echo "Service started successfully" + fi + + - name: Wait for the LSC + run: | + echo "Waiting for service on port 8080..." + for i in {1..30}; do + if curl --output /dev/null --fail http://localhost:8080/v1/models ; then + echo "Service is up!" + exit 0 + fi + docker compose -f docker-compose-library.yaml logs --tail=30 + echo "Still waiting..." + sleep 2 + done + + echo "Service did not start in time" + exit 1 + + - name: Run lightspeed evaluation e2e tests + env: + TERM: xterm-256color + FORCE_COLOR: 1 + run: | + cd lightspeed-evaluation + echo "Installing e2e tests dependencies" + pip install --break-system-packages uv + uv sync + + echo "Running e2e test suite..." + make e2e_tests_lcore + + - name: Show logs on failure + if: failure() + run: | + echo "=== Test failure logs ===" + echo "=== lightspeed-stack (library mode) logs ===" + docker compose -f docker-compose-library.yaml logs lightspeed-stack + + + # Cleanup + - name: Stop the LSC if in local devel + if: ${{ always() && env.ACT }} + run: | + echo "Stopping containers" + echo "++++++++++++++++++++++" + sleep 2 + docker compose -f docker-compose-library.yaml down --rmi all diff --git a/.tekton/lightspeed-stack-pull-request.yaml b/.tekton/lightspeed-stack-pull-request.yaml index b6406f96c..59ef02f04 100644 --- a/.tekton/lightspeed-stack-pull-request.yaml +++ b/.tekton/lightspeed-stack-pull-request.yaml @@ -54,7 +54,7 @@ spec: ], "requirements_build_files": ["requirements-build.txt"], "binary": { - "packages": "aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,docstring-parser,durationpy,einops,email-validator,faiss-cpu,fire,frozenlist,fsspec,google-cloud-core,google-cloud-resource-manager,google-crc32c,google-genai,google-resumable-media,googleapis-common-protos,grpc-google-iam-v1,grpcio,grpcio-status,h11,hf-xet,httpcore,httpx,httpx-sse,idna,importlib-metadata,jinja2,jiter,joblib,jsonschema,jsonschema-specifications,kubernetes,lxml,markdown-it-py,mcp,mdurl,mpmath,multidict,networkx,numpy,oauthlib,packaging,pandas,peft,pillow,prometheus-client,prompt-toolkit,propcache,psycopg2-binary,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pygments,python-dateutil,python-multipart,pyyaml,referencing,requests,requests-oauthlib,rpds-py,rsa,safetensors,scikit-learn,scipy,setuptools,six,sniffio,sqlalchemy,starlette,sympy,termcolor,threadpoolctl,tiktoken,tokenizers,torch,tornado,tqdm,transformers,tree-sitter,triton,typing-extensions,typing-inspection,tzdata,urllib3,websocket-client,websockets,wrapt,xxhash,yarl,zipp,uv,pip,maturin", + "packages": "aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,cffi,chevron,click,cryptography,datasets,dill,distro,dnspython,docstring-parser,durationpy,einops,email-validator,faiss-cpu,fire,frozenlist,fsspec,google-cloud-core,google-cloud-resource-manager,google-crc32c,google-genai,google-resumable-media,grpc-google-iam-v1,grpcio,grpcio-status,h11,hf-xet,httpcore,httpx,httpx-sse,idna,importlib-metadata,jinja2,jiter,joblib,jsonschema,jsonschema-specifications,kubernetes,lxml,markdown-it-py,mcp,mdurl,mpmath,multidict,networkx,numpy,oauthlib,packaging,pandas,peft,pillow,prometheus-client,prompt-toolkit,propcache,psycopg2-binary,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pygments,python-dateutil,python-multipart,pyyaml,referencing,requests,requests-oauthlib,rpds-py,safetensors,scikit-learn,scipy,setuptools,six,sniffio,sqlalchemy,starlette,sympy,termcolor,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,tree-sitter,triton,typing-extensions,typing-inspection,tzdata,urllib3,websocket-client,websockets,wrapt,xxhash,yarl,zipp,uv,pip,maturin", "os": "linux", "arch": "x86_64,aarch64", "py_version": 312 @@ -209,7 +209,7 @@ spec: - name: name value: prefetch-dependencies-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies-oci-ta:0.2@sha256:22612d629796a29ddd177d6e29c18a4319875d4e2348286ea01d16427cec0dc1 + value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies-oci-ta:0.3@sha256:a579d00fe370b6d9a1cb1751c883ecd0ec9f663604344e2fd61e1f6d5bf4e990 - name: kind value: task resolver: bundles @@ -267,7 +267,7 @@ spec: - name: name value: buildah-remote-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-buildah-remote-oci-ta:0.9@sha256:1302dbf65547d9ce065b4947f6217b7d3daa06dfd4542cbaa3e42438c1a08b0e + value: quay.io/konflux-ci/tekton-catalog/task-buildah-remote-oci-ta:0.9@sha256:a9ca472e297388d6ef8d1f51ee205abee6076aed7c5356ec0df84f14a2e78ad8 - name: kind value: task resolver: bundles @@ -293,7 +293,7 @@ spec: - name: name value: build-image-index - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-build-image-index:0.2@sha256:ac4f8b58ade5000f6e47d287b72832f0d89a91651849467be73e05da639cff7d + value: quay.io/konflux-ci/tekton-catalog/task-build-image-index:0.2@sha256:c7b0f7e1f743040d99a3532abbdfddc9484f80fd559a75171c97499c3eb5d163 - name: kind value: task resolver: bundles @@ -314,7 +314,7 @@ spec: - name: name value: source-build-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-source-build-oci-ta:0.3@sha256:eb620d137d2dfa9966d991ac210ad14f391cfa9cfc501e3cc1eb24e3332c6986 + value: quay.io/konflux-ci/tekton-catalog/task-source-build-oci-ta:0.3@sha256:362f0475df00e7dfb5f15dea0481d1b68b287f60411718d70a23da3c059a5613 - name: kind value: task resolver: bundles @@ -336,7 +336,7 @@ spec: - name: name value: deprecated-image-check - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-deprecated-image-check:0.5@sha256:516ea66977bc4cdad1da61d9273a31540f0d419270f8c8c4b6b3a6aaa4002d96 + value: quay.io/konflux-ci/tekton-catalog/task-deprecated-image-check:0.5@sha256:3457a4ca93f8d55f14ebd407532b1223c689eacc34f0abb3003db4111667bdae - name: kind value: task resolver: bundles @@ -416,7 +416,7 @@ spec: - name: name value: sast-snyk-check-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-sast-snyk-check-oci-ta:0.4@sha256:129e089094f472c7a147f1b1591ad003bb509d2ade553ff54fcf2f99d8af5ef3 + value: quay.io/konflux-ci/tekton-catalog/task-sast-snyk-check-oci-ta:0.4@sha256:6045ed6f2d37cfdf75cb3f2bf88706839c276a59f892ae027a315456c2914cf3 - name: kind value: task resolver: bundles @@ -443,7 +443,7 @@ spec: - name: name value: clamav-scan - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-clamav-scan:0.3@sha256:657d2704299777e90bc177ea012f4b13c80199ae77fa5d4b5e5b524993411e86 + value: quay.io/konflux-ci/tekton-catalog/task-clamav-scan:0.3@sha256:9f18b216ce71a66909e7cb17d9b34526c02d73cf12884ba32d1f10614f7b9f5a - name: kind value: task resolver: bundles @@ -488,7 +488,7 @@ spec: - name: name value: sast-coverity-check-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-sast-coverity-check-oci-ta:0.3@sha256:e8c63570f1d01d70b2a21b22a2a4aad9ca7d5c0327d8b2a4058a6e616cce17ca + value: quay.io/konflux-ci/tekton-catalog/task-sast-coverity-check-oci-ta:0.3@sha256:ab60e90de028036be823e75343fdc205418edcfa7c4de569bb5f8ab833bc2037 - name: kind value: task resolver: bundles @@ -509,7 +509,7 @@ spec: - name: name value: coverity-availability-check - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-coverity-availability-check:0.2@sha256:c9b9301c442830eca3ad7d9d5287b082b94c38d406442f391447484147afd006 + value: quay.io/konflux-ci/tekton-catalog/task-coverity-availability-check:0.2@sha256:de35caf2f090e3275cfd1019ea50d9662422e904fb4aebd6ea29fb53a1ad57f5 - name: kind value: task resolver: bundles @@ -535,7 +535,7 @@ spec: - name: name value: sast-shell-check-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-sast-shell-check-oci-ta:0.1@sha256:a2fa9231978362bdc5d244eb179167fba727044a18a981ebac806847845aced8 + value: quay.io/konflux-ci/tekton-catalog/task-sast-shell-check-oci-ta:0.1@sha256:c314b4d5369d7961af51c865be28cd792d5f233aef94ecf035b3f84acde398bf - name: kind value: task resolver: bundles @@ -561,7 +561,7 @@ spec: - name: name value: sast-unicode-check-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-sast-unicode-check-oci-ta:0.4@sha256:8f93c716782b68a71e314d3eb037edfc07255d1a4d531afcf612409ef62233c7 + value: quay.io/konflux-ci/tekton-catalog/task-sast-unicode-check-oci-ta:0.4@sha256:3d8a6902ab7c5c2125be07263f395426342c5032b3abfd0140162ad838437bab - name: kind value: task resolver: bundles @@ -606,7 +606,7 @@ spec: - name: name value: push-dockerfile-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-push-dockerfile-oci-ta:0.3@sha256:322d515ca66d92188067344761733d1e5c64d4b7bb790d10f35540da5e6289f1 + value: quay.io/konflux-ci/tekton-catalog/task-push-dockerfile-oci-ta:0.3@sha256:1bc2d0f26b89259db090a47bb38217c82c05e335d626653d184adf1d196ca131 - name: kind value: task resolver: bundles @@ -623,7 +623,7 @@ spec: - name: name value: rpms-signature-scan - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-rpms-signature-scan:0.2@sha256:e920854293f9917e628d8c68a0ee3b003dabe0a67a5a2fc865a354030d4f93e2 + value: quay.io/konflux-ci/tekton-catalog/task-rpms-signature-scan:0.2@sha256:637fcb11066e2248d901c8f5fcbf713836bb9bf6ef6eff869b9891acd4d32398 - name: kind value: task resolver: bundles diff --git a/.tekton/lightspeed-stack-push.yaml b/.tekton/lightspeed-stack-push.yaml index a46f29153..fb722b51e 100644 --- a/.tekton/lightspeed-stack-push.yaml +++ b/.tekton/lightspeed-stack-push.yaml @@ -46,7 +46,7 @@ spec: ], "requirements_build_files": ["requirements-build.txt"], "binary": { - "packages": "aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,docstring-parser,durationpy,einops,email-validator,faiss-cpu,fire,frozenlist,fsspec,google-cloud-core,google-cloud-resource-manager,google-crc32c,google-genai,google-resumable-media,googleapis-common-protos,grpc-google-iam-v1,grpcio,grpcio-status,h11,hf-xet,httpcore,httpx,httpx-sse,idna,importlib-metadata,jinja2,jiter,joblib,jsonschema,jsonschema-specifications,kubernetes,lxml,markdown-it-py,mcp,mdurl,mpmath,multidict,networkx,numpy,oauthlib,packaging,pandas,peft,pillow,prometheus-client,prompt-toolkit,propcache,psycopg2-binary,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pygments,python-dateutil,python-multipart,pyyaml,referencing,requests,requests-oauthlib,rpds-py,rsa,safetensors,scikit-learn,scipy,setuptools,six,sniffio,sqlalchemy,starlette,sympy,termcolor,threadpoolctl,tiktoken,tokenizers,torch,tornado,tqdm,transformers,tree-sitter,triton,typing-extensions,typing-inspection,tzdata,urllib3,websocket-client,websockets,wrapt,xxhash,yarl,zipp,uv,pip,maturin", + "packages": "aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,cffi,chevron,click,cryptography,datasets,dill,distro,dnspython,docstring-parser,durationpy,einops,email-validator,faiss-cpu,fire,frozenlist,fsspec,google-cloud-core,google-cloud-resource-manager,google-crc32c,google-genai,google-resumable-media,grpc-google-iam-v1,grpcio,grpcio-status,h11,hf-xet,httpcore,httpx,httpx-sse,idna,importlib-metadata,jinja2,jiter,joblib,jsonschema,jsonschema-specifications,kubernetes,lxml,markdown-it-py,mcp,mdurl,mpmath,multidict,networkx,numpy,oauthlib,packaging,pandas,peft,pillow,prometheus-client,prompt-toolkit,propcache,psycopg2-binary,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pygments,python-dateutil,python-multipart,pyyaml,referencing,requests,requests-oauthlib,rpds-py,safetensors,scikit-learn,scipy,setuptools,six,sniffio,sqlalchemy,starlette,sympy,termcolor,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,tree-sitter,triton,typing-extensions,typing-inspection,tzdata,urllib3,websocket-client,websockets,wrapt,xxhash,yarl,zipp,uv,pip,maturin", "os": "linux", "arch": "x86_64,aarch64", "py_version": 312 @@ -194,7 +194,7 @@ spec: - name: name value: prefetch-dependencies-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies-oci-ta:0.2@sha256:22612d629796a29ddd177d6e29c18a4319875d4e2348286ea01d16427cec0dc1 + value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies-oci-ta:0.3@sha256:a579d00fe370b6d9a1cb1751c883ecd0ec9f663604344e2fd61e1f6d5bf4e990 - name: kind value: task resolver: bundles @@ -248,7 +248,7 @@ spec: - name: name value: buildah-remote-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-buildah-remote-oci-ta:0.9@sha256:1302dbf65547d9ce065b4947f6217b7d3daa06dfd4542cbaa3e42438c1a08b0e + value: quay.io/konflux-ci/tekton-catalog/task-buildah-remote-oci-ta:0.9@sha256:a9ca472e297388d6ef8d1f51ee205abee6076aed7c5356ec0df84f14a2e78ad8 - name: kind value: task resolver: bundles @@ -274,7 +274,7 @@ spec: - name: name value: build-image-index - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-build-image-index:0.2@sha256:ac4f8b58ade5000f6e47d287b72832f0d89a91651849467be73e05da639cff7d + value: quay.io/konflux-ci/tekton-catalog/task-build-image-index:0.2@sha256:c7b0f7e1f743040d99a3532abbdfddc9484f80fd559a75171c97499c3eb5d163 - name: kind value: task resolver: bundles @@ -295,7 +295,7 @@ spec: - name: name value: source-build-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-source-build-oci-ta:0.3@sha256:eb620d137d2dfa9966d991ac210ad14f391cfa9cfc501e3cc1eb24e3332c6986 + value: quay.io/konflux-ci/tekton-catalog/task-source-build-oci-ta:0.3@sha256:362f0475df00e7dfb5f15dea0481d1b68b287f60411718d70a23da3c059a5613 - name: kind value: task resolver: bundles @@ -317,7 +317,7 @@ spec: - name: name value: deprecated-image-check - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-deprecated-image-check:0.5@sha256:516ea66977bc4cdad1da61d9273a31540f0d419270f8c8c4b6b3a6aaa4002d96 + value: quay.io/konflux-ci/tekton-catalog/task-deprecated-image-check:0.5@sha256:3457a4ca93f8d55f14ebd407532b1223c689eacc34f0abb3003db4111667bdae - name: kind value: task resolver: bundles @@ -397,7 +397,7 @@ spec: - name: name value: sast-snyk-check-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-sast-snyk-check-oci-ta:0.4@sha256:129e089094f472c7a147f1b1591ad003bb509d2ade553ff54fcf2f99d8af5ef3 + value: quay.io/konflux-ci/tekton-catalog/task-sast-snyk-check-oci-ta:0.4@sha256:6045ed6f2d37cfdf75cb3f2bf88706839c276a59f892ae027a315456c2914cf3 - name: kind value: task resolver: bundles @@ -424,7 +424,7 @@ spec: - name: name value: clamav-scan - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-clamav-scan:0.3@sha256:657d2704299777e90bc177ea012f4b13c80199ae77fa5d4b5e5b524993411e86 + value: quay.io/konflux-ci/tekton-catalog/task-clamav-scan:0.3@sha256:9f18b216ce71a66909e7cb17d9b34526c02d73cf12884ba32d1f10614f7b9f5a - name: kind value: task resolver: bundles @@ -469,7 +469,7 @@ spec: - name: name value: sast-coverity-check-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-sast-coverity-check-oci-ta:0.3@sha256:e8c63570f1d01d70b2a21b22a2a4aad9ca7d5c0327d8b2a4058a6e616cce17ca + value: quay.io/konflux-ci/tekton-catalog/task-sast-coverity-check-oci-ta:0.3@sha256:ab60e90de028036be823e75343fdc205418edcfa7c4de569bb5f8ab833bc2037 - name: kind value: task resolver: bundles @@ -490,7 +490,7 @@ spec: - name: name value: coverity-availability-check - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-coverity-availability-check:0.2@sha256:c9b9301c442830eca3ad7d9d5287b082b94c38d406442f391447484147afd006 + value: quay.io/konflux-ci/tekton-catalog/task-coverity-availability-check:0.2@sha256:de35caf2f090e3275cfd1019ea50d9662422e904fb4aebd6ea29fb53a1ad57f5 - name: kind value: task resolver: bundles @@ -516,7 +516,7 @@ spec: - name: name value: sast-shell-check-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-sast-shell-check-oci-ta:0.1@sha256:a2fa9231978362bdc5d244eb179167fba727044a18a981ebac806847845aced8 + value: quay.io/konflux-ci/tekton-catalog/task-sast-shell-check-oci-ta:0.1@sha256:c314b4d5369d7961af51c865be28cd792d5f233aef94ecf035b3f84acde398bf - name: kind value: task resolver: bundles @@ -542,7 +542,7 @@ spec: - name: name value: sast-unicode-check-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-sast-unicode-check-oci-ta:0.4@sha256:8f93c716782b68a71e314d3eb037edfc07255d1a4d531afcf612409ef62233c7 + value: quay.io/konflux-ci/tekton-catalog/task-sast-unicode-check-oci-ta:0.4@sha256:3d8a6902ab7c5c2125be07263f395426342c5032b3abfd0140162ad838437bab - name: kind value: task resolver: bundles @@ -590,7 +590,7 @@ spec: - name: name value: push-dockerfile-oci-ta - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-push-dockerfile-oci-ta:0.3@sha256:322d515ca66d92188067344761733d1e5c64d4b7bb790d10f35540da5e6289f1 + value: quay.io/konflux-ci/tekton-catalog/task-push-dockerfile-oci-ta:0.3@sha256:1bc2d0f26b89259db090a47bb38217c82c05e335d626653d184adf1d196ca131 - name: kind value: task resolver: bundles @@ -607,7 +607,7 @@ spec: - name: name value: rpms-signature-scan - name: bundle - value: quay.io/konflux-ci/tekton-catalog/task-rpms-signature-scan:0.2@sha256:e920854293f9917e628d8c68a0ee3b003dabe0a67a5a2fc865a354030d4f93e2 + value: quay.io/konflux-ci/tekton-catalog/task-rpms-signature-scan:0.2@sha256:637fcb11066e2248d901c8f5fcbf713836bb9bf6ef6eff869b9891acd4d32398 - name: kind value: task resolver: bundles diff --git a/dev-tools/mcp-mock-server/server.py b/dev-tools/mcp-mock-server/server.py index fbee23c96..7f087622c 100644 --- a/dev-tools/mcp-mock-server/server.py +++ b/dev-tools/mcp-mock-server/server.py @@ -24,7 +24,7 @@ import sys import threading from http.server import HTTPServer, BaseHTTPRequestHandler -from datetime import datetime +from datetime import datetime, UTC from pathlib import Path from typing import Any @@ -38,7 +38,7 @@ class MCPMockHandler(BaseHTTPRequestHandler): def log_message(self, format: str, *args: Any) -> None: """Log requests with timestamp.""" # pylint: disable=redefined-builtin - timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + timestamp = datetime.now(tz=UTC).strftime("%Y-%m-%d %H:%M:%S") print(f"[{timestamp}] {format % args}") def _capture_headers(self) -> None: @@ -46,13 +46,12 @@ def _capture_headers(self) -> None: last_headers.clear() # Capture all headers for debugging - for header_name, value in self.headers.items(): - last_headers[header_name] = value + last_headers.update(dict(self.headers.items())) # Log the request request_log.append( { - "timestamp": datetime.now().isoformat(), + "timestamp": datetime.now(tz=UTC).isoformat(), "method": self.command, "path": self.path, "headers": dict(last_headers), diff --git a/docker-compose.yaml b/docker-compose.yaml index 03bd6450d..4ee0d30c1 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -82,6 +82,7 @@ services: volumes: - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:z - ./tests/e2e/secrets/mcp-token:/tmp/mcp-secret-token:ro + - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-secret-token:ro environment: - OPENAI_API_KEY=${OPENAI_API_KEY} # Azure Entra ID credentials (AZURE_API_KEY is obtained dynamically) diff --git a/docs/config.html b/docs/config.html index 423649442..b31888ed5 100644 --- a/docs/config.html +++ b/docs/config.html @@ -376,6 +376,11 @@

AzureEntraIdConfiguration

ByokRag

BYOK (Bring Your Own Knowledge) RAG configuration.

+ + + + + @@ -414,6 +419,13 @@

ByokRag

+ + + + +
Field string Path to RAG database.
score_multipliernumberMultiplier applied to relevance scores from this vector store. Used +to weight results when querying multiple knowledge sources. Values > +1 boost this store’s results; values < 1 reduce them.

CORSConfiguration

@@ -603,9 +615,15 @@

Configuration

‘production’). Used in telemetry events. - solr + rag - Configuration for Solr vector search operations. + Configuration for all RAG strategies (inline and tool-based). + + + okp + + OKP provider settings. Only used when ‘okp’ is listed in rag.inline +or rag.tool. @@ -1089,6 +1107,41 @@

ModelContextProtocolServer

+

OkpConfiguration

+

OKP (Offline Knowledge Portal) provider configuration.

+

Controls provider-specific behaviour for the OKP vector store. Only +relevant when "okp" is listed in rag.inline or +rag.tool.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
offlinebooleanWhen True, use parent_id for OKP chunk source URLs. When False, use +reference_url for chunk source URLs.
chunk_filter_querystringOKP filter query applied to every OKP search request. Defaults to +‘is_chunk:true’ to restrict results to chunk documents. To add extra +constraints, extend the expression using boolean syntax, +e.g. ‘is_chunk:true AND product:openshift’.

PostgreSQLDatabaseConfiguration

PostgreSQL database configuration.

PostgreSQL database is used by Lightspeed Core Stack service for @@ -1337,6 +1390,48 @@

RHIdentityConfiguration

+

RagConfiguration

+

RAG strategy configuration.

+

Controls which RAG sources are used for inline and tool-based +retrieval.

+

Each strategy lists RAG IDs to include. The special ID +"okp" defined in constants, activates the OKP provider; all +other IDs refer to entries in byok_rag.

+

Backward compatibility: - inline defaults to +[] (no inline RAG). - tool defaults to +None which means all registered vector stores are used +(identical to the previous tool.byok.enabled = True +default).

+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
inlinearrayRAG IDs whose sources are injected as context before the LLM call. +Use ‘okp’ to enable OKP inline RAG. Empty by default (no inline +RAG).
toolarrayRAG IDs made available to the LLM as a file_search tool. Use ‘okp’ +to include the OKP vector store. When omitted, all registered BYOK +vector stores are used (backward compatibility).

SQLiteDatabaseConfiguration

SQLite database configuration.

@@ -1429,39 +1524,6 @@

ServiceConfiguration

-

SolrConfiguration

-

Solr configuration for vector search queries.

-

Controls whether to use offline or online mode when building document -URLs from vector search results, and enables/disables Solr vector IO -functionality.

- - - - - - - - - - - - - - - - - - - - - - - - - -
FieldTypeDescription
enabledbooleanWhen True, enables Solr vector IO functionality for vector search -queries. When False, disables Solr vector search processing.
offlinebooleanWhen True, use parent_id for chunk source URLs. When False, use -reference_url for chunk source URLs.

SplunkConfiguration

Splunk HEC (HTTP Event Collector) configuration.

Splunk HEC allows sending events directly to Splunk over HTTP/HTTPS. diff --git a/docs/config.md b/docs/config.md index 8ba10ad7e..00c0ae747 100644 --- a/docs/config.md +++ b/docs/config.md @@ -130,12 +130,12 @@ byok_rag: | Field | Type | Description | |-------|------|-------------| | rag_id | string | Unique RAG ID | -| rag_type | string | Type of RAG database (e.g. `inline::faiss`). | +| rag_type | string | Type of RAG database. | | embedding_model | string | Embedding model identification | | embedding_dimension | integer | Dimensionality of embedding vectors. | | vector_db_id | string | Vector database identification. | | db_path | string | Path to RAG database. | -| score_multiplier | number | Multiplier applied to relevance scores from this vector store when querying multiple sources. Values > 1 boost results; values < 1 reduce them. Default: 1.0. | +| score_multiplier | number | Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them. | ## CORSConfiguration @@ -187,7 +187,8 @@ Global service configuration. | azure_entra_id | | | | splunk | | Splunk HEC configuration for sending telemetry events. | | deployment_environment | string | Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events. | -| rag | | RAG strategy configuration (OKP and BYOK). Controls pre-query (Inline RAG) and tool-based (Tool RAG) retrieval. | +| rag | | Configuration for all RAG strategies (inline and tool-based). | +| okp | | OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool. | ## ConversationHistoryConfiguration @@ -395,6 +396,21 @@ Useful resources: | timeout | integer | Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support. | +## OkpConfiguration + + +OKP (Offline Knowledge Portal) provider configuration. + +Controls provider-specific behaviour for the OKP vector store. +Only relevant when ``"okp"`` is listed in ``rag.inline`` or ``rag.tool``. + + +| Field | Type | Description | +|-------|------|-------------| +| offline | boolean | When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs. | +| chunk_filter_query | string | OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'. | + + ## PostgreSQLDatabaseConfiguration @@ -501,6 +517,28 @@ Red Hat Identity authentication configuration. | required_entitlements | array | List of all required entitlements. | +## RagConfiguration + + +RAG strategy configuration. + +Controls which RAG sources are used for inline and tool-based retrieval. + +Each strategy lists RAG IDs to include. The special ID ``"okp"`` defined in constants, +activates the OKP provider; all other IDs refer to entries in ``byok_rag``. + +Backward compatibility: + - ``inline`` defaults to ``[]`` (no inline RAG). + - ``tool`` defaults to ``None`` which means all registered vector stores + are used (identical to the previous ``tool.byok.enabled = True`` default). + + +| Field | Type | Description | +|-------|------|-------------| +| inline | array | RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG). | +| tool | array | RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility). | + + ## SQLiteDatabaseConfiguration @@ -537,62 +575,6 @@ the service can handle requests concurrently. | cors | | Cross-Origin Resource Sharing configuration for cross-domain requests | -## RagConfiguration - - -Top-level RAG strategy configuration. Controls two complementary retrieval modes: - -- **Inline RAG**: context is fetched from the listed sources and injected before the - LLM request. -- **Tool RAG**: the LLM can call the `file_search` tool during generation to retrieve - context on demand from the listed vector stores. Supports both BYOK and OKP. - -Each strategy is configured as a list of RAG IDs referencing entries in `byok_rag`. -The special ID `okp` activates the OKP provider (no `byok_rag` entry needed). - -**Backward compatibility**: omitting `tool` uses all registered BYOK vector stores -(equivalent to the old `tool.byok.enabled = True`). Omitting `inline` means no -context is injected before the LLM request. - -Example: - -```yaml -rag: - inline: - - my-docs # inject context from my-docs before the LLM request - tool: - - okp # LLM can search OKP as a tool - - my-docs # LLM can also search my-docs as a tool - -okp: - offline: true # use parent_id for OKP URL construction -``` - - -| Field | Type | Description | -|-------|------|-------------| -| inline | list[string] | RAG IDs whose content is injected before the LLM request. Use `okp` for OKP. Empty by default (no inline RAG). | -| tool | list[string] or null | RAG IDs exposed as a `file_search` tool the LLM can invoke. Use `okp` to include OKP. When omitted, all registered BYOK vector stores are used (backward compatibility). | - - -## OkpConfiguration - -OKP (Offline Knowledge Portal) provider settings. Only used when `okp` is listed in `rag.inline` or `rag.tool`. - -Example: - -```yaml -okp: - offline: true # use parent_id for OKP URL construction - chunk_filter_query: "is_chunk:true" -``` - -| Field | Type | Description | -|-------|------|-------------| -| offline | boolean | When `true` (default), use `parent_id` for OKP chunk source URLs. When `false`, use `reference_url`. | -| chunk_filter_query | string | OKP filter query (`fq`) applied to every OKP search request. Defaults to `"is_chunk:true"`. Extend with `AND` for extra constraints. | - - ## SplunkConfiguration diff --git a/docs/config.puml b/docs/config.puml index cc35f5862..69497edcc 100644 --- a/docs/config.puml +++ b/docs/config.puml @@ -41,11 +41,12 @@ class "AzureEntraIdConfiguration" as src.models.config.AzureEntraIdConfiguration tenant_id } class "ByokRag" as src.models.config.ByokRag { - db_path + db_path : str embedding_dimension embedding_model : str rag_id : str rag_type : str + score_multiplier : float vector_db_id : str } class "CORSConfiguration" as src.models.config.CORSConfiguration { @@ -69,9 +70,10 @@ class "Configuration" as src.models.config.Configuration { llama_stack mcp_servers : list[ModelContextProtocolServer] name : str + okp quota_handlers + rag service - solr : Optional[SolrConfiguration] splunk : Optional[SplunkConfiguration] user_data_collection dump(filename: str | Path) -> None @@ -160,6 +162,10 @@ class "ModelContextProtocolServer" as src.models.config.ModelContextProtocolServ resolve_auth_headers() -> Self validate_headers(value: list[str]) -> list[str] } +class "OkpConfiguration" as src.models.config.OkpConfiguration { + chunk_filter_query : str + offline : bool +} class "PostgreSQLDatabaseConfiguration" as src.models.config.PostgreSQLDatabaseConfiguration { ca_cert_path : Optional[FilePath] db : str @@ -194,6 +200,10 @@ class "QuotaSchedulerConfiguration" as src.models.config.QuotaSchedulerConfigura class "RHIdentityConfiguration" as src.models.config.RHIdentityConfiguration { required_entitlements : Optional[list[str]] } +class "RagConfiguration" as src.models.config.RagConfiguration { + inline : list[str] + tool : Optional[list[str]] +} class "SQLiteDatabaseConfiguration" as src.models.config.SQLiteDatabaseConfiguration { db_path : str } @@ -211,10 +221,6 @@ class "ServiceConfiguration" as src.models.config.ServiceConfiguration { check_service_configuration() -> Self validate_root_path(value: str) -> str } -class "SolrConfiguration" as src.models.config.SolrConfiguration { - enabled : bool - offline : bool -} class "SplunkConfiguration" as src.models.config.SplunkConfiguration { enabled : bool index : Optional[str] @@ -257,14 +263,15 @@ src.models.config.JwtConfiguration --|> src.models.config.ConfigurationBase src.models.config.JwtRoleRule --|> src.models.config.ConfigurationBase src.models.config.LlamaStackConfiguration --|> src.models.config.ConfigurationBase src.models.config.ModelContextProtocolServer --|> src.models.config.ConfigurationBase +src.models.config.OkpConfiguration --|> src.models.config.ConfigurationBase src.models.config.PostgreSQLDatabaseConfiguration --|> src.models.config.ConfigurationBase src.models.config.QuotaHandlersConfiguration --|> src.models.config.ConfigurationBase src.models.config.QuotaLimiterConfiguration --|> src.models.config.ConfigurationBase src.models.config.QuotaSchedulerConfiguration --|> src.models.config.ConfigurationBase src.models.config.RHIdentityConfiguration --|> src.models.config.ConfigurationBase +src.models.config.RagConfiguration --|> src.models.config.ConfigurationBase src.models.config.SQLiteDatabaseConfiguration --|> src.models.config.ConfigurationBase src.models.config.ServiceConfiguration --|> src.models.config.ConfigurationBase -src.models.config.SolrConfiguration --|> src.models.config.ConfigurationBase src.models.config.SplunkConfiguration --|> src.models.config.ConfigurationBase src.models.config.TLSConfiguration --|> src.models.config.ConfigurationBase src.models.config.UserDataCollection --|> src.models.config.ConfigurationBase @@ -278,8 +285,10 @@ src.models.config.InferenceConfiguration --* src.models.config.Configuration : i src.models.config.JsonPathOperator --* src.models.config.JwtRoleRule : operator src.models.config.JwtConfiguration --* src.models.config.JwkConfiguration : jwt_configuration src.models.config.LlamaStackConfiguration --* src.models.config.Configuration : llama_stack +src.models.config.OkpConfiguration --* src.models.config.Configuration : okp src.models.config.QuotaHandlersConfiguration --* src.models.config.Configuration : quota_handlers src.models.config.QuotaSchedulerConfiguration --* src.models.config.QuotaHandlersConfiguration : scheduler +src.models.config.RagConfiguration --* src.models.config.Configuration : rag src.models.config.SQLiteDatabaseConfiguration --* src.models.config.DatabaseConfiguration : sqlite src.models.config.ServiceConfiguration --* src.models.config.Configuration : service src.models.config.TLSConfiguration --* src.models.config.ServiceConfiguration : tls_config diff --git a/docs/config.svg b/docs/config.svg index c0d342d41..816cbd1e5 100644 --- a/docs/config.svg +++ b/docs/config.svg @@ -1,729 +1,759 @@ - + - - - - A2AStateConfiguration - - config - postgres : Optional[PostgreSQLDatabaseConfiguration] - sqlite : Optional[SQLiteDatabaseConfiguration] - storage_type - - check_a2a_state_configuration() -> Self + + + + A2AStateConfiguration + + config + postgres : Optional[PostgreSQLDatabaseConfiguration] + sqlite : Optional[SQLiteDatabaseConfiguration] + storage_type + + check_a2a_state_configuration() -> Self - - - - APIKeyTokenConfiguration - - api_key - + + + + APIKeyTokenConfiguration + + api_key + - - - - AccessRule - - actions : list[Action] - role : str - + + + + AccessRule + + actions : list[Action] + role : str + - - - - Action - - name - + + + + Action + + name + - - - - AuthenticationConfiguration - - api_key_config : Optional[APIKeyTokenConfiguration] - api_key_configuration - jwk_config : Optional[JwkConfiguration] - jwk_configuration - k8s_ca_cert_path : Optional[FilePath] - k8s_cluster_api : Optional[AnyHttpUrl] - module : str - rh_identity_config : Optional[RHIdentityConfiguration] - rh_identity_configuration - skip_for_health_probes : bool - skip_tls_verification : bool - - check_authentication_model() -> Self + + + + AuthenticationConfiguration + + api_key_config : Optional[APIKeyTokenConfiguration] + api_key_configuration + jwk_config : Optional[JwkConfiguration] + jwk_configuration + k8s_ca_cert_path : Optional[FilePath] + k8s_cluster_api : Optional[AnyHttpUrl] + module : str + rh_identity_config : Optional[RHIdentityConfiguration] + rh_identity_configuration + skip_for_health_probes : bool + skip_tls_verification : bool + + check_authentication_model() -> Self - - - - AuthorizationConfiguration - - access_rules : list[AccessRule] - + + + + AuthorizationConfiguration + + access_rules : list[AccessRule] + - - - - AzureEntraIdConfiguration - - client_id - client_secret - scope : str - tenant_id - + + + + AzureEntraIdConfiguration + + client_id + client_secret + scope : str + tenant_id + - - - - ByokRag - - db_path - embedding_dimension - embedding_model : str - rag_id : str - rag_type : str - vector_db_id : str - + + + + ByokRag + + db_path : str + embedding_dimension + embedding_model : str + rag_id : str + rag_type : str + score_multiplier : float + vector_db_id : str + - - - - CORSConfiguration - - allow_credentials : bool - allow_headers : list[str] - allow_methods : list[str] - allow_origins : list[str] - - check_cors_configuration() -> Self + + + + CORSConfiguration + + allow_credentials : bool + allow_headers : list[str] + allow_methods : list[str] + allow_origins : list[str] + + check_cors_configuration() -> Self - - - - Configuration - - a2a_state - authentication - authorization : Optional[AuthorizationConfiguration] - azure_entra_id : Optional[AzureEntraIdConfiguration] - byok_rag : list[ByokRag] - conversation_cache - customization : Optional[Customization] - database - deployment_environment : str - inference - llama_stack - mcp_servers : list[ModelContextProtocolServer] - name : str - quota_handlers - service - solr : Optional[SolrConfiguration] - splunk : Optional[SplunkConfiguration] - user_data_collection - - dump(filename: str | Path) -> None - validate_mcp_auth_headers() -> Self + + + + Configuration + + a2a_state + authentication + authorization : Optional[AuthorizationConfiguration] + azure_entra_id : Optional[AzureEntraIdConfiguration] + byok_rag : list[ByokRag] + conversation_cache + customization : Optional[Customization] + database + deployment_environment : str + inference + llama_stack + mcp_servers : list[ModelContextProtocolServer] + name : str + okp + quota_handlers + rag + service + splunk : Optional[SplunkConfiguration] + user_data_collection + + dump(filename: str | Path) -> None + validate_mcp_auth_headers() -> Self - - - - ConfigurationBase - - model_config - + + + + ConfigurationBase + + model_config + - - - - ConversationHistoryConfiguration - - memory : Optional[InMemoryCacheConfig] - postgres : Optional[PostgreSQLDatabaseConfiguration] - sqlite : Optional[SQLiteDatabaseConfiguration] - type : Optional[Literal['noop', 'memory', 'sqlite', 'postgres']] - - check_cache_configuration() -> Self + + + + ConversationHistoryConfiguration + + memory : Optional[InMemoryCacheConfig] + postgres : Optional[PostgreSQLDatabaseConfiguration] + sqlite : Optional[SQLiteDatabaseConfiguration] + type : Optional[Literal['noop', 'memory', 'sqlite', 'postgres']] + + check_cache_configuration() -> Self - - - - CustomProfile - - path : str - prompts : dict[str, str] - - get_prompts() -> dict[str, str] + + + + CustomProfile + + path : str + prompts : dict[str, str] + + get_prompts() -> dict[str, str] - - - - Customization - - agent_card_config : Optional[dict[str, Any]] - agent_card_path : Optional[FilePath] - custom_profile : Optional[CustomProfile] - disable_query_system_prompt : bool - disable_shield_ids_override : bool - profile_path : Optional[str] - system_prompt : Optional[str] - system_prompt_path : Optional[FilePath] - - check_customization_model() -> Self + + + + Customization + + agent_card_config : Optional[dict[str, Any]] + agent_card_path : Optional[FilePath] + custom_profile : Optional[CustomProfile] + disable_query_system_prompt : bool + disable_shield_ids_override : bool + profile_path : Optional[str] + system_prompt : Optional[str] + system_prompt_path : Optional[FilePath] + + check_customization_model() -> Self - - - - DatabaseConfiguration - - config - db_type - postgres : Optional[PostgreSQLDatabaseConfiguration] - sqlite : Optional[SQLiteDatabaseConfiguration] - - check_database_configuration() -> Self + + + + DatabaseConfiguration + + config + db_type + postgres : Optional[PostgreSQLDatabaseConfiguration] + sqlite : Optional[SQLiteDatabaseConfiguration] + + check_database_configuration() -> Self - - - - InMemoryCacheConfig - - max_entries - + + + + InMemoryCacheConfig + + max_entries + - - - - InferenceConfiguration - - default_model : Optional[str] - default_provider : Optional[str] - - check_default_model_and_provider() -> Self + + + + InferenceConfiguration + + default_model : Optional[str] + default_provider : Optional[str] + + check_default_model_and_provider() -> Self - - - - JsonPathOperator - - name - + + + + JsonPathOperator + + name + - - - - JwkConfiguration - - jwt_configuration - url - + + + + JwkConfiguration + + jwt_configuration + url + - - - - JwtConfiguration - - role_rules : list[JwtRoleRule] - user_id_claim : str - username_claim : str - + + + + JwtConfiguration + + role_rules : list[JwtRoleRule] + user_id_claim : str + username_claim : str + - - - - JwtRoleRule - - compiled_regex - jsonpath : str - negate : bool - operator - roles : list[str] - value : Any - - check_jsonpath() -> Self - check_regex_pattern() -> Self - check_roles() -> Self + + + + JwtRoleRule + + compiled_regex + jsonpath : str + negate : bool + operator + roles : list[str] + value : Any + + check_jsonpath() -> Self + check_regex_pattern() -> Self + check_roles() -> Self - - - - LlamaStackConfiguration - - api_key : Optional[SecretStr] - library_client_config_path : Optional[str] - timeout - url : Optional[AnyHttpUrl] - use_as_library_client : Optional[bool] - - check_llama_stack_model() -> Self + + + + LlamaStackConfiguration + + api_key : Optional[SecretStr] + library_client_config_path : Optional[str] + timeout + url : Optional[AnyHttpUrl] + use_as_library_client : Optional[bool] + + check_llama_stack_model() -> Self - - - - ModelContextProtocolServer - - authorization_headers : dict[str, str] - headers : list[str] - name : str - provider_id : str - resolved_authorization_headers - timeout : Optional[PositiveInt] - url : str - - resolve_auth_headers() -> Self - validate_headers(value: list[str]) -> list[str] + + + + ModelContextProtocolServer + + authorization_headers : dict[str, str] + headers : list[str] + name : str + provider_id : str + resolved_authorization_headers + timeout : Optional[PositiveInt] + url : str + + resolve_auth_headers() -> Self + validate_headers(value: list[str]) -> list[str] + + + + + + + OkpConfiguration + + chunk_filter_query : str + offline : bool + - - - - PostgreSQLDatabaseConfiguration - - ca_cert_path : Optional[FilePath] - db : str - gss_encmode : str - host : str - namespace : Optional[str] - password - port - ssl_mode : str - user : str - - check_postgres_configuration() -> Self + + + + PostgreSQLDatabaseConfiguration + + ca_cert_path : Optional[FilePath] + db : str + gss_encmode : str + host : str + namespace : Optional[str] + password + port + ssl_mode : str + user : str + + check_postgres_configuration() -> Self - - - - QuotaHandlersConfiguration - - enable_token_history : bool - limiters : list[QuotaLimiterConfiguration] - postgres : Optional[PostgreSQLDatabaseConfiguration] - scheduler - sqlite : Optional[SQLiteDatabaseConfiguration] - + + + + QuotaHandlersConfiguration + + enable_token_history : bool + limiters : list[QuotaLimiterConfiguration] + postgres : Optional[PostgreSQLDatabaseConfiguration] + scheduler + sqlite : Optional[SQLiteDatabaseConfiguration] + - - - - QuotaLimiterConfiguration - - initial_quota - name : str - period : str - quota_increase - type : Literal['user_limiter', 'cluster_limiter'] - + + + + QuotaLimiterConfiguration + + initial_quota + name : str + period : str + quota_increase + type : Literal['user_limiter', 'cluster_limiter'] + - - - - QuotaSchedulerConfiguration - - database_reconnection_count - database_reconnection_delay - period - + + + + QuotaSchedulerConfiguration + + database_reconnection_count + database_reconnection_delay + period + - - - - RHIdentityConfiguration - - required_entitlements : Optional[list[str]] - + + + + RHIdentityConfiguration + + required_entitlements : Optional[list[str]] + + + + + + + + RagConfiguration + + inline : list[str] + tool : Optional[list[str]] + - - - - SQLiteDatabaseConfiguration - - db_path : str - + + + + SQLiteDatabaseConfiguration + + db_path : str + - - - - ServiceConfiguration - - access_log : bool - auth_enabled : bool - base_url : Optional[str] - color_log : bool - cors - host : str - port - root_path : str - tls_config - workers - - check_service_configuration() -> Self - validate_root_path(value: str) -> str - - - - - - - SolrConfiguration - - enabled : bool - offline : bool - + + + + ServiceConfiguration + + access_log : bool + auth_enabled : bool + base_url : Optional[str] + color_log : bool + cors + host : str + port + root_path : str + tls_config + workers + + check_service_configuration() -> Self + validate_root_path(value: str) -> str - - - - SplunkConfiguration - - enabled : bool - index : Optional[str] - source : str - timeout - token_path : Optional[FilePath] - url : Optional[str] - verify_ssl : bool - - check_splunk_configuration() -> Self + + + + SplunkConfiguration + + enabled : bool + index : Optional[str] + source : str + timeout + token_path : Optional[FilePath] + url : Optional[str] + verify_ssl : bool + + check_splunk_configuration() -> Self - - - - TLSConfiguration - - tls_certificate_path : Optional[FilePath] - tls_key_password : Optional[FilePath] - tls_key_path : Optional[FilePath] - - check_tls_configuration() -> Self + + + + TLSConfiguration + + tls_certificate_path : Optional[FilePath] + tls_key_password : Optional[FilePath] + tls_key_path : Optional[FilePath] + + check_tls_configuration() -> Self - - - - UserDataCollection - - feedback_enabled : bool - feedback_storage : Optional[str] - transcripts_enabled : bool - transcripts_storage : Optional[str] - - check_storage_location_is_set_when_needed() -> Self + + + + UserDataCollection + + feedback_enabled : bool + feedback_storage : Optional[str] + transcripts_enabled : bool + transcripts_storage : Optional[str] + + check_storage_location_is_set_when_needed() -> Self - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + + + - - + + - - + + - - + + - - + + - - + + + + + + + - - + + - - - - - - - + + - - + + - - + + - - + + - - - a2a_state + + + a2a_state - - - authentication + + + authentication - - - cors + + + cors - - - conversation_cache + + + conversation_cache - - - custom_profile + + + custom_profile - - - database + + + database - - - inference + + + inference - - - operator + + + operator - - - jwt_configuration + + + jwt_configuration - - - llama_stack + + + llama_stack + + + + + + okp - - - quota_handlers + + + quota_handlers - - - scheduler + + + scheduler + + + + + + rag - - - sqlite + + + sqlite - - - service + + + service - - - tls_config + + + tls_config - - - user_data_collection + + + user_data_collection - + diff --git a/docs/e2e_testing.md b/docs/e2e_testing.md index 4035a17d0..64eff79b2 100644 --- a/docs/e2e_testing.md +++ b/docs/e2e_testing.md @@ -58,7 +58,7 @@ tests/e2e/ ├── utils/ │ ├── utils.py # restart_container, switch_config, wait_for_container_health, etc. │ ├── prow_utils.py # Prow/OpenShift helpers (restore_llama_stack_pod, etc.) -│ └── llama_stack_shields.py # Shield unregister/register (server mode, optional) +│ └── llama_stack_utils.py # Toolgroups + shield unregister/register (server mode, optional) ├── mock_mcp_server/ # Mock MCP server for MCP tests └── rag/ # RAG test data (e.g. for FAISS) ``` diff --git a/docs/openapi.json b/docs/openapi.json index bff858bf2..80873199d 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -3795,19 +3795,19 @@ } } }, - "/v1/infer": { + "/v1/responses": { "post": { "tags": [ - "rlsapi-v1" + "responses" ], - "summary": "Infer Endpoint", - "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n infer_request: The inference request containing question and context.\n request: The FastAPI request object for accessing headers and state.\n background_tasks: FastAPI background tasks for async Splunk event sending.\n auth: Authentication tuple from the configured auth provider.\n\nReturns:\n RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n HTTPException: 503 if the LLM service is unavailable.", - "operationId": "infer_endpoint_v1_infer_post", + "summary": "Responses Endpoint Handler", + "description": "Handle request to the /responses endpoint using Responses API (LCORE specification).\n\nProcesses a POST request to the responses endpoint, forwarding the\nuser's request to a selected Llama Stack LLM and returning the generated response\nfollowing the LCORE OpenAPI specification.\n\nReturns:\n ResponsesResponse: Contains the response following LCORE specification (non-streaming).\n StreamingResponse: SSE-formatted streaming response with enriched events (streaming).\n - response.created event includes conversation attribute\n - response.completed event includes available_quotas attribute\n\nRaises:\n HTTPException:\n - 401: Unauthorized - Missing or invalid credentials\n - 403: Forbidden - Insufficient permissions or model override not allowed\n - 404: Not Found - Conversation, model, or provider not found\n - 413: Prompt too long - Prompt exceeded model's context window size\n - 422: Unprocessable Entity - Request validation failed\n - 429: Quota limit exceeded - The token quota for model or user has been exceeded\n - 500: Internal Server Error - Configuration not loaded or other server errors\n - 503: Service Unavailable - Unable to connect to Llama Stack backend", + "operationId": "responses_endpoint_handler_v1_responses_post", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RlsapiV1InferRequest" + "$ref": "#/components/schemas/ResponsesRequest" } } }, @@ -3819,14 +3819,59 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RlsapiV1InferResponse" + "$ref": "#/components/schemas/ResponsesResponse" }, "example": { - "data": { - "request_id": "01JDKR8N7QW9ZMXVGK3PB5TQWZ", - "text": "To list files in Linux, use the `ls` command." + "available_quotas": { + "daily": 1000, + "monthly": 50000 + }, + "completed_at": 1704067250, + "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", + "created_at": 1704067200, + "id": "resp_abc123", + "instructions": "You are a helpful assistant", + "model": "openai/gpt-4-turbo", + "object": "response", + "output": [ + { + "content": [ + { + "text": "Kubernetes is an open-source container orchestration system...", + "type": "output_text" + } + ], + "role": "assistant", + "type": "message" + } + ], + "output_text": "Kubernetes is an open-source container orchestration system...", + "parallel_tool_calls": true, + "status": "completed", + "store": true, + "temperature": 0.7, + "text": { + "format": { + "type": "text" + } + }, + "usage": { + "input_tokens": 100, + "output_tokens": 50, + "total_tokens": 150 } } + }, + "text/event-stream": { + "schema": { + "type": "string" + }, + "examples": { + "stream": { + "value": "event: response.created\ndata: {\"type\":\"response.created\",\"sequence_number\":0,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"status\":\"in_progress\",\"output\":[],\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{},\"output_text\":\"\"}}\n\nevent: response.output_item.added\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":1}\n\nevent: response.output_item.done\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"sequence_number\":3,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"completed_at\":1704067250,\"status\":\"completed\",\"output\":[{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]}],\"usage\":{\"input_tokens\":10,\"output_tokens\":6,\"total_tokens\":16},\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{\"daily\":1000,\"monthly\":50000},\"output_text\":\"Hello! How can I help?\"}}\n\ndata: [DONE]\n\n" + } + }, + "description": "SSE stream of events" } } }, @@ -3866,6 +3911,14 @@ "$ref": "#/components/schemas/ForbiddenResponse" }, "examples": { + "conversation read": { + "value": { + "detail": { + "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000", + "response": "User does not have permission to perform this action" + } + } + }, "endpoint": { "value": { "detail": { @@ -3873,6 +3926,50 @@ "response": "User does not have permission to access this endpoint" } } + }, + "model override": { + "value": { + "detail": { + "cause": "User lacks model_override permission required to override model/provider.", + "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request." + } + } + } + } + } + } + }, + "404": { + "description": "Resource not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/NotFoundResponse" + }, + "examples": { + "conversation": { + "value": { + "detail": { + "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", + "response": "Conversation not found" + } + } + }, + "provider": { + "value": { + "detail": { + "cause": "Provider with ID openai does not exist", + "response": "Provider not found" + } + } + }, + "model": { + "value": { + "detail": { + "cause": "Model with ID gpt-4-turbo is not configured", + "response": "Model not found" + } + } } } } @@ -4008,6 +4105,16 @@ "application/json": { "schema": { "$ref": "#/components/schemas/InternalServerErrorResponse" + }, + "examples": { + "configuration": { + "value": { + "detail": { + "cause": "Lightspeed Stack configuration has not been initialized.", + "response": "Configuration is not loaded" + } + } + } } } } @@ -4035,26 +4142,37 @@ } } }, - "/readiness": { - "get": { + "/v1/infer": { + "post": { "tags": [ - "health" + "rlsapi-v1" ], - "summary": "Readiness Probe Get Method", - "description": "Handle the readiness probe endpoint, returning service readiness.\n\nIf any provider reports an error status, responds with HTTP 503\nand details of unhealthy providers; otherwise, indicates the\nservice is ready.\n\nReturns:\n ReadinessResponse: Object with `ready` indicating overall readiness,\n `reason` explaining the outcome, and `providers` containing the list of\n unhealthy ProviderHealthStatus entries (empty when ready).", - "operationId": "readiness_probe_get_method_readiness_get", + "summary": "Infer Endpoint", + "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n infer_request: The inference request containing question and context.\n request: The FastAPI request object for accessing headers and state.\n background_tasks: FastAPI background tasks for async Splunk event sending.\n auth: Authentication tuple from the configured auth provider.\n\nReturns:\n RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n HTTPException: 503 if the LLM service is unavailable.", + "operationId": "infer_endpoint_v1_infer_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RlsapiV1InferRequest" + } + } + }, + "required": true + }, "responses": { "200": { "description": "Successful response", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ReadinessResponse" + "$ref": "#/components/schemas/RlsapiV1InferResponse" }, "example": { - "providers": [], - "ready": true, - "reason": "Service is ready" + "data": { + "request_id": "01JDKR8N7QW9ZMXVGK3PB5TQWZ", + "text": "To list files in Linux, use the `ls` command." + } } } } @@ -4107,167 +4225,153 @@ } } }, - "503": { - "description": "Service unavailable", + "413": { + "description": "Prompt is too long", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ServiceUnavailableResponse" + "$ref": "#/components/schemas/PromptTooLongResponse" }, "examples": { - "llama stack": { + "prompt too long": { "value": { "detail": { - "cause": "Connection error while trying to reach backend service.", - "response": "Unable to connect to Llama Stack" + "cause": "The prompt exceeds the maximum allowed length.", + "response": "Prompt is too long" } } } } } } - } - } - } - }, - "/liveness": { - "get": { - "tags": [ - "health" - ], - "summary": "Liveness Probe Get Method", - "description": "Return the liveness status of the service.\n\nReturns:\n LivenessResponse: Indicates that the service is alive.", - "operationId": "liveness_probe_get_method_liveness_get", - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/LivenessResponse" - }, - "example": { - "alive": true - } - } - } }, - "401": { - "description": "Unauthorized", + "422": { + "description": "Request validation failed", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UnauthorizedResponse" + "$ref": "#/components/schemas/UnprocessableEntityResponse" }, "examples": { - "missing header": { + "invalid format": { "value": { "detail": { - "cause": "No Authorization header found", - "response": "Missing or invalid credentials provided by client" + "cause": "Invalid request format. The request body could not be parsed.", + "response": "Invalid request format" } } }, - "missing token": { + "missing attributes": { "value": { "detail": { - "cause": "No token found in Authorization header", - "response": "Missing or invalid credentials provided by client" + "cause": "Missing required attributes: ['query', 'model', 'provider']", + "response": "Missing required attributes" } } - } - } - } - } - }, - "403": { - "description": "Permission denied", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ForbiddenResponse" - }, - "examples": { - "endpoint": { + }, + "invalid value": { "value": { "detail": { - "cause": "User 6789 is not authorized to access this endpoint.", - "response": "User does not have permission to access this endpoint" + "cause": "Invalid attachment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']", + "response": "Invalid attribute value" } } } } } } - } - } - } - }, - "/authorized": { - "post": { - "tags": [ - "authorized" - ], - "summary": "Authorized Endpoint Handler", - "description": "Handle request to the /authorized endpoint.\n\nProcess POST requests to the /authorized endpoint, returning\nthe authenticated user's ID and username.\n\nThe response intentionally omits any authentication token.\n\nReturns:\n AuthorizedResponse: Contains the user ID and username of the authenticated user.", - "operationId": "authorized_endpoint_handler_authorized_post", - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/AuthorizedResponse" - }, - "example": { - "skip_userid_check": false, - "user_id": "123e4567-e89b-12d3-a456-426614174000", - "username": "user1" - } - } - } }, - "401": { - "description": "Unauthorized", + "429": { + "description": "Quota limit exceeded", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/UnauthorizedResponse" + "$ref": "#/components/schemas/QuotaExceededResponse" }, "examples": { - "missing header": { + "model": { "value": { "detail": { - "cause": "No Authorization header found", - "response": "Missing or invalid credentials provided by client" + "cause": "The token quota for model gpt-4-turbo has been exceeded.", + "response": "The model quota has been exceeded" } } }, - "missing token": { + "user none": { "value": { "detail": { - "cause": "No token found in Authorization header", - "response": "Missing or invalid credentials provided by client" + "cause": "User 123 has no available tokens.", + "response": "The quota has been exceeded" } } - } - } - } - } - }, - "403": { - "description": "Permission denied", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ForbiddenResponse" - }, - "examples": { - "endpoint": { + }, + "cluster none": { "value": { "detail": { - "cause": "User 6789 is not authorized to access this endpoint.", - "response": "User does not have permission to access this endpoint" + "cause": "Cluster has no available tokens.", + "response": "The quota has been exceeded" + } + } + }, + "subject none": { + "value": { + "detail": { + "cause": "Unknown subject 999 has no available tokens.", + "response": "The quota has been exceeded" + } + } + }, + "user insufficient": { + "value": { + "detail": { + "cause": "User 123 has 5 tokens, but 10 tokens are needed.", + "response": "The quota has been exceeded" + } + } + }, + "cluster insufficient": { + "value": { + "detail": { + "cause": "Cluster has 500 tokens, but 900 tokens are needed.", + "response": "The quota has been exceeded" + } + } + }, + "subject insufficient": { + "value": { + "detail": { + "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.", + "response": "The quota has been exceeded" + } + } + } + } + } + } + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InternalServerErrorResponse" + } + } + } + }, + "503": { + "description": "Service unavailable", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ServiceUnavailableResponse" + }, + "examples": { + "llama stack": { + "value": { + "detail": { + "cause": "Connection error while trying to reach backend service.", + "response": "Unable to connect to Llama Stack" } } } @@ -4278,21 +4382,26 @@ } } }, - "/metrics": { + "/readiness": { "get": { "tags": [ - "metrics" + "health" ], - "summary": "Metrics Endpoint Handler", - "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.\n\nReturns:\n PlainTextResponse: Response body containing the Prometheus metrics text\n and the Prometheus content type.", - "operationId": "metrics_endpoint_handler_metrics_get", + "summary": "Readiness Probe Get Method", + "description": "Handle the readiness probe endpoint, returning service readiness.\n\nIf any provider reports an error status, responds with HTTP 503\nand details of unhealthy providers; otherwise, indicates the\nservice is ready.\n\nReturns:\n ReadinessResponse: Object with `ready` indicating overall readiness,\n `reason` explaining the outcome, and `providers` containing the list of\n unhealthy ProviderHealthStatus entries (empty when ready).", + "operationId": "readiness_probe_get_method_readiness_get", "responses": { "200": { - "description": "Successful Response", + "description": "Successful response", "content": { - "text/plain": { + "application/json": { "schema": { - "type": "string" + "$ref": "#/components/schemas/ReadinessResponse" + }, + "example": { + "providers": [], + "ready": true, + "reason": "Service is ready" } } } @@ -4301,6 +4410,9 @@ "description": "Unauthorized", "content": { "application/json": { + "schema": { + "$ref": "#/components/schemas/UnauthorizedResponse" + }, "examples": { "missing header": { "value": { @@ -4319,11 +4431,6 @@ } } } - }, - "text/plain": { - "schema": { - "$ref": "#/components/schemas/UnauthorizedResponse" - } } } }, @@ -4331,6 +4438,9 @@ "description": "Permission denied", "content": { "application/json": { + "schema": { + "$ref": "#/components/schemas/ForbiddenResponse" + }, "examples": { "endpoint": { "value": { @@ -4341,33 +4451,6 @@ } } } - }, - "text/plain": { - "schema": { - "$ref": "#/components/schemas/ForbiddenResponse" - } - } - } - }, - "500": { - "description": "Internal server error", - "content": { - "application/json": { - "examples": { - "configuration": { - "value": { - "detail": { - "cause": "Lightspeed Stack configuration has not been initialized.", - "response": "Configuration is not loaded" - } - } - } - } - }, - "text/plain": { - "schema": { - "$ref": "#/components/schemas/InternalServerErrorResponse" - } } } }, @@ -4375,6 +4458,9 @@ "description": "Service unavailable", "content": { "application/json": { + "schema": { + "$ref": "#/components/schemas/ServiceUnavailableResponse" + }, "examples": { "llama stack": { "value": { @@ -4385,162 +4471,2725 @@ } } } - }, - "text/plain": { - "schema": { - "$ref": "#/components/schemas/ServiceUnavailableResponse" - } } } } } } }, - "/.well-known/agent-card.json": { + "/liveness": { "get": { "tags": [ - "a2a" + "health" ], - "summary": "Get Agent Card", - "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n AgentCard: The agent card describing this agent's capabilities.", - "operationId": "get_agent_card__well_known_agent_card_json_get", + "summary": "Liveness Probe Get Method", + "description": "Return the liveness status of the service.\n\nReturns:\n LivenessResponse: Indicates that the service is alive.", + "operationId": "liveness_probe_get_method_liveness_get", "responses": { "200": { - "description": "Successful Response", + "description": "Successful response", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentCard" + "$ref": "#/components/schemas/LivenessResponse" + }, + "example": { + "alive": true } } } - } - } - } - }, - "/.well-known/agent.json": { - "get": { - "tags": [ - "a2a" - ], - "summary": "Get Agent Card", - "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n AgentCard: The agent card describing this agent's capabilities.", - "operationId": "get_agent_card__well_known_agent_json_get", - "responses": { - "200": { - "description": "Successful Response", + }, + "401": { + "description": "Unauthorized", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgentCard" + "$ref": "#/components/schemas/UnauthorizedResponse" + }, + "examples": { + "missing header": { + "value": { + "detail": { + "cause": "No Authorization header found", + "response": "Missing or invalid credentials provided by client" + } + } + }, + "missing token": { + "value": { + "detail": { + "cause": "No token found in Authorization header", + "response": "Missing or invalid credentials provided by client" + } + } + } } } } - } - } - } - }, - "/a2a": { - "get": { - "tags": [ - "a2a" - ], - "summary": "Handle A2A Jsonrpc", - "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - } - }, - "post": { - "tags": [ - "a2a" - ], - "summary": "Handle A2A Jsonrpc", - "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_get", - "responses": { - "200": { - "description": "Successful Response", + }, + "403": { + "description": "Permission denied", "content": { "application/json": { - "schema": {} + "schema": { + "$ref": "#/components/schemas/ForbiddenResponse" + }, + "examples": { + "endpoint": { + "value": { + "detail": { + "cause": "User 6789 is not authorized to access this endpoint.", + "response": "User does not have permission to access this endpoint" + } + } + } + } } } } } } }, - "/a2a/health": { - "get": { + "/authorized": { + "post": { "tags": [ - "a2a" + "authorized" ], - "summary": "A2A Health Check", - "description": "Health check endpoint for A2A service.\n\nReturns:\n Dict with health status information.", - "operationId": "a2a_health_check_a2a_health_get", + "summary": "Authorized Endpoint Handler", + "description": "Handle request to the /authorized endpoint.\n\nProcess POST requests to the /authorized endpoint, returning\nthe authenticated user's ID and username.\n\nThe response intentionally omits any authentication token.\n\nReturns:\n AuthorizedResponse: Contains the user ID and username of the authenticated user.", + "operationId": "authorized_endpoint_handler_authorized_post", "responses": { "200": { - "description": "Successful Response", + "description": "Successful response", "content": { "application/json": { "schema": { - "additionalProperties": { - "type": "string" + "$ref": "#/components/schemas/AuthorizedResponse" + }, + "example": { + "skip_userid_check": false, + "user_id": "123e4567-e89b-12d3-a456-426614174000", + "username": "user1" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UnauthorizedResponse" + }, + "examples": { + "missing header": { + "value": { + "detail": { + "cause": "No Authorization header found", + "response": "Missing or invalid credentials provided by client" + } + } }, - "type": "object", - "title": "Response A2A Health Check A2A Health Get" + "missing token": { + "value": { + "detail": { + "cause": "No token found in Authorization header", + "response": "Missing or invalid credentials provided by client" + } + } + } + } + } + } + }, + "403": { + "description": "Permission denied", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ForbiddenResponse" + }, + "examples": { + "endpoint": { + "value": { + "detail": { + "cause": "User 6789 is not authorized to access this endpoint.", + "response": "User does not have permission to access this endpoint" + } + } + } + } + } + } + } + } + } + }, + "/metrics": { + "get": { + "tags": [ + "metrics" + ], + "summary": "Metrics Endpoint Handler", + "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.\n\nReturns:\n PlainTextResponse: Response body containing the Prometheus metrics text\n and the Prometheus content type.", + "operationId": "metrics_endpoint_handler_metrics_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + } + }, + "401": { + "description": "Unauthorized", + "content": { + "application/json": { + "examples": { + "missing header": { + "value": { + "detail": { + "cause": "No Authorization header found", + "response": "Missing or invalid credentials provided by client" + } + } + }, + "missing token": { + "value": { + "detail": { + "cause": "No token found in Authorization header", + "response": "Missing or invalid credentials provided by client" + } + } + } + } + }, + "text/plain": { + "schema": { + "$ref": "#/components/schemas/UnauthorizedResponse" + } + } + } + }, + "403": { + "description": "Permission denied", + "content": { + "application/json": { + "examples": { + "endpoint": { + "value": { + "detail": { + "cause": "User 6789 is not authorized to access this endpoint.", + "response": "User does not have permission to access this endpoint" + } + } + } + } + }, + "text/plain": { + "schema": { + "$ref": "#/components/schemas/ForbiddenResponse" + } + } + } + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "examples": { + "configuration": { + "value": { + "detail": { + "cause": "Lightspeed Stack configuration has not been initialized.", + "response": "Configuration is not loaded" + } + } + } + } + }, + "text/plain": { + "schema": { + "$ref": "#/components/schemas/InternalServerErrorResponse" + } + } + } + }, + "503": { + "description": "Service unavailable", + "content": { + "application/json": { + "examples": { + "llama stack": { + "value": { + "detail": { + "cause": "Connection error while trying to reach backend service.", + "response": "Unable to connect to Llama Stack" + } + } + } + } + }, + "text/plain": { + "schema": { + "$ref": "#/components/schemas/ServiceUnavailableResponse" + } + } + } + } + } + } + }, + "/.well-known/agent-card.json": { + "get": { + "tags": [ + "a2a" + ], + "summary": "Get Agent Card", + "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n AgentCard: The agent card describing this agent's capabilities.", + "operationId": "get_agent_card__well_known_agent_card_json_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentCard" + } + } + } + } + } + } + }, + "/.well-known/agent.json": { + "get": { + "tags": [ + "a2a" + ], + "summary": "Get Agent Card", + "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n AgentCard: The agent card describing this agent's capabilities.", + "operationId": "get_agent_card__well_known_agent_json_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentCard" } } } } - } - } - } - }, - "components": { - "schemas": { - "A2AStateConfiguration": { + } + } + }, + "/a2a": { + "get": { + "tags": [ + "a2a" + ], + "summary": "Handle A2A Jsonrpc", + "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", + "operationId": "handle_a2a_jsonrpc_a2a_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + } + } + }, + "post": { + "tags": [ + "a2a" + ], + "summary": "Handle A2A Jsonrpc", + "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", + "operationId": "handle_a2a_jsonrpc_a2a_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + } + } + } + }, + "/a2a/health": { + "get": { + "tags": [ + "a2a" + ], + "summary": "A2A Health Check", + "description": "Health check endpoint for A2A service.\n\nReturns:\n Dict with health status information.", + "operationId": "a2a_health_check_a2a_health_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Response A2A Health Check A2A Health Get" + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "A2AStateConfiguration": { + "properties": { + "sqlite": { + "anyOf": [ + { + "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "SQLite configuration", + "description": "SQLite database configuration for A2A state storage." + }, + "postgres": { + "anyOf": [ + { + "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration for A2A state storage." + } + }, + "additionalProperties": false, + "type": "object", + "title": "A2AStateConfiguration", + "description": "A2A protocol persistent state configuration.\n\nConfigures how A2A task state and context-to-conversation mappings are\nstored. For multi-worker deployments, use SQLite or PostgreSQL to ensure\nstate is shared across all workers.\n\nIf no configuration is provided, in-memory storage is used (default).\nThis is suitable for single-worker deployments but state will be lost\non restarts and not shared across workers.\n\nAttributes:\n sqlite: SQLite database configuration for A2A state storage.\n postgres: PostgreSQL database configuration for A2A state storage." + }, + "APIKeySecurityScheme": { + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "in": { + "$ref": "#/components/schemas/In" + }, + "name": { + "type": "string", + "title": "Name" + }, + "type": { + "type": "string", + "const": "apiKey", + "title": "Type", + "default": "apiKey" + } + }, + "type": "object", + "required": [ + "in", + "name" + ], + "title": "APIKeySecurityScheme", + "description": "Defines a security scheme using an API key." + }, + "APIKeyTokenConfiguration": { + "properties": { + "api_key": { + "type": "string", + "minLength": 1, + "format": "password", + "title": "API key", + "writeOnly": true, + "examples": [ + "some-api-key" + ] + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "api_key" + ], + "title": "APIKeyTokenConfiguration", + "description": "API Key Token configuration." + }, + "AccessRule": { + "properties": { + "role": { + "type": "string", + "title": "Role name", + "description": "Name of the role" + }, + "actions": { + "items": { + "$ref": "#/components/schemas/Action" + }, + "type": "array", + "title": "Allowed actions", + "description": "Allowed actions for this role" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "role", + "actions" + ], + "title": "AccessRule", + "description": "Rule defining what actions a role can perform." + }, + "Action": { + "type": "string", + "enum": [ + "admin", + "list_other_conversations", + "read_other_conversations", + "query_other_conversations", + "delete_other_conversations", + "query", + "streaming_query", + "get_conversation", + "list_conversations", + "delete_conversation", + "update_conversation", + "feedback", + "get_models", + "get_tools", + "get_shields", + "list_providers", + "get_provider", + "list_rags", + "get_rag", + "get_metrics", + "get_config", + "info", + "model_override", + "rlsapi_v1_infer", + "a2a_agent_card", + "a2a_task_execution", + "a2a_message", + "a2a_jsonrpc" + ], + "title": "Action", + "description": "Available actions in the system.\n\nNote: this is not a real model, just an enumeration of all action names." + }, + "AgentCapabilities": { + "properties": { + "extensions": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/AgentExtension" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Extensions" + }, + "pushNotifications": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Pushnotifications" + }, + "stateTransitionHistory": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Statetransitionhistory" + }, + "streaming": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Streaming" + } + }, + "type": "object", + "title": "AgentCapabilities", + "description": "Defines optional capabilities supported by an agent." + }, + "AgentCard": { + "properties": { + "additionalInterfaces": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/AgentInterface" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Additionalinterfaces" + }, + "capabilities": { + "$ref": "#/components/schemas/AgentCapabilities" + }, + "defaultInputModes": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Defaultinputmodes" + }, + "defaultOutputModes": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Defaultoutputmodes" + }, + "description": { + "type": "string", + "title": "Description", + "examples": [ + "Agent that helps users with recipes and cooking." + ] + }, + "documentationUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Documentationurl" + }, + "iconUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Iconurl" + }, + "name": { + "type": "string", + "title": "Name", + "examples": [ + "Recipe Agent" + ] + }, + "preferredTransport": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Preferredtransport", + "default": "JSONRPC", + "examples": [ + "JSONRPC", + "GRPC", + "HTTP+JSON" + ] + }, + "protocolVersion": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Protocolversion", + "default": "0.3.0" + }, + "provider": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentProvider" + }, + { + "type": "null" + } + ] + }, + "security": { + "anyOf": [ + { + "items": { + "additionalProperties": { + "items": { + "type": "string" + }, + "type": "array" + }, + "type": "object" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Security", + "examples": [ + [ + { + "oauth": [ + "read" + ] + }, + { + "api-key": [], + "mtls": [] + } + ] + ] + }, + "securitySchemes": { + "anyOf": [ + { + "additionalProperties": { + "$ref": "#/components/schemas/SecurityScheme" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Securityschemes" + }, + "signatures": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/AgentCardSignature" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Signatures" + }, + "skills": { + "items": { + "$ref": "#/components/schemas/AgentSkill" + }, + "type": "array", + "title": "Skills" + }, + "supportsAuthenticatedExtendedCard": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Supportsauthenticatedextendedcard" + }, + "url": { + "type": "string", + "title": "Url", + "examples": [ + "https://api.example.com/a2a/v1" + ] + }, + "version": { + "type": "string", + "title": "Version", + "examples": [ + "1.0.0" + ] + } + }, + "type": "object", + "required": [ + "capabilities", + "defaultInputModes", + "defaultOutputModes", + "description", + "name", + "skills", + "url", + "version" + ], + "title": "AgentCard", + "description": "The AgentCard is a self-describing manifest for an agent. It provides essential\nmetadata including the agent's identity, capabilities, skills, supported\ncommunication methods, and security requirements." + }, + "AgentCardSignature": { + "properties": { + "header": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Header" + }, + "protected": { + "type": "string", + "title": "Protected" + }, + "signature": { + "type": "string", + "title": "Signature" + } + }, + "type": "object", + "required": [ + "protected", + "signature" + ], + "title": "AgentCardSignature", + "description": "AgentCardSignature represents a JWS signature of an AgentCard.\nThis follows the JSON format of an RFC 7515 JSON Web Signature (JWS)." + }, + "AgentExtension": { + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "params": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Params" + }, + "required": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Required" + }, + "uri": { + "type": "string", + "title": "Uri" + } + }, + "type": "object", + "required": [ + "uri" + ], + "title": "AgentExtension", + "description": "A declaration of a protocol extension supported by an Agent." + }, + "AgentInterface": { + "properties": { + "transport": { + "type": "string", + "title": "Transport", + "examples": [ + "JSONRPC", + "GRPC", + "HTTP+JSON" + ] + }, + "url": { + "type": "string", + "title": "Url", + "examples": [ + "https://api.example.com/a2a/v1", + "https://grpc.example.com/a2a", + "https://rest.example.com/v1" + ] + } + }, + "type": "object", + "required": [ + "transport", + "url" + ], + "title": "AgentInterface", + "description": "Declares a combination of a target URL and a transport protocol for interacting with the agent.\nThis allows agents to expose the same functionality over multiple transport mechanisms." + }, + "AgentProvider": { + "properties": { + "organization": { + "type": "string", + "title": "Organization" + }, + "url": { + "type": "string", + "title": "Url" + } + }, + "type": "object", + "required": [ + "organization", + "url" + ], + "title": "AgentProvider", + "description": "Represents the service provider of an agent." + }, + "AgentSkill": { + "properties": { + "description": { + "type": "string", + "title": "Description" + }, + "examples": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Examples", + "examples": [ + [ + "I need a recipe for bread" + ] + ] + }, + "id": { + "type": "string", + "title": "Id" + }, + "inputModes": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Inputmodes" + }, + "name": { + "type": "string", + "title": "Name" + }, + "outputModes": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Outputmodes" + }, + "security": { + "anyOf": [ + { + "items": { + "additionalProperties": { + "items": { + "type": "string" + }, + "type": "array" + }, + "type": "object" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Security", + "examples": [ + [ + { + "google": [ + "oidc" + ] + } + ] + ] + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Tags", + "examples": [ + [ + "cooking", + "customer support", + "billing" + ] + ] + } + }, + "type": "object", + "required": [ + "description", + "id", + "name", + "tags" + ], + "title": "AgentSkill", + "description": "Represents a distinct capability or function that an agent can perform." + }, + "AllowedToolsFilter": { + "properties": { + "tool_names": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Names" + } + }, + "type": "object", + "title": "AllowedToolsFilter", + "description": "Filter configuration for restricting which MCP tools can be used.\n\n:param tool_names: (Optional) List of specific tool names that are allowed" + }, + "ApprovalFilter": { + "properties": { + "always": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Always" + }, + "never": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Never" + } + }, + "type": "object", + "title": "ApprovalFilter", + "description": "Filter configuration for MCP tool approval requirements.\n\n:param always: (Optional) List of tool names that always require approval\n:param never: (Optional) List of tool names that never require approval" + }, + "Attachment": { + "properties": { + "attachment_type": { + "type": "string", + "title": "Attachment Type", + "description": "The attachment type, like 'log', 'configuration' etc.", + "examples": [ + "log" + ] + }, + "content_type": { + "type": "string", + "title": "Content Type", + "description": "The content type as defined in MIME standard", + "examples": [ + "text/plain" + ] + }, + "content": { + "type": "string", + "title": "Content", + "description": "The actual attachment content", + "examples": [ + "warning: quota exceeded" + ] + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "attachment_type", + "content_type", + "content" + ], + "title": "Attachment", + "description": "Model representing an attachment that can be send from the UI as part of query.\n\nA list of attachments can be an optional part of 'query' request.\n\nAttributes:\n attachment_type: The attachment type, like \"log\", \"configuration\" etc.\n content_type: The content type as defined in MIME standard\n content: The actual attachment content\n\nYAML attachments with **kind** and **metadata/name** attributes will\nbe handled as resources with the specified name:\n```\nkind: Pod\nmetadata:\n name: private-reg\n```", + "examples": [ + { + "attachment_type": "log", + "content": "this is attachment", + "content_type": "text/plain" + }, + { + "attachment_type": "configuration", + "content": "kind: Pod\n metadata:\n name: private-reg", + "content_type": "application/yaml" + }, + { + "attachment_type": "configuration", + "content": "foo: bar", + "content_type": "application/yaml" + } + ] + }, + "AuthenticationConfiguration": { + "properties": { + "module": { + "type": "string", + "title": "Module", + "default": "noop" + }, + "skip_tls_verification": { + "type": "boolean", + "title": "Skip Tls Verification", + "default": false + }, + "skip_for_health_probes": { + "type": "boolean", + "title": "Skip authorization for probes", + "description": "Skip authorization for readiness and liveness probes", + "default": false + }, + "k8s_cluster_api": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "format": "uri" + }, + { + "type": "null" + } + ], + "title": "K8S Cluster Api" + }, + "k8s_ca_cert_path": { + "anyOf": [ + { + "type": "string", + "format": "file-path" + }, + { + "type": "null" + } + ], + "title": "K8S Ca Cert Path" + }, + "jwk_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/JwkConfiguration" + }, + { + "type": "null" + } + ] + }, + "api_key_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/APIKeyTokenConfiguration" + }, + { + "type": "null" + } + ] + }, + "rh_identity_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/RHIdentityConfiguration" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "type": "object", + "title": "AuthenticationConfiguration", + "description": "Authentication configuration." + }, + "AuthorizationCodeOAuthFlow": { + "properties": { + "authorizationUrl": { + "type": "string", + "title": "Authorizationurl" + }, + "refreshUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Refreshurl" + }, + "scopes": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Scopes" + }, + "tokenUrl": { + "type": "string", + "title": "Tokenurl" + } + }, + "type": "object", + "required": [ + "authorizationUrl", + "scopes", + "tokenUrl" + ], + "title": "AuthorizationCodeOAuthFlow", + "description": "Defines configuration details for the OAuth 2.0 Authorization Code flow." + }, + "AuthorizationConfiguration": { + "properties": { + "access_rules": { + "items": { + "$ref": "#/components/schemas/AccessRule" + }, + "type": "array", + "title": "Access rules", + "description": "Rules for role-based access control" + } + }, + "additionalProperties": false, + "type": "object", + "title": "AuthorizationConfiguration", + "description": "Authorization configuration." + }, + "AuthorizedResponse": { + "properties": { + "user_id": { + "type": "string", + "title": "User Id", + "description": "User ID, for example UUID", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "username": { + "type": "string", + "title": "Username", + "description": "User name", + "examples": [ + "John Doe", + "Adam Smith" + ] + }, + "skip_userid_check": { + "type": "boolean", + "title": "Skip Userid Check", + "description": "Whether to skip the user ID check", + "examples": [ + true, + false + ] + } + }, + "type": "object", + "required": [ + "user_id", + "username", + "skip_userid_check" + ], + "title": "AuthorizedResponse", + "description": "Model representing a response to an authorization request.\n\nAttributes:\n user_id: The ID of the logged in user.\n username: The name of the logged in user.\n skip_userid_check: Whether to skip the user ID check.", + "examples": [ + { + "skip_userid_check": false, + "user_id": "123e4567-e89b-12d3-a456-426614174000", + "username": "user1" + } + ] + }, + "AzureEntraIdConfiguration": { + "properties": { + "tenant_id": { + "type": "string", + "format": "password", + "title": "Tenant Id", + "writeOnly": true + }, + "client_id": { + "type": "string", + "format": "password", + "title": "Client Id", + "writeOnly": true + }, + "client_secret": { + "type": "string", + "format": "password", + "title": "Client Secret", + "writeOnly": true + }, + "scope": { + "type": "string", + "title": "Token scope", + "description": "Azure Cognitive Services scope for token requests. Override only if using a different Azure service.", + "default": "https://cognitiveservices.azure.com/.default" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "tenant_id", + "client_id", + "client_secret" + ], + "title": "AzureEntraIdConfiguration", + "description": "Microsoft Entra ID authentication attributes for Azure." + }, + "BadRequestResponse": { + "properties": { + "status_code": { + "type": "integer", + "title": "Status Code" + }, + "detail": { + "$ref": "#/components/schemas/DetailModel" + } + }, + "type": "object", + "required": [ + "status_code", + "detail" + ], + "title": "BadRequestResponse", + "description": "400 Bad Request. Invalid resource identifier.", + "examples": [ + { + "detail": { + "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.", + "response": "Invalid conversation ID format" + }, + "label": "conversation_id" + } + ] + }, + "ByokRag": { + "properties": { + "rag_id": { + "type": "string", + "minLength": 1, + "title": "RAG ID", + "description": "Unique RAG ID" + }, + "rag_type": { + "type": "string", + "minLength": 1, + "title": "RAG type", + "description": "Type of RAG database.", + "default": "inline::faiss" + }, + "embedding_model": { + "type": "string", + "minLength": 1, + "title": "Embedding model", + "description": "Embedding model identification", + "default": "sentence-transformers/all-mpnet-base-v2" + }, + "embedding_dimension": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Embedding dimension", + "description": "Dimensionality of embedding vectors.", + "default": 768 + }, + "vector_db_id": { + "type": "string", + "minLength": 1, + "title": "Vector DB ID", + "description": "Vector database identification." + }, + "db_path": { + "type": "string", + "title": "DB path", + "description": "Path to RAG database." + }, + "score_multiplier": { + "type": "number", + "exclusiveMinimum": 0.0, + "title": "Score multiplier", + "description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.", + "default": 1.0 + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "rag_id", + "vector_db_id", + "db_path" + ], + "title": "ByokRag", + "description": "BYOK (Bring Your Own Knowledge) RAG configuration." + }, + "CORSConfiguration": { + "properties": { + "allow_origins": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Allow origins", + "description": "A list of origins allowed for cross-origin requests. An origin is the combination of protocol (http, https), domain (myapp.com, localhost, localhost.tiangolo.com), and port (80, 443, 8080). Use ['*'] to allow all origins.", + "default": [ + "*" + ] + }, + "allow_credentials": { + "type": "boolean", + "title": "Allow credentials", + "description": "Indicate that cookies should be supported for cross-origin requests", + "default": false + }, + "allow_methods": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Allow methods", + "description": "A list of HTTP methods that should be allowed for cross-origin requests. You can use ['*'] to allow all standard methods.", + "default": [ + "*" + ] + }, + "allow_headers": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Allow headers", + "description": "A list of HTTP request headers that should be supported for cross-origin requests. You can use ['*'] to allow all headers. The Accept, Accept-Language, Content-Language and Content-Type headers are always allowed for simple CORS requests.", + "default": [ + "*" + ] + } + }, + "additionalProperties": false, + "type": "object", + "title": "CORSConfiguration", + "description": "CORS configuration.\n\nCORS or 'Cross-Origin Resource Sharing' refers to the situations when a\nfrontend running in a browser has JavaScript code that communicates with a\nbackend, and the backend is in a different 'origin' than the frontend.\n\nUseful resources:\n\n - [CORS in FastAPI](https://fastapi.tiangolo.com/tutorial/cors/)\n - [Wikipedia article](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing)\n - [What is CORS?](https://dev.to/akshay_chauhan/what-is-cors-explained-8f1)" + }, + "ClientCredentialsOAuthFlow": { + "properties": { + "refreshUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Refreshurl" + }, + "scopes": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Scopes" + }, + "tokenUrl": { + "type": "string", + "title": "Tokenurl" + } + }, + "type": "object", + "required": [ + "scopes", + "tokenUrl" + ], + "title": "ClientCredentialsOAuthFlow", + "description": "Defines configuration details for the OAuth 2.0 Client Credentials flow." + }, + "Configuration": { + "properties": { + "name": { + "type": "string", + "title": "Service name", + "description": "Name of the service. That value will be used in REST API endpoints." + }, + "service": { + "$ref": "#/components/schemas/ServiceConfiguration", + "title": "Service configuration", + "description": "This section contains Lightspeed Core Stack service configuration." + }, + "llama_stack": { + "$ref": "#/components/schemas/LlamaStackConfiguration", + "title": "Llama Stack configuration", + "description": "This section contains Llama Stack configuration. Lightspeed Core Stack service can call Llama Stack in library mode or in server mode." + }, + "user_data_collection": { + "$ref": "#/components/schemas/UserDataCollection", + "title": "User data collection configuration", + "description": "This section contains configuration for subsystem that collects user data(transcription history and feedbacks)." + }, + "database": { + "$ref": "#/components/schemas/DatabaseConfiguration", + "title": "Database Configuration", + "description": "Configuration for database to store conversation IDs and other runtime data" + }, + "mcp_servers": { + "items": { + "$ref": "#/components/schemas/ModelContextProtocolServer" + }, + "type": "array", + "title": "Model Context Protocol Server and tools configuration", + "description": "MCP (Model Context Protocol) servers provide tools and capabilities to the AI agents. These are configured in this section. Only MCP servers defined in the lightspeed-stack.yaml configuration are available to the agents. Tools configured in the llama-stack run.yaml are not accessible to lightspeed-core agents." + }, + "authentication": { + "$ref": "#/components/schemas/AuthenticationConfiguration", + "title": "Authentication configuration", + "description": "Authentication configuration" + }, + "authorization": { + "anyOf": [ + { + "$ref": "#/components/schemas/AuthorizationConfiguration" + }, + { + "type": "null" + } + ], + "title": "Authorization configuration", + "description": "Lightspeed Core Stack implements a modular authentication and authorization system with multiple authentication methods. Authorization is configurable through role-based access control. Authentication is handled through selectable modules configured via the module field in the authentication configuration." + }, + "customization": { + "anyOf": [ + { + "$ref": "#/components/schemas/Customization" + }, + { + "type": "null" + } + ], + "title": "Custom profile configuration", + "description": "It is possible to customize Lightspeed Core Stack via this section. System prompt can be customized and also different parts of the service can be replaced by custom Python modules." + }, + "inference": { + "$ref": "#/components/schemas/InferenceConfiguration", + "title": "Inference configuration", + "description": "One LLM provider and one its model might be selected as default ones. When no provider+model pair is specified in REST API calls (query endpoints), the default provider and model are used." + }, + "conversation_cache": { + "$ref": "#/components/schemas/ConversationHistoryConfiguration", + "title": "Conversation history configuration" + }, + "byok_rag": { + "items": { + "$ref": "#/components/schemas/ByokRag" + }, + "type": "array", + "title": "BYOK RAG configuration", + "description": "BYOK RAG configuration. This configuration can be used to reconfigure Llama Stack through its run.yaml configuration file" + }, + "a2a_state": { + "$ref": "#/components/schemas/A2AStateConfiguration", + "title": "A2A state configuration", + "description": "Configuration for A2A protocol persistent state storage." + }, + "quota_handlers": { + "$ref": "#/components/schemas/QuotaHandlersConfiguration", + "title": "Quota handlers", + "description": "Quota handlers configuration" + }, + "azure_entra_id": { + "anyOf": [ + { + "$ref": "#/components/schemas/AzureEntraIdConfiguration" + }, + { + "type": "null" + } + ] + }, + "splunk": { + "anyOf": [ + { + "$ref": "#/components/schemas/SplunkConfiguration" + }, + { + "type": "null" + } + ], + "title": "Splunk configuration", + "description": "Splunk HEC configuration for sending telemetry events." + }, + "deployment_environment": { + "type": "string", + "title": "Deployment environment", + "description": "Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events.", + "default": "development" + }, + "rag": { + "$ref": "#/components/schemas/RagConfiguration", + "title": "RAG configuration", + "description": "Configuration for all RAG strategies (inline and tool-based)." + }, + "okp": { + "$ref": "#/components/schemas/OkpConfiguration", + "title": "OKP configuration", + "description": "OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool." + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "name", + "service", + "llama_stack", + "user_data_collection" + ], + "title": "Configuration", + "description": "Global service configuration." + }, + "ConfigurationResponse": { + "properties": { + "configuration": { + "$ref": "#/components/schemas/Configuration" + } + }, + "type": "object", + "required": [ + "configuration" + ], + "title": "ConfigurationResponse", + "description": "Success response model for the config endpoint.", + "examples": [ + { + "configuration": { + "authentication": { + "module": "noop", + "skip_tls_verification": false + }, + "authorization": { + "access_rules": [] + }, + "byok_rag": [], + "conversation_cache": {}, + "database": { + "sqlite": { + "db_path": "/tmp/lightspeed-stack.db" + } + }, + "inference": { + "default_model": "gpt-4-turbo", + "default_provider": "openai" + }, + "llama_stack": { + "api_key": "*****", + "url": "http://localhost:8321", + "use_as_library_client": false + }, + "mcp_servers": [ + { + "name": "server1", + "provider_id": "provider1", + "url": "http://url.com:1" + } + ], + "name": "lightspeed-stack", + "quota_handlers": { + "enable_token_history": false, + "limiters": [], + "scheduler": { + "period": 1 + } + }, + "service": { + "access_log": true, + "auth_enabled": false, + "color_log": true, + "cors": { + "allow_credentials": false, + "allow_headers": [ + "*" + ], + "allow_methods": [ + "*" + ], + "allow_origins": [ + "*" + ] + }, + "host": "localhost", + "port": 8080, + "tls_config": {}, + "workers": 1 + }, + "user_data_collection": { + "feedback_enabled": true, + "feedback_storage": "/tmp/data/feedback", + "transcripts_enabled": false, + "transcripts_storage": "/tmp/data/transcripts" + } + } + } + ] + }, + "ConversationData": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id" + }, + "topic_summary": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Topic Summary" + }, + "last_message_timestamp": { + "type": "number", + "title": "Last Message Timestamp" + } + }, + "type": "object", + "required": [ + "conversation_id", + "topic_summary", + "last_message_timestamp" + ], + "title": "ConversationData", + "description": "Model representing conversation data returned by cache list operations.\n\nAttributes:\n conversation_id: The conversation ID\n topic_summary: The topic summary for the conversation (can be None)\n last_message_timestamp: The timestamp of the last message in the conversation" + }, + "ConversationDeleteResponse": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "The conversation ID (UUID) that was deleted.", + "examples": [ + "123e4567-e89b-12d3-a456-426614174000" + ] + }, + "success": { + "type": "boolean", + "title": "Success", + "description": "Whether the deletion was successful.", + "examples": [ + true, + false + ] + }, + "response": { + "type": "string", + "title": "Response", + "description": "A message about the deletion result.", + "examples": [ + "Conversation deleted successfully", + "Conversation cannot be deleted" + ] + } + }, + "type": "object", + "required": [ + "conversation_id", + "success", + "response" + ], + "title": "ConversationDeleteResponse", + "description": "Model representing a response for deleting a conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was deleted.\n success: Whether the deletion was successful.\n response: A message about the deletion result.", + "examples": [ + { + "label": "deleted", + "value": { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "response": "Conversation deleted successfully", + "success": true + } + }, + { + "label": "not found", + "value": { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "response": "Conversation can not be deleted", + "success": true + } + } + ] + }, + "ConversationDetails": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "Conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "created_at": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Created At", + "description": "When the conversation was created", + "examples": [ + "2024-01-01T01:00:00Z" + ] + }, + "last_message_at": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Message At", + "description": "When the last message was sent", + "examples": [ + "2024-01-01T01:00:00Z" + ] + }, + "message_count": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Message Count", + "description": "Number of user messages in the conversation", + "examples": [ + 42 + ] + }, + "last_used_model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Used Model", + "description": "Identification of the last model used for the conversation", + "examples": [ + "gpt-4-turbo", + "gpt-3.5-turbo-0125" + ] + }, + "last_used_provider": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Last Used Provider", + "description": "Identification of the last provider used for the conversation", + "examples": [ + "openai", + "gemini" + ] + }, + "topic_summary": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Topic Summary", + "description": "Topic summary for the conversation", + "examples": [ + "Openshift Microservices Deployment Strategies" + ] + } + }, + "type": "object", + "required": [ + "conversation_id" + ], + "title": "ConversationDetails", + "description": "Model representing the details of a user conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID).\n created_at: When the conversation was created.\n last_message_at: When the last message was sent.\n message_count: Number of user messages in the conversation.\n last_used_model: The last model used for the conversation.\n last_used_provider: The provider of the last used model.\n topic_summary: The topic summary for the conversation.\n\nExample:\n ```python\n conversation = ConversationDetails(\n conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n created_at=\"2024-01-01T00:00:00Z\",\n last_message_at=\"2024-01-01T00:05:00Z\",\n message_count=5,\n last_used_model=\"gemini/gemini-2.0-flash\",\n last_used_provider=\"gemini\",\n topic_summary=\"Openshift Microservices Deployment Strategies\",\n )\n ```" + }, + "ConversationHistoryConfiguration": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "enum": [ + "noop", + "memory", + "sqlite", + "postgres" + ] + }, + { + "type": "null" + } + ], + "title": "Conversation history database type", + "description": "Type of database where the conversation history is to be stored." + }, + "memory": { + "anyOf": [ + { + "$ref": "#/components/schemas/InMemoryCacheConfig" + }, + { + "type": "null" + } + ], + "title": "In-memory cache configuration", + "description": "In-memory cache configuration" + }, + "sqlite": { + "anyOf": [ + { + "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "SQLite configuration", + "description": "SQLite database configuration" + }, + "postgres": { + "anyOf": [ + { + "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration" + } + }, + "additionalProperties": false, + "type": "object", + "title": "ConversationHistoryConfiguration", + "description": "Conversation history configuration." + }, + "ConversationResponse": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "Conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "chat_history": { + "items": { + "$ref": "#/components/schemas/ConversationTurn" + }, + "type": "array", + "title": "Chat History", + "description": "The simplified chat history as a list of conversation turns", + "examples": [ + { + "completed_at": "2024-01-01T00:01:05Z", + "messages": [ + { + "content": "Hello", + "type": "user" + }, + { + "content": "Hi there!", + "type": "assistant" + } + ], + "model": "gpt-4o-mini", + "provider": "openai", + "started_at": "2024-01-01T00:01:00Z", + "tool_calls": [], + "tool_results": [] + } + ] + } + }, + "type": "object", + "required": [ + "conversation_id", + "chat_history" + ], + "title": "ConversationResponse", + "description": "Model representing a response for retrieving a conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID).\n chat_history: The chat history as a list of conversation turns.", + "examples": [ + { + "chat_history": [ + { + "completed_at": "2024-01-01T00:01:05Z", + "messages": [ + { + "content": "Hello", + "type": "user" + }, + { + "content": "Hi there!", + "type": "assistant" + } + ], + "model": "gpt-4o-mini", + "provider": "openai", + "started_at": "2024-01-01T00:01:00Z", + "tool_calls": [], + "tool_results": [] + } + ], + "conversation_id": "123e4567-e89b-12d3-a456-426614174000" + } + ] + }, + "ConversationTurn": { + "properties": { + "messages": { + "items": { + "$ref": "#/components/schemas/Message" + }, + "type": "array", + "title": "Messages", + "description": "List of messages in this turn" + }, + "tool_calls": { + "items": { + "$ref": "#/components/schemas/ToolCallSummary" + }, + "type": "array", + "title": "Tool Calls", + "description": "List of tool calls made in this turn" + }, + "tool_results": { + "items": { + "$ref": "#/components/schemas/ToolResultSummary" + }, + "type": "array", + "title": "Tool Results", + "description": "List of tool results from this turn" + }, + "provider": { + "type": "string", + "title": "Provider", + "description": "Provider identifier used for this turn", + "examples": [ + "openai" + ] + }, + "model": { + "type": "string", + "title": "Model", + "description": "Model identifier used for this turn", + "examples": [ + "gpt-4o-mini" + ] + }, + "started_at": { + "type": "string", + "title": "Started At", + "description": "ISO 8601 timestamp when the turn started", + "examples": [ + "2024-01-01T00:01:00Z" + ] + }, + "completed_at": { + "type": "string", + "title": "Completed At", + "description": "ISO 8601 timestamp when the turn completed", + "examples": [ + "2024-01-01T00:01:05Z" + ] + } + }, + "type": "object", + "required": [ + "provider", + "model", + "started_at", + "completed_at" + ], + "title": "ConversationTurn", + "description": "Model representing a single conversation turn.\n\nAttributes:\n messages: List of messages in this turn.\n tool_calls: List of tool calls made in this turn.\n tool_results: List of tool results from this turn.\n provider: Provider identifier used for this turn.\n model: Model identifier used for this turn.\n started_at: ISO 8601 timestamp when the turn started.\n completed_at: ISO 8601 timestamp when the turn completed." + }, + "ConversationUpdateRequest": { + "properties": { + "topic_summary": { + "type": "string", + "maxLength": 1000, + "minLength": 1, + "title": "Topic Summary", + "description": "The new topic summary for the conversation", + "examples": [ + "Discussion about machine learning algorithms" + ] + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "topic_summary" + ], + "title": "ConversationUpdateRequest", + "description": "Model representing a request to update a conversation topic summary.\n\nAttributes:\n topic_summary: The new topic summary for the conversation.\n\nExample:\n ```python\n update_request = ConversationUpdateRequest(\n topic_summary=\"Discussion about machine learning algorithms\"\n )\n ```" + }, + "ConversationUpdateResponse": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "The conversation ID (UUID) that was updated", + "examples": [ + "123e4567-e89b-12d3-a456-426614174000" + ] + }, + "success": { + "type": "boolean", + "title": "Success", + "description": "Whether the update was successful", + "examples": [ + true + ] + }, + "message": { + "type": "string", + "title": "Message", + "description": "A message about the update result", + "examples": [ + "Topic summary updated successfully" + ] + } + }, + "type": "object", + "required": [ + "conversation_id", + "success", + "message" + ], + "title": "ConversationUpdateResponse", + "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was updated.\n success: Whether the update was successful.\n message: A message about the update result.\n\nExample:\n ```python\n update_response = ConversationUpdateResponse(\n conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n success=True,\n message=\"Topic summary updated successfully\",\n )\n ```", + "examples": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "message": "Topic summary updated successfully", + "success": true + } + ] + }, + "ConversationsListResponse": { + "properties": { + "conversations": { + "items": { + "$ref": "#/components/schemas/ConversationDetails" + }, + "type": "array", + "title": "Conversations" + } + }, + "type": "object", + "required": [ + "conversations" + ], + "title": "ConversationsListResponse", + "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n conversations: List of conversation details associated with the user.", + "examples": [ + { + "conversations": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "created_at": "2024-01-01T00:00:00Z", + "last_message_at": "2024-01-01T00:05:00Z", + "last_used_model": "gemini/gemini-2.0-flash", + "last_used_provider": "gemini", + "message_count": 5, + "topic_summary": "Openshift Microservices Deployment Strategies" + }, + { + "conversation_id": "456e7890-e12b-34d5-a678-901234567890", + "created_at": "2024-01-01T01:00:00Z", + "last_used_model": "gemini/gemini-2.5-flash", + "last_used_provider": "gemini", + "message_count": 2, + "topic_summary": "RHDH Purpose Summary" + } + ] + } + ] + }, + "ConversationsListResponseV2": { + "properties": { + "conversations": { + "items": { + "$ref": "#/components/schemas/ConversationData" + }, + "type": "array", + "title": "Conversations" + } + }, + "type": "object", + "required": [ + "conversations" + ], + "title": "ConversationsListResponseV2", + "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n conversations: List of conversation data associated with the user.", + "examples": [ + { + "conversations": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "last_message_timestamp": 1704067200.0, + "topic_summary": "Openshift Microservices Deployment Strategies" + } + ] + } + ] + }, + "CustomProfile": { + "properties": { + "path": { + "type": "string", + "title": "Path to custom profile", + "description": "Path to Python modules containing custom profile." + }, + "prompts": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "System prompts", + "description": "Dictionary containing map of system prompts", + "default": {} + } + }, + "type": "object", + "required": [ + "path" + ], + "title": "CustomProfile", + "description": "Custom profile customization for prompts and validation." + }, + "Customization": { + "properties": { + "profile_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Profile Path" + }, + "disable_query_system_prompt": { + "type": "boolean", + "title": "Disable Query System Prompt", + "default": false + }, + "disable_shield_ids_override": { + "type": "boolean", + "title": "Disable Shield Ids Override", + "default": false + }, + "system_prompt_path": { + "anyOf": [ + { + "type": "string", + "format": "file-path" + }, + { + "type": "null" + } + ], + "title": "System Prompt Path" + }, + "system_prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "System Prompt" + }, + "agent_card_path": { + "anyOf": [ + { + "type": "string", + "format": "file-path" + }, + { + "type": "null" + } + ], + "title": "Agent Card Path" + }, + "agent_card_config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Agent Card Config" + }, + "custom_profile": { + "anyOf": [ + { + "$ref": "#/components/schemas/CustomProfile" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "type": "object", + "title": "Customization", + "description": "Service customization." + }, + "DatabaseConfiguration": { + "properties": { + "sqlite": { + "anyOf": [ + { + "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "SQLite configuration", + "description": "SQLite database configuration" + }, + "postgres": { + "anyOf": [ + { + "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + }, + { + "type": "null" + } + ], + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration" + } + }, + "additionalProperties": false, + "type": "object", + "title": "DatabaseConfiguration", + "description": "Database configuration." + }, + "DetailModel": { + "properties": { + "response": { + "type": "string", + "title": "Response", + "description": "Short summary of the error" + }, + "cause": { + "type": "string", + "title": "Cause", + "description": "Detailed explanation of what caused the error" + } + }, + "type": "object", + "required": [ + "response", + "cause" + ], + "title": "DetailModel", + "description": "Nested detail model for error responses." + }, + "FeedbackCategory": { + "type": "string", + "enum": [ + "incorrect", + "not_relevant", + "incomplete", + "outdated_information", + "unsafe", + "other" + ], + "title": "FeedbackCategory", + "description": "Enum representing predefined feedback categories for AI responses.\n\nThese categories help provide structured feedback about AI inference quality\nwhen users provide negative feedback (thumbs down). Multiple categories can\nbe selected to provide comprehensive feedback about response issues." + }, + "FeedbackRequest": { + "properties": { + "conversation_id": { + "type": "string", + "title": "Conversation Id", + "description": "The required conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "user_question": { + "type": "string", + "title": "User Question", + "description": "User question (the query string)", + "examples": [ + "What is Kubernetes?" + ] + }, + "llm_response": { + "type": "string", + "title": "Llm Response", + "description": "Response from LLM", + "examples": [ + "Kubernetes is an open-source container orchestration system for automating ..." + ] + }, + "sentiment": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Sentiment", + "description": "User sentiment, if provided must be -1 or 1", + "examples": [ + -1, + 1 + ] + }, + "user_feedback": { + "anyOf": [ + { + "type": "string", + "maxLength": 4096 + }, + { + "type": "null" + } + ], + "title": "User Feedback", + "description": "Feedback on the LLM response.", + "examples": [ + "I'm not satisfied with the response because it is too vague." + ] + }, + "categories": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/FeedbackCategory" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Categories", + "description": "List of feedback categories that describe issues with the LLM response (for negative feedback).", + "examples": [ + [ + "incorrect", + "incomplete" + ] + ] + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "conversation_id", + "user_question", + "llm_response" + ], + "title": "FeedbackRequest", + "description": "Model representing a feedback request.\n\nAttributes:\n conversation_id: The required conversation ID (UUID).\n user_question: The required user question.\n llm_response: The required LLM response.\n sentiment: The optional sentiment.\n user_feedback: The optional user feedback.\n categories: The optional list of feedback categories (multi-select for negative feedback).\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n user_question=\"what are you doing?\",\n user_feedback=\"This response is not helpful\",\n llm_response=\"I don't know\",\n sentiment=-1,\n categories=[FeedbackCategory.INCORRECT, FeedbackCategory.INCOMPLETE]\n )\n ```", + "examples": [ + { + "conversation_id": "12345678-abcd-0000-0123-456789abcdef", + "llm_response": "bar", + "sentiment": -1, + "user_feedback": "Not satisfied with the response quality.", + "user_question": "foo" + }, + { + "categories": [ + "incorrect" + ], + "conversation_id": "12345678-abcd-0000-0123-456789abcdef", + "llm_response": "The capital of France is Berlin.", + "sentiment": -1, + "user_question": "What is the capital of France?" + }, + { + "categories": [ + "incomplete", + "not_relevant" + ], + "conversation_id": "12345678-abcd-0000-0123-456789abcdef", + "llm_response": "Use Docker.", + "sentiment": -1, + "user_feedback": "This response is too general and doesn't provide specific steps.", + "user_question": "How do I deploy a web app?" + } + ] + }, + "FeedbackResponse": { + "properties": { + "response": { + "type": "string", + "title": "Response", + "description": "The response of the feedback request.", + "examples": [ + "feedback received" + ] + } + }, + "type": "object", + "required": [ + "response" + ], + "title": "FeedbackResponse", + "description": "Model representing a response to a feedback request.\n\nAttributes:\n response: The response of the feedback request.\n\nExample:\n ```python\n feedback_response = FeedbackResponse(response=\"feedback received\")\n ```", + "examples": [ + { + "response": "feedback received" + } + ] + }, + "FeedbackStatusUpdateRequest": { + "properties": { + "status": { + "type": "boolean", + "title": "Status", + "description": "Desired state of feedback enablement, must be False or True", + "default": false, + "examples": [ + true, + false + ] + } + }, + "additionalProperties": false, + "type": "object", + "title": "FeedbackStatusUpdateRequest", + "description": "Model representing a feedback status update request.\n\nAttributes:\n status: Value of the desired feedback enabled state.\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n status=false\n )\n ```" + }, + "FeedbackStatusUpdateResponse": { + "properties": { + "status": { + "additionalProperties": true, + "type": "object", + "title": "Status" + } + }, + "type": "object", + "required": [ + "status" + ], + "title": "FeedbackStatusUpdateResponse", + "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n status: The previous and current status of the service and who updated it.\n\nExample:\n ```python\n status_response = StatusResponse(\n status={\n \"previous_status\": true,\n \"updated_status\": false,\n \"updated_by\": \"user/test\",\n \"timestamp\": \"2023-03-15 12:34:56\"\n },\n )\n ```", + "examples": [ + { + "status": { + "previous_status": true, + "timestamp": "2023-03-15 12:34:56", + "updated_by": "user/test", + "updated_status": false + } + } + ] + }, + "ForbiddenResponse": { "properties": { - "sqlite": { - "anyOf": [ - { - "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" - }, - { - "type": "null" - } - ], - "title": "SQLite configuration", - "description": "SQLite database configuration for A2A state storage." + "status_code": { + "type": "integer", + "title": "Status Code" }, - "postgres": { + "detail": { + "$ref": "#/components/schemas/DetailModel" + } + }, + "type": "object", + "required": [ + "status_code", + "detail" + ], + "title": "ForbiddenResponse", + "description": "403 Forbidden. Access denied.", + "examples": [ + { + "detail": { + "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000", + "response": "User does not have permission to perform this action" + }, + "label": "conversation read" + }, + { + "detail": { + "cause": "User 6789 does not have permission to delete conversation with ID 123e4567-e89b-12d3-a456-426614174000", + "response": "User does not have permission to perform this action" + }, + "label": "conversation delete" + }, + { + "detail": { + "cause": "User 6789 is not authorized to access this endpoint.", + "response": "User does not have permission to access this endpoint" + }, + "label": "endpoint" + }, + { + "detail": { + "cause": "Storing feedback is disabled.", + "response": "Storing feedback is disabled" + }, + "label": "feedback" + }, + { + "detail": { + "cause": "User lacks model_override permission required to override model/provider.", + "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request." + }, + "label": "model override" + } + ] + }, + "HTTPAuthSecurityScheme": { + "properties": { + "bearerFormat": { "anyOf": [ { - "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "PostgreSQL configuration", - "description": "PostgreSQL database configuration for A2A state storage." - } - }, - "additionalProperties": false, - "type": "object", - "title": "A2AStateConfiguration", - "description": "A2A protocol persistent state configuration.\n\nConfigures how A2A task state and context-to-conversation mappings are\nstored. For multi-worker deployments, use SQLite or PostgreSQL to ensure\nstate is shared across all workers.\n\nIf no configuration is provided, in-memory storage is used (default).\nThis is suitable for single-worker deployments but state will be lost\non restarts and not shared across workers.\n\nAttributes:\n sqlite: SQLite database configuration for A2A state storage.\n postgres: PostgreSQL database configuration for A2A state storage." - }, - "APIKeySecurityScheme": { - "properties": { + "title": "Bearerformat" + }, "description": { "anyOf": [ { @@ -4552,340 +7201,411 @@ ], "title": "Description" }, - "in": { - "$ref": "#/components/schemas/In" - }, - "name": { + "scheme": { "type": "string", - "title": "Name" + "title": "Scheme" }, "type": { "type": "string", - "const": "apiKey", + "const": "http", "title": "Type", - "default": "apiKey" + "default": "http" } }, "type": "object", "required": [ - "in", - "name" + "scheme" ], - "title": "APIKeySecurityScheme", - "description": "Defines a security scheme using an API key." + "title": "HTTPAuthSecurityScheme", + "description": "Defines a security scheme using HTTP authentication." }, - "APIKeyTokenConfiguration": { + "HTTPValidationError": { "properties": { - "api_key": { - "type": "string", - "minLength": 1, - "format": "password", - "title": "API key", - "writeOnly": true, - "examples": [ - "some-api-key" - ] + "detail": { + "items": { + "$ref": "#/components/schemas/ValidationError" + }, + "type": "array", + "title": "Detail" } }, - "additionalProperties": false, "type": "object", - "required": [ - "api_key" - ], - "title": "APIKeyTokenConfiguration", - "description": "API Key Token configuration." + "title": "HTTPValidationError" }, - "AccessRule": { + "ImplicitOAuthFlow": { "properties": { - "role": { + "authorizationUrl": { "type": "string", - "title": "Role name", - "description": "Name of the role" + "title": "Authorizationurl" }, - "actions": { - "items": { - "$ref": "#/components/schemas/Action" + "refreshUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Refreshurl" + }, + "scopes": { + "additionalProperties": { + "type": "string" }, - "type": "array", - "title": "Allowed actions", - "description": "Allowed actions for this role" + "type": "object", + "title": "Scopes" } }, - "additionalProperties": false, "type": "object", "required": [ - "role", - "actions" + "authorizationUrl", + "scopes" ], - "title": "AccessRule", - "description": "Rule defining what actions a role can perform." + "title": "ImplicitOAuthFlow", + "description": "Defines configuration details for the OAuth 2.0 Implicit flow." }, - "Action": { + "In": { "type": "string", "enum": [ - "admin", - "list_other_conversations", - "read_other_conversations", - "query_other_conversations", - "delete_other_conversations", - "query", - "streaming_query", - "get_conversation", - "list_conversations", - "delete_conversation", - "update_conversation", - "feedback", - "get_models", - "get_tools", - "get_shields", - "list_providers", - "get_provider", - "list_rags", - "get_rag", - "get_metrics", - "get_config", - "info", - "model_override", - "rlsapi_v1_infer", - "a2a_agent_card", - "a2a_task_execution", - "a2a_message", - "a2a_jsonrpc" + "cookie", + "header", + "query" ], - "title": "Action", - "description": "Available actions in the system.\n\nNote: this is not a real model, just an enumeration of all action names." + "title": "In", + "description": "The location of the API key." }, - "AgentCapabilities": { + "InMemoryCacheConfig": { "properties": { - "extensions": { - "anyOf": [ - { - "items": { - "$ref": "#/components/schemas/AgentExtension" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "title": "Extensions" - }, - "pushNotifications": { + "max_entries": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Max entries", + "description": "Maximum number of entries stored in the in-memory cache" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "max_entries" + ], + "title": "InMemoryCacheConfig", + "description": "In-memory cache configuration." + }, + "IncludeParameter": { + "type": "string", + "enum": [ + "web_search_call.action.sources", + "code_interpreter_call.outputs", + "computer_call_output.output.image_url", + "file_search_call.results", + "message.input_image.image_url", + "message.output_text.logprobs", + "reasoning.encrypted_content" + ] + }, + "InferenceConfiguration": { + "properties": { + "default_model": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "title": "Pushnotifications" + "title": "Default model", + "description": "Identification of default model used when no other model is specified." }, - "stateTransitionHistory": { + "default_provider": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "title": "Statetransitionhistory" + "title": "Default provider", + "description": "Identification of default provider used when no other model is specified." + } + }, + "additionalProperties": false, + "type": "object", + "title": "InferenceConfiguration", + "description": "Inference configuration." + }, + "InfoResponse": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "Service name", + "examples": [ + "Lightspeed Stack" + ] }, - "streaming": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "title": "Streaming" + "service_version": { + "type": "string", + "title": "Service Version", + "description": "Service version", + "examples": [ + "0.1.0", + "0.2.0", + "1.0.0" + ] + }, + "llama_stack_version": { + "type": "string", + "title": "Llama Stack Version", + "description": "Llama Stack version", + "examples": [ + "0.2.1", + "0.2.2", + "0.2.18", + "0.2.21", + "0.2.22" + ] } }, "type": "object", - "title": "AgentCapabilities", - "description": "Defines optional capabilities supported by an agent." + "required": [ + "name", + "service_version", + "llama_stack_version" + ], + "title": "InfoResponse", + "description": "Model representing a response to an info request.\n\nAttributes:\n name: Service name.\n service_version: Service version.\n llama_stack_version: Llama Stack version.\n\nExample:\n ```python\n info_response = InfoResponse(\n name=\"Lightspeed Stack\",\n service_version=\"1.0.0\",\n llama_stack_version=\"0.2.22\",\n )\n ```", + "examples": [ + { + "llama_stack_version": "1.0.0", + "name": "Lightspeed Stack", + "service_version": "1.0.0" + } + ] }, - "AgentCard": { + "InternalServerErrorResponse": { "properties": { - "additionalInterfaces": { - "anyOf": [ - { - "items": { - "$ref": "#/components/schemas/AgentInterface" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "title": "Additionalinterfaces" + "status_code": { + "type": "integer", + "title": "Status Code" }, - "capabilities": { - "$ref": "#/components/schemas/AgentCapabilities" + "detail": { + "$ref": "#/components/schemas/DetailModel" + } + }, + "type": "object", + "required": [ + "status_code", + "detail" + ], + "title": "InternalServerErrorResponse", + "description": "500 Internal Server Error.", + "examples": [ + { + "detail": { + "cause": "An unexpected error occurred while processing the request.", + "response": "Internal server error" + }, + "label": "internal" }, - "defaultInputModes": { - "items": { - "type": "string" + { + "detail": { + "cause": "Lightspeed Stack configuration has not been initialized.", + "response": "Configuration is not loaded" }, - "type": "array", - "title": "Defaultinputmodes" + "label": "configuration" }, - "defaultOutputModes": { - "items": { - "type": "string" + { + "detail": { + "cause": "Failed to store feedback at directory: /path/example", + "response": "Failed to store feedback" }, - "type": "array", - "title": "Defaultoutputmodes" + "label": "feedback storage" }, - "description": { + { + "detail": { + "cause": "Failed to call backend API", + "response": "Error while processing query" + }, + "label": "query" + }, + { + "detail": { + "cause": "Conversation cache is not configured or unavailable.", + "response": "Conversation cache not configured" + }, + "label": "conversation cache" + }, + { + "detail": { + "cause": "Failed to query the database", + "response": "Database query failed" + }, + "label": "database" + } + ] + }, + "JsonPathOperator": { + "type": "string", + "enum": [ + "equals", + "contains", + "in", + "match" + ], + "title": "JsonPathOperator", + "description": "Supported operators for JSONPath evaluation.\n\nNote: this is not a real model, just an enumeration of all supported JSONPath operators." + }, + "JwkConfiguration": { + "properties": { + "url": { "type": "string", - "title": "Description", - "examples": [ - "Agent that helps users with recipes and cooking." - ] + "minLength": 1, + "format": "uri", + "title": "URL", + "description": "HTTPS URL of the JWK (JSON Web Key) set used to validate JWTs." }, - "documentationUrl": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Documentationurl" + "jwt_configuration": { + "$ref": "#/components/schemas/JwtConfiguration", + "title": "JWT configuration", + "description": "JWT (JSON Web Token) configuration" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "url" + ], + "title": "JwkConfiguration", + "description": "JWK (JSON Web Key) configuration.\n\nA JSON Web Key (JWK) is a JavaScript Object Notation (JSON) data structure\nthat represents a cryptographic key.\n\nUseful resources:\n\n - [JSON Web Key](https://openid.net/specs/draft-jones-json-web-key-03.html)\n - [RFC 7517](https://www.rfc-editor.org/rfc/rfc7517)" + }, + "JwtConfiguration": { + "properties": { + "user_id_claim": { + "type": "string", + "title": "User ID claim", + "description": "JWT claim name that uniquely identifies the user (subject ID).", + "default": "user_id" }, - "iconUrl": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Iconurl" + "username_claim": { + "type": "string", + "title": "Username claim", + "description": "JWT claim name that provides the human-readable username.", + "default": "username" }, - "name": { + "role_rules": { + "items": { + "$ref": "#/components/schemas/JwtRoleRule" + }, + "type": "array", + "title": "Role rules", + "description": "Rules for extracting roles from JWT claims" + } + }, + "additionalProperties": false, + "type": "object", + "title": "JwtConfiguration", + "description": "JWT (JSON Web Token) configuration.\n\nJSON Web Token (JWT) is a compact, URL-safe means of representing\nclaims to be transferred between two parties. The claims in a JWT\nare encoded as a JSON object that is used as the payload of a JSON\nWeb Signature (JWS) structure or as the plaintext of a JSON Web\nEncryption (JWE) structure, enabling the claims to be digitally\nsigned or integrity protected with a Message Authentication Code\n(MAC) and/or encrypted.\n\nUseful resources:\n\n - [JSON Web Token](https://en.wikipedia.org/wiki/JSON_Web_Token)\n - [RFC 7519](https://datatracker.ietf.org/doc/html/rfc7519)\n - [JSON Web Tokens](https://auth0.com/docs/secure/tokens/json-web-tokens)" + }, + "JwtRoleRule": { + "properties": { + "jsonpath": { "type": "string", - "title": "Name", - "examples": [ - "Recipe Agent" - ] + "title": "JSON path", + "description": "JSONPath expression to evaluate against the JWT payload" }, - "preferredTransport": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Preferredtransport", - "default": "JSONRPC", - "examples": [ - "JSONRPC", - "GRPC", - "HTTP+JSON" - ] + "operator": { + "$ref": "#/components/schemas/JsonPathOperator", + "title": "Operator", + "description": "JSON path comparison operator" }, - "protocolVersion": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Protocolversion", - "default": "0.3.0" + "negate": { + "type": "boolean", + "title": "Negate rule", + "description": "If set to true, the meaning of the rule is negated", + "default": false }, - "provider": { - "anyOf": [ - { - "$ref": "#/components/schemas/AgentProvider" - }, - { - "type": "null" - } - ] + "value": { + "title": "Value", + "description": "Value to compare against" }, - "security": { - "anyOf": [ - { - "items": { - "additionalProperties": { - "items": { - "type": "string" - }, - "type": "array" - }, - "type": "object" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "title": "Security", + "roles": { + "items": { + "type": "string" + }, + "type": "array", + "title": "List of roles", + "description": "Roles to be assigned if the rule matches" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "jsonpath", + "operator", + "value", + "roles" + ], + "title": "JwtRoleRule", + "description": "Rule for extracting roles from JWT claims." + }, + "LivenessResponse": { + "properties": { + "alive": { + "type": "boolean", + "title": "Alive", + "description": "Flag indicating that the app is alive", "examples": [ - [ - { - "oauth": [ - "read" - ] - }, - { - "api-key": [], - "mtls": [] - } - ] + true, + false ] - }, - "securitySchemes": { + } + }, + "type": "object", + "required": [ + "alive" + ], + "title": "LivenessResponse", + "description": "Model representing a response to a liveness request.\n\nAttributes:\n alive: If app is alive.\n\nExample:\n ```python\n liveness_response = LivenessResponse(alive=True)\n ```", + "examples": [ + { + "alive": true + } + ] + }, + "LlamaStackConfiguration": { + "properties": { + "url": { "anyOf": [ { - "additionalProperties": { - "$ref": "#/components/schemas/SecurityScheme" - }, - "type": "object" + "type": "string", + "minLength": 1, + "format": "uri" }, { "type": "null" } ], - "title": "Securityschemes" + "title": "Llama Stack URL", + "description": "URL to Llama Stack service; used when library mode is disabled. Must be a valid HTTP or HTTPS URL." }, - "signatures": { + "api_key": { "anyOf": [ { - "items": { - "$ref": "#/components/schemas/AgentCardSignature" - }, - "type": "array" + "type": "string", + "format": "password", + "writeOnly": true }, { "type": "null" } ], - "title": "Signatures" - }, - "skills": { - "items": { - "$ref": "#/components/schemas/AgentSkill" - }, - "type": "array", - "title": "Skills" + "title": "API key", + "description": "API key to access Llama Stack service" }, - "supportsAuthenticatedExtendedCard": { + "use_as_library_client": { "anyOf": [ { "type": "boolean" @@ -4894,198 +7614,155 @@ "type": "null" } ], - "title": "Supportsauthenticatedextendedcard" - }, - "url": { - "type": "string", - "title": "Url", - "examples": [ - "https://api.example.com/a2a/v1" - ] + "title": "Use as library", + "description": "When set to true Llama Stack will be used in library mode, not in server mode (default)" }, - "version": { - "type": "string", - "title": "Version", - "examples": [ - "1.0.0" - ] - } - }, - "type": "object", - "required": [ - "capabilities", - "defaultInputModes", - "defaultOutputModes", - "description", - "name", - "skills", - "url", - "version" - ], - "title": "AgentCard", - "description": "The AgentCard is a self-describing manifest for an agent. It provides essential\nmetadata including the agent's identity, capabilities, skills, supported\ncommunication methods, and security requirements." - }, - "AgentCardSignature": { - "properties": { - "header": { + "library_client_config_path": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], - "title": "Header" - }, - "protected": { - "type": "string", - "title": "Protected" + "title": "Llama Stack configuration path", + "description": "Path to configuration file used when Llama Stack is run in library mode" }, - "signature": { - "type": "string", - "title": "Signature" + "timeout": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Request timeout", + "description": "Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries.", + "default": 180 } }, + "additionalProperties": false, "type": "object", - "required": [ - "protected", - "signature" - ], - "title": "AgentCardSignature", - "description": "AgentCardSignature represents a JWS signature of an AgentCard.\nThis follows the JSON format of an RFC 7515 JSON Web Signature (JWS)." + "title": "LlamaStackConfiguration", + "description": "Llama stack configuration.\n\nLlama Stack is a comprehensive system that provides a uniform set of tools\nfor building, scaling, and deploying generative AI applications, enabling\ndevelopers to create, integrate, and orchestrate multiple AI services and\ncapabilities into an adaptable setup.\n\nUseful resources:\n\n - [Llama Stack](https://www.llama.com/products/llama-stack/)\n - [Python Llama Stack client](https://github.com/llamastack/llama-stack-client-python)\n - [Build AI Applications with Llama Stack](https://llamastack.github.io/)" }, - "AgentExtension": { + "MCPClientAuthOptionsResponse": { "properties": { - "description": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Description" - }, - "params": { - "anyOf": [ + "servers": { + "items": { + "$ref": "#/components/schemas/MCPServerAuthInfo" + }, + "type": "array", + "title": "Servers", + "description": "List of MCP servers that accept client-provided authorization" + } + }, + "type": "object", + "title": "MCPClientAuthOptionsResponse", + "description": "Response containing MCP servers that accept client-provided authorization.", + "examples": [ + { + "servers": [ { - "additionalProperties": true, - "type": "object" + "client_auth_headers": [ + "Authorization" + ], + "name": "github" }, { - "type": "null" + "client_auth_headers": [ + "Authorization", + "X-API-Key" + ], + "name": "gitlab" } - ], - "title": "Params" + ] + } + ] + }, + "MCPListToolsTool": { + "properties": { + "input_schema": { + "additionalProperties": true, + "type": "object", + "title": "Input Schema" }, - "required": { + "name": { + "type": "string", + "title": "Name" + }, + "description": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "title": "Required" - }, - "uri": { - "type": "string", - "title": "Uri" + "title": "Description" } }, "type": "object", "required": [ - "uri" + "input_schema", + "name" ], - "title": "AgentExtension", - "description": "A declaration of a protocol extension supported by an Agent." + "title": "MCPListToolsTool", + "description": "Tool definition returned by MCP list tools operation.\n\n:param input_schema: JSON schema defining the tool's input parameters\n:param name: Name of the tool\n:param description: (Optional) Description of what the tool does" }, - "AgentInterface": { + "MCPServerAuthInfo": { "properties": { - "transport": { + "name": { "type": "string", - "title": "Transport", - "examples": [ - "JSONRPC", - "GRPC", - "HTTP+JSON" - ] + "title": "Name", + "description": "MCP server name" }, - "url": { - "type": "string", - "title": "Url", - "examples": [ - "https://api.example.com/a2a/v1", - "https://grpc.example.com/a2a", - "https://rest.example.com/v1" - ] + "client_auth_headers": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Client Auth Headers", + "description": "List of authentication header names for client-provided tokens" } }, "type": "object", "required": [ - "transport", - "url" + "name", + "client_auth_headers" ], - "title": "AgentInterface", - "description": "Declares a combination of a target URL and a transport protocol for interacting with the agent.\nThis allows agents to expose the same functionality over multiple transport mechanisms." + "title": "MCPServerAuthInfo", + "description": "Information about MCP server client authentication options." }, - "AgentProvider": { + "Message": { "properties": { - "organization": { + "content": { "type": "string", - "title": "Organization" + "title": "Content", + "description": "The message content", + "examples": [ + "Hello, how can I help you?" + ] }, - "url": { - "type": "string", - "title": "Url" - } - }, - "type": "object", - "required": [ - "organization", - "url" - ], - "title": "AgentProvider", - "description": "Represents the service provider of an agent." - }, - "AgentSkill": { - "properties": { - "description": { + "type": { "type": "string", - "title": "Description" - }, - "examples": { - "anyOf": [ - { - "items": { - "type": "string" - }, - "type": "array" - }, - { - "type": "null" - } + "enum": [ + "user", + "assistant", + "system", + "developer" ], - "title": "Examples", + "title": "Type", + "description": "The type of message", "examples": [ - [ - "I need a recipe for bread" - ] + "user", + "assistant", + "system", + "developer" ] }, - "id": { - "type": "string", - "title": "Id" - }, - "inputModes": { + "referenced_documents": { "anyOf": [ { "items": { - "type": "string" + "$ref": "#/components/schemas/ReferencedDocument" }, "type": "array" }, @@ -5093,838 +7770,924 @@ "type": "null" } ], - "title": "Inputmodes" - }, + "title": "Referenced Documents", + "description": "List of documents referenced in the response (assistant messages only)" + } + }, + "type": "object", + "required": [ + "content", + "type" + ], + "title": "Message", + "description": "Model representing a message in a conversation turn.\n\nAttributes:\n content: The message content.\n type: The type of message.\n referenced_documents: Optional list of documents referenced in an assistant response." + }, + "ModelContextProtocolServer": { + "properties": { "name": { "type": "string", - "title": "Name" + "title": "MCP name", + "description": "MCP server name that must be unique" }, - "outputModes": { + "provider_id": { + "type": "string", + "title": "Provider ID", + "description": "MCP provider identification", + "default": "model-context-protocol" + }, + "url": { + "type": "string", + "title": "MCP server URL", + "description": "URL of the MCP server" + }, + "authorization_headers": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Authorization headers", + "description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 3 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client-provided token in the header. To specify this use a string 'client' instead of the file path. 3. Usage of the oauth token in the header. To specify this use a string 'oauth' instead of the file path. " + }, + "headers": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Propagated headers", + "description": "List of HTTP header names to automatically forward from the incoming request to this MCP server. Headers listed here are extracted from the original client request and included when calling the MCP server. This is useful when infrastructure components (e.g. API gateways) inject headers that MCP servers need, such as x-rh-identity in HCC. Header matching is case-insensitive. These headers are additive with authorization_headers and MCP-HEADERS." + }, + "timeout": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "integer", + "exclusiveMinimum": 0.0 }, { "type": "null" } ], - "title": "Outputmodes" - }, - "security": { + "title": "Request timeout", + "description": "Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support." + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "name", + "url" + ], + "title": "ModelContextProtocolServer", + "description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and capabilities to the\nAI agents. These are configured by this structure. Only MCP servers\ndefined in the lightspeed-stack.yaml configuration are available to the\nagents. Tools configured in the llama-stack run.yaml are not accessible to\nlightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)" + }, + "ModelsResponse": { + "properties": { + "models": { + "items": { + "additionalProperties": true, + "type": "object" + }, + "type": "array", + "title": "Models", + "description": "List of models available" + } + }, + "type": "object", + "required": [ + "models" + ], + "title": "ModelsResponse", + "description": "Model representing a response to models request.", + "examples": [ + { + "models": [ + { + "api_model_type": "llm", + "identifier": "openai/gpt-4-turbo", + "metadata": {}, + "model_type": "llm", + "provider_id": "openai", + "provider_resource_id": "gpt-4-turbo", + "type": "model" + } + ] + } + ] + }, + "MutualTLSSecurityScheme": { + "properties": { + "description": { "anyOf": [ { - "items": { - "additionalProperties": { - "items": { - "type": "string" - }, - "type": "array" - }, - "type": "object" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], - "title": "Security", - "examples": [ - [ - { - "google": [ - "oidc" - ] - } - ] - ] + "title": "Description" }, - "tags": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Tags", - "examples": [ - [ - "cooking", - "customer support", - "billing" - ] - ] + "type": { + "type": "string", + "const": "mutualTLS", + "title": "Type", + "default": "mutualTLS" } }, "type": "object", - "required": [ - "description", - "id", - "name", - "tags" - ], - "title": "AgentSkill", - "description": "Represents a distinct capability or function that an agent can perform." + "title": "MutualTLSSecurityScheme", + "description": "Defines a security scheme using mTLS authentication." }, - "Attachment": { + "NotFoundResponse": { "properties": { - "attachment_type": { - "type": "string", - "title": "Attachment Type", - "description": "The attachment type, like 'log', 'configuration' etc.", - "examples": [ - "log" - ] - }, - "content_type": { - "type": "string", - "title": "Content Type", - "description": "The content type as defined in MIME standard", - "examples": [ - "text/plain" - ] + "status_code": { + "type": "integer", + "title": "Status Code" }, - "content": { - "type": "string", - "title": "Content", - "description": "The actual attachment content", - "examples": [ - "warning: quota exceeded" - ] + "detail": { + "$ref": "#/components/schemas/DetailModel" } }, - "additionalProperties": false, "type": "object", "required": [ - "attachment_type", - "content_type", - "content" + "status_code", + "detail" ], - "title": "Attachment", - "description": "Model representing an attachment that can be send from the UI as part of query.\n\nA list of attachments can be an optional part of 'query' request.\n\nAttributes:\n attachment_type: The attachment type, like \"log\", \"configuration\" etc.\n content_type: The content type as defined in MIME standard\n content: The actual attachment content\n\nYAML attachments with **kind** and **metadata/name** attributes will\nbe handled as resources with the specified name:\n```\nkind: Pod\nmetadata:\n name: private-reg\n```", + "title": "NotFoundResponse", + "description": "404 Not Found - Resource does not exist.", "examples": [ { - "attachment_type": "log", - "content": "this is attachment", - "content_type": "text/plain" + "detail": { + "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", + "response": "Conversation not found" + }, + "label": "conversation" }, { - "attachment_type": "configuration", - "content": "kind: Pod\n metadata:\n name: private-reg", - "content_type": "application/yaml" + "detail": { + "cause": "Provider with ID openai does not exist", + "response": "Provider not found" + }, + "label": "provider" }, { - "attachment_type": "configuration", - "content": "foo: bar", - "content_type": "application/yaml" + "detail": { + "cause": "Model with ID gpt-4-turbo is not configured", + "response": "Model not found" + }, + "label": "model" + }, + { + "detail": { + "cause": "Rag with ID vs_7b52a8cf-0fa3-489c-beab-27e061d102f3 does not exist", + "response": "Rag not found" + }, + "label": "rag" + }, + { + "detail": { + "cause": "Streaming Request with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", + "response": "Streaming Request not found" + }, + "label": "streaming request" } ] }, - "AuthenticationConfiguration": { + "OAuth2SecurityScheme": { "properties": { - "module": { - "type": "string", - "title": "Module", - "default": "noop" - }, - "skip_tls_verification": { - "type": "boolean", - "title": "Skip Tls Verification", - "default": false - }, - "skip_for_health_probes": { - "type": "boolean", - "title": "Skip authorization for probes", - "description": "Skip authorization for readiness and liveness probes", - "default": false - }, - "k8s_cluster_api": { + "description": { "anyOf": [ { - "type": "string", - "minLength": 1, - "format": "uri" + "type": "string" }, { "type": "null" } ], - "title": "K8S Cluster Api" + "title": "Description" }, - "k8s_ca_cert_path": { + "flows": { + "$ref": "#/components/schemas/OAuthFlows" + }, + "oauth2MetadataUrl": { "anyOf": [ { - "type": "string", - "format": "file-path" + "type": "string" }, { "type": "null" } ], - "title": "K8S Ca Cert Path" + "title": "Oauth2Metadataurl" }, - "jwk_config": { + "type": { + "type": "string", + "const": "oauth2", + "title": "Type", + "default": "oauth2" + } + }, + "type": "object", + "required": [ + "flows" + ], + "title": "OAuth2SecurityScheme", + "description": "Defines a security scheme using OAuth 2.0." + }, + "OAuthFlows": { + "properties": { + "authorizationCode": { "anyOf": [ { - "$ref": "#/components/schemas/JwkConfiguration" + "$ref": "#/components/schemas/AuthorizationCodeOAuthFlow" }, { "type": "null" } ] }, - "api_key_config": { + "clientCredentials": { "anyOf": [ { - "$ref": "#/components/schemas/APIKeyTokenConfiguration" + "$ref": "#/components/schemas/ClientCredentialsOAuthFlow" }, { "type": "null" } ] }, - "rh_identity_config": { + "implicit": { "anyOf": [ { - "$ref": "#/components/schemas/RHIdentityConfiguration" + "$ref": "#/components/schemas/ImplicitOAuthFlow" }, { "type": "null" } ] - } - }, - "additionalProperties": false, - "type": "object", - "title": "AuthenticationConfiguration", - "description": "Authentication configuration." - }, - "AuthorizationCodeOAuthFlow": { - "properties": { - "authorizationUrl": { - "type": "string", - "title": "Authorizationurl" }, - "refreshUrl": { + "password": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/PasswordOAuthFlow" }, { "type": "null" } - ], - "title": "Refreshurl" - }, - "scopes": { - "additionalProperties": { - "type": "string" - }, - "type": "object", - "title": "Scopes" - }, - "tokenUrl": { - "type": "string", - "title": "Tokenurl" - } - }, - "type": "object", - "required": [ - "authorizationUrl", - "scopes", - "tokenUrl" - ], - "title": "AuthorizationCodeOAuthFlow", - "description": "Defines configuration details for the OAuth 2.0 Authorization Code flow." - }, - "AuthorizationConfiguration": { - "properties": { - "access_rules": { - "items": { - "$ref": "#/components/schemas/AccessRule" - }, - "type": "array", - "title": "Access rules", - "description": "Rules for role-based access control" + ] } }, - "additionalProperties": false, "type": "object", - "title": "AuthorizationConfiguration", - "description": "Authorization configuration." + "title": "OAuthFlows", + "description": "Defines the configuration for the supported OAuth 2.0 flows." }, - "AuthorizedResponse": { + "OkpConfiguration": { "properties": { - "user_id": { - "type": "string", - "title": "User Id", - "description": "User ID, for example UUID", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] + "offline": { + "type": "boolean", + "title": "OKP offline mode", + "description": "When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs.", + "default": true }, - "username": { + "chunk_filter_query": { "type": "string", - "title": "Username", - "description": "User name", - "examples": [ - "John Doe", - "Adam Smith" - ] - }, - "skip_userid_check": { - "type": "boolean", - "title": "Skip Userid Check", - "description": "Whether to skip the user ID check", - "examples": [ - true, - false - ] + "title": "OKP chunk filter query", + "description": "OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'.", + "default": "is_chunk:true" } }, + "additionalProperties": false, "type": "object", - "required": [ - "user_id", - "username", - "skip_userid_check" - ], - "title": "AuthorizedResponse", - "description": "Model representing a response to an authorization request.\n\nAttributes:\n user_id: The ID of the logged in user.\n username: The name of the logged in user.\n skip_userid_check: Whether to skip the user ID check.", - "examples": [ - { - "skip_userid_check": false, - "user_id": "123e4567-e89b-12d3-a456-426614174000", - "username": "user1" - } - ] + "title": "OkpConfiguration", + "description": "OKP (Offline Knowledge Portal) provider configuration.\n\nControls provider-specific behaviour for the OKP vector store.\nOnly relevant when ``\"okp\"`` is listed in ``rag.inline`` or ``rag.tool``." }, - "AzureEntraIdConfiguration": { + "OpenAIResponseAnnotationCitation": { "properties": { - "tenant_id": { + "type": { "type": "string", - "format": "password", - "title": "Tenant Id", - "writeOnly": true + "const": "url_citation", + "title": "Type", + "default": "url_citation" }, - "client_id": { - "type": "string", - "format": "password", - "title": "Client Id", - "writeOnly": true + "end_index": { + "type": "integer", + "title": "End Index" }, - "client_secret": { + "start_index": { + "type": "integer", + "title": "Start Index" + }, + "title": { "type": "string", - "format": "password", - "title": "Client Secret", - "writeOnly": true + "title": "Title" }, - "scope": { + "url": { "type": "string", - "title": "Token scope", - "description": "Azure Cognitive Services scope for token requests. Override only if using a different Azure service.", - "default": "https://cognitiveservices.azure.com/.default" + "title": "Url" } }, - "additionalProperties": false, "type": "object", "required": [ - "tenant_id", - "client_id", - "client_secret" + "end_index", + "start_index", + "title", + "url" ], - "title": "AzureEntraIdConfiguration", - "description": "Microsoft Entra ID authentication attributes for Azure." + "title": "OpenAIResponseAnnotationCitation", + "description": "URL citation annotation for referencing external web resources.\n\n:param type: Annotation type identifier, always \"url_citation\"\n:param end_index: End position of the citation span in the content\n:param start_index: Start position of the citation span in the content\n:param title: Title of the referenced web resource\n:param url: URL of the referenced web resource" }, - "BadRequestResponse": { + "OpenAIResponseAnnotationContainerFileCitation": { "properties": { - "status_code": { + "type": { + "type": "string", + "const": "container_file_citation", + "title": "Type", + "default": "container_file_citation" + }, + "container_id": { + "type": "string", + "title": "Container Id" + }, + "end_index": { "type": "integer", - "title": "Status Code" + "title": "End Index" }, - "detail": { - "$ref": "#/components/schemas/DetailModel" + "file_id": { + "type": "string", + "title": "File Id" + }, + "filename": { + "type": "string", + "title": "Filename" + }, + "start_index": { + "type": "integer", + "title": "Start Index" } }, "type": "object", "required": [ - "status_code", - "detail" + "container_id", + "end_index", + "file_id", + "filename", + "start_index" ], - "title": "BadRequestResponse", - "description": "400 Bad Request. Invalid resource identifier.", - "examples": [ - { - "detail": { - "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.", - "response": "Invalid conversation ID format" - }, - "label": "conversation_id" - } - ] + "title": "OpenAIResponseAnnotationContainerFileCitation" }, - "ByokRag": { + "OpenAIResponseAnnotationFileCitation": { "properties": { - "rag_id": { + "type": { "type": "string", - "minLength": 1, - "title": "RAG ID", - "description": "Unique RAG ID" + "const": "file_citation", + "title": "Type", + "default": "file_citation" }, - "rag_type": { + "file_id": { "type": "string", - "minLength": 1, - "title": "RAG type", - "description": "Type of RAG database.", - "default": "inline::faiss" + "title": "File Id" }, - "embedding_model": { + "filename": { "type": "string", - "minLength": 1, - "title": "Embedding model", - "description": "Embedding model identification", - "default": "sentence-transformers/all-mpnet-base-v2" + "title": "Filename" }, - "embedding_dimension": { + "index": { "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Embedding dimension", - "description": "Dimensionality of embedding vectors.", - "default": 768 - }, - "vector_db_id": { + "title": "Index" + } + }, + "type": "object", + "required": [ + "file_id", + "filename", + "index" + ], + "title": "OpenAIResponseAnnotationFileCitation", + "description": "File citation annotation for referencing specific files in response content.\n\n:param type: Annotation type identifier, always \"file_citation\"\n:param file_id: Unique identifier of the referenced file\n:param filename: Name of the referenced file\n:param index: Position index of the citation within the content" + }, + "OpenAIResponseAnnotationFilePath": { + "properties": { + "type": { "type": "string", - "minLength": 1, - "title": "Vector DB ID", - "description": "Vector database identification." + "const": "file_path", + "title": "Type", + "default": "file_path" }, - "db_path": { + "file_id": { "type": "string", - "format": "file-path", - "title": "DB path", - "description": "Path to RAG database." + "title": "File Id" }, - "score_multiplier": { - "type": "number", - "exclusiveMinimum": 0.0, - "title": "Score multiplier", - "description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.", - "default": 1.0 + "index": { + "type": "integer", + "title": "Index" } }, - "additionalProperties": false, "type": "object", "required": [ - "rag_id", - "vector_db_id", - "db_path" + "file_id", + "index" ], - "title": "ByokRag", - "description": "BYOK (Bring Your Own Knowledge) RAG configuration." + "title": "OpenAIResponseAnnotationFilePath" }, - "CORSConfiguration": { + "OpenAIResponseContentPartRefusal": { "properties": { - "allow_origins": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Allow origins", - "description": "A list of origins allowed for cross-origin requests. An origin is the combination of protocol (http, https), domain (myapp.com, localhost, localhost.tiangolo.com), and port (80, 443, 8080). Use ['*'] to allow all origins.", - "default": [ - "*" - ] - }, - "allow_credentials": { - "type": "boolean", - "title": "Allow credentials", - "description": "Indicate that cookies should be supported for cross-origin requests", - "default": false - }, - "allow_methods": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Allow methods", - "description": "A list of HTTP methods that should be allowed for cross-origin requests. You can use ['*'] to allow all standard methods.", - "default": [ - "*" - ] + "type": { + "type": "string", + "const": "refusal", + "title": "Type", + "default": "refusal" }, - "allow_headers": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Allow headers", - "description": "A list of HTTP request headers that should be supported for cross-origin requests. You can use ['*'] to allow all headers. The Accept, Accept-Language, Content-Language and Content-Type headers are always allowed for simple CORS requests.", - "default": [ - "*" - ] + "refusal": { + "type": "string", + "title": "Refusal" } }, - "additionalProperties": false, "type": "object", - "title": "CORSConfiguration", - "description": "CORS configuration.\n\nCORS or 'Cross-Origin Resource Sharing' refers to the situations when a\nfrontend running in a browser has JavaScript code that communicates with a\nbackend, and the backend is in a different 'origin' than the frontend.\n\nUseful resources:\n\n - [CORS in FastAPI](https://fastapi.tiangolo.com/tutorial/cors/)\n - [Wikipedia article](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing)\n - [What is CORS?](https://dev.to/akshay_chauhan/what-is-cors-explained-8f1)" + "required": [ + "refusal" + ], + "title": "OpenAIResponseContentPartRefusal", + "description": "Refusal content within a streamed response part.\n\n:param type: Content part type identifier, always \"refusal\"\n:param refusal: Refusal text supplied by the model" }, - "ClientCredentialsOAuthFlow": { + "OpenAIResponseError": { "properties": { - "refreshUrl": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Refreshurl" - }, - "scopes": { - "additionalProperties": { - "type": "string" - }, - "type": "object", - "title": "Scopes" + "code": { + "type": "string", + "title": "Code" }, - "tokenUrl": { + "message": { "type": "string", - "title": "Tokenurl" + "title": "Message" } }, "type": "object", "required": [ - "scopes", - "tokenUrl" + "code", + "message" ], - "title": "ClientCredentialsOAuthFlow", - "description": "Defines configuration details for the OAuth 2.0 Client Credentials flow." + "title": "OpenAIResponseError", + "description": "Error details for failed OpenAI response requests.\n\n:param code: Error code identifying the type of failure\n:param message: Human-readable error message describing the failure" }, - "Configuration": { + "OpenAIResponseInputFunctionToolCallOutput": { "properties": { - "name": { + "call_id": { "type": "string", - "title": "Service name", - "description": "Name of the service. That value will be used in REST API endpoints." - }, - "service": { - "$ref": "#/components/schemas/ServiceConfiguration", - "title": "Service configuration", - "description": "This section contains Lightspeed Core Stack service configuration." - }, - "llama_stack": { - "$ref": "#/components/schemas/LlamaStackConfiguration", - "title": "Llama Stack configuration", - "description": "This section contains Llama Stack configuration. Lightspeed Core Stack service can call Llama Stack in library mode or in server mode." + "title": "Call Id" }, - "user_data_collection": { - "$ref": "#/components/schemas/UserDataCollection", - "title": "User data collection configuration", - "description": "This section contains configuration for subsystem that collects user data(transcription history and feedbacks)." - }, - "database": { - "$ref": "#/components/schemas/DatabaseConfiguration", - "title": "Database Configuration", - "description": "Configuration for database to store conversation IDs and other runtime data" + "output": { + "type": "string", + "title": "Output" }, - "mcp_servers": { - "items": { - "$ref": "#/components/schemas/ModelContextProtocolServer" - }, - "type": "array", - "title": "Model Context Protocol Server and tools configuration", - "description": "MCP (Model Context Protocol) servers provide tools and capabilities to the AI agents. These are configured in this section. Only MCP servers defined in the lightspeed-stack.yaml configuration are available to the agents. Tools configured in the llama-stack run.yaml are not accessible to lightspeed-core agents." + "type": { + "type": "string", + "const": "function_call_output", + "title": "Type", + "default": "function_call_output" }, - "authentication": { - "$ref": "#/components/schemas/AuthenticationConfiguration", - "title": "Authentication configuration", - "description": "Authentication configuration" + "id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Id" }, - "authorization": { + "status": { "anyOf": [ { - "$ref": "#/components/schemas/AuthorizationConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "Authorization configuration", - "description": "Lightspeed Core Stack implements a modular authentication and authorization system with multiple authentication methods. Authorization is configurable through role-based access control. Authentication is handled through selectable modules configured via the module field in the authentication configuration." + "title": "Status" + } + }, + "type": "object", + "required": [ + "call_id", + "output" + ], + "title": "OpenAIResponseInputFunctionToolCallOutput", + "description": "This represents the output of a function call that gets passed back to the model." + }, + "OpenAIResponseInputMessageContentFile": { + "properties": { + "type": { + "type": "string", + "const": "input_file", + "title": "Type", + "default": "input_file" }, - "customization": { + "file_data": { "anyOf": [ { - "$ref": "#/components/schemas/Customization" + "type": "string" }, { "type": "null" } ], - "title": "Custom profile configuration", - "description": "It is possible to customize Lightspeed Core Stack via this section. System prompt can be customized and also different parts of the service can be replaced by custom Python modules." - }, - "inference": { - "$ref": "#/components/schemas/InferenceConfiguration", - "title": "Inference configuration", - "description": "One LLM provider and one its model might be selected as default ones. When no provider+model pair is specified in REST API calls (query endpoints), the default provider and model are used." + "title": "File Data" }, - "conversation_cache": { - "$ref": "#/components/schemas/ConversationHistoryConfiguration", - "title": "Conversation history configuration" + "file_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Id" }, - "byok_rag": { - "items": { - "$ref": "#/components/schemas/ByokRag" - }, - "type": "array", - "title": "BYOK RAG configuration", - "description": "BYOK RAG configuration. This configuration can be used to reconfigure Llama Stack through its run.yaml configuration file" + "file_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Url" }, - "a2a_state": { - "$ref": "#/components/schemas/A2AStateConfiguration", - "title": "A2A state configuration", - "description": "Configuration for A2A protocol persistent state storage." + "filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Filename" + } + }, + "type": "object", + "title": "OpenAIResponseInputMessageContentFile", + "description": "File content for input messages in OpenAI response format.\n\n:param type: The type of the input item. Always `input_file`.\n:param file_data: The data of the file to be sent to the model.\n:param file_id: (Optional) The ID of the file to be sent to the model.\n:param file_url: The URL of the file to be sent to the model.\n:param filename: The name of the file to be sent to the model." + }, + "OpenAIResponseInputMessageContentImage": { + "properties": { + "detail": { + "anyOf": [ + { + "type": "string", + "const": "low" + }, + { + "type": "string", + "const": "high" + }, + { + "type": "string", + "const": "auto" + } + ], + "title": "Detail", + "default": "auto" }, - "quota_handlers": { - "$ref": "#/components/schemas/QuotaHandlersConfiguration", - "title": "Quota handlers", - "description": "Quota handlers configuration" + "type": { + "type": "string", + "const": "input_image", + "title": "Type", + "default": "input_image" }, - "azure_entra_id": { + "file_id": { "anyOf": [ { - "$ref": "#/components/schemas/AzureEntraIdConfiguration" + "type": "string" }, { "type": "null" } - ] + ], + "title": "File Id" }, - "splunk": { + "image_url": { "anyOf": [ { - "$ref": "#/components/schemas/SplunkConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "Splunk configuration", - "description": "Splunk HEC configuration for sending telemetry events." + "title": "Image Url" + } + }, + "type": "object", + "title": "OpenAIResponseInputMessageContentImage", + "description": "Image content for input messages in OpenAI response format.\n\n:param detail: Level of detail for image processing, can be \"low\", \"high\", or \"auto\"\n:param type: Content type identifier, always \"input_image\"\n:param file_id: (Optional) The ID of the file to be sent to the model.\n:param image_url: (Optional) URL of the image content" + }, + "OpenAIResponseInputMessageContentText": { + "properties": { + "text": { + "type": "string", + "title": "Text" }, - "deployment_environment": { + "type": { "type": "string", - "title": "Deployment environment", - "description": "Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events.", - "default": "development" + "const": "input_text", + "title": "Type", + "default": "input_text" + } + }, + "type": "object", + "required": [ + "text" + ], + "title": "OpenAIResponseInputMessageContentText", + "description": "Text content for input messages in OpenAI response format.\n\n:param text: The text content of the input message\n:param type: Content type identifier, always \"input_text\"" + }, + "OpenAIResponseInputToolChoiceAllowedTools": { + "properties": { + "mode": { + "type": "string", + "enum": [ + "auto", + "required" + ], + "title": "Mode", + "default": "auto" }, - "rag": { - "$ref": "#/components/schemas/RagConfiguration", - "title": "RAG configuration", - "description": "Configuration for all RAG strategies (inline and tool-based)." + "tools": { + "items": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "type": "array", + "title": "Tools" }, - "okp": { - "$ref": "#/components/schemas/OkpConfiguration", - "title": "OKP configuration", - "description": "OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool." + "type": { + "type": "string", + "const": "allowed_tools", + "title": "Type", + "default": "allowed_tools" } }, - "additionalProperties": false, "type": "object", "required": [ - "name", - "service", - "llama_stack", - "user_data_collection" + "tools" ], - "title": "Configuration", - "description": "Global service configuration." + "title": "OpenAIResponseInputToolChoiceAllowedTools", + "description": "Constrains the tools available to the model to a pre-defined set.\n\n:param mode: Constrains the tools available to the model to a pre-defined set\n:param tools: A list of tool definitions that the model should be allowed to call\n:param type: Tool choice type identifier, always \"allowed_tools\"" }, - "ConfigurationResponse": { + "OpenAIResponseInputToolChoiceCustomTool": { "properties": { - "configuration": { - "$ref": "#/components/schemas/Configuration" + "type": { + "type": "string", + "const": "custom", + "title": "Type", + "default": "custom" + }, + "name": { + "type": "string", + "title": "Name" } }, "type": "object", "required": [ - "configuration" + "name" ], - "title": "ConfigurationResponse", - "description": "Success response model for the config endpoint.", - "examples": [ - { - "configuration": { - "authentication": { - "module": "noop", - "skip_tls_verification": false - }, - "authorization": { - "access_rules": [] - }, - "byok_rag": [], - "conversation_cache": {}, - "database": { - "sqlite": { - "db_path": "/tmp/lightspeed-stack.db" - } - }, - "inference": { - "default_model": "gpt-4-turbo", - "default_provider": "openai" - }, - "llama_stack": { - "api_key": "*****", - "url": "http://localhost:8321", - "use_as_library_client": false + "title": "OpenAIResponseInputToolChoiceCustomTool", + "description": "Forces the model to call a custom tool.\n\n:param type: Tool choice type identifier, always \"custom\"\n:param name: The name of the custom tool to call." + }, + "OpenAIResponseInputToolChoiceFileSearch": { + "properties": { + "type": { + "type": "string", + "const": "file_search", + "title": "Type", + "default": "file_search" + } + }, + "type": "object", + "title": "OpenAIResponseInputToolChoiceFileSearch", + "description": "Indicates that the model should use file search to generate a response.\n\n:param type: Tool choice type identifier, always \"file_search\"" + }, + "OpenAIResponseInputToolChoiceFunctionTool": { + "properties": { + "name": { + "type": "string", + "title": "Name" + }, + "type": { + "type": "string", + "const": "function", + "title": "Type", + "default": "function" + } + }, + "type": "object", + "required": [ + "name" + ], + "title": "OpenAIResponseInputToolChoiceFunctionTool", + "description": "Forces the model to call a specific function.\n\n:param name: The name of the function to call\n:param type: Tool choice type identifier, always \"function\"" + }, + "OpenAIResponseInputToolChoiceMCPTool": { + "properties": { + "server_label": { + "type": "string", + "title": "Server Label" + }, + "type": { + "type": "string", + "const": "mcp", + "title": "Type", + "default": "mcp" + }, + "name": { + "anyOf": [ + { + "type": "string" }, - "mcp_servers": [ - { - "name": "server1", - "provider_id": "provider1", - "url": "http://url.com:1" - } - ], - "name": "lightspeed-stack", - "quota_handlers": { - "enable_token_history": false, - "limiters": [], - "scheduler": { - "period": 1 - } + { + "type": "null" + } + ], + "title": "Name" + } + }, + "type": "object", + "required": [ + "server_label" + ], + "title": "OpenAIResponseInputToolChoiceMCPTool", + "description": "Forces the model to call a specific tool on a remote MCP server\n\n:param server_label: The label of the MCP server to use.\n:param type: Tool choice type identifier, always \"mcp\"\n:param name: (Optional) The name of the tool to call on the server." + }, + "OpenAIResponseInputToolChoiceMode": { + "type": "string", + "enum": [ + "auto", + "required", + "none" + ], + "title": "OpenAIResponseInputToolChoiceMode" + }, + "OpenAIResponseInputToolChoiceWebSearch": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "const": "web_search" }, - "service": { - "access_log": true, - "auth_enabled": false, - "color_log": true, - "cors": { - "allow_credentials": false, - "allow_headers": [ - "*" - ], - "allow_methods": [ - "*" - ], - "allow_origins": [ - "*" - ] - }, - "host": "localhost", - "port": 8080, - "tls_config": {}, - "workers": 1 + { + "type": "string", + "const": "web_search_preview" }, - "user_data_collection": { - "feedback_enabled": true, - "feedback_storage": "/tmp/data/feedback", - "transcripts_enabled": false, - "transcripts_storage": "/tmp/data/transcripts" + { + "type": "string", + "const": "web_search_preview_2025_03_11" + }, + { + "type": "string", + "const": "web_search_2025_08_26" } - } + ], + "title": "Type", + "default": "web_search" } - ] + }, + "type": "object", + "title": "OpenAIResponseInputToolChoiceWebSearch", + "description": "Indicates that the model should use web search to generate a response\n\n:param type: Web search tool type variant to use" }, - "ConversationData": { + "OpenAIResponseInputToolFileSearch": { "properties": { - "conversation_id": { + "type": { "type": "string", - "title": "Conversation Id" + "const": "file_search", + "title": "Type", + "default": "file_search" }, - "topic_summary": { + "vector_store_ids": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Vector Store Ids" + }, + "filters": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "title": "Topic Summary" + "title": "Filters" }, - "last_message_timestamp": { - "type": "number", - "title": "Last Message Timestamp" + "max_num_results": { + "anyOf": [ + { + "type": "integer", + "maximum": 50.0, + "minimum": 1.0 + }, + { + "type": "null" + } + ], + "title": "Max Num Results", + "default": 10 + }, + "ranking_options": { + "anyOf": [ + { + "$ref": "#/components/schemas/SearchRankingOptions" + }, + { + "type": "null" + } + ] } }, "type": "object", "required": [ - "conversation_id", - "topic_summary", - "last_message_timestamp" + "vector_store_ids" ], - "title": "ConversationData", - "description": "Model representing conversation data returned by cache list operations.\n\nAttributes:\n conversation_id: The conversation ID\n topic_summary: The topic summary for the conversation (can be None)\n last_message_timestamp: The timestamp of the last message in the conversation" + "title": "OpenAIResponseInputToolFileSearch", + "description": "File search tool configuration for OpenAI response inputs.\n\n:param type: Tool type identifier, always \"file_search\"\n:param vector_store_ids: List of vector store identifiers to search within\n:param filters: (Optional) Additional filters to apply to the search\n:param max_num_results: (Optional) Maximum number of search results to return (1-50)\n:param ranking_options: (Optional) Options for ranking and scoring search results" }, - "ConversationDeleteResponse": { + "OpenAIResponseInputToolFunction": { "properties": { - "conversation_id": { + "type": { "type": "string", - "title": "Conversation Id", - "description": "The conversation ID (UUID) that was deleted.", - "examples": [ - "123e4567-e89b-12d3-a456-426614174000" - ] - }, - "success": { - "type": "boolean", - "title": "Success", - "description": "Whether the deletion was successful.", - "examples": [ - true, - false - ] + "const": "function", + "title": "Type", + "default": "function" }, - "response": { + "name": { "type": "string", - "title": "Response", - "description": "A message about the deletion result.", - "examples": [ - "Conversation deleted successfully", - "Conversation cannot be deleted" - ] + "title": "Name" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "parameters": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Parameters" + }, + "strict": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Strict" } }, "type": "object", "required": [ - "conversation_id", - "success", - "response" + "name", + "parameters" ], - "title": "ConversationDeleteResponse", - "description": "Model representing a response for deleting a conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was deleted.\n success: Whether the deletion was successful.\n response: A message about the deletion result.", - "examples": [ - { - "label": "deleted", - "value": { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "response": "Conversation deleted successfully", - "success": true - } - }, - { - "label": "not found", - "value": { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "response": "Conversation can not be deleted", - "success": true - } - } - ] + "title": "OpenAIResponseInputToolFunction", + "description": "Function tool configuration for OpenAI response inputs.\n\n:param type: Tool type identifier, always \"function\"\n:param name: Name of the function that can be called\n:param description: (Optional) Description of what the function does\n:param parameters: (Optional) JSON schema defining the function's parameters\n:param strict: (Optional) Whether to enforce strict parameter validation" }, - "ConversationDetails": { + "OpenAIResponseInputToolMCP": { "properties": { - "conversation_id": { + "type": { "type": "string", - "title": "Conversation Id", - "description": "Conversation ID (UUID)", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] + "const": "mcp", + "title": "Type", + "default": "mcp" }, - "created_at": { + "server_label": { + "type": "string", + "title": "Server Label" + }, + "connector_id": { "anyOf": [ { "type": "string" @@ -5933,13 +8696,9 @@ "type": "null" } ], - "title": "Created At", - "description": "When the conversation was created", - "examples": [ - "2024-01-01T01:00:00Z" - ] + "title": "Connector Id" }, - "last_message_at": { + "server_url": { "anyOf": [ { "type": "string" @@ -5948,44 +8707,167 @@ "type": "null" } ], - "title": "Last Message At", - "description": "When the last message was sent", - "examples": [ - "2024-01-01T01:00:00Z" - ] + "title": "Server Url" }, - "message_count": { + "headers": { "anyOf": [ { - "type": "integer" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "title": "Message Count", - "description": "Number of user messages in the conversation", - "examples": [ - 42 - ] + "title": "Headers" }, - "last_used_model": { + "authorization": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Authorization" + }, + "require_approval": { + "anyOf": [ + { + "type": "string", + "const": "always" + }, + { + "type": "string", + "const": "never" + }, + { + "$ref": "#/components/schemas/ApprovalFilter" + } + ], + "title": "Require Approval", + "default": "never" + }, + "allowed_tools": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "$ref": "#/components/schemas/AllowedToolsFilter" + }, + { + "type": "null" + } + ], + "title": "Allowed Tools" + } + }, + "type": "object", + "required": [ + "server_label" + ], + "title": "OpenAIResponseInputToolMCP", + "description": "Model Context Protocol (MCP) tool configuration for OpenAI response inputs.\n\n:param type: Tool type identifier, always \"mcp\"\n:param server_label: Label to identify this MCP server\n:param connector_id: (Optional) ID of the connector to use for this MCP server\n:param server_url: (Optional) URL endpoint of the MCP server\n:param headers: (Optional) HTTP headers to include when connecting to the server\n:param authorization: (Optional) OAuth access token for authenticating with the MCP server\n:param require_approval: Approval requirement for tool calls (\"always\", \"never\", or filter)\n:param allowed_tools: (Optional) Restriction on which tools can be used from this server" + }, + "OpenAIResponseInputToolWebSearch": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "const": "web_search" + }, + { + "type": "string", + "const": "web_search_preview" + }, + { + "type": "string", + "const": "web_search_preview_2025_03_11" + }, + { + "type": "string", + "const": "web_search_2025_08_26" + } + ], + "title": "Type", + "default": "web_search" + }, + "search_context_size": { "anyOf": [ { - "type": "string" + "type": "string", + "pattern": "^low|medium|high$" }, { "type": "null" } ], - "title": "Last Used Model", - "description": "Identification of the last model used for the conversation", - "examples": [ - "gpt-4-turbo", - "gpt-3.5-turbo-0125" - ] + "title": "Search Context Size", + "default": "medium" + } + }, + "type": "object", + "title": "OpenAIResponseInputToolWebSearch", + "description": "Web search tool configuration for OpenAI response inputs.\n\n:param type: Web search tool type variant to use\n:param search_context_size: (Optional) Size of search context, must be \"low\", \"medium\", or \"high\"" + }, + "OpenAIResponseMCPApprovalRequest": { + "properties": { + "arguments": { + "type": "string", + "title": "Arguments" }, - "last_used_provider": { + "id": { + "type": "string", + "title": "Id" + }, + "name": { + "type": "string", + "title": "Name" + }, + "server_label": { + "type": "string", + "title": "Server Label" + }, + "type": { + "type": "string", + "const": "mcp_approval_request", + "title": "Type", + "default": "mcp_approval_request" + } + }, + "type": "object", + "required": [ + "arguments", + "id", + "name", + "server_label" + ], + "title": "OpenAIResponseMCPApprovalRequest", + "description": "A request for human approval of a tool invocation." + }, + "OpenAIResponseMCPApprovalResponse": { + "properties": { + "approval_request_id": { + "type": "string", + "title": "Approval Request Id" + }, + "approve": { + "type": "boolean", + "title": "Approve" + }, + "type": { + "type": "string", + "const": "mcp_approval_response", + "title": "Type", + "default": "mcp_approval_response" + }, + "id": { "anyOf": [ { "type": "string" @@ -5994,14 +8876,9 @@ "type": "null" } ], - "title": "Last Used Provider", - "description": "Identification of the last provider used for the conversation", - "examples": [ - "openai", - "gemini" - ] + "title": "Id" }, - "topic_summary": { + "reason": { "anyOf": [ { "type": "string" @@ -6010,382 +8887,468 @@ "type": "null" } ], - "title": "Topic Summary", - "description": "Topic summary for the conversation", - "examples": [ - "Openshift Microservices Deployment Strategies" - ] + "title": "Reason" } }, "type": "object", "required": [ - "conversation_id" + "approval_request_id", + "approve" ], - "title": "ConversationDetails", - "description": "Model representing the details of a user conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID).\n created_at: When the conversation was created.\n last_message_at: When the last message was sent.\n message_count: Number of user messages in the conversation.\n last_used_model: The last model used for the conversation.\n last_used_provider: The provider of the last used model.\n topic_summary: The topic summary for the conversation.\n\nExample:\n ```python\n conversation = ConversationDetails(\n conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n created_at=\"2024-01-01T00:00:00Z\",\n last_message_at=\"2024-01-01T00:05:00Z\",\n message_count=5,\n last_used_model=\"gemini/gemini-2.0-flash\",\n last_used_provider=\"gemini\",\n topic_summary=\"Openshift Microservices Deployment Strategies\",\n )\n ```" + "title": "OpenAIResponseMCPApprovalResponse", + "description": "A response to an MCP approval request." }, - "ConversationHistoryConfiguration": { + "OpenAIResponseMessage-Input": { "properties": { - "type": { + "content": { "anyOf": [ { - "type": "string", - "enum": [ - "noop", - "memory", - "sqlite", - "postgres" - ] + "type": "string" }, { - "type": "null" + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile", + "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage", + "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText" + } + } + }, + "type": "array" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Input" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "output_text": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Input", + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + } + } + }, + "type": "array" } ], - "title": "Conversation history database type", - "description": "Type of database where the conversation history is to be stored." + "title": "Content" }, - "memory": { + "role": { "anyOf": [ { - "$ref": "#/components/schemas/InMemoryCacheConfig" + "type": "string", + "const": "system" }, { - "type": "null" + "type": "string", + "const": "developer" + }, + { + "type": "string", + "const": "user" + }, + { + "type": "string", + "const": "assistant" } ], - "title": "In-memory cache configuration", - "description": "In-memory cache configuration" + "title": "Role" }, - "sqlite": { + "type": { + "type": "string", + "const": "message", + "title": "Type", + "default": "message" + }, + "id": { "anyOf": [ { - "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "SQLite configuration", - "description": "SQLite database configuration" + "title": "Id" }, - "postgres": { + "status": { "anyOf": [ { - "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "PostgreSQL configuration", - "description": "PostgreSQL database configuration" + "title": "Status" } }, - "additionalProperties": false, "type": "object", - "title": "ConversationHistoryConfiguration", - "description": "Conversation history configuration." + "required": [ + "content", + "role" + ], + "title": "OpenAIResponseMessage", + "description": "Corresponds to the various Message types in the Responses API.\nThey are all under one type because the Responses API gives them all\nthe same \"type\" value, and there is no way to tell them apart in certain\nscenarios." }, - "ConversationResponse": { + "OpenAIResponseMessage-Output": { "properties": { - "conversation_id": { - "type": "string", - "title": "Conversation Id", - "description": "Conversation ID (UUID)", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] - }, - "chat_history": { - "items": { - "$ref": "#/components/schemas/ConversationTurn" - }, - "type": "array", - "title": "Chat History", - "description": "The simplified chat history as a list of conversation turns", - "examples": [ + "content": { + "anyOf": [ { - "completed_at": "2024-01-01T00:01:05Z", - "messages": [ - { - "content": "Hello", - "type": "user" - }, - { - "content": "Hi there!", - "type": "assistant" + "type": "string" + }, + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile", + "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage", + "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText" + } } - ], - "model": "gpt-4o-mini", - "provider": "openai", - "started_at": "2024-01-01T00:01:00Z", - "tool_calls": [], - "tool_results": [] - } - ] - } - }, - "type": "object", - "required": [ - "conversation_id", - "chat_history" - ], - "title": "ConversationResponse", - "description": "Model representing a response for retrieving a conversation.\n\nAttributes:\n conversation_id: The conversation ID (UUID).\n chat_history: The chat history as a list of conversation turns.", - "examples": [ - { - "chat_history": [ + }, + "type": "array" + }, { - "completed_at": "2024-01-01T00:01:05Z", - "messages": [ - { - "content": "Hello", - "type": "user" - }, - { - "content": "Hi there!", - "type": "assistant" + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Output" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "output_text": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Output", + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + } } - ], - "model": "gpt-4o-mini", - "provider": "openai", - "started_at": "2024-01-01T00:01:00Z", - "tool_calls": [], - "tool_results": [] + }, + "type": "array" } - ], - "conversation_id": "123e4567-e89b-12d3-a456-426614174000" - } - ] - }, - "ConversationTurn": { - "properties": { - "messages": { - "items": { - "$ref": "#/components/schemas/Message" - }, - "type": "array", - "title": "Messages", - "description": "List of messages in this turn" - }, - "tool_calls": { - "items": { - "$ref": "#/components/schemas/ToolCallSummary" - }, - "type": "array", - "title": "Tool Calls", - "description": "List of tool calls made in this turn" - }, - "tool_results": { - "items": { - "$ref": "#/components/schemas/ToolResultSummary" - }, - "type": "array", - "title": "Tool Results", - "description": "List of tool results from this turn" + ], + "title": "Content" }, - "provider": { - "type": "string", - "title": "Provider", - "description": "Provider identifier used for this turn", - "examples": [ - "openai" - ] + "role": { + "anyOf": [ + { + "type": "string", + "const": "system" + }, + { + "type": "string", + "const": "developer" + }, + { + "type": "string", + "const": "user" + }, + { + "type": "string", + "const": "assistant" + } + ], + "title": "Role" }, - "model": { + "type": { "type": "string", - "title": "Model", - "description": "Model identifier used for this turn", - "examples": [ - "gpt-4o-mini" - ] + "const": "message", + "title": "Type", + "default": "message" }, - "started_at": { - "type": "string", - "title": "Started At", - "description": "ISO 8601 timestamp when the turn started", - "examples": [ - "2024-01-01T00:01:00Z" - ] + "id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Id" }, - "completed_at": { - "type": "string", - "title": "Completed At", - "description": "ISO 8601 timestamp when the turn completed", - "examples": [ - "2024-01-01T00:01:05Z" - ] + "status": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Status" } }, "type": "object", "required": [ - "provider", - "model", - "started_at", - "completed_at" + "content", + "role" ], - "title": "ConversationTurn", - "description": "Model representing a single conversation turn.\n\nAttributes:\n messages: List of messages in this turn.\n tool_calls: List of tool calls made in this turn.\n tool_results: List of tool results from this turn.\n provider: Provider identifier used for this turn.\n model: Model identifier used for this turn.\n started_at: ISO 8601 timestamp when the turn started.\n completed_at: ISO 8601 timestamp when the turn completed." + "title": "OpenAIResponseMessage", + "description": "Corresponds to the various Message types in the Responses API.\nThey are all under one type because the Responses API gives them all\nthe same \"type\" value, and there is no way to tell them apart in certain\nscenarios." }, - "ConversationUpdateRequest": { + "OpenAIResponseOutputMessageContentOutputText-Input": { "properties": { - "topic_summary": { + "text": { "type": "string", - "maxLength": 1000, - "minLength": 1, - "title": "Topic Summary", - "description": "The new topic summary for the conversation", - "examples": [ - "Discussion about machine learning algorithms" - ] + "title": "Text" + }, + "type": { + "type": "string", + "const": "output_text", + "title": "Type", + "default": "output_text" + }, + "annotations": { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation", + "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation", + "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath", + "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation" + } + } + }, + "type": "array", + "title": "Annotations" + }, + "logprobs": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/OpenAITokenLogProb" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Logprobs" } }, - "additionalProperties": false, "type": "object", "required": [ - "topic_summary" + "text" ], - "title": "ConversationUpdateRequest", - "description": "Model representing a request to update a conversation topic summary.\n\nAttributes:\n topic_summary: The new topic summary for the conversation.\n\nExample:\n ```python\n update_request = ConversationUpdateRequest(\n topic_summary=\"Discussion about machine learning algorithms\"\n )\n ```" + "title": "OpenAIResponseOutputMessageContentOutputText" }, - "ConversationUpdateResponse": { + "OpenAIResponseOutputMessageContentOutputText-Output": { "properties": { - "conversation_id": { + "text": { "type": "string", - "title": "Conversation Id", - "description": "The conversation ID (UUID) that was updated", - "examples": [ - "123e4567-e89b-12d3-a456-426614174000" - ] - }, - "success": { - "type": "boolean", - "title": "Success", - "description": "Whether the update was successful", - "examples": [ - true - ] + "title": "Text" }, - "message": { + "type": { "type": "string", - "title": "Message", - "description": "A message about the update result", - "examples": [ - "Topic summary updated successfully" - ] + "const": "output_text", + "title": "Type", + "default": "output_text" + }, + "annotations": { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation", + "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation", + "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath", + "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation" + } + } + }, + "type": "array", + "title": "Annotations" + }, + "logprobs": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/OpenAITokenLogProb" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Logprobs" } }, "type": "object", "required": [ - "conversation_id", - "success", - "message" + "text" ], - "title": "ConversationUpdateResponse", - "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was updated.\n success: Whether the update was successful.\n message: A message about the update result.\n\nExample:\n ```python\n update_response = ConversationUpdateResponse(\n conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n success=True,\n message=\"Topic summary updated successfully\",\n )\n ```", - "examples": [ - { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "message": "Topic summary updated successfully", - "success": true - } - ] + "title": "OpenAIResponseOutputMessageContentOutputText" }, - "ConversationsListResponse": { + "OpenAIResponseOutputMessageFileSearchToolCall": { "properties": { - "conversations": { + "id": { + "type": "string", + "title": "Id" + }, + "queries": { "items": { - "$ref": "#/components/schemas/ConversationDetails" + "type": "string" }, "type": "array", - "title": "Conversations" - } - }, - "type": "object", - "required": [ - "conversations" - ], - "title": "ConversationsListResponse", - "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n conversations: List of conversation details associated with the user.", - "examples": [ - { - "conversations": [ + "title": "Queries" + }, + "status": { + "type": "string", + "title": "Status" + }, + "type": { + "type": "string", + "const": "file_search_call", + "title": "Type", + "default": "file_search_call" + }, + "results": { + "anyOf": [ { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "created_at": "2024-01-01T00:00:00Z", - "last_message_at": "2024-01-01T00:05:00Z", - "last_used_model": "gemini/gemini-2.0-flash", - "last_used_provider": "gemini", - "message_count": 5, - "topic_summary": "Openshift Microservices Deployment Strategies" + "items": { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults" + }, + "type": "array" }, { - "conversation_id": "456e7890-e12b-34d5-a678-901234567890", - "created_at": "2024-01-01T01:00:00Z", - "last_used_model": "gemini/gemini-2.5-flash", - "last_used_provider": "gemini", - "message_count": 2, - "topic_summary": "RHDH Purpose Summary" + "type": "null" } - ] - } - ] - }, - "ConversationsListResponseV2": { - "properties": { - "conversations": { - "items": { - "$ref": "#/components/schemas/ConversationData" - }, - "type": "array", - "title": "Conversations" + ], + "title": "Results" } }, "type": "object", - "required": [ - "conversations" - ], - "title": "ConversationsListResponseV2", - "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n conversations: List of conversation data associated with the user.", - "examples": [ - { - "conversations": [ - { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "last_message_timestamp": 1704067200.0, - "topic_summary": "Openshift Microservices Deployment Strategies" - } - ] - } - ] + "required": [ + "id", + "queries", + "status" + ], + "title": "OpenAIResponseOutputMessageFileSearchToolCall", + "description": "File search tool call output message for OpenAI responses.\n\n:param id: Unique identifier for this tool call\n:param queries: List of search queries executed\n:param status: Current status of the file search operation\n:param type: Tool call type identifier, always \"file_search_call\"\n:param results: (Optional) Search results returned by the file search operation" }, - "CustomProfile": { + "OpenAIResponseOutputMessageFileSearchToolCallResults": { "properties": { - "path": { + "attributes": { + "additionalProperties": true, + "type": "object", + "title": "Attributes" + }, + "file_id": { "type": "string", - "title": "Path to custom profile", - "description": "Path to Python modules containing custom profile." + "title": "File Id" }, - "prompts": { - "additionalProperties": { - "type": "string" - }, - "type": "object", - "title": "System prompts", - "description": "Dictionary containing map of system prompts", - "default": {} + "filename": { + "type": "string", + "title": "Filename" + }, + "score": { + "type": "number", + "title": "Score" + }, + "text": { + "type": "string", + "title": "Text" } }, "type": "object", "required": [ - "path" + "attributes", + "file_id", + "filename", + "score", + "text" ], - "title": "CustomProfile", - "description": "Custom profile customization for prompts and validation." + "title": "OpenAIResponseOutputMessageFileSearchToolCallResults", + "description": "Search results returned by the file search operation.\n\n:param attributes: (Optional) Key-value attributes associated with the file\n:param file_id: Unique identifier of the file containing the result\n:param filename: Name of the file containing the result\n:param score: Relevance score for this search result (between 0 and 1)\n:param text: Text content of the search result" }, - "Customization": { + "OpenAIResponseOutputMessageFunctionToolCall": { "properties": { - "profile_path": { + "call_id": { + "type": "string", + "title": "Call Id" + }, + "name": { + "type": "string", + "title": "Name" + }, + "arguments": { + "type": "string", + "title": "Arguments" + }, + "type": { + "type": "string", + "const": "function_call", + "title": "Type", + "default": "function_call" + }, + "id": { "anyOf": [ { "type": "string" @@ -6394,31 +9357,54 @@ "type": "null" } ], - "title": "Profile Path" - }, - "disable_query_system_prompt": { - "type": "boolean", - "title": "Disable Query System Prompt", - "default": false - }, - "disable_shield_ids_override": { - "type": "boolean", - "title": "Disable Shield Ids Override", - "default": false + "title": "Id" }, - "system_prompt_path": { + "status": { "anyOf": [ { - "type": "string", - "format": "file-path" + "type": "string" }, { "type": "null" } ], - "title": "System Prompt Path" + "title": "Status" + } + }, + "type": "object", + "required": [ + "call_id", + "name", + "arguments" + ], + "title": "OpenAIResponseOutputMessageFunctionToolCall", + "description": "Function tool call output message for OpenAI responses.\n\n:param call_id: Unique identifier for the function call\n:param name: Name of the function being called\n:param arguments: JSON string containing the function arguments\n:param type: Tool call type identifier, always \"function_call\"\n:param id: (Optional) Additional identifier for the tool call\n:param status: (Optional) Current status of the function call execution" + }, + "OpenAIResponseOutputMessageMCPCall": { + "properties": { + "id": { + "type": "string", + "title": "Id" }, - "system_prompt": { + "type": { + "type": "string", + "const": "mcp_call", + "title": "Type", + "default": "mcp_call" + }, + "arguments": { + "type": "string", + "title": "Arguments" + }, + "name": { + "type": "string", + "title": "Name" + }, + "server_label": { + "type": "string", + "title": "Server Label" + }, + "error": { "anyOf": [ { "type": "string" @@ -6427,367 +9413,443 @@ "type": "null" } ], - "title": "System Prompt" + "title": "Error" }, - "agent_card_path": { + "output": { "anyOf": [ { - "type": "string", - "format": "file-path" + "type": "string" }, { "type": "null" } ], - "title": "Agent Card Path" + "title": "Output" + } + }, + "type": "object", + "required": [ + "id", + "arguments", + "name", + "server_label" + ], + "title": "OpenAIResponseOutputMessageMCPCall", + "description": "Model Context Protocol (MCP) call output message for OpenAI responses.\n\n:param id: Unique identifier for this MCP call\n:param type: Tool call type identifier, always \"mcp_call\"\n:param arguments: JSON string containing the MCP call arguments\n:param name: Name of the MCP method being called\n:param server_label: Label identifying the MCP server handling the call\n:param error: (Optional) Error message if the MCP call failed\n:param output: (Optional) Output result from the successful MCP call" + }, + "OpenAIResponseOutputMessageMCPListTools": { + "properties": { + "id": { + "type": "string", + "title": "Id" }, - "agent_card_config": { + "type": { + "type": "string", + "const": "mcp_list_tools", + "title": "Type", + "default": "mcp_list_tools" + }, + "server_label": { + "type": "string", + "title": "Server Label" + }, + "tools": { + "items": { + "$ref": "#/components/schemas/MCPListToolsTool" + }, + "type": "array", + "title": "Tools" + } + }, + "type": "object", + "required": [ + "id", + "server_label", + "tools" + ], + "title": "OpenAIResponseOutputMessageMCPListTools", + "description": "MCP list tools output message containing available tools from an MCP server.\n\n:param id: Unique identifier for this MCP list tools operation\n:param type: Tool call type identifier, always \"mcp_list_tools\"\n:param server_label: Label identifying the MCP server providing the tools\n:param tools: List of available tools provided by the MCP server" + }, + "OpenAIResponseOutputMessageWebSearchToolCall": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "status": { + "type": "string", + "title": "Status" + }, + "type": { + "type": "string", + "const": "web_search_call", + "title": "Type", + "default": "web_search_call" + } + }, + "type": "object", + "required": [ + "id", + "status" + ], + "title": "OpenAIResponseOutputMessageWebSearchToolCall", + "description": "Web search tool call output message for OpenAI responses.\n\n:param id: Unique identifier for this tool call\n:param status: Current status of the web search operation\n:param type: Tool call type identifier, always \"web_search_call\"" + }, + "OpenAIResponsePrompt": { + "properties": { + "id": { + "type": "string", + "title": "Id" + }, + "variables": { "anyOf": [ { - "additionalProperties": true, + "additionalProperties": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile", + "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage", + "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText" + } + } + }, "type": "object" }, { "type": "null" } ], - "title": "Agent Card Config" + "title": "Variables" }, - "custom_profile": { + "version": { "anyOf": [ { - "$ref": "#/components/schemas/CustomProfile" + "type": "string" }, { "type": "null" } - ] + ], + "title": "Version" } }, - "additionalProperties": false, "type": "object", - "title": "Customization", - "description": "Service customization." + "required": [ + "id" + ], + "title": "OpenAIResponsePrompt", + "description": "OpenAI compatible Prompt object that is used in OpenAI responses.\n\n:param id: Unique identifier of the prompt template\n:param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types\nlike images or files.\n:param version: Version number of the prompt to use (defaults to latest if not specified)" }, - "DatabaseConfiguration": { + "OpenAIResponseReasoning": { "properties": { - "sqlite": { + "effort": { "anyOf": [ { - "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + "type": "string", + "enum": [ + "none", + "minimal", + "low", + "medium", + "high", + "xhigh" + ] }, { "type": "null" } ], - "title": "SQLite configuration", - "description": "SQLite database configuration" - }, - "postgres": { + "title": "Effort" + } + }, + "type": "object", + "title": "OpenAIResponseReasoning", + "description": "Configuration for reasoning effort in OpenAI responses.\n\nControls how much reasoning the model performs before generating a response.\n\n:param effort: The effort level for reasoning. \"low\" favors speed and economical token usage,\n \"high\" favors more complete reasoning, \"medium\" is a balance between the two." + }, + "OpenAIResponseText": { + "properties": { + "format": { "anyOf": [ { - "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + "$ref": "#/components/schemas/OpenAIResponseTextFormat" }, { "type": "null" } - ], - "title": "PostgreSQL configuration", - "description": "PostgreSQL database configuration" - } - }, - "additionalProperties": false, - "type": "object", - "title": "DatabaseConfiguration", - "description": "Database configuration." - }, - "DetailModel": { - "properties": { - "response": { - "type": "string", - "title": "Response", - "description": "Short summary of the error" - }, - "cause": { - "type": "string", - "title": "Cause", - "description": "Detailed explanation of what caused the error" + ] } }, "type": "object", - "required": [ - "response", - "cause" - ], - "title": "DetailModel", - "description": "Nested detail model for error responses." - }, - "FeedbackCategory": { - "type": "string", - "enum": [ - "incorrect", - "not_relevant", - "incomplete", - "outdated_information", - "unsafe", - "other" - ], - "title": "FeedbackCategory", - "description": "Enum representing predefined feedback categories for AI responses.\n\nThese categories help provide structured feedback about AI inference quality\nwhen users provide negative feedback (thumbs down). Multiple categories can\nbe selected to provide comprehensive feedback about response issues." + "title": "OpenAIResponseText", + "description": "Text response configuration for OpenAI responses.\n\n:param format: (Optional) Text format configuration specifying output format requirements" }, - "FeedbackRequest": { + "OpenAIResponseTextFormat": { "properties": { - "conversation_id": { - "type": "string", - "title": "Conversation Id", - "description": "The required conversation ID (UUID)", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] - }, - "user_question": { - "type": "string", - "title": "User Question", - "description": "User question (the query string)", - "examples": [ - "What is Kubernetes?" - ] + "type": { + "anyOf": [ + { + "type": "string", + "const": "text" + }, + { + "type": "string", + "const": "json_schema" + }, + { + "type": "string", + "const": "json_object" + } + ], + "title": "Type" }, - "llm_response": { - "type": "string", - "title": "Llm Response", - "description": "Response from LLM", - "examples": [ - "Kubernetes is an open-source container orchestration system for automating ..." - ] + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Name" }, - "sentiment": { + "schema": { "anyOf": [ { - "type": "integer" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "title": "Sentiment", - "description": "User sentiment, if provided must be -1 or 1", - "examples": [ - -1, - 1 - ] + "title": "Schema" }, - "user_feedback": { + "description": { "anyOf": [ { - "type": "string", - "maxLength": 4096 + "type": "string" }, { "type": "null" } ], - "title": "User Feedback", - "description": "Feedback on the LLM response.", - "examples": [ - "I'm not satisfied with the response because it is too vague." - ] + "title": "Description" }, - "categories": { + "strict": { "anyOf": [ { - "items": { - "$ref": "#/components/schemas/FeedbackCategory" - }, - "type": "array" + "type": "boolean" }, { "type": "null" } ], - "title": "Categories", - "description": "List of feedback categories that describe issues with the LLM response (for negative feedback).", - "examples": [ - [ - "incorrect", - "incomplete" - ] - ] + "title": "Strict" } }, - "additionalProperties": false, "type": "object", - "required": [ - "conversation_id", - "user_question", - "llm_response" - ], - "title": "FeedbackRequest", - "description": "Model representing a feedback request.\n\nAttributes:\n conversation_id: The required conversation ID (UUID).\n user_question: The required user question.\n llm_response: The required LLM response.\n sentiment: The optional sentiment.\n user_feedback: The optional user feedback.\n categories: The optional list of feedback categories (multi-select for negative feedback).\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n user_question=\"what are you doing?\",\n user_feedback=\"This response is not helpful\",\n llm_response=\"I don't know\",\n sentiment=-1,\n categories=[FeedbackCategory.INCORRECT, FeedbackCategory.INCOMPLETE]\n )\n ```", - "examples": [ - { - "conversation_id": "12345678-abcd-0000-0123-456789abcdef", - "llm_response": "bar", - "sentiment": -1, - "user_feedback": "Not satisfied with the response quality.", - "user_question": "foo" - }, - { - "categories": [ - "incorrect" - ], - "conversation_id": "12345678-abcd-0000-0123-456789abcdef", - "llm_response": "The capital of France is Berlin.", - "sentiment": -1, - "user_question": "What is the capital of France?" - }, - { - "categories": [ - "incomplete", - "not_relevant" - ], - "conversation_id": "12345678-abcd-0000-0123-456789abcdef", - "llm_response": "Use Docker.", - "sentiment": -1, - "user_feedback": "This response is too general and doesn't provide specific steps.", - "user_question": "How do I deploy a web app?" - } - ] + "title": "OpenAIResponseTextFormat", + "description": "Configuration for Responses API text format.\n\n:param type: Must be \"text\", \"json_schema\", or \"json_object\" to identify the format type\n:param name: The name of the response format. Only used for json_schema.\n:param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema.\n:param description: (Optional) A description of the response format. Only used for json_schema.\n:param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema." }, - "FeedbackResponse": { + "OpenAIResponseToolMCP": { "properties": { - "response": { + "type": { "type": "string", - "title": "Response", - "description": "The response of the feedback request.", - "examples": [ - "feedback received" - ] + "const": "mcp", + "title": "Type", + "default": "mcp" + }, + "server_label": { + "type": "string", + "title": "Server Label" + }, + "allowed_tools": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "$ref": "#/components/schemas/AllowedToolsFilter" + }, + { + "type": "null" + } + ], + "title": "Allowed Tools" } }, "type": "object", "required": [ - "response" + "server_label" ], - "title": "FeedbackResponse", - "description": "Model representing a response to a feedback request.\n\nAttributes:\n response: The response of the feedback request.\n\nExample:\n ```python\n feedback_response = FeedbackResponse(response=\"feedback received\")\n ```", - "examples": [ - { - "response": "feedback received" - } - ] + "title": "OpenAIResponseToolMCP", + "description": "Model Context Protocol (MCP) tool configuration for OpenAI response object.\n\n:param type: Tool type identifier, always \"mcp\"\n:param server_label: Label to identify this MCP server\n:param allowed_tools: (Optional) Restriction on which tools can be used from this server" }, - "FeedbackStatusUpdateRequest": { + "OpenAIResponseUsage": { "properties": { - "status": { - "type": "boolean", - "title": "Status", - "description": "Desired state of feedback enablement, must be False or True", - "default": false, - "examples": [ - true, - false - ] + "input_tokens": { + "type": "integer", + "title": "Input Tokens" + }, + "output_tokens": { + "type": "integer", + "title": "Output Tokens" + }, + "total_tokens": { + "type": "integer", + "title": "Total Tokens" + }, + "input_tokens_details": { + "$ref": "#/components/schemas/OpenAIResponseUsageInputTokensDetails" + }, + "output_tokens_details": { + "$ref": "#/components/schemas/OpenAIResponseUsageOutputTokensDetails" } }, - "additionalProperties": false, "type": "object", - "title": "FeedbackStatusUpdateRequest", - "description": "Model representing a feedback status update request.\n\nAttributes:\n status: Value of the desired feedback enabled state.\n\nExample:\n ```python\n feedback_request = FeedbackRequest(\n status=false\n )\n ```" + "required": [ + "input_tokens", + "output_tokens", + "total_tokens", + "input_tokens_details", + "output_tokens_details" + ], + "title": "OpenAIResponseUsage", + "description": "Usage information for OpenAI response.\n\n:param input_tokens: Number of tokens in the input\n:param output_tokens: Number of tokens in the output\n:param total_tokens: Total tokens used (input + output)\n:param input_tokens_details: Detailed breakdown of input token usage\n:param output_tokens_details: Detailed breakdown of output token usage" }, - "FeedbackStatusUpdateResponse": { + "OpenAIResponseUsageInputTokensDetails": { "properties": { - "status": { - "additionalProperties": true, - "type": "object", - "title": "Status" + "cached_tokens": { + "type": "integer", + "title": "Cached Tokens" } }, "type": "object", "required": [ - "status" + "cached_tokens" ], - "title": "FeedbackStatusUpdateResponse", - "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n status: The previous and current status of the service and who updated it.\n\nExample:\n ```python\n status_response = StatusResponse(\n status={\n \"previous_status\": true,\n \"updated_status\": false,\n \"updated_by\": \"user/test\",\n \"timestamp\": \"2023-03-15 12:34:56\"\n },\n )\n ```", - "examples": [ - { - "status": { - "previous_status": true, - "timestamp": "2023-03-15 12:34:56", - "updated_by": "user/test", - "updated_status": false - } - } - ] + "title": "OpenAIResponseUsageInputTokensDetails", + "description": "Token details for input tokens in OpenAI response usage.\n\n:param cached_tokens: Number of tokens retrieved from cache" }, - "ForbiddenResponse": { + "OpenAIResponseUsageOutputTokensDetails": { "properties": { - "status_code": { + "reasoning_tokens": { "type": "integer", - "title": "Status Code" - }, - "detail": { - "$ref": "#/components/schemas/DetailModel" + "title": "Reasoning Tokens" } }, "type": "object", "required": [ - "status_code", - "detail" + "reasoning_tokens" ], - "title": "ForbiddenResponse", - "description": "403 Forbidden. Access denied.", - "examples": [ - { - "detail": { - "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000", - "response": "User does not have permission to perform this action" - }, - "label": "conversation read" - }, - { - "detail": { - "cause": "User 6789 does not have permission to delete conversation with ID 123e4567-e89b-12d3-a456-426614174000", - "response": "User does not have permission to perform this action" - }, - "label": "conversation delete" + "title": "OpenAIResponseUsageOutputTokensDetails", + "description": "Token details for output tokens in OpenAI response usage.\n\n:param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models)" + }, + "OpenAITokenLogProb": { + "properties": { + "token": { + "type": "string", + "title": "Token", + "description": "The token." }, - { - "detail": { - "cause": "User 6789 is not authorized to access this endpoint.", - "response": "User does not have permission to access this endpoint" - }, - "label": "endpoint" + "bytes": { + "anyOf": [ + { + "items": { + "type": "integer" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Bytes", + "description": "The bytes for the token." }, - { - "detail": { - "cause": "Storing feedback is disabled.", - "response": "Storing feedback is disabled" - }, - "label": "feedback" + "logprob": { + "type": "number", + "title": "Logprob", + "description": "The log probability of the token." }, - { - "detail": { - "cause": "User lacks model_override permission required to override model/provider.", - "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request." - }, - "label": "model override" + "top_logprobs": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/OpenAITopLogProb" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Top Logprobs", + "description": "The top log probabilities for the token." } - ] + }, + "type": "object", + "required": [ + "token", + "logprob" + ], + "title": "OpenAITokenLogProb", + "description": "The log probability for a token from an OpenAI-compatible chat completion response." }, - "HTTPAuthSecurityScheme": { + "OpenAITopLogProb": { "properties": { - "bearerFormat": { + "token": { + "type": "string", + "title": "Token", + "description": "The token." + }, + "bytes": { "anyOf": [ { - "type": "string" + "items": { + "type": "integer" + }, + "type": "array" }, { "type": "null" } ], - "title": "Bearerformat" + "title": "Bytes", + "description": "The bytes for the token." }, + "logprob": { + "type": "number", + "title": "Logprob", + "description": "The log probability of the token." + } + }, + "type": "object", + "required": [ + "token", + "logprob" + ], + "title": "OpenAITopLogProb", + "description": "The top log probability for a token from an OpenAI-compatible chat completion response." + }, + "OpenIdConnectSecurityScheme": { + "properties": { "description": { "anyOf": [ { @@ -6799,43 +9861,26 @@ ], "title": "Description" }, - "scheme": { + "openIdConnectUrl": { "type": "string", - "title": "Scheme" + "title": "Openidconnecturl" }, "type": { "type": "string", - "const": "http", + "const": "openIdConnect", "title": "Type", - "default": "http" + "default": "openIdConnect" } }, "type": "object", "required": [ - "scheme" + "openIdConnectUrl" ], - "title": "HTTPAuthSecurityScheme", - "description": "Defines a security scheme using HTTP authentication." - }, - "HTTPValidationError": { - "properties": { - "detail": { - "items": { - "$ref": "#/components/schemas/ValidationError" - }, - "type": "array", - "title": "Detail" - } - }, - "type": "object", - "title": "HTTPValidationError" + "title": "OpenIdConnectSecurityScheme", + "description": "Defines a security scheme using OpenID Connect." }, - "ImplicitOAuthFlow": { + "PasswordOAuthFlow": { "properties": { - "authorizationUrl": { - "type": "string", - "title": "Authorizationurl" - }, "refreshUrl": { "anyOf": [ { @@ -6853,46 +9898,53 @@ }, "type": "object", "title": "Scopes" + }, + "tokenUrl": { + "type": "string", + "title": "Tokenurl" } }, "type": "object", "required": [ - "authorizationUrl", - "scopes" - ], - "title": "ImplicitOAuthFlow", - "description": "Defines configuration details for the OAuth 2.0 Implicit flow." - }, - "In": { - "type": "string", - "enum": [ - "cookie", - "header", - "query" + "scopes", + "tokenUrl" ], - "title": "In", - "description": "The location of the API key." + "title": "PasswordOAuthFlow", + "description": "Defines configuration details for the OAuth 2.0 Resource Owner Password flow." }, - "InMemoryCacheConfig": { + "PostgreSQLDatabaseConfiguration": { "properties": { - "max_entries": { + "host": { + "type": "string", + "title": "Hostname", + "description": "Database server host or socket directory", + "default": "localhost" + }, + "port": { "type": "integer", "exclusiveMinimum": 0.0, - "title": "Max entries", - "description": "Maximum number of entries stored in the in-memory cache" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "max_entries" - ], - "title": "InMemoryCacheConfig", - "description": "In-memory cache configuration." - }, - "InferenceConfiguration": { - "properties": { - "default_model": { + "title": "Port", + "description": "Database server port", + "default": 5432 + }, + "db": { + "type": "string", + "title": "Database name", + "description": "Database name to connect to" + }, + "user": { + "type": "string", + "title": "User name", + "description": "Database user name used to authenticate" + }, + "password": { + "type": "string", + "format": "password", + "title": "Password", + "description": "Password used to authenticate", + "writeOnly": true + }, + "namespace": { "anyOf": [ { "type": "string" @@ -6901,543 +9953,665 @@ "type": "null" } ], - "title": "Default model", - "description": "Identification of default model used when no other model is specified." + "title": "Name space", + "description": "Database namespace", + "default": "public" }, - "default_provider": { + "ssl_mode": { + "type": "string", + "title": "SSL mode", + "description": "SSL mode", + "default": "prefer" + }, + "gss_encmode": { + "type": "string", + "title": "GSS encmode", + "description": "This option determines whether or with what priority a secure GSS TCP/IP connection will be negotiated with the server.", + "default": "prefer" + }, + "ca_cert_path": { "anyOf": [ { - "type": "string" + "type": "string", + "format": "file-path" }, { "type": "null" } ], - "title": "Default provider", - "description": "Identification of default provider used when no other model is specified." + "title": "CA certificate path", + "description": "Path to CA certificate" } }, "additionalProperties": false, "type": "object", - "title": "InferenceConfiguration", - "description": "Inference configuration." + "required": [ + "db", + "user", + "password" + ], + "title": "PostgreSQLDatabaseConfiguration", + "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)" }, - "InfoResponse": { + "PromptTooLongResponse": { "properties": { - "name": { + "status_code": { + "type": "integer", + "title": "Status Code" + }, + "detail": { + "$ref": "#/components/schemas/DetailModel" + } + }, + "type": "object", + "required": [ + "status_code", + "detail" + ], + "title": "PromptTooLongResponse", + "description": "413 Payload Too Large - Prompt is too long.", + "examples": [ + { + "detail": { + "cause": "The prompt exceeds the maximum allowed length.", + "response": "Prompt is too long" + }, + "label": "prompt too long" + } + ] + }, + "ProviderHealthStatus": { + "properties": { + "provider_id": { "type": "string", - "title": "Name", - "description": "Service name", + "title": "Provider Id", + "description": "The ID of the provider" + }, + "status": { + "type": "string", + "title": "Status", + "description": "The health status", "examples": [ - "Lightspeed Stack" + "ok", + "unhealthy", + "not_implemented" + ] + }, + "message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Message", + "description": "Optional message about the health status", + "examples": [ + "All systems operational", + "Llama Stack is unavailable" ] + } + }, + "type": "object", + "required": [ + "provider_id", + "status" + ], + "title": "ProviderHealthStatus", + "description": "Model representing the health status of a provider.\n\nAttributes:\n provider_id: The ID of the provider.\n status: The health status ('ok', 'unhealthy', 'not_implemented').\n message: Optional message about the health status." + }, + "ProviderResponse": { + "properties": { + "api": { + "type": "string", + "title": "Api", + "description": "The API this provider implements" + }, + "config": { + "additionalProperties": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "items": {}, + "type": "array" + }, + {}, + { + "type": "null" + } + ] + }, + "type": "object", + "title": "Config", + "description": "Provider configuration parameters" + }, + "health": { + "additionalProperties": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "items": {}, + "type": "array" + }, + {}, + { + "type": "null" + } + ] + }, + "type": "object", + "title": "Health", + "description": "Current health status of the provider" }, - "service_version": { + "provider_id": { "type": "string", - "title": "Service Version", - "description": "Service version", - "examples": [ - "0.1.0", - "0.2.0", - "1.0.0" - ] + "title": "Provider Id", + "description": "Unique provider identifier" }, - "llama_stack_version": { + "provider_type": { "type": "string", - "title": "Llama Stack Version", - "description": "Llama Stack version", - "examples": [ - "0.2.1", - "0.2.2", - "0.2.18", - "0.2.21", - "0.2.22" - ] + "title": "Provider Type", + "description": "Provider implementation type" } }, "type": "object", "required": [ - "name", - "service_version", - "llama_stack_version" + "api", + "config", + "health", + "provider_id", + "provider_type" ], - "title": "InfoResponse", - "description": "Model representing a response to an info request.\n\nAttributes:\n name: Service name.\n service_version: Service version.\n llama_stack_version: Llama Stack version.\n\nExample:\n ```python\n info_response = InfoResponse(\n name=\"Lightspeed Stack\",\n service_version=\"1.0.0\",\n llama_stack_version=\"0.2.22\",\n )\n ```", + "title": "ProviderResponse", + "description": "Model representing a response to get specific provider request.", "examples": [ { - "llama_stack_version": "1.0.0", - "name": "Lightspeed Stack", - "service_version": "1.0.0" + "api": "inference", + "config": { + "api_key": "********" + }, + "health": { + "message": "Healthy", + "status": "OK" + }, + "provider_id": "openai", + "provider_type": "remote::openai" } ] }, - "InternalServerErrorResponse": { + "ProvidersListResponse": { "properties": { - "status_code": { - "type": "integer", - "title": "Status Code" - }, - "detail": { - "$ref": "#/components/schemas/DetailModel" + "providers": { + "additionalProperties": { + "items": { + "additionalProperties": true, + "type": "object" + }, + "type": "array" + }, + "type": "object", + "title": "Providers", + "description": "List of available API types and their corresponding providers" } }, "type": "object", "required": [ - "status_code", - "detail" + "providers" ], - "title": "InternalServerErrorResponse", - "description": "500 Internal Server Error.", + "title": "ProvidersListResponse", + "description": "Model representing a response to providers request.", "examples": [ { - "detail": { - "cause": "An unexpected error occurred while processing the request.", - "response": "Internal server error" - }, - "label": "internal" - }, - { - "detail": { - "cause": "Lightspeed Stack configuration has not been initialized.", - "response": "Configuration is not loaded" - }, - "label": "configuration" - }, - { - "detail": { - "cause": "Failed to store feedback at directory: /path/example", - "response": "Failed to store feedback" - }, - "label": "feedback storage" - }, - { - "detail": { - "cause": "Failed to call backend API", - "response": "Error while processing query" - }, - "label": "query" - }, - { - "detail": { - "cause": "Conversation cache is not configured or unavailable.", - "response": "Conversation cache not configured" - }, - "label": "conversation cache" - }, - { - "detail": { - "cause": "Failed to query the database", - "response": "Database query failed" - }, - "label": "database" + "providers": { + "agents": [ + { + "provider_id": "meta-reference", + "provider_type": "inline::meta-reference" + } + ], + "inference": [ + { + "provider_id": "sentence-transformers", + "provider_type": "inline::sentence-transformers" + }, + { + "provider_id": "openai", + "provider_type": "remote::openai" + } + ] + } } ] }, - "JsonPathOperator": { - "type": "string", - "enum": [ - "equals", - "contains", - "in", - "match" - ], - "title": "JsonPathOperator", - "description": "Supported operators for JSONPath evaluation.\n\nNote: this is not a real model, just an enumeration of all supported JSONPath operators." - }, - "JwkConfiguration": { - "properties": { - "url": { - "type": "string", - "minLength": 1, - "format": "uri", - "title": "URL", - "description": "HTTPS URL of the JWK (JSON Web Key) set used to validate JWTs." - }, - "jwt_configuration": { - "$ref": "#/components/schemas/JwtConfiguration", - "title": "JWT configuration", - "description": "JWT (JSON Web Token) configuration" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "url" - ], - "title": "JwkConfiguration", - "description": "JWK (JSON Web Key) configuration.\n\nA JSON Web Key (JWK) is a JavaScript Object Notation (JSON) data structure\nthat represents a cryptographic key.\n\nUseful resources:\n\n - [JSON Web Key](https://openid.net/specs/draft-jones-json-web-key-03.html)\n - [RFC 7517](https://www.rfc-editor.org/rfc/rfc7517)" - }, - "JwtConfiguration": { + "QueryRequest": { "properties": { - "user_id_claim": { + "query": { "type": "string", - "title": "User ID claim", - "description": "JWT claim name that uniquely identifies the user (subject ID).", - "default": "user_id" + "title": "Query", + "description": "The query string", + "examples": [ + "What is Kubernetes?" + ] }, - "username_claim": { - "type": "string", - "title": "Username claim", - "description": "JWT claim name that provides the human-readable username.", - "default": "username" + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The optional conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] }, - "role_rules": { - "items": { - "$ref": "#/components/schemas/JwtRoleRule" - }, - "type": "array", - "title": "Role rules", - "description": "Rules for extracting roles from JWT claims" - } - }, - "additionalProperties": false, - "type": "object", - "title": "JwtConfiguration", - "description": "JWT (JSON Web Token) configuration.\n\nJSON Web Token (JWT) is a compact, URL-safe means of representing\nclaims to be transferred between two parties. The claims in a JWT\nare encoded as a JSON object that is used as the payload of a JSON\nWeb Signature (JWS) structure or as the plaintext of a JSON Web\nEncryption (JWE) structure, enabling the claims to be digitally\nsigned or integrity protected with a Message Authentication Code\n(MAC) and/or encrypted.\n\nUseful resources:\n\n - [JSON Web Token](https://en.wikipedia.org/wiki/JSON_Web_Token)\n - [RFC 7519](https://datatracker.ietf.org/doc/html/rfc7519)\n - [JSON Web Tokens](https://auth0.com/docs/secure/tokens/json-web-tokens)" - }, - "JwtRoleRule": { - "properties": { - "jsonpath": { - "type": "string", - "title": "JSON path", - "description": "JSONPath expression to evaluate against the JWT payload" + "provider": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Provider", + "description": "The optional provider", + "examples": [ + "openai", + "watsonx" + ] }, - "operator": { - "$ref": "#/components/schemas/JsonPathOperator", - "title": "Operator", - "description": "JSON path comparison operator" + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "The optional model", + "examples": [ + "gpt4mini" + ] }, - "negate": { - "type": "boolean", - "title": "Negate rule", - "description": "If set to true, the meaning of the rule is negated", - "default": false + "system_prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "System Prompt", + "description": "The optional system prompt.", + "examples": [ + "You are OpenShift assistant.", + "You are Ansible assistant." + ] }, - "value": { - "title": "Value", - "description": "Value to compare against" + "attachments": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/Attachment" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Attachments", + "description": "The optional list of attachments.", + "examples": [ + { + "attachment_type": "log", + "content": "this is attachment", + "content_type": "text/plain" + }, + { + "attachment_type": "configuration", + "content": "kind: Pod\n metadata:\n name: private-reg", + "content_type": "application/yaml" + }, + { + "attachment_type": "configuration", + "content": "foo: bar", + "content_type": "application/yaml" + } + ] }, - "roles": { - "items": { - "type": "string" - }, - "type": "array", - "title": "List of roles", - "description": "Roles to be assigned if the rule matches" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "jsonpath", - "operator", - "value", - "roles" - ], - "title": "JwtRoleRule", - "description": "Rule for extracting roles from JWT claims." - }, - "LivenessResponse": { - "properties": { - "alive": { - "type": "boolean", - "title": "Alive", - "description": "Flag indicating that the app is alive", + "no_tools": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "No Tools", + "description": "Whether to bypass all tools and MCP servers", + "default": false, "examples": [ true, false ] - } - }, - "type": "object", - "required": [ - "alive" - ], - "title": "LivenessResponse", - "description": "Model representing a response to a liveness request.\n\nAttributes:\n alive: If app is alive.\n\nExample:\n ```python\n liveness_response = LivenessResponse(alive=True)\n ```", - "examples": [ - { - "alive": true - } - ] - }, - "LlamaStackConfiguration": { - "properties": { - "url": { + }, + "generate_topic_summary": { "anyOf": [ { - "type": "string", - "minLength": 1, - "format": "uri" + "type": "boolean" }, { "type": "null" } ], - "title": "Llama Stack URL", - "description": "URL to Llama Stack service; used when library mode is disabled. Must be a valid HTTP or HTTPS URL." + "title": "Generate Topic Summary", + "description": "Whether to generate topic summary for new conversations", + "default": true, + "examples": [ + true, + false + ] }, - "api_key": { + "media_type": { "anyOf": [ { - "type": "string", - "format": "password", - "writeOnly": true + "type": "string" }, { "type": "null" } ], - "title": "API key", - "description": "API key to access Llama Stack service" + "title": "Media Type", + "description": "Media type for the response format", + "examples": [ + "application/json", + "text/plain" + ] }, - "use_as_library_client": { + "vector_store_ids": { "anyOf": [ { - "type": "boolean" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "title": "Use as library", - "description": "When set to true Llama Stack will be used in library mode, not in server mode (default)" + "title": "Vector Store Ids", + "description": "Optional list of specific vector store IDs to query for RAG. If not provided, all available vector stores will be queried.", + "examples": [ + "ocp_docs", + "knowledge_base", + "vector_db_1" + ] }, - "library_client_config_path": { + "shield_ids": { "anyOf": [ { - "type": "string" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "title": "Llama Stack configuration path", - "description": "Path to configuration file used when Llama Stack is run in library mode" + "title": "Shield Ids", + "description": "Optional list of safety shield IDs to apply. If None, all configured shields are used. ", + "examples": [ + "llama-guard", + "custom-shield" + ] }, - "timeout": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Request timeout", - "description": "Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries.", - "default": 180 + "solr": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Solr", + "description": "Solr-specific query parameters including filter queries", + "examples": [ + { + "fq": [ + "product:*openshift*", + "product_version:*4.16*" + ] + } + ] } }, "additionalProperties": false, "type": "object", - "title": "LlamaStackConfiguration", - "description": "Llama stack configuration.\n\nLlama Stack is a comprehensive system that provides a uniform set of tools\nfor building, scaling, and deploying generative AI applications, enabling\ndevelopers to create, integrate, and orchestrate multiple AI services and\ncapabilities into an adaptable setup.\n\nUseful resources:\n\n - [Llama Stack](https://www.llama.com/products/llama-stack/)\n - [Python Llama Stack client](https://github.com/llamastack/llama-stack-client-python)\n - [Build AI Applications with Llama Stack](https://llamastack.github.io/)" - }, - "MCPClientAuthOptionsResponse": { - "properties": { - "servers": { - "items": { - "$ref": "#/components/schemas/MCPServerAuthInfo" - }, - "type": "array", - "title": "Servers", - "description": "List of MCP servers that accept client-provided authorization" - } - }, - "type": "object", - "title": "MCPClientAuthOptionsResponse", - "description": "Response containing MCP servers that accept client-provided authorization.", + "required": [ + "query" + ], + "title": "QueryRequest", + "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n query: The query string.\n conversation_id: The optional conversation ID (UUID).\n provider: The optional provider.\n model: The optional model.\n system_prompt: The optional system prompt.\n attachments: The optional attachments.\n no_tools: Whether to bypass all tools and MCP servers (default: False).\n generate_topic_summary: Whether to generate topic summary for new conversations.\n media_type: The optional media type for response format (application/json or text/plain).\n vector_store_ids: The optional list of specific vector store IDs to query for RAG.\n shield_ids: The optional list of safety shield IDs to apply.\n\nExample:\n ```python\n query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n ```", "examples": [ { - "servers": [ + "attachments": [ { - "client_auth_headers": [ - "Authorization" - ], - "name": "github" + "attachment_type": "log", + "content": "this is attachment", + "content_type": "text/plain" }, { - "client_auth_headers": [ - "Authorization", - "X-API-Key" - ], - "name": "gitlab" + "attachment_type": "configuration", + "content": "kind: Pod\n metadata:\n name: private-reg", + "content_type": "application/yaml" + }, + { + "attachment_type": "configuration", + "content": "foo: bar", + "content_type": "application/yaml" } + ], + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "generate_topic_summary": true, + "model": "model-name", + "no_tools": false, + "provider": "openai", + "query": "write a deployment yaml for the mongodb image", + "system_prompt": "You are a helpful assistant", + "vector_store_ids": [ + "ocp_docs", + "knowledge_base" ] } ] }, - "MCPServerAuthInfo": { + "QueryResponse": { "properties": { - "name": { + "conversation_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Conversation Id", + "description": "The optional conversation ID (UUID)", + "examples": [ + "c5260aec-4d82-4370-9fdf-05cf908b3f16" + ] + }, + "response": { "type": "string", - "title": "Name", - "description": "MCP server name" + "title": "Response", + "description": "Response from LLM", + "examples": [ + "Kubernetes is an open-source container orchestration system for automating ..." + ] }, - "client_auth_headers": { + "rag_chunks": { "items": { - "type": "string" + "$ref": "#/components/schemas/RAGChunk" }, "type": "array", - "title": "Client Auth Headers", - "description": "List of authentication header names for client-provided tokens" - } - }, - "type": "object", - "required": [ - "name", - "client_auth_headers" - ], - "title": "MCPServerAuthInfo", - "description": "Information about MCP server client authentication options." - }, - "Message": { - "properties": { - "content": { - "type": "string", - "title": "Content", - "description": "The message content", + "title": "Rag Chunks", + "description": "Deprecated: List of RAG chunks used to generate the response." + }, + "referenced_documents": { + "items": { + "$ref": "#/components/schemas/ReferencedDocument" + }, + "type": "array", + "title": "Referenced Documents", + "description": "List of documents referenced in generating the response", "examples": [ - "Hello, how can I help you?" + [ + { + "doc_title": "Operator Lifecycle Manager (OLM)", + "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html" + } + ] ] }, - "type": { - "type": "string", - "enum": [ - "user", - "assistant", - "system", - "developer" - ], - "title": "Type", - "description": "The type of message", + "truncated": { + "type": "boolean", + "title": "Truncated", + "description": "Deprecated:Whether conversation history was truncated", + "default": false, "examples": [ - "user", - "assistant", - "system", - "developer" + false, + true ] - } - }, - "type": "object", - "required": [ - "content", - "type" - ], - "title": "Message", - "description": "Model representing a message in a conversation turn.\n\nAttributes:\n content: The message content.\n type: The type of message." - }, - "ModelContextProtocolServer": { - "properties": { - "name": { - "type": "string", - "title": "MCP name", - "description": "MCP server name that must be unique" }, - "provider_id": { - "type": "string", - "title": "Provider ID", - "description": "MCP provider identification", - "default": "model-context-protocol" + "input_tokens": { + "type": "integer", + "title": "Input Tokens", + "description": "Number of tokens sent to LLM", + "default": 0, + "examples": [ + 150, + 250, + 500 + ] }, - "url": { - "type": "string", - "title": "MCP server URL", - "description": "URL of the MCP server" + "output_tokens": { + "type": "integer", + "title": "Output Tokens", + "description": "Number of tokens received from LLM", + "default": 0, + "examples": [ + 50, + 100, + 200 + ] }, - "authorization_headers": { + "available_quotas": { "additionalProperties": { - "type": "string" + "type": "integer" }, "type": "object", - "title": "Authorization headers", - "description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 3 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client-provided token in the header. To specify this use a string 'client' instead of the file path. 3. Usage of the oauth token in the header. To specify this use a string 'oauth' instead of the file path. " + "title": "Available Quotas", + "description": "Quota available as measured by all configured quota limiters", + "examples": [ + { + "daily": 1000, + "monthly": 50000 + } + ] }, - "headers": { + "tool_calls": { "items": { - "type": "string" + "$ref": "#/components/schemas/ToolCallSummary" }, "type": "array", - "title": "Propagated headers", - "description": "List of HTTP header names to automatically forward from the incoming request to this MCP server. Headers listed here are extracted from the original client request and included when calling the MCP server. This is useful when infrastructure components (e.g. API gateways) inject headers that MCP servers need, such as x-rh-identity in HCC. Header matching is case-insensitive. These headers are additive with authorization_headers and MCP-HEADERS." + "title": "Tool Calls", + "description": "List of tool calls made during response generation" }, - "timeout": { - "anyOf": [ - { - "type": "integer", - "exclusiveMinimum": 0.0 - }, - { - "type": "null" - } - ], - "title": "Request timeout", - "description": "Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support." - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "name", - "url" - ], - "title": "ModelContextProtocolServer", - "description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and capabilities to the\nAI agents. These are configured by this structure. Only MCP servers\ndefined in the lightspeed-stack.yaml configuration are available to the\nagents. Tools configured in the llama-stack run.yaml are not accessible to\nlightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)" - }, - "ModelsResponse": { - "properties": { - "models": { + "tool_results": { "items": { - "additionalProperties": true, - "type": "object" + "$ref": "#/components/schemas/ToolResultSummary" }, "type": "array", - "title": "Models", - "description": "List of models available" + "title": "Tool Results", + "description": "List of tool results" } }, "type": "object", "required": [ - "models" + "response" ], - "title": "ModelsResponse", - "description": "Model representing a response to models request.", + "title": "QueryResponse", + "description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n This information is now available in tool_results under file_search_call type.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n tool_results: List of tool results.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.", "examples": [ { - "models": [ + "available_quotas": { + "ClusterQuotaLimiter": 998911, + "UserQuotaLimiter": 998911 + }, + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "input_tokens": 123, + "output_tokens": 456, + "referenced_documents": [ { - "api_model_type": "llm", - "identifier": "openai/gpt-4-turbo", - "metadata": {}, - "model_type": "llm", - "provider_id": "openai", - "provider_resource_id": "gpt-4-turbo", - "type": "model" + "doc_title": "Operator Lifecycle Manager concepts and resources", + "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/understanding/olm/olm-understanding-olm.html" } - ] - } - ] - }, - "MutualTLSSecurityScheme": { - "properties": { - "description": { - "anyOf": [ + ], + "response": "Operator Lifecycle Manager (OLM) helps users install...", + "tool_calls": [ { - "type": "string" - }, + "args": {}, + "id": "1", + "name": "tool1", + "type": "tool_call" + } + ], + "tool_results": [ { - "type": "null" + "content": "bla", + "id": "1", + "round": 1, + "status": "success", + "type": "tool_result" } ], - "title": "Description" - }, - "type": { - "type": "string", - "const": "mutualTLS", - "title": "Type", - "default": "mutualTLS" + "truncated": false } - }, - "type": "object", - "title": "MutualTLSSecurityScheme", - "description": "Defines a security scheme using mTLS authentication." + ] }, - "NotFoundResponse": { + "QuotaExceededResponse": { "properties": { "status_code": { "type": "integer", @@ -7451,158 +10625,195 @@ "required": [ "status_code", "detail" - ], - "title": "NotFoundResponse", - "description": "404 Not Found - Resource does not exist.", + ], + "title": "QuotaExceededResponse", + "description": "429 Too Many Requests - Quota limit exceeded.", "examples": [ { "detail": { - "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", - "response": "Conversation not found" + "cause": "The token quota for model gpt-4-turbo has been exceeded.", + "response": "The model quota has been exceeded" }, - "label": "conversation" + "label": "model" }, { "detail": { - "cause": "Provider with ID openai does not exist", - "response": "Provider not found" + "cause": "User 123 has no available tokens.", + "response": "The quota has been exceeded" }, - "label": "provider" + "label": "user none" }, { "detail": { - "cause": "Model with ID gpt-4-turbo is not configured", - "response": "Model not found" + "cause": "Cluster has no available tokens.", + "response": "The quota has been exceeded" }, - "label": "model" + "label": "cluster none" }, { "detail": { - "cause": "Rag with ID vs_7b52a8cf-0fa3-489c-beab-27e061d102f3 does not exist", - "response": "Rag not found" + "cause": "Unknown subject 999 has no available tokens.", + "response": "The quota has been exceeded" }, - "label": "rag" + "label": "subject none" }, { "detail": { - "cause": "Streaming Request with ID 123e4567-e89b-12d3-a456-426614174000 does not exist", - "response": "Streaming Request not found" + "cause": "User 123 has 5 tokens, but 10 tokens are needed.", + "response": "The quota has been exceeded" }, - "label": "streaming request" + "label": "user insufficient" + }, + { + "detail": { + "cause": "Cluster has 500 tokens, but 900 tokens are needed.", + "response": "The quota has been exceeded" + }, + "label": "cluster insufficient" + }, + { + "detail": { + "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.", + "response": "The quota has been exceeded" + }, + "label": "subject insufficient" } ] }, - "OAuth2SecurityScheme": { + "QuotaHandlersConfiguration": { "properties": { - "description": { + "sqlite": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" }, { "type": "null" } ], - "title": "Description" - }, - "flows": { - "$ref": "#/components/schemas/OAuthFlows" + "title": "SQLite configuration", + "description": "SQLite database configuration" }, - "oauth2MetadataUrl": { + "postgres": { "anyOf": [ { - "type": "string" + "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" }, { "type": "null" } ], - "title": "Oauth2Metadataurl" + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration" }, - "type": { - "type": "string", - "const": "oauth2", - "title": "Type", - "default": "oauth2" + "limiters": { + "items": { + "$ref": "#/components/schemas/QuotaLimiterConfiguration" + }, + "type": "array", + "title": "Quota limiters", + "description": "Quota limiters configuration" + }, + "scheduler": { + "$ref": "#/components/schemas/QuotaSchedulerConfiguration", + "title": "Quota scheduler", + "description": "Quota scheduler configuration" + }, + "enable_token_history": { + "type": "boolean", + "title": "Enable token history", + "description": "Enables storing information about token usage history", + "default": false } }, + "additionalProperties": false, "type": "object", - "required": [ - "flows" - ], - "title": "OAuth2SecurityScheme", - "description": "Defines a security scheme using OAuth 2.0." + "title": "QuotaHandlersConfiguration", + "description": "Quota limiter configuration.\n\nIt is possible to limit quota usage per user or per service or services\n(that typically run in one cluster). Each limit is configured as a separate\n_quota limiter_. It can be of type `user_limiter` or `cluster_limiter`\n(which is name that makes sense in OpenShift deployment)." }, - "OAuthFlows": { + "QuotaLimiterConfiguration": { "properties": { - "authorizationCode": { - "anyOf": [ - { - "$ref": "#/components/schemas/AuthorizationCodeOAuthFlow" - }, - { - "type": "null" - } - ] + "type": { + "type": "string", + "enum": [ + "user_limiter", + "cluster_limiter" + ], + "title": "Quota limiter type", + "description": "Quota limiter type, either user_limiter or cluster_limiter" }, - "clientCredentials": { - "anyOf": [ - { - "$ref": "#/components/schemas/ClientCredentialsOAuthFlow" - }, - { - "type": "null" - } - ] + "name": { + "type": "string", + "title": "Quota limiter name", + "description": "Human readable quota limiter name" }, - "implicit": { - "anyOf": [ - { - "$ref": "#/components/schemas/ImplicitOAuthFlow" - }, - { - "type": "null" - } - ] + "initial_quota": { + "type": "integer", + "minimum": 0.0, + "title": "Initial quota", + "description": "Quota set at beginning of the period" }, - "password": { - "anyOf": [ - { - "$ref": "#/components/schemas/PasswordOAuthFlow" - }, - { - "type": "null" - } - ] + "quota_increase": { + "type": "integer", + "minimum": 0.0, + "title": "Quota increase", + "description": "Delta value used to increase quota when period is reached" + }, + "period": { + "type": "string", + "title": "Period", + "description": "Period specified in human readable form" } }, + "additionalProperties": false, "type": "object", - "title": "OAuthFlows", - "description": "Defines the configuration for the supported OAuth 2.0 flows." + "required": [ + "type", + "name", + "initial_quota", + "quota_increase", + "period" + ], + "title": "QuotaLimiterConfiguration", + "description": "Configuration for one quota limiter.\n\nThere are three configuration options for each limiter:\n\n1. ``period`` is specified in a human-readable form, see\n https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT\n for all possible options. When the end of the period is reached, the\n quota is reset or increased.\n2. ``initial_quota`` is the value set at the beginning of the period.\n3. ``quota_increase`` is the value (if specified) used to increase the\n quota when the period is reached.\n\nThere are two basic use cases:\n\n1. When the quota needs to be reset to a specific value periodically (for\n example on a weekly or monthly basis), set ``initial_quota`` to the\n required value.\n2. When the quota needs to be increased by a specific value periodically\n (for example on a daily basis), set ``quota_increase``." }, - "OkpConfiguration": { + "QuotaSchedulerConfiguration": { "properties": { - "offline": { - "type": "boolean", - "title": "OKP offline mode", - "description": "When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs.", - "default": true + "period": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Period", + "description": "Quota scheduler period specified in seconds", + "default": 1 }, - "chunk_filter_query": { - "type": "string", - "title": "OKP chunk filter query", - "description": "OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'.", - "default": "is_chunk:true" + "database_reconnection_count": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Database reconnection count on startup", + "description": "Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.", + "default": 10 + }, + "database_reconnection_delay": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Database reconnection delay", + "description": "Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.", + "default": 1 } }, "additionalProperties": false, "type": "object", - "title": "OkpConfiguration", - "description": "OKP (Offline Knowledge Portal) provider configuration.\n\nControls provider-specific behaviour for the OKP vector store.\nOnly relevant when ``\"okp\"`` is listed in ``rag.inline`` or ``rag.tool``." + "title": "QuotaSchedulerConfiguration", + "description": "Quota scheduler configuration." }, - "OpenIdConnectSecurityScheme": { + "RAGChunk": { "properties": { - "description": { + "content": { + "type": "string", + "title": "Content", + "description": "The content of the chunk" + }, + "source": { "anyOf": [ { "type": "string" @@ -7611,92 +10822,53 @@ "type": "null" } ], - "title": "Description" + "title": "Source", + "description": "Index name identifying the knowledge source from configuration" }, - "openIdConnectUrl": { - "type": "string", - "title": "Openidconnecturl" + "score": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Score", + "description": "Relevance score" }, - "type": { - "type": "string", - "const": "openIdConnect", - "title": "Type", - "default": "openIdConnect" - } - }, - "type": "object", - "required": [ - "openIdConnectUrl" - ], - "title": "OpenIdConnectSecurityScheme", - "description": "Defines a security scheme using OpenID Connect." - }, - "PasswordOAuthFlow": { - "properties": { - "refreshUrl": { + "attributes": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "title": "Refreshurl" - }, - "scopes": { - "additionalProperties": { - "type": "string" - }, - "type": "object", - "title": "Scopes" - }, - "tokenUrl": { - "type": "string", - "title": "Tokenurl" + "title": "Attributes", + "description": "Document metadata from the RAG provider (e.g., url, title, author)" } }, "type": "object", "required": [ - "scopes", - "tokenUrl" + "content" ], - "title": "PasswordOAuthFlow", - "description": "Defines configuration details for the OAuth 2.0 Resource Owner Password flow." + "title": "RAGChunk", + "description": "Model representing a RAG chunk used in the response." }, - "PostgreSQLDatabaseConfiguration": { + "RAGInfoResponse": { "properties": { - "host": { - "type": "string", - "title": "Hostname", - "description": "Database server host or socket directory", - "default": "localhost" - }, - "port": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Port", - "description": "Database server port", - "default": 5432 - }, - "db": { - "type": "string", - "title": "Database name", - "description": "Database name to connect to" - }, - "user": { - "type": "string", - "title": "User name", - "description": "Database user name used to authenticate" - }, - "password": { + "id": { "type": "string", - "format": "password", - "title": "Password", - "description": "Password used to authenticate", - "writeOnly": true + "title": "Id", + "description": "Vector DB unique ID", + "examples": [ + "vs_00000000_0000_0000" + ] }, - "namespace": { + "name": { "anyOf": [ { "type": "string" @@ -7705,296 +10877,244 @@ "type": "null" } ], - "title": "Name space", - "description": "Database namespace", - "default": "public" - }, - "ssl_mode": { - "type": "string", - "title": "SSL mode", - "description": "SSL mode", - "default": "prefer" + "title": "Name", + "description": "Human readable vector DB name", + "examples": [ + "Faiss Store with Knowledge base" + ] }, - "gss_encmode": { - "type": "string", - "title": "GSS encmode", - "description": "This option determines whether or with what priority a secure GSS TCP/IP connection will be negotiated with the server.", - "default": "prefer" + "created_at": { + "type": "integer", + "title": "Created At", + "description": "When the vector store was created, represented as Unix time", + "examples": [ + 1763391371 + ] }, - "ca_cert_path": { + "last_active_at": { "anyOf": [ { - "type": "string", - "format": "file-path" + "type": "integer" }, { "type": "null" } ], - "title": "CA certificate path", - "description": "Path to CA certificate" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "db", - "user", - "password" - ], - "title": "PostgreSQLDatabaseConfiguration", - "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)" - }, - "PromptTooLongResponse": { - "properties": { - "status_code": { - "type": "integer", - "title": "Status Code" - }, - "detail": { - "$ref": "#/components/schemas/DetailModel" - } - }, - "type": "object", - "required": [ - "status_code", - "detail" - ], - "title": "PromptTooLongResponse", - "description": "413 Payload Too Large - Prompt is too long.", - "examples": [ - { - "detail": { - "cause": "The prompt exceeds the maximum allowed length.", - "response": "Prompt is too long" - }, - "label": "prompt too long" - } - ] - }, - "ProviderHealthStatus": { - "properties": { - "provider_id": { - "type": "string", - "title": "Provider Id", - "description": "The ID of the provider" + "title": "Last Active At", + "description": "When the vector store was last active, represented as Unix time", + "examples": [ + 1763391371 + ] }, - "status": { - "type": "string", - "title": "Status", - "description": "The health status", + "usage_bytes": { + "type": "integer", + "title": "Usage Bytes", + "description": "Storage byte(s) used by this vector DB", "examples": [ - "ok", - "unhealthy", - "not_implemented" + 0 ] }, - "message": { + "expires_at": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], - "title": "Message", - "description": "Optional message about the health status", + "title": "Expires At", + "description": "When the vector store expires, represented as Unix time", "examples": [ - "All systems operational", - "Llama Stack is unavailable" + 1763391371 ] - } - }, - "type": "object", - "required": [ - "provider_id", - "status" - ], - "title": "ProviderHealthStatus", - "description": "Model representing the health status of a provider.\n\nAttributes:\n provider_id: The ID of the provider.\n status: The health status ('ok', 'unhealthy', 'not_implemented').\n message: Optional message about the health status." - }, - "ProviderResponse": { - "properties": { - "api": { - "type": "string", - "title": "Api", - "description": "The API this provider implements" - }, - "config": { - "additionalProperties": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "items": {}, - "type": "array" - }, - {}, - { - "type": "null" - } - ] - }, - "type": "object", - "title": "Config", - "description": "Provider configuration parameters" - }, - "health": { - "additionalProperties": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "items": {}, - "type": "array" - }, - {}, - { - "type": "null" - } - ] - }, - "type": "object", - "title": "Health", - "description": "Current health status of the provider" }, - "provider_id": { + "object": { "type": "string", - "title": "Provider Id", - "description": "Unique provider identifier" + "title": "Object", + "description": "Object type", + "examples": [ + "vector_store" + ] }, - "provider_type": { + "status": { "type": "string", - "title": "Provider Type", - "description": "Provider implementation type" + "title": "Status", + "description": "Vector DB status", + "examples": [ + "completed" + ] } }, "type": "object", "required": [ - "api", - "config", - "health", - "provider_id", - "provider_type" + "id", + "created_at", + "usage_bytes", + "object", + "status" ], - "title": "ProviderResponse", - "description": "Model representing a response to get specific provider request.", + "title": "RAGInfoResponse", + "description": "Model representing a response with information about RAG DB.", "examples": [ { - "api": "inference", - "config": { - "api_key": "********" - }, - "health": { - "message": "Healthy", - "status": "OK" - }, - "provider_id": "openai", - "provider_type": "remote::openai" + "created_at": 1763391371, + "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", + "last_active_at": 1763391371, + "name": "Faiss Store with Knowledge base", + "object": "vector_store", + "status": "completed", + "usage_bytes": 1024000 } ] }, - "ProvidersListResponse": { + "RAGListResponse": { "properties": { - "providers": { - "additionalProperties": { - "items": { - "additionalProperties": true, - "type": "object" - }, - "type": "array" + "rags": { + "items": { + "type": "string" }, - "type": "object", - "title": "Providers", - "description": "List of available API types and their corresponding providers" + "type": "array", + "title": "RAG list response", + "description": "List of RAG identifiers", + "examples": [ + "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", + "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3" + ] } }, "type": "object", "required": [ - "providers" + "rags" ], - "title": "ProvidersListResponse", - "description": "Model representing a response to providers request.", + "title": "RAGListResponse", + "description": "Model representing a response to list RAGs request.", "examples": [ { - "providers": { - "agents": [ - { - "provider_id": "meta-reference", - "provider_type": "inline::meta-reference" - } - ], - "inference": [ - { - "provider_id": "sentence-transformers", - "provider_type": "inline::sentence-transformers" - }, - { - "provider_id": "openai", - "provider_type": "remote::openai" - } - ] - } + "rags": [ + "vs_00000000-cafe-babe-0000-000000000000", + "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", + "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3" + ] } ] }, - "QueryRequest": { + "RHIdentityConfiguration": { "properties": { - "query": { - "type": "string", - "title": "Query", - "description": "The query string", - "examples": [ - "What is Kubernetes?" - ] + "required_entitlements": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Required entitlements", + "description": "List of all required entitlements." + } + }, + "additionalProperties": false, + "type": "object", + "title": "RHIdentityConfiguration", + "description": "Red Hat Identity authentication configuration." + }, + "RagConfiguration": { + "properties": { + "inline": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Inline RAG IDs", + "description": "RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG)." }, - "conversation_id": { + "tool": { "anyOf": [ { - "type": "string" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "title": "Conversation Id", - "description": "The optional conversation ID (UUID)", + "title": "Tool RAG IDs", + "description": "RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility)." + } + }, + "additionalProperties": false, + "type": "object", + "title": "RagConfiguration", + "description": "RAG strategy configuration.\n\nControls which RAG sources are used for inline and tool-based retrieval.\n\nEach strategy lists RAG IDs to include. The special ID ``\"okp\"`` defined in constants,\nactivates the OKP provider; all other IDs refer to entries in ``byok_rag``.\n\nBackward compatibility:\n - ``inline`` defaults to ``[]`` (no inline RAG).\n - ``tool`` defaults to ``None`` which means all registered vector stores\n are used (identical to the previous ``tool.byok.enabled = True`` default)." + }, + "ReadinessResponse": { + "properties": { + "ready": { + "type": "boolean", + "title": "Ready", + "description": "Flag indicating if service is ready", "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" + true, + false ] }, - "provider": { + "reason": { + "type": "string", + "title": "Reason", + "description": "The reason for the readiness", + "examples": [ + "Service is ready" + ] + }, + "providers": { + "items": { + "$ref": "#/components/schemas/ProviderHealthStatus" + }, + "type": "array", + "title": "Providers", + "description": "List of unhealthy providers in case of readiness failure.", + "examples": [] + } + }, + "type": "object", + "required": [ + "ready", + "reason", + "providers" + ], + "title": "ReadinessResponse", + "description": "Model representing response to a readiness request.\n\nAttributes:\n ready: If service is ready.\n reason: The reason for the readiness.\n providers: List of unhealthy providers in case of readiness failure.\n\nExample:\n ```python\n readiness_response = ReadinessResponse(\n ready=False,\n reason=\"Service is not ready\",\n providers=[\n ProviderHealthStatus(\n provider_id=\"ollama\",\n status=\"unhealthy\",\n message=\"Server is unavailable\"\n )\n ]\n )\n ```", + "examples": [ + { + "providers": [], + "ready": true, + "reason": "Service is ready" + } + ] + }, + "ReferencedDocument": { + "properties": { + "doc_url": { "anyOf": [ { - "type": "string" + "type": "string", + "minLength": 1, + "format": "uri" }, { "type": "null" } ], - "title": "Provider", - "description": "The optional provider", - "examples": [ - "openai", - "watsonx" - ] + "title": "Doc Url", + "description": "URL of the referenced document" }, - "model": { + "doc_title": { "anyOf": [ { "type": "string" @@ -8003,13 +11123,10 @@ "type": "null" } ], - "title": "Model", - "description": "The optional model", - "examples": [ - "gpt4mini" - ] + "title": "Doc Title", + "description": "Title of the referenced document" }, - "system_prompt": { + "source": { "anyOf": [ { "type": "string" @@ -8018,80 +11135,100 @@ "type": "null" } ], - "title": "System Prompt", - "description": "The optional system prompt.", - "examples": [ - "You are OpenShift assistant.", - "You are Ansible assistant." - ] + "title": "Source", + "description": "Index name identifying the knowledge source from configuration" + } + }, + "type": "object", + "title": "ReferencedDocument", + "description": "Model representing a document referenced in generating a response.\n\nAttributes:\n doc_url: Url to the referenced doc.\n doc_title: Title of the referenced doc." + }, + "ResponseInput": { + "anyOf": [ + { + "type": "string" }, - "attachments": { + { + "items": { + "$ref": "#/components/schemas/ResponseItem" + }, + "type": "array" + } + ] + }, + "ResponseItem": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseMessage-Input" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" + } + ] + }, + "ResponsesRequest": { + "properties": { + "input": { + "$ref": "#/components/schemas/ResponseInput" + }, + "model": { "anyOf": [ { - "items": { - "$ref": "#/components/schemas/Attachment" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], - "title": "Attachments", - "description": "The optional list of attachments.", - "examples": [ - { - "attachment_type": "log", - "content": "this is attachment", - "content_type": "text/plain" - }, - { - "attachment_type": "configuration", - "content": "kind: Pod\n metadata:\n name: private-reg", - "content_type": "application/yaml" - }, - { - "attachment_type": "configuration", - "content": "foo: bar", - "content_type": "application/yaml" - } - ] + "title": "Model" }, - "no_tools": { + "conversation": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "title": "No Tools", - "description": "Whether to bypass all tools and MCP servers", - "default": false, - "examples": [ - true, - false - ] + "title": "Conversation" }, - "generate_topic_summary": { + "include": { "anyOf": [ { - "type": "boolean" + "items": { + "$ref": "#/components/schemas/IncludeParameter" + }, + "type": "array" }, { "type": "null" } ], - "title": "Generate Topic Summary", - "description": "Whether to generate topic summary for new conversations", - "default": true, - "examples": [ - true, - false - ] + "title": "Include" }, - "media_type": { + "instructions": { "anyOf": [ { "type": "string" @@ -8100,496 +11237,252 @@ "type": "null" } ], - "title": "Media Type", - "description": "Media type for the response format", - "examples": [ - "application/json", - "text/plain" - ] + "title": "Instructions" }, - "vector_store_ids": { + "max_infer_iters": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "integer" }, { "type": "null" } ], - "title": "Vector Store Ids", - "description": "Optional list of specific vector store IDs to query for RAG. If not provided, all available vector stores will be queried.", - "examples": [ - "ocp_docs", - "knowledge_base", - "vector_db_1" - ] + "title": "Max Infer Iters" }, - "shield_ids": { + "max_output_tokens": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "integer" }, { "type": "null" } ], - "title": "Shield Ids", - "description": "Optional list of safety shield IDs to apply. If None, all configured shields are used. If provided, must contain at least one valid shield ID (empty list raises 422 error).", - "examples": [ - "llama-guard", - "custom-shield" - ] + "title": "Max Output Tokens" }, - "solr": { + "max_tool_calls": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "integer" }, { "type": "null" } ], - "title": "Solr", - "description": "Solr-specific query parameters including filter queries", - "examples": [ - { - "fq": [ - "product:*openshift*", - "product_version:*4.16*" - ] - } - ] - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "query" - ], - "title": "QueryRequest", - "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n query: The query string.\n conversation_id: The optional conversation ID (UUID).\n provider: The optional provider.\n model: The optional model.\n system_prompt: The optional system prompt.\n attachments: The optional attachments.\n no_tools: Whether to bypass all tools and MCP servers (default: False).\n generate_topic_summary: Whether to generate topic summary for new conversations.\n media_type: The optional media type for response format (application/json or text/plain).\n vector_store_ids: The optional list of specific vector store IDs to query for RAG.\n shield_ids: The optional list of safety shield IDs to apply.\n\nExample:\n ```python\n query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n ```", - "examples": [ - { - "attachments": [ - { - "attachment_type": "log", - "content": "this is attachment", - "content_type": "text/plain" - }, - { - "attachment_type": "configuration", - "content": "kind: Pod\n metadata:\n name: private-reg", - "content_type": "application/yaml" - }, - { - "attachment_type": "configuration", - "content": "foo: bar", - "content_type": "application/yaml" - } - ], - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "generate_topic_summary": true, - "model": "model-name", - "no_tools": false, - "provider": "openai", - "query": "write a deployment yaml for the mongodb image", - "system_prompt": "You are a helpful assistant", - "vector_store_ids": [ - "ocp_docs", - "knowledge_base" - ] - } - ] - }, - "QueryResponse": { - "properties": { - "conversation_id": { + "title": "Max Tool Calls" + }, + "metadata": { "anyOf": [ { - "type": "string" + "additionalProperties": { + "type": "string" + }, + "type": "object" }, { "type": "null" } ], - "title": "Conversation Id", - "description": "The optional conversation ID (UUID)", - "examples": [ - "c5260aec-4d82-4370-9fdf-05cf908b3f16" - ] - }, - "response": { - "type": "string", - "title": "Response", - "description": "Response from LLM", - "examples": [ - "Kubernetes is an open-source container orchestration system for automating ..." - ] - }, - "rag_chunks": { - "items": { - "$ref": "#/components/schemas/RAGChunk" - }, - "type": "array", - "title": "Rag Chunks", - "description": "Deprecated: List of RAG chunks used to generate the response." - }, - "referenced_documents": { - "items": { - "$ref": "#/components/schemas/ReferencedDocument" - }, - "type": "array", - "title": "Referenced Documents", - "description": "List of documents referenced in generating the response", - "examples": [ - [ - { - "doc_title": "Operator Lifecycle Manager (OLM)", - "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html" - } - ] - ] - }, - "truncated": { - "type": "boolean", - "title": "Truncated", - "description": "Deprecated:Whether conversation history was truncated", - "default": false, - "examples": [ - false, - true - ] - }, - "input_tokens": { - "type": "integer", - "title": "Input Tokens", - "description": "Number of tokens sent to LLM", - "default": 0, - "examples": [ - 150, - 250, - 500 - ] - }, - "output_tokens": { - "type": "integer", - "title": "Output Tokens", - "description": "Number of tokens received from LLM", - "default": 0, - "examples": [ - 50, - 100, - 200 - ] + "title": "Metadata" }, - "available_quotas": { - "additionalProperties": { - "type": "integer" - }, - "type": "object", - "title": "Available Quotas", - "description": "Quota available as measured by all configured quota limiters", - "examples": [ + "parallel_tool_calls": { + "anyOf": [ { - "daily": 1000, - "monthly": 50000 - } - ] - }, - "tool_calls": { - "items": { - "$ref": "#/components/schemas/ToolCallSummary" - }, - "type": "array", - "title": "Tool Calls", - "description": "List of tool calls made during response generation" - }, - "tool_results": { - "items": { - "$ref": "#/components/schemas/ToolResultSummary" - }, - "type": "array", - "title": "Tool Results", - "description": "List of tool results" - } - }, - "type": "object", - "required": [ - "response" - ], - "title": "QueryResponse", - "description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n This information is now available in tool_results under file_search_call type.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n tool_results: List of tool results.\n truncated: Whether conversation history was truncated.\n input_tokens: Number of tokens sent to LLM.\n output_tokens: Number of tokens received from LLM.\n available_quotas: Quota available as measured by all configured quota limiters.", - "examples": [ - { - "available_quotas": { - "ClusterQuotaLimiter": 998911, - "UserQuotaLimiter": 998911 - }, - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "input_tokens": 123, - "output_tokens": 456, - "referenced_documents": [ + "type": "boolean" + }, { - "doc_title": "Operator Lifecycle Manager concepts and resources", - "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/understanding/olm/olm-understanding-olm.html" + "type": "null" } ], - "response": "Operator Lifecycle Manager (OLM) helps users install...", - "tool_calls": [ + "title": "Parallel Tool Calls" + }, + "previous_response_id": { + "anyOf": [ { - "args": {}, - "id": "1", - "name": "tool1", - "type": "tool_call" - } - ], - "tool_results": [ + "type": "string" + }, { - "content": "bla", - "id": "1", - "round": 1, - "status": "success", - "type": "tool_result" + "type": "null" } ], - "truncated": false - } - ] - }, - "QuotaExceededResponse": { - "properties": { - "status_code": { - "type": "integer", - "title": "Status Code" - }, - "detail": { - "$ref": "#/components/schemas/DetailModel" - } - }, - "type": "object", - "required": [ - "status_code", - "detail" - ], - "title": "QuotaExceededResponse", - "description": "429 Too Many Requests - Quota limit exceeded.", - "examples": [ - { - "detail": { - "cause": "The token quota for model gpt-4-turbo has been exceeded.", - "response": "The model quota has been exceeded" - }, - "label": "model" - }, - { - "detail": { - "cause": "User 123 has no available tokens.", - "response": "The quota has been exceeded" - }, - "label": "user none" - }, - { - "detail": { - "cause": "Cluster has no available tokens.", - "response": "The quota has been exceeded" - }, - "label": "cluster none" + "title": "Previous Response Id" }, - { - "detail": { - "cause": "Unknown subject 999 has no available tokens.", - "response": "The quota has been exceeded" - }, - "label": "subject none" - }, - { - "detail": { - "cause": "User 123 has 5 tokens, but 10 tokens are needed.", - "response": "The quota has been exceeded" - }, - "label": "user insufficient" - }, - { - "detail": { - "cause": "Cluster has 500 tokens, but 900 tokens are needed.", - "response": "The quota has been exceeded" - }, - "label": "cluster insufficient" + "prompt": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponsePrompt" + }, + { + "type": "null" + } + ] }, - { - "detail": { - "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.", - "response": "The quota has been exceeded" - }, - "label": "subject insufficient" - } - ] - }, - "QuotaHandlersConfiguration": { - "properties": { - "sqlite": { + "reasoning": { "anyOf": [ { - "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + "$ref": "#/components/schemas/OpenAIResponseReasoning" }, { "type": "null" } - ], - "title": "SQLite configuration", - "description": "SQLite database configuration" + ] }, - "postgres": { + "safety_identifier": { "anyOf": [ { - "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + "type": "string" }, { "type": "null" } ], - "title": "PostgreSQL configuration", - "description": "PostgreSQL database configuration" - }, - "limiters": { - "items": { - "$ref": "#/components/schemas/QuotaLimiterConfiguration" - }, - "type": "array", - "title": "Quota limiters", - "description": "Quota limiters configuration" + "title": "Safety Identifier" }, - "scheduler": { - "$ref": "#/components/schemas/QuotaSchedulerConfiguration", - "title": "Quota scheduler", - "description": "Quota scheduler configuration" + "store": { + "type": "boolean", + "title": "Store", + "default": true }, - "enable_token_history": { + "stream": { "type": "boolean", - "title": "Enable token history", - "description": "Enables storing information about token usage history", + "title": "Stream", "default": false - } - }, - "additionalProperties": false, - "type": "object", - "title": "QuotaHandlersConfiguration", - "description": "Quota limiter configuration.\n\nIt is possible to limit quota usage per user or per service or services\n(that typically run in one cluster). Each limit is configured as a separate\n_quota limiter_. It can be of type `user_limiter` or `cluster_limiter`\n(which is name that makes sense in OpenShift deployment)." - }, - "QuotaLimiterConfiguration": { - "properties": { - "type": { - "type": "string", - "enum": [ - "user_limiter", - "cluster_limiter" - ], - "title": "Quota limiter type", - "description": "Quota limiter type, either user_limiter or cluster_limiter" - }, - "name": { - "type": "string", - "title": "Quota limiter name", - "description": "Human readable quota limiter name" }, - "initial_quota": { - "type": "integer", - "minimum": 0.0, - "title": "Initial quota", - "description": "Quota set at beginning of the period" - }, - "quota_increase": { - "type": "integer", - "minimum": 0.0, - "title": "Quota increase", - "description": "Delta value used to increase quota when period is reached" + "temperature": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Temperature" }, - "period": { - "type": "string", - "title": "Period", - "description": "Period specified in human readable form" - } - }, - "additionalProperties": false, - "type": "object", - "required": [ - "type", - "name", - "initial_quota", - "quota_increase", - "period" - ], - "title": "QuotaLimiterConfiguration", - "description": "Configuration for one quota limiter.\n\nThere are three configuration options for each limiter:\n\n1. ``period`` is specified in a human-readable form, see\n https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT\n for all possible options. When the end of the period is reached, the\n quota is reset or increased.\n2. ``initial_quota`` is the value set at the beginning of the period.\n3. ``quota_increase`` is the value (if specified) used to increase the\n quota when the period is reached.\n\nThere are two basic use cases:\n\n1. When the quota needs to be reset to a specific value periodically (for\n example on a weekly or monthly basis), set ``initial_quota`` to the\n required value.\n2. When the quota needs to be increased by a specific value periodically\n (for example on a daily basis), set ``quota_increase``." - }, - "QuotaSchedulerConfiguration": { - "properties": { - "period": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Period", - "description": "Quota scheduler period specified in seconds", - "default": 1 + "text": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseText" + }, + { + "type": "null" + } + ] }, - "database_reconnection_count": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Database reconnection count on startup", - "description": "Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.", - "default": 10 + "tool_choice": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMode" + }, + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "allowed_tools": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools", + "custom": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool", + "file_search": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch", + "function": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool", + "mcp": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool", + "web_search": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_preview": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch" + } + } + }, + { + "type": "null" + } + ], + "title": "Tool Choice" }, - "database_reconnection_delay": { - "type": "integer", - "exclusiveMinimum": 0.0, - "title": "Database reconnection delay", - "description": "Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.", - "default": 1 - } - }, - "additionalProperties": false, - "type": "object", - "title": "QuotaSchedulerConfiguration", - "description": "Quota scheduler configuration." - }, - "RAGChunk": { - "properties": { - "content": { - "type": "string", - "title": "Content", - "description": "The content of the chunk" + "tools": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolWebSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolFileSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolFunction" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolMCP" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch", + "function": "#/components/schemas/OpenAIResponseInputToolFunction", + "mcp": "#/components/schemas/OpenAIResponseInputToolMCP", + "web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_preview": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolWebSearch" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tools" }, - "source": { + "generate_topic_summary": { "anyOf": [ { - "type": "string" + "type": "boolean" }, { "type": "null" } ], - "title": "Source", - "description": "Index name identifying the knowledge source from configuration" + "title": "Generate Topic Summary", + "default": true }, - "score": { + "shield_ids": { "anyOf": [ { - "type": "number" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "title": "Score", - "description": "Relevance score" + "title": "Shield Ids" }, - "attributes": { + "solr": { "anyOf": [ { "additionalProperties": true, @@ -8599,28 +11492,115 @@ "type": "null" } ], - "title": "Attributes", - "description": "Document metadata from the RAG provider (e.g., url, title, author)" + "title": "Solr" } }, + "additionalProperties": false, "type": "object", "required": [ - "content" + "input" ], - "title": "RAGChunk", - "description": "Model representing a RAG chunk used in the response." + "title": "ResponsesRequest", + "description": "Model representing a request for the Responses API following LCORE specification.\n\nAttributes:\n input: Input text or structured input items containing the query.\n model: Model identifier in format \"provider/model\". Auto-selected if not provided.\n conversation: Conversation ID linking to an existing conversation. Accepts both\n OpenAI and LCORE formats. Mutually exclusive with previous_response_id.\n include: Explicitly specify output item types that are excluded by default but\n should be included in the response.\n instructions: System instructions or guidelines provided to the model (acts as\n the system prompt).\n max_infer_iters: Maximum number of inference iterations the model can perform.\n max_output_tokens: Maximum number of tokens allowed in the response.\n max_tool_calls: Maximum number of tool calls allowed in a single response.\n metadata: Custom metadata dictionary with key-value pairs for tracking or logging.\n parallel_tool_calls: Whether the model can make multiple tool calls in parallel.\n previous_response_id: Identifier of the previous response in a multi-turn\n conversation. Mutually exclusive with conversation.\n prompt: Prompt object containing a template with variables for dynamic\n substitution.\n reasoning: Reasoning configuration for the response.\n safety_identifier: Safety identifier for the response.\n store: Whether to store the response in conversation history. Defaults to True.\n stream: Whether to stream the response as it is generated. Defaults to False.\n temperature: Sampling temperature controlling randomness (typically 0.0\u20132.0).\n text: Text response configuration specifying output format constraints (JSON\n schema, JSON object, or plain text).\n tool_choice: Tool selection strategy (\"auto\", \"required\", \"none\", or specific\n tool configuration).\n tools: List of tools available to the model (file search, web search, function\n calls, MCP tools). Defaults to all tools available to the model.\n generate_topic_summary: LCORE-specific flag indicating whether to generate a\n topic summary for new conversations. Defaults to True.\n shield_ids: LCORE-specific list of safety shield IDs to apply. If None, all\n configured shields are used.\n solr: LCORE-specific Solr vector_io provider query parameters (e.g. filter\n queries). Optional.", + "examples": [ + { + "generate_topic_summary": true, + "input": "Hello World!", + "instructions": "You are a helpful assistant", + "model": "openai/gpt-4o-mini", + "store": true, + "stream": false + } + ] }, - "RAGInfoResponse": { + "ResponsesResponse": { "properties": { + "created_at": { + "type": "integer", + "title": "Created At" + }, + "completed_at": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Completed At" + }, + "error": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseError" + }, + { + "type": "null" + } + ] + }, "id": { "type": "string", - "title": "Id", - "description": "Vector DB unique ID", - "examples": [ - "vs_00000000_0000_0000" - ] + "title": "Id" }, - "name": { + "model": { + "type": "string", + "title": "Model" + }, + "object": { + "type": "string", + "const": "response", + "title": "Object", + "default": "response" + }, + "output": { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseMessage-Output" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall", + "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall", + "mcp_approval_request": "#/components/schemas/OpenAIResponseMCPApprovalRequest", + "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall", + "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools", + "message": "#/components/schemas/OpenAIResponseMessage-Output", + "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + } + } + }, + "type": "array", + "title": "Output" + }, + "parallel_tool_calls": { + "type": "boolean", + "title": "Parallel Tool Calls", + "default": true + }, + "previous_response_id": { "anyOf": [ { "type": "string" @@ -8629,244 +11609,205 @@ "type": "null" } ], - "title": "Name", - "description": "Human readable vector DB name", - "examples": [ - "Faiss Store with Knowledge base" - ] + "title": "Previous Response Id" }, - "created_at": { - "type": "integer", - "title": "Created At", - "description": "When the vector store was created, represented as Unix time", - "examples": [ - 1763391371 + "prompt": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponsePrompt" + }, + { + "type": "null" + } ] }, - "last_active_at": { + "status": { + "type": "string", + "title": "Status" + }, + "temperature": { "anyOf": [ { - "type": "integer" + "type": "number" }, { "type": "null" } ], - "title": "Last Active At", - "description": "When the vector store was last active, represented as Unix time", - "examples": [ - 1763391371 - ] + "title": "Temperature" }, - "usage_bytes": { - "type": "integer", - "title": "Usage Bytes", - "description": "Storage byte(s) used by this vector DB", - "examples": [ - 0 + "text": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseText" + }, + { + "type": "null" + } ] }, - "expires_at": { + "top_p": { "anyOf": [ { - "type": "integer" + "type": "number" }, { "type": "null" } ], - "title": "Expires At", - "description": "When the vector store expires, represented as Unix time", - "examples": [ - 1763391371 - ] - }, - "object": { - "type": "string", - "title": "Object", - "description": "Object type", - "examples": [ - "vector_store" - ] + "title": "Top P" }, - "status": { - "type": "string", - "title": "Status", - "description": "Vector DB status", - "examples": [ - "completed" - ] - } - }, - "type": "object", - "required": [ - "id", - "created_at", - "usage_bytes", - "object", - "status" - ], - "title": "RAGInfoResponse", - "description": "Model representing a response with information about RAG DB.", - "examples": [ - { - "created_at": 1763391371, - "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", - "last_active_at": 1763391371, - "name": "Faiss Store with Knowledge base", - "object": "vector_store", - "status": "completed", - "usage_bytes": 1024000 - } - ] - }, - "RAGListResponse": { - "properties": { - "rags": { - "items": { - "type": "string" - }, - "type": "array", - "title": "RAG list response", - "description": "List of RAG identifiers", - "examples": [ - "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", - "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3" - ] - } - }, - "type": "object", - "required": [ - "rags" - ], - "title": "RAGListResponse", - "description": "Model representing a response to list RAGs request.", - "examples": [ - { - "rags": [ - "vs_00000000-cafe-babe-0000-000000000000", - "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", - "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3" - ] - } - ] - }, - "RHIdentityConfiguration": { - "properties": { - "required_entitlements": { + "tools": { + "anyOf": [ + { + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolWebSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolFileSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolFunction" + }, + { + "$ref": "#/components/schemas/OpenAIResponseToolMCP" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch", + "function": "#/components/schemas/OpenAIResponseInputToolFunction", + "mcp": "#/components/schemas/OpenAIResponseToolMCP", + "web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_preview": "#/components/schemas/OpenAIResponseInputToolWebSearch", + "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolWebSearch" + } + } + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tools" + }, + "tool_choice": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMode" + }, + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "allowed_tools": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools", + "custom": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool", + "file_search": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch", + "function": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool", + "mcp": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool", + "web_search": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_preview": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch", + "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch" + } + } }, { "type": "null" } ], - "title": "Required entitlements", - "description": "List of all required entitlements." - } - }, - "additionalProperties": false, - "type": "object", - "title": "RHIdentityConfiguration", - "description": "Red Hat Identity authentication configuration." - }, - "RagConfiguration": { - "properties": { - "inline": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Inline RAG IDs", - "description": "RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG)." + "title": "Tool Choice" }, - "tool": { + "truncation": { "anyOf": [ { - "items": { - "type": "string" - }, - "type": "array" + "type": "string" }, { "type": "null" } ], - "title": "Tool RAG IDs", - "description": "RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility)." - } - }, - "additionalProperties": false, - "type": "object", - "title": "RagConfiguration", - "description": "RAG strategy configuration.\n\nControls which RAG sources are used for inline and tool-based retrieval.\n\nEach strategy lists RAG IDs to include. The special ID ``\"okp\"`` defined in constants,\nactivates the OKP provider; all other IDs refer to entries in ``byok_rag``.\n\nBackward compatibility:\n - ``inline`` defaults to ``[]`` (no inline RAG).\n - ``tool`` defaults to ``None`` which means all registered vector stores\n are used (identical to the previous ``tool.byok.enabled = True`` default)." - }, - "ReadinessResponse": { - "properties": { - "ready": { - "type": "boolean", - "title": "Ready", - "description": "Flag indicating if service is ready", - "examples": [ - true, - false + "title": "Truncation" + }, + "usage": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseUsage" + }, + { + "type": "null" + } ] }, - "reason": { - "type": "string", - "title": "Reason", - "description": "The reason for the readiness", - "examples": [ - "Service is ready" + "instructions": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Instructions" + }, + "max_tool_calls": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tool Calls" + }, + "reasoning": { + "anyOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseReasoning" + }, + { + "type": "null" + } ] }, - "providers": { - "items": { - "$ref": "#/components/schemas/ProviderHealthStatus" - }, - "type": "array", - "title": "Providers", - "description": "List of unhealthy providers in case of readiness failure.", - "examples": [] - } - }, - "type": "object", - "required": [ - "ready", - "reason", - "providers" - ], - "title": "ReadinessResponse", - "description": "Model representing response to a readiness request.\n\nAttributes:\n ready: If service is ready.\n reason: The reason for the readiness.\n providers: List of unhealthy providers in case of readiness failure.\n\nExample:\n ```python\n readiness_response = ReadinessResponse(\n ready=False,\n reason=\"Service is not ready\",\n providers=[\n ProviderHealthStatus(\n provider_id=\"ollama\",\n status=\"unhealthy\",\n message=\"Server is unavailable\"\n )\n ]\n )\n ```", - "examples": [ - { - "providers": [], - "ready": true, - "reason": "Service is ready" - } - ] - }, - "ReferencedDocument": { - "properties": { - "doc_url": { + "max_output_tokens": { "anyOf": [ { - "type": "string", - "minLength": 1, - "format": "uri" + "type": "integer" }, { "type": "null" } ], - "title": "Doc Url", - "description": "URL of the referenced document" + "title": "Max Output Tokens" }, - "doc_title": { + "safety_identifier": { "anyOf": [ { "type": "string" @@ -8875,10 +11816,34 @@ "type": "null" } ], - "title": "Doc Title", - "description": "Title of the referenced document" + "title": "Safety Identifier" }, - "source": { + "metadata": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Metadata" + }, + "store": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Store" + }, + "conversation": { "anyOf": [ { "type": "string" @@ -8887,13 +11852,75 @@ "type": "null" } ], - "title": "Source", - "description": "Index name identifying the knowledge source from configuration" + "title": "Conversation" + }, + "available_quotas": { + "additionalProperties": { + "type": "integer" + }, + "type": "object", + "title": "Available Quotas" + }, + "output_text": { + "type": "string", + "title": "Output Text" } }, "type": "object", - "title": "ReferencedDocument", - "description": "Model representing a document referenced in generating a response.\n\nAttributes:\n doc_url: Url to the referenced doc.\n doc_title: Title of the referenced doc." + "required": [ + "created_at", + "id", + "model", + "output", + "status", + "available_quotas", + "output_text" + ], + "title": "ResponsesResponse", + "description": "Model representing a response from the Responses API following LCORE specification.\n\nAttributes:\n created_at: Unix timestamp when the response was created.\n completed_at: Unix timestamp when the response was completed, if applicable.\n error: Error details if the response failed or was blocked.\n id: Unique identifier for this response.\n model: Model identifier in \"provider/model\" format used for generation.\n object: Object type identifier, always \"response\".\n output: List of structured output items containing messages, tool calls, and\n other content. This is the primary response content.\n parallel_tool_calls: Whether the model can make multiple tool calls in parallel.\n previous_response_id: Identifier of the previous response in a multi-turn\n conversation.\n prompt: The input prompt object that was sent to the model.\n status: Current status of the response (e.g., \"completed\", \"blocked\",\n \"in_progress\").\n temperature: Temperature parameter used for generation (controls randomness).\n text: Text response configuration object used for OpenAI responses.\n top_p: Top-p sampling parameter used for generation.\n tools: List of tools available to the model during generation.\n tool_choice: Tool selection strategy used (e.g., \"auto\", \"required\", \"none\").\n truncation: Strategy used for handling content that exceeds context limits.\n usage: Token usage statistics including input_tokens, output_tokens, and\n total_tokens.\n instructions: System instructions or guidelines provided to the model.\n max_tool_calls: Maximum number of tool calls allowed in a single response.\n reasoning: Reasoning configuration (effort level) used for the response.\n max_output_tokens: Upper bound for tokens generated in the response.\n safety_identifier: Safety/guardrail identifier applied to the request.\n metadata: Additional metadata dictionary with custom key-value pairs.\n store: Whether the response was stored.\n conversation: Conversation ID linking this response to a conversation thread\n (LCORE-specific).\n available_quotas: Remaining token quotas for the user (LCORE-specific).\n output_text: Aggregated text output from all output_text items in the\n output array.", + "examples": [ + { + "available_quotas": { + "daily": 1000, + "monthly": 50000 + }, + "completed_at": 1704067250, + "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", + "created_at": 1704067200, + "id": "resp_abc123", + "instructions": "You are a helpful assistant", + "model": "openai/gpt-4-turbo", + "object": "response", + "output": [ + { + "content": [ + { + "text": "Kubernetes is an open-source container orchestration system...", + "type": "output_text" + } + ], + "role": "assistant", + "type": "message" + } + ], + "output_text": "Kubernetes is an open-source container orchestration system...", + "parallel_tool_calls": true, + "status": "completed", + "store": true, + "temperature": 0.7, + "text": { + "format": { + "type": "text" + } + }, + "usage": { + "input_tokens": 100, + "output_tokens": 50, + "total_tokens": 150 + } + } + ], + "sse_example": "event: response.created\ndata: {\"type\":\"response.created\",\"sequence_number\":0,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"status\":\"in_progress\",\"output\":[],\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{},\"output_text\":\"\"}}\n\nevent: response.output_item.added\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":1}\n\nevent: response.output_item.done\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"sequence_number\":3,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"completed_at\":1704067250,\"status\":\"completed\",\"output\":[{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]}],\"usage\":{\"input_tokens\":10,\"output_tokens\":6,\"total_tokens\":16},\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{\"daily\":1000,\"monthly\":50000},\"output_text\":\"Hello! How can I help?\"}}\n\ndata: [DONE]\n\n" }, "RlsapiV1Attachment": { "properties": { @@ -9163,6 +12190,90 @@ "title": "SQLiteDatabaseConfiguration", "description": "SQLite database configuration." }, + "SearchRankingOptions": { + "properties": { + "ranker": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Ranker" + }, + "score_threshold": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Score Threshold", + "default": 0.0 + }, + "alpha": { + "anyOf": [ + { + "type": "number", + "maximum": 1.0, + "minimum": 0.0 + }, + { + "type": "null" + } + ], + "title": "Alpha", + "description": "Weight factor for weighted ranker" + }, + "impact_factor": { + "anyOf": [ + { + "type": "number", + "exclusiveMinimum": 0.0 + }, + { + "type": "null" + } + ], + "title": "Impact Factor", + "description": "Impact factor for RRF algorithm" + }, + "weights": { + "anyOf": [ + { + "additionalProperties": { + "type": "number" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Weights", + "description": "Weights for combining vector, keyword, and neural scores. Keys: 'vector', 'keyword', 'neural'" + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "Model identifier for neural reranker" + } + }, + "type": "object", + "title": "SearchRankingOptions", + "description": "Options for ranking and filtering search results.\n\nThis class configures how search results are ranked and filtered. You can use algorithm-based\nrerankers (weighted, RRF) or neural rerankers. Defaults from VectorStoresConfig are\nused when parameters are not provided.\n\nExamples:\n # Weighted ranker with custom alpha\n SearchRankingOptions(ranker=\"weighted\", alpha=0.7)\n\n # RRF ranker with custom impact factor\n SearchRankingOptions(ranker=\"rrf\", impact_factor=50.0)\n\n # Use config defaults (just specify ranker type)\n SearchRankingOptions(ranker=\"weighted\") # Uses alpha from VectorStoresConfig\n\n # Score threshold filtering\n SearchRankingOptions(ranker=\"weighted\", score_threshold=0.5)\n\n:param ranker: (Optional) Name of the ranking algorithm to use. Supported values:\n - \"weighted\": Weighted combination of vector and keyword scores\n - \"rrf\": Reciprocal Rank Fusion algorithm\n - \"neural\": Neural reranking model (requires model parameter, Part II)\n Note: For OpenAI API compatibility, any string value is accepted, but only the above values are supported.\n:param score_threshold: (Optional) Minimum relevance score threshold for results. Default: 0.0\n:param alpha: (Optional) Weight factor for weighted ranker (0-1).\n - 0.0 = keyword only\n - 0.5 = equal weight (default)\n - 1.0 = vector only\n Only used when ranker=\"weighted\" and weights is not provided.\n Falls back to VectorStoresConfig.chunk_retrieval_params.weighted_search_alpha if not provided.\n:param impact_factor: (Optional) Impact factor (k) for RRF algorithm.\n Lower values emphasize higher-ranked results. Default: 60.0 (optimal from research).\n Only used when ranker=\"rrf\".\n Falls back to VectorStoresConfig.chunk_retrieval_params.rrf_impact_factor if not provided.\n:param weights: (Optional) Dictionary of weights for combining different signal types.\n Keys can be \"vector\", \"keyword\", \"neural\". Values should sum to 1.0.\n Used when combining algorithm-based reranking with neural reranking (Part II).\n Example: {\"vector\": 0.3, \"keyword\": 0.3, \"neural\": 0.4}\n:param model: (Optional) Model identifier for neural reranker (e.g., \"vllm/Qwen3-Reranker-0.6B\").\n Required when ranker=\"neural\" or when weights contains \"neural\" (Part II)." + }, "SecurityScheme": { "anyOf": [ { @@ -9854,4 +12965,4 @@ } } } -} +} \ No newline at end of file diff --git a/docs/openapi.md b/docs/openapi.md index e1df8b8c6..8c4f77d8a 100644 --- a/docs/openapi.md +++ b/docs/openapi.md @@ -3804,6 +3804,7 @@ BYOK (Bring Your Own Knowledge) RAG configuration. | embedding_dimension | integer | Dimensionality of embedding vectors. | | vector_db_id | string | Vector database identification. | | db_path | string | Path to RAG database. | +| score_multiplier | number | Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them. | ## CORSConfiguration @@ -3868,7 +3869,8 @@ Global service configuration. | azure_entra_id | | | | splunk | | Splunk HEC configuration for sending telemetry events. | | deployment_environment | string | Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events. | -| solr | | Configuration for Solr vector search operations. | +| rag | | Configuration for all RAG strategies (inline and tool-based). | +| okp | | OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool. | ## ConfigurationResponse @@ -4523,12 +4525,14 @@ Model representing a message in a conversation turn. Attributes: content: The message content. type: The type of message. + referenced_documents: Optional list of documents referenced in an assistant response. | Field | Type | Description | |-------|------|-------------| | content | string | The message content | | type | string | The type of message | +| referenced_documents | | List of documents referenced in the response (assistant messages only) | ## ModelContextProtocolServer @@ -4622,6 +4626,21 @@ Defines the configuration for the supported OAuth 2.0 flows. | password | | | +## OkpConfiguration + + +OKP (Offline Knowledge Portal) provider configuration. + +Controls provider-specific behaviour for the OKP vector store. +Only relevant when ``"okp"`` is listed in ``rag.inline`` or ``rag.tool``. + + +| Field | Type | Description | +|-------|------|-------------| +| offline | boolean | When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs. | +| chunk_filter_query | string | OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'. | + + ## OpenIdConnectSecurityScheme @@ -4769,7 +4788,7 @@ Example: | generate_topic_summary | | Whether to generate topic summary for new conversations | | media_type | | Media type for the response format | | vector_store_ids | | Optional list of specific vector store IDs to query for RAG. If not provided, all available vector stores will be queried. | -| shield_ids | | Optional list of safety shield IDs to apply. If None, all configured shields are used. If provided, must contain at least one valid shield ID (empty list raises 422 error). | +| shield_ids | | Optional list of safety shield IDs to apply. If None, all configured shields are used. | | solr | | Solr-specific query parameters including filter queries | @@ -4938,6 +4957,28 @@ Red Hat Identity authentication configuration. | required_entitlements | | List of all required entitlements. | +## RagConfiguration + + +RAG strategy configuration. + +Controls which RAG sources are used for inline and tool-based retrieval. + +Each strategy lists RAG IDs to include. The special ID ``"okp"`` defined in constants, +activates the OKP provider; all other IDs refer to entries in ``byok_rag``. + +Backward compatibility: + - ``inline`` defaults to ``[]`` (no inline RAG). + - ``tool`` defaults to ``None`` which means all registered vector stores + are used (identical to the previous ``tool.byok.enabled = True`` default). + + +| Field | Type | Description | +|-------|------|-------------| +| inline | array | RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG). | +| tool | | RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility). | + + ## ReadinessResponse @@ -5200,21 +5241,6 @@ Model representing a response to shields request. | shields | array | List of shields available | -## SolrConfiguration - - -Solr configuration for vector search queries. - -Controls whether to use offline or online mode when building document URLs -from vector search results, and enables/disables Solr vector IO functionality. - - -| Field | Type | Description | -|-------|------|-------------| -| enabled | boolean | When True, enables Solr vector IO functionality for vector search queries. When False, disables Solr vector search processing. | -| offline | boolean | When True, use parent_id for chunk source URLs. When False, use reference_url for chunk source URLs. | - - ## SplunkConfiguration diff --git a/docs/rag_guide.md b/docs/rag_guide.md index 1ddbbd96d..a7122b6ea 100644 --- a/docs/rag_guide.md +++ b/docs/rag_guide.md @@ -67,7 +67,7 @@ For users with BYOK or OKP/Solr configurations, you can automatically enrich you ```bash # Enrich run.yaml with BYOK and/or Solr configurations from lightspeed-stack.yaml -python src/llama_stack_configuration.py -c lightspeed-stack.yaml -i run.yaml -o run_enriched.yaml +uv run src/llama_stack_configuration.py -c lightspeed-stack.yaml -i run.yaml -o run_enriched.yaml ``` This script automatically adds the necessary: diff --git a/docs/responses.md b/docs/responses.md index 80131048b..1eafb22ab 100644 --- a/docs/responses.md +++ b/docs/responses.md @@ -1,6 +1,6 @@ # LCORE OpenResponses API Specification -This document describes the LCORE implementation of the OpenResponses API, exposed via the `POST /v1/responses` endpoint. This endpoint follows the OpenResponses specification and is built on top of the Llama Stack Responses API. Since the underlying Llama Stack Responses API is still evolving, the LCORE endpoint provides a standards-aligned interface while documenting a supported subset of OpenResponses fields. In addition, it introduces LCORE-specific extensions to preserve feature parity and defines explicit field mappings to reproduce the functionality of existing `/v1/query` and `/v1/streaming_query` endpoints. +This document describes the LCORE implementation of the OpenResponses API, exposed via the `POST /v1/responses` endpoint. This endpoint follows the OpenResponses specification and is built on top of the Llama Stack Responses API. In addition, it introduces LCORE-specific extensions to preserve feature parity and defines explicit field mappings to reproduce the functionality of existing `/v1/query` and `/v1/streaming_query` endpoints. --- @@ -9,7 +9,7 @@ This document describes the LCORE implementation of the OpenResponses API, expos * [Introduction](#introduction) * [Endpoint Overview](#endpoint-overview) * [Request Specification](#request-specification) - * [Inherited LLS OpenAPI Fields](#inherited-lls-openapi-fields) + * [Inherited LLS OpenAPI Fields](#inherited-lls-openapi-attributes) * [LCORE-Specific Extensions](#lcore-specific-extensions) * [Field Mappings](#field-mappings) * [Structured request attributes: variants and usage](#structured-request-attributes-variants-and-usage) @@ -19,7 +19,7 @@ This document describes the LCORE implementation of the OpenResponses API, expos * [LCORE-Specific Extensions](#lcore-specific-extensions-1) * [Field Mappings](#field-mappings-1) * [Streaming Support](#streaming-support) -* [Known Limitations and Behavioral Differences](#known-limitations-and-behavioral-differences) +* [Behavioral Differences](#behavioral-differences) * [Conversation Handling](#conversation-handling) * [Output Representation](#output-representation) * [Tool Configuration Differences](#tool-configuration-differences) @@ -47,9 +47,9 @@ This document describes the LCORE implementation of the OpenResponses API, expos ## Introduction -The LCORE OpenResponses API provides a standards-aligned interface for AI response generation while preserving feature compatibility with existing LCORE workflows. In particular, the endpoint enriches requests and responses with LCORE-specific attributes, adjusts the semantics of some fields for compatibility, and enriches streaming events. +The LCORE OpenResponses API provides a standards-aligned interface for AI response generation while preserving feature compatibility with existing LCORE workflows. In particular, the endpoint enriches requests and responses with LCORE-specific attributes, adjusts the semantics of some fields for compatibility, and enriches content of some streaming events. -The endpoint is designed to provide feature parity with existing streaming endpoints while offering a more direct interface to the underlying Responses API. +The endpoint is designed to provide feature parity with existing query endpoints while offering a more direct interface to the underlying Responses API. --- @@ -69,7 +69,7 @@ The endpoint is designed to provide feature parity with existing streaming endpo ## Request Specification -### Inherited LLS OpenAPI Fields +### Inherited LLS OpenAPI Attributes The following request attributes are supported as defined by the underlying Llama Stack Responses API and retain their original OpenResponses semantics unless otherwise stated: @@ -80,12 +80,15 @@ The following request attributes are supported as defined by the underlying Llam | `conversation` | string | Conversation ID (OpenAI or LCORE format). Mutually exclusive with `previous_response_id` | No | | `include` | array[string] | Extra output item types to include | No | | `instructions` | string | System prompt | No | -| `max_infer_iters` | integer | Max inference iterations | No | -| `max_tool_calls` | integer | Max tool calls per response | No | +| `max_infer_iters` | integer | Maximum of inference iterations | No | +| `max_output_tokens` | integer | Maximum of output tokens | No | +| `max_tool_calls` | integer | Maximum of tool calls per response | No | | `metadata` | dictionary | Custom metadata (tracking/logging) | No | | `parallel_tool_calls` | boolean | Allow parallel tool calls | No | | `previous_response_id` | string | Previous response ID for context. Mutually exclusive with `conversation` | No | | `prompt` | object | Prompt substitution template | No | +| `reasoning` | object | Reasoning configuration (effort level) used for the response | No | +| `safety_identifier` | string | Safety/guardrail identifier applied to the request | No | | `store` | boolean | Store in conversation history (default: true) | No | | `stream` | boolean | Stream response (default: false) | No | | `temperature` | float | Sampling temperature (0.0–2.0) | No | @@ -93,15 +96,14 @@ The following request attributes are supported as defined by the underlying Llam | `tool_choice` | string or object | Tool selection strategy (auto, required, none, or specific rules). Default: auto | No | | `tools` | array[object] | Tools available for request (file search, web search, functions, MCP). Default: all | No | -**Note:** Only the fields listed above are currently supported. Additional OpenResponses fields may not yet be available due to LLS API incompleteness. - ### LCORE-Specific Extensions The following fields are LCORE-specific request extensions and are not part of the standard LLS OpenAPI specification: | Field | Type | Description | Required | |-------|------|-------------|----------| -| `generate_topic_summary` | boolean | Generate topic summary for new conversations | No | +| `generate_topic_summary` | boolean | Generate topic summary for new conversations. Default: true | No | +| `shield_ids` | array[string] | Shield IDs to apply. If omitted, all configured shields in LCORE are used | No | | `solr` | dictionary | Solr vector_io provider query parameters | No | @@ -114,11 +116,12 @@ The following table maps LCORE query request fields to the OpenResponses request | `query` | `input` | The attribute allows to pass string-like input and also structured input of list of input items | | `conversation_id` | `conversation` | Supports OpenAI `conv_*` format or LCORE hex UUID | | `provider` + `model` | `model` | Concatenated as `provider/model` | -| `system_prompt` | `instructions` | Only change in attribute's name | +| `system_prompt` | `instructions` | Same meaning. Only change in attribute's name | | `attachments` | `input` items | Attachments can be passed as input messages with content of type `input_file` | | `no_tools` | `tool_choice` | `no_tools=true` mapped to `tool_choice="none"` | | `vector_store_ids` | `tools` + `tool_choice` | Vector stores can be explicitly specified and restricted by `file_search` tool type's `vector_store_ids` attribute | | `generate_topic_summary` | N/A | Exposed directly (LCORE-specific) | +| `shield_ids` | N/A | Exposed directly (LCORE-specific) | | `solr` | N/A | Exposed directly (LCORE-specific) | **Note:** The `media_type` attribute is not present in the LCORE specification, as downstream logic determines which format to process (structured `output` or textual `output_text` response attributes). @@ -141,7 +144,7 @@ Required. Either a **string** or a list of input items. Each **item** is one of: - [mcp_approval_request](#mcp_approval_request) — request for human approval of an MCP call - [mcp_approval_response](#mcp_approval_response) — human approval or denial -All input item objects have a common `type` attribute that determines their structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type. +All input item objects have a common `type` discriminator that determines the subsequent structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type. #### `include` @@ -190,6 +193,28 @@ Template with multiple variable types (text, image, file): Here the template `report_template` (version `2.0`) might define placeholders such as `{{title}}`, `{{chart}}`, and `{{data}}`; the backend substitutes them with the provided text, image, and file respectively. +#### `reasoning` + +Optional. **Reasoning effort configuration** that controls how much “thinking” the model does before producing its answer. Supported on models that expose reasoning (e.g. o1/o3-style). Lower effort favors speed and fewer tokens; higher effort favors more thorough reasoning. + +When provided, the object has a single key: + +`effort`: One of `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"`, or `"xhigh"`. `None` leaves the default behavior to the backend. + +**Examples:** + +```json +{ "reasoning": { "effort": "low" } } +``` + +```json +{ "reasoning": { "effort": "high" } } +``` + +```json +{ "reasoning": { "effort": "medium" } } +``` + #### `text` Optional. Text response configuration that tells the model how to format its main text output. @@ -357,27 +382,31 @@ The following response attributes are inherited directly from the LLS OpenAPI sp | Field | Type | Description | |-------|------|-------------| -| `id` | string | Unique response ID | -| `object` | string | Always `"response"` | | `created_at` | integer | Creation time (Unix) | -| `status` | string | Status (e.g. completed, blocked, in_progress) | | `completed_at` | integer | Completion time (Unix), if set | +| `error` | object | Error details if failed or incompleted | +| `id` | string | Unique response ID or moderation ID | | `model` | string | Model ID (provider/model) used | +| `object` | string | Always `"response"` | | `output` | array[object] | Structured output (messages, tool calls, etc.) | -| `error` | object | Error details if failed or blocked | -| `instructions` | string | System instructions used | -| `max_tool_calls` | integer | Max tool calls allowed | -| `metadata` | dictionary | Custom metadata | | `parallel_tool_calls` | boolean | Parallel tool calls allowed | | `previous_response_id` | string | Previous response ID (multi-turn) | -| `prompt` | object | Prompt echoed (id, variables, version) | -| `temperature` | float | Temperature used | -| `text` | object | Text config (format key) | -| `tool_choice` | string or object | Tool selection used | -| `tools` | array[object] | Tools available during generation | +| `prompt` | object | The input prompt object that was sent to the model | +| `status` | string | Status (e.g. completed, blocked, in_progress) | +| `temperature` | float | Temperature parameter used for generation | +| `text` | object | Text response configuration object used | | `top_p` | float | Top-p sampling used | +| `tools` | array[object] | Tools available during generation | +| `tool_choice` | string or object | Tool selection used | | `truncation` | string | Truncation strategy applied (`"auto"` or `"disabled"`) | | `usage` | object | Token usage (input_tokens, output_tokens, total_tokens) | +| `instructions` | string | System instructions used | +| `max_tool_calls` | integer | Max tool calls allowed | +| `reasoning` | object | Reasoning configuration applied | +| `max_output_tokens` | integer | Maximum output tokens allowed, if set | +| `safety_identifier` | string | Safety model or identifier used, if set | +| `metadata` | dictionary | Custom metadata specified in request | +| `store` | boolean | Whether the response was stored | | `output_text` | string | Aggregated text from output items | ### Structured response output: object types and examples @@ -394,7 +423,7 @@ The `output` array contains structured items. Each item has a `type`. Each list **Note:** No `mcp_approval_response` nor `function_call_output` here as they can serve only as input items. -All response item objects have a common `type` attribute that determines their structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type. +All response item objects have a common `type` discriminator that determines subsequent structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type. ### LCORE-Specific Extensions @@ -421,7 +450,7 @@ The following mappings are applied when converting from LLS OpenAPI format to LC **Deprecated Fields:** The following fields are not exposed in the LCORE OpenResponses specification: * `rag_chunks` - Part of `output` items of `file_search_call` type * `referenced_documents` - Part of `output` items -* `truncated` - Deprecated; `truncation` field indicates used strategy, not whether the truncation was applied. +* `truncated` - Deprecated; `truncation` field indicates used strategy, not whether the truncation was actually applied. --- @@ -447,15 +476,15 @@ Each streaming event follows the Server-Sent Events (SSE) format: --- -## Known Limitations and Behavioral Differences +## Behavioral Differences -The `/v1/responses` endpoint follows the OpenResponses structure but is currently constrained by the capabilities of the underlying Llama Stack Responses API. As a result, only the documented subset of request and response fields is supported. +The `/v1/responses` endpoint follows the OpenResponses structure but also incorporates LCORE-specific features to maintain full feature compatibility with query endpoints. Several behavioral differences and implementation details should be noted: ### Conversation Handling -The `conversation` field in responses is a LCORE-managed extension. While not natively defined by the Llama Stack specification, it is internally resolved and linked to the request conversation to preserve multi-turn behavior. +The `conversation` field in responses is a LCORE-managed extension. While not natively defined by the Llama Stack specification, it is internally resolved and **always** present in the response to preserve LCORE conversation-based model. The endpoint accepts two conversation ID formats: @@ -484,13 +513,14 @@ Fields such as `media_type`, `tool_calls`, `tool_results`, `rag_chunks`, and `re ### Tool Configuration Differences -Vector store IDs are configured within the `tools` array (e.g., as `file_search` tools) rather than through separate parameters. By default all tools that are configured in LCORE are used to support the response. The set of available tools can be maintained per-request by `tool_choice` or `tools` attributes. +Vector store IDs are configured within the `tools` as `file_search` tools rather than through separate parameters. MCP tools are configurable under `mcp` tool type. By default **all** tools that are configured in LCORE are used to support the response. The set of available tools can be maintained per-request by `tool_choice` or `tools` attributes. ### LCORE-Specific Extensions The API introduces extensions that are not part of the OpenResponses specification: - `generate_topic_summary` (request) — When set to `true` and a new conversation is created, a topic summary is automatically generated and stored in conversation metadata. +- `shield_ids` (request) — Optional list of safety shield IDs to apply. If omitted, all configured shields are used. - `solr` (request) — Solr vector_io provider query parameters (e.g. filter queries). - `available_quotas` (response) — Provides real-time quota information from all configured quota limiters. @@ -498,10 +528,33 @@ The API introduces extensions that are not part of the OpenResponses specificati Streaming responses use Server-Sent Events (SSE) and are enriched with LCORE-specific metadata: -- The `conversation` attribute is included in streamed response payloads. +- The `conversation` attribute is included in all streamed payloads that contain `response` attribute. - The `available_quotas` attribute is added to final completion events (`response.completed`, `response.incomplete`, or `response.failed`) and also to the intermediate `response.in_progress` with empty object. -This enrichment may differ slightly from standard OpenAI streaming behavior but preserves compatibility with existing LCORE streaming workflows. + +## Implicit Conversation Management + +This implementation introduces **implicit conversation management**, ensuring that every response is associated with a conversation and can be inspected through the Conversations API. + +Users can provide context to the LLM using one of the following **mutually exclusive** strategies: + +- `conversation` — reference an existing conversation by ID +- `previous_response_id` — reference a previous response (for multi-turn continuation or branching) +- **no context** — neither a conversation nor a previous response is provided + +In **LCORE**, a conversation is modeled as a **linear chain of user turns** (request + response), where every turn belongs to exactly one conversation. Supporting `previous_response_id` as a context mechanism introduces **branching semantics**, which would break this linear structure if handled naively. To preserve a consistent conversation model, implicit conversation management applies the following rules: + +- **Context via `conversation`** — All items from the referenced conversation are provided as context for the new response. The new turn is automatically appended to that conversation, provided the conversation exists and the user has permission to access it. + +- **No context provided** — LCORE creates a new, empty conversation and assigns the new turn to it. + +- **Context via `previous_response_id`** — LCORE determines whether the referenced response is the **latest response in its conversation**: + - **If it is the latest successful response** — The request is treated as a normal continuation of that conversation, preserving the linear structure. + - **If it is not the latest response** — The conversation is **forked**. A new conversation is created, and the new turn becomes the starting point of that conversation. + +**Moderation responses** (requests that fail shield moderation) follow the same conversation rules. However, only **valid (successful) responses** can be referenced via `previous_response_id`; moderation responses cannot be used as context for follow-up requests. + +Blocked turns still appear in conversation history via the Conversations API, but they **do not produce a referenceable response** for continuation or forking. They are also **excluded when determining the latest response** in a conversation. ## Examples diff --git a/examples/run.yaml b/examples/run.yaml index e4951ee2b..cf6fcc4df 100644 --- a/examples/run.yaml +++ b/examples/run.yaml @@ -163,6 +163,12 @@ registered_resources: - toolgroup_id: builtin::rag # Register the RAG tool provider_id: rag-runtime vector_stores: + annotation_prompt_params: # Override the default Llama Stack annotation that adds <| file-xyz |> to responses + enable_annotations: true + annotation_instruction_template: > + When appropriate, cite sources at the end of sentences using doc_url and doc_title format. + Citing sources is not always required because citations are handled externally. + Never include any citation that is in the form '<| file-id |>'. default_provider_id: faiss default_embedding_model: # Define the default embedding model for RAG provider_id: sentence-transformers diff --git a/pyproject.toml b/pyproject.toml index 5f60d4351..c3025c3fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,9 +28,9 @@ dependencies = [ # Used by authentication/k8s integration "kubernetes>=30.1.0", # Used to call Llama Stack APIs - "llama-stack==0.4.3", - "llama-stack-client==0.4.3", - "llama-stack-api==0.4.4", + "llama-stack==0.5.2", + "llama-stack-client==0.5.2", + "llama-stack-api==0.5.2", # Used by Logger "rich>=14.0.0", # Used by JWK token auth handler @@ -223,7 +223,7 @@ disable = ["R0801"] extend-exclude = ["tests/profiles/syntax_error.py"] [tool.ruff.lint] -extend-select = ["TID251", "UP006", "UP007", "UP017", "UP035", "RUF100", "B010"] +extend-select = ["TID251", "UP006", "UP007", "UP010", "UP017", "UP035", "RUF100", "B009", "B010", "DTZ005"] [tool.ruff.lint.flake8-tidy-imports.banned-api] unittest = { msg = "use pytest instead of unittest" } diff --git a/requirements-build.txt b/requirements-build.txt index 66b538dc4..e3c84f824 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -1,11 +1,13 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.13 # by the following command: # # pybuild-deps compile --output-file=requirements-build.txt requirements.source.txt # calver==2025.10.20 # via trove-classifiers +cython==3.2.4 + # via oracledb dunamai==1.26.0 # via uv-dynamic-versioning flit-core==3.12.0 @@ -35,6 +37,7 @@ hatchling==1.29.0 # hatch-fancy-pypi-readme # hatch-vcs # opentelemetry-api + # opentelemetry-distro # opentelemetry-exporter-otlp # opentelemetry-exporter-otlp-proto-common # opentelemetry-exporter-otlp-proto-grpc @@ -44,6 +47,7 @@ hatchling==1.29.0 # opentelemetry-sdk # opentelemetry-semantic-conventions # pydantic-settings + # pythainlp # uv-dynamic-versioning # uvicorn # wcwidth @@ -77,6 +81,7 @@ setuptools-rust==1.12.0 setuptools-scm==9.2.2 # via # hatch-vcs + # llama-stack # llama-stack-api # pluggy # setuptools-rust @@ -90,15 +95,22 @@ uv-dynamic-versioning==0.13.0 wheel==0.46.3 # via # authlib + # azure-core # azure-identity # cachetools # litellm + # oci + # oracledb # sentence-transformers + # tornado # The following packages are considered to be unsafe in a requirements file: setuptools==82.0.0 + # via charset-normalizer +setuptools==82.0.1 # via # authlib + # azure-core # azure-identity # blobfile # cachetools @@ -107,12 +119,15 @@ setuptools==82.0.0 # emoji # google-api-core # google-cloud-bigquery + # googleapis-common-protos # greenlet # llama-stack # llama-stack-api # markupsafe # maturin # multiprocess + # oci + # oracledb # pathspec # pluggy # polyleven @@ -128,5 +143,6 @@ setuptools==82.0.0 # setuptools-scm # sse-starlette # tenacity + # tornado # trl # trove-classifiers diff --git a/requirements.hashes.source.txt b/requirements.hashes.source.txt index 1845c51e4..2057f95ea 100644 --- a/requirements.hashes.source.txt +++ b/requirements.hashes.source.txt @@ -2,9 +2,9 @@ # uv pip compile requirements.source.txt --refresh --generate-hashes --python-version 3.12 --emit-index-url --no-deps --no-annotate --index-url https://pypi.org/simple -a2a-sdk==0.3.24 \ - --hash=sha256:3581e6e8a854cd725808f5732f90b7978e661b6d4e227a4755a8f063a3c1599d \ - --hash=sha256:7b248767096bb55311f57deebf6b767349388d94c1b376c60cb8f6b715e053f6 +a2a-sdk==0.3.25 \ + --hash=sha256:2fce38faea82eb0b6f9f9c2bcf761b0d78612c80ef0e599b50d566db1b2654b5 \ + --hash=sha256:afda85bab8d6af0c5d15e82f326c94190f6be8a901ce562d045a338b7127242f accelerate==1.13.0 \ --hash=sha256:cf1a3efb96c18f7b152eb0fa7490f3710b19c3f395699358f08decca2b8b62e0 \ --hash=sha256:d631b4e0f5b3de4aff2d7e9e6857d164810dfc3237d54d017f075122d057b236 @@ -14,49 +14,166 @@ authlib==1.6.9 \ autoevals==0.1.0 \ --hash=sha256:573ab490966fd5f2265dc4842d0bfd7b729ee121c86bd72db4440badb7264587 \ --hash=sha256:ae884fe6107dbd6e05d840f51c2dba7eccfa01449e5ee5e83b6b4589508b2aca -azure-core==1.38.2 \ - --hash=sha256:074806c75cf239ea284a33a66827695ef7aeddac0b4e19dda266a93e4665ead9 \ - --hash=sha256:67562857cb979217e48dc60980243b61ea115b77326fa93d83b729e7ff0482e7 -azure-identity==1.25.2 \ - --hash=sha256:030dbaa720266c796221c6cdbd1999b408c079032c919fef725fcc348a540fe9 \ - --hash=sha256:1b40060553d01a72ba0d708b9a46d0f61f56312e215d8896d836653ffdc6753d +azure-core==1.38.3 \ + --hash=sha256:a7931fd445cb4af8802c6f39c6a326bbd1e34b115846550a8245fa656ead6f8e \ + --hash=sha256:bf59d29765bf4748ab9edf25f98a30b7ea9797f43e367c06d846a30b29c1f845 +azure-identity==1.25.3 \ + --hash=sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6 \ + --hash=sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c blobfile==3.2.0 \ --hash=sha256:78514a9265b9aa7d4607042dc77c5e6461ab27036450ad8e1f6ef9a7f29bf958 \ --hash=sha256:e5e4095477da9f09e2077f41320c006001b2102a61f07d41ceaaecdf5d9741d8 -cachetools==7.0.3 \ - --hash=sha256:8c246313b95849964e54a909c03b327a87ab0428b068fac10da7b105ca275ef6 \ - --hash=sha256:c128ffca156eef344c25fcd08a96a5952803786fa33097f5f2d49edf76f79d53 +cachetools==7.0.5 \ + --hash=sha256:0cd042c24377200c1dcd225f8b7b12b0ca53cc2c961b43757e774ebe190fd990 \ + --hash=sha256:46bc8ebefbe485407621d0a4264b23c080cedd913921bad7ac3ed2f26c183114 certifi==2026.2.25 \ --hash=sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa \ --hash=sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7 -chardet==7.0.1 \ - --hash=sha256:11f51985946b49739968b6dc2fa70e7d8f490bb15574377c5ee114f33d19ef7e \ - --hash=sha256:1566d0f91990b8f33b53836391d557f779584bd48beabf90efbf7a6efa89179e \ - --hash=sha256:169951fa88d449e72e0c6194cec1c5e405fd36a6cfbe74c7dab5494cc35f1700 \ - --hash=sha256:26186f0ea03c4c1f9be20c088b127c71b0e9d487676930fab77625ddec2a4ef2 \ - --hash=sha256:265cb3b5dafc0411c0949800a0692f07e986fb663b6ae1ecfba32ad193a55a03 \ - --hash=sha256:302798e1e62008ca34a216dd04ecc5e240993b2090628e2a35d4c0754313ea9a \ - --hash=sha256:3355a3c8453d673e7c1664fdd24a0c6ef39964c3d41befc4849250f7eb1de3b5 \ - --hash=sha256:33f4132f9781302beff34713fe6c990badd009aa8ea730611aef0931b27f1541 \ - --hash=sha256:44011e3b4fd4a8a15bc94736717414b7ec82880066fb22d9f476c68a4ded2647 \ - --hash=sha256:4af34cf0652a9da44720540c97f11e30781a77900c89547b311984a7272b33f7 \ - --hash=sha256:5333f9967863ea7d8642df0e00cf4d33e8ed7e99fe7b6464b40ba969a2808544 \ - --hash=sha256:54e448fab0c11b27bb908ea0218e2094578c583d05faa5f65b91fa6ccfa45570 \ - --hash=sha256:63bc210ce73f8a1b87430b949f84d086cb326d67eb259305862e7c8861b73374 \ - --hash=sha256:67fe3f453416ed9343057dcf06583b36aae6d8bdb013370b3ff46bc37b7e30ac \ - --hash=sha256:69708a504a43464b60ea16d031250b58206969c9bbd6851266e2f39afef53168 \ - --hash=sha256:6f907962b18df78d5ca87a7484e4034354408d2c97cec6f53634b0ea0424c594 \ - --hash=sha256:6fce895c12c5495bb598e59ae3cd89306969b4464ec7b6dd609b9c86e3397fe3 \ - --hash=sha256:8714f0013c208452a98e23595d99cef53c5364565454425f431446eb586e2591 \ - --hash=sha256:88793aeebb28a5296eea9bdd9b5e74ee4e3582766a6a2cb7f39e4761a96fdd55 \ - --hash=sha256:8a8d87853c7f191029933307094a8896b087c2c436703281cb289a22aa4ae8bd \ - --hash=sha256:9e827211249d8e3cacc1adf6950a7a8cf56920e5e303e56dcab827b71c03df33 \ - --hash=sha256:c12abc65830068ad05bd257fb953aaaf63a551446688e03e145522086be5738c \ - --hash=sha256:c3f59dc3e148b54813ec5c7b4b2e025d37f5dc221ee28a06d1a62f169cfaedf5 \ - --hash=sha256:dd6db7505556ae8f9e2a3bf6d689c2b86aa6b459cf39552645d2c4d3fdbf489c \ - --hash=sha256:e51e1ff2c51b2d622d97c9737bd5ee9d9b9038f05b7dd8f9ea10b9e2d9674c24 \ - --hash=sha256:f661edbfa77b8683a503043ddc9b9fe9036cf28af13064200e11fa1844ded79c \ - --hash=sha256:fb14755377d8de845c69378bbaedc0e35109c21a43824450524fd9c3178792d5 +chardet==7.1.0 \ + --hash=sha256:00c3182f739ae7715641e8c08e0ee8ae21b5db6402b883264aa04511edf428b9 \ + --hash=sha256:18afc27681cd9f583fac47282179f8b73f37b1cab171a528e8af89e7e4562b32 \ + --hash=sha256:1a3c22672c9502af99e0433b47421d0d72c8803efce2cd4a91a3ae1ab5972243 \ + --hash=sha256:20b73403b7a21487e31b2810ea9d7182ce5e301a8ebe847b49d91ec6022e214a \ + --hash=sha256:38be4c07e016dac37fb6060094f3a720200e3e49dc14f55924a1c230eeffa59f \ + --hash=sha256:43c1e3cba6c41d8958ee4acdab94c151dbe256d7ef8df4ae032dc62a892f294f \ + --hash=sha256:49b5edd762751735704d1fec38665ee219da28c66f2a4921d615b12be389735b \ + --hash=sha256:5d86d349f768e6d35f6804013f6643d880ec877b94c453fa40a3fd10d16ddb48 \ + --hash=sha256:619d7ef3187ff1691525a7fdbe8c30f5a519885e1de82f6f57e26a29866bf11b \ + --hash=sha256:6f806f325825325e0682226269a2a4859993344cccca14f2463855d4f5a93272 \ + --hash=sha256:70adef4a036d39d9b5f6c2702f773118a9985c0dff23b650e1045f20e33c9467 \ + --hash=sha256:7f677725333bf53f84b7f57458f44669a8a5eb2ac4092ac699cdfa9b1af08a5f \ + --hash=sha256:8e067ee79709ccf9caa5732419df2dbef476bd9b9658ffc929d45cf13fc91ed7 \ + --hash=sha256:8f47bc4accac17bd9accbb4acc1d563acc024a783806c0a43c3a583f5285690b \ + --hash=sha256:96e7fe0770cd77361bec21a1dd8524e77aaa567577fa8372368d5fa8dd0ef00b \ + --hash=sha256:97cdd7a016fbb451a4dc26b3b1173960b3c0071bbe46a46d6b70027a517170ff \ + --hash=sha256:a02197831a4304eed360559e0ffc58deccc9cdda9f9315c6e7ad978f7d8617d3 \ + --hash=sha256:a6492bebaba8882afb3e14c786fb69ed767326b6f514b8e093dcdf6e2a094d33 \ + --hash=sha256:b951107b254cdc766e52f4b8339dcfa97c7b45ca9f5509075308db2497e7f3af \ + --hash=sha256:bacc8f862998c59e9ee7fe4960538300d1cc3fe2c293b9cc99bbbc7bf3bedf51 \ + --hash=sha256:bbd4fccf1cf6d92fdd75a1827a478672abb5685e61e92ce863d9380b18cb813f \ + --hash=sha256:c35d17822fc94467b7951adebd897cb01c0e37ac694be18d2cbd2b676d61df4f \ + --hash=sha256:cc8c7520a9736da766f5794bbabb1c6cdfe446676429a5cf691af878631a80bf \ + --hash=sha256:dff284d0661563e82d235f79f1d410c526b15ef8d50adc0446cba8162db68d22 \ + --hash=sha256:e096d9c211050fff40e22748e1d09d0cec8348fc13ee6e2e0a1da079345b8a86 \ + --hash=sha256:eb2a9b4052be006b87a985dbdbb00ab35b4b1b66d2751b0ee12680f8f4e90406 \ + --hash=sha256:fdfc42dfc44ccd569b84fe6a1fdea1df66dc0c48461bc3899dea5efea8d507f6 +charset-normalizer==3.4.5 \ + --hash=sha256:014837af6fabf57121b6254fa8ade10dceabc3528b27b721a64bbc7b8b1d4eb4 \ + --hash=sha256:01a1ed54b953303ca7e310fafe0fe347aab348bd81834a0bcd602eb538f89d66 \ + --hash=sha256:0294916d6ccf2d069727d65973c3a1ca477d68708db25fd758dd28b0827cff54 \ + --hash=sha256:02a9d1b01c1e12c27883b0c9349e0bcd9ae92e727ff1a277207e1a262b1cbf05 \ + --hash=sha256:036c079aa08a6a592b82487f97c60b439428320ed1b2ea0b3912e99d30c77765 \ + --hash=sha256:039215608ac7b358c4da0191d10fc76868567fbf276d54c14721bdedeb6de064 \ + --hash=sha256:0625665e4ebdddb553ab185de5db7054393af8879fb0c87bd5690d14379d6819 \ + --hash=sha256:0a45e504f5e1be0bd385935a8e1507c442349ca36f511a47057a71c9d1d6ea9e \ + --hash=sha256:0b362bcd27819f9c07cbf23db4e0e8cd4b44c5ecd900c2ff907b2b92274a7412 \ + --hash=sha256:0c300cefd9b0970381a46394902cd18eaf2aa00163f999590ace991989dcd0fc \ + --hash=sha256:1088345bcc93c58d8d8f3d783eca4a6e7a7752bbff26c3eee7e73c597c191c2e \ + --hash=sha256:10b473fc8dca1c3ad8559985794815f06ca3fc71942c969129070f2c3cdf7281 \ + --hash=sha256:131716d6786ad5e3dc542f5cc6f397ba3339dc0fb87f87ac30e550e8987756af \ + --hash=sha256:14498a429321de554b140013142abe7608f9d8ccc04d7baf2ad60498374aefa2 \ + --hash=sha256:149ec69866c3d6c2fb6f758dbc014ecb09f30b35a5ca90b6a8a2d4e54e18fdfe \ + --hash=sha256:165c7b21d19365464e8f70e5ce5e12524c58b48c78c1f5a57524603c1ab003f8 \ + --hash=sha256:1827734a5b308b65ac54e86a618de66f935a4f63a8a462ff1e19a6788d6c2262 \ + --hash=sha256:19092dde50335accf365cce21998a1c6dd8eafd42c7b226eb54b2747cdce2fac \ + --hash=sha256:1a374cc0b88aa710e8865dc1bd6edb3743c59f27830f0293ab101e4cf3ce9f85 \ + --hash=sha256:1d1401945cb77787dbd3af2446ff2d75912327c4c3a1526ab7955ecf8600687c \ + --hash=sha256:1f2da5cbb9becfcd607757a169e38fb82aa5fd86fae6653dea716e7b613fe2cf \ + --hash=sha256:259cd1ca995ad525f638e131dbcc2353a586564c038fc548a3fe450a91882139 \ + --hash=sha256:2820a98460c83663dd8ec015d9ddfd1e4879f12e06bb7d0500f044fb477d2770 \ + --hash=sha256:28269983f25a4da0425743d0d257a2d6921ea7d9b83599d4039486ec5b9f911d \ + --hash=sha256:2b970382e4a36bed897c19f310f31d7d13489c11b4f468ddfba42d41cddfb918 \ + --hash=sha256:2da4eedcb6338e2321e831a0165759c0c620e37f8cd044a263ff67493be8ffb3 \ + --hash=sha256:30987f4a8ed169983f93e1be8ffeea5214a779e27ed0b059835c7afe96550ad7 \ + --hash=sha256:30a2b1a48478c3428d047ed9690d57c23038dac838a87ad624c85c0a78ebeb39 \ + --hash=sha256:340810d34ef83af92148e96e3e44cb2d3f910d2bf95e5618a5c467d9f102231d \ + --hash=sha256:3f64c6bf8f32f9133b668c7f7a7cbdbc453412bc95ecdbd157f3b1e377a92990 \ + --hash=sha256:4167a621a9a1a986c73777dbc15d4b5eac8ac5c10393374109a343d4013ec765 \ + --hash=sha256:4354e401eb6dab9aed3c7b4030514328a6c748d05e1c3e19175008ca7de84fb1 \ + --hash=sha256:4481e6da1830c8a1cc0b746b47f603b653dadb690bcd851d039ffaefe70533aa \ + --hash=sha256:4b8551b6e6531e156db71193771c93bda78ffc4d1e6372517fe58ad3b91e4659 \ + --hash=sha256:4cd966c2559f501c6fd69294d082c2934c8dd4719deb32c22961a5ac6db0df1d \ + --hash=sha256:50bcbca6603c06a1dcc7b056ed45c37715fb5d2768feb3bcd37d2313c587a5b9 \ + --hash=sha256:530beedcec9b6e027e7a4b6ce26eed36678aa39e17da85e6e03d7bd9e8e9d7c9 \ + --hash=sha256:568e3c34b58422075a1b49575a6abc616d9751b4d61b23f712e12ebb78fe47b2 \ + --hash=sha256:573ef5814c4b7c0d59a7710aa920eaaaef383bd71626aa420fba27b5cab92e8d \ + --hash=sha256:58ad8270cfa5d4bef1bc85bd387217e14ff154d6630e976c6f56f9a040757475 \ + --hash=sha256:597d10dec876923e5c59e48dbd366e852eacb2b806029491d307daea6b917d7c \ + --hash=sha256:5bcb3227c3d9aaf73eaaab1db7ccd80a8995c509ee9941e2aae060ca6e4e5d81 \ + --hash=sha256:5cffde4032a197bd3b42fd0b9509ec60fb70918d6970e4cc773f20fc9180ca67 \ + --hash=sha256:5fea359734b140d0d6741189fea5478c6091b54ffc69d7ce119e0a05637d8c99 \ + --hash=sha256:60d68e820af339df4ae8358c7a2e7596badeb61e544438e489035f9fbf3246a5 \ + --hash=sha256:610f72c0ee565dfb8ae1241b666119582fdbfe7c0975c175be719f940e110694 \ + --hash=sha256:65a126fb4b070d05340a84fc709dd9e7c75d9b063b610ece8a60197a291d0adf \ + --hash=sha256:65b3c403a5b6b8034b655e7385de4f72b7b244869a22b32d4030b99a60593eca \ + --hash=sha256:66dee73039277eb35380d1b82cccc69cc82b13a66f9f4a18da32d573acf02b7c \ + --hash=sha256:708c7acde173eedd4bfa4028484426ba689d2103b28588c513b9db2cd5ecde9c \ + --hash=sha256:728c6a963dfab66ef865f49286e45239384249672cd598576765acc2a640a636 \ + --hash=sha256:754f96058e61a5e22e91483f823e07df16416ce76afa4ebf306f8e1d1296d43f \ + --hash=sha256:75dfd1afe0b1647449e852f4fb428195a7ed0588947218f7ba929f6538487f02 \ + --hash=sha256:75ee9c1cce2911581a70a3c0919d8bccf5b1cbc9b0e5171400ec736b4b569497 \ + --hash=sha256:76a9d0de4d0eab387822e7b35d8f89367dd237c72e82ab42b9f7bf5e15ada00f \ + --hash=sha256:77be992288f720306ab4108fe5c74797de327f3248368dfc7e1a916d6ed9e5a2 \ + --hash=sha256:7ad83b8f9379176c841f8865884f3514d905bcd2a9a3b210eaa446e7d2223e4d \ + --hash=sha256:8197abe5ca1ffb7d91e78360f915eef5addff270f8a71c1fc5be24a56f3e4873 \ + --hash=sha256:82cc7c2ad42faec8b574351f8bc2a0c049043893853317bd9bb309f5aba6cb5a \ + --hash=sha256:8a28afb04baa55abf26df544e3e5c6534245d3daa5178bc4a8eeb48202060d0e \ + --hash=sha256:8b78d8a609a4b82c273257ee9d631ded7fac0d875bdcdccc109f3ee8328cfcb1 \ + --hash=sha256:8ce11cd4d62d11166f2b441e30ace226c19a3899a7cf0796f668fba49a9fb123 \ + --hash=sha256:8fff79bf5978c693c9b1a4d71e4a94fddfb5fe744eb062a318e15f4a2f63a550 \ + --hash=sha256:92263f7eca2f4af326cd20de8d16728d2602f7cfea02e790dcde9d83c365d7cc \ + --hash=sha256:93b3b2cc5cf1b8743660ce77a4f45f3f6d1172068207c1defc779a36eea6bb36 \ + --hash=sha256:95adae7b6c42a6c5b5b559b1a99149f090a57128155daeea91732c8d970d8644 \ + --hash=sha256:97ab7787092eb9b50fb47fa04f24c75b768a606af1bcba1957f07f128a7219e4 \ + --hash=sha256:9db5e3fcdcee89a78c04dffb3fe33c79f77bd741a624946db2591c81b2fc85b0 \ + --hash=sha256:a118e2e0b5ae6b0120d5efa5f866e58f2bb826067a646431da4d6a2bdae7950e \ + --hash=sha256:a2aecdb364b8a1802afdc7f9327d55dad5366bc97d8502d0f5854e50712dbc5f \ + --hash=sha256:a66aa5022bf81ab4b1bebfb009db4fd68e0c6d4307a1ce5ef6a26e5878dfc9e4 \ + --hash=sha256:a68766a3c58fde7f9aaa22b3786276f62ab2f594efb02d0a1421b6282e852e98 \ + --hash=sha256:aa2f963b4da26daf46231d9b9e0e2c9408a751f8f0d0f44d2de56d3caf51d294 \ + --hash=sha256:aa92ec1102eaff840ccd1021478af176a831f1bccb08e526ce844b7ddda85c22 \ + --hash=sha256:ac59c15e3f1465f722607800c68713f9fbc2f672b9eb649fe831da4019ae9b23 \ + --hash=sha256:ae8b03427410731469c4033934cf473426faff3e04b69d2dfb64a4281a3719f8 \ + --hash=sha256:afca7f78067dd27c2b848f1b234623d26b87529296c6c5652168cc1954f2f3b2 \ + --hash=sha256:b2d37d78297b39a9eb9eb92c0f6df98c706467282055419df141389b23f93362 \ + --hash=sha256:b3e71afc578b98512bfe7bdb822dd6bc57d4b0093b4b6e5487c1e96ad4ace242 \ + --hash=sha256:ba20bdf69bd127f66d0174d6f2a93e69045e0b4036dc1ca78e091bcc765830c4 \ + --hash=sha256:c108f8619e504140569ee7de3f97d234f0fbae338a7f9f360455071ef9855a95 \ + --hash=sha256:c23eb3263356d94858655b3e63f85ac5d50970c6e8febcdde7830209139cc37d \ + --hash=sha256:c5af897b45fa606b12464ccbe0014bbf8c09191e0a66aab6aa9d5cf6e77e0c94 \ + --hash=sha256:c7a80a9242963416bd81f99349d5f3fce1843c303bd404f204918b6d75a75fd6 \ + --hash=sha256:c7e84e0c0005e3bdc1a9211cd4e62c78ba80bc37b2365ef4410cd2007a9047f2 \ + --hash=sha256:cace89841c0599d736d3d74a27bc5821288bb47c5441923277afc6059d7fbcb4 \ + --hash=sha256:cd2d0f0ec9aa977a27731a3209ebbcacebebaf41f902bd453a928bfd281cf7f8 \ + --hash=sha256:d01de5e768328646e6a3fa9e562706f8f6641708c115c62588aef2b941a4f88e \ + --hash=sha256:d1028de43596a315e2720a9849ee79007ab742c06ad8b45a50db8cdb7ed4a82a \ + --hash=sha256:d27ce22ec453564770d29d03a9506d449efbb9fa13c00842262b2f6801c48cce \ + --hash=sha256:d29dd9c016f2078b43d0c357511e87eee5b05108f3dd603423cb389b89813969 \ + --hash=sha256:d31f0d1671e1534e395f9eb84a68e0fb670e1edb1fe819a9d7f564ae3bc4e53f \ + --hash=sha256:d4eb8ac7469b2a5d64b5b8c04f84d8bf3ad340f4514b98523805cbf46e3b3923 \ + --hash=sha256:d5e52d127045d6ae01a1e821acfad2f3a1866c54d0e837828538fabe8d9d1bd6 \ + --hash=sha256:d77f97e515688bd615c1d1f795d540f32542d514242067adcb8ef532504cb9ee \ + --hash=sha256:d8ed79b8f6372ca4254955005830fd61c1ccdd8c0fac6603e2c145c61dd95db6 \ + --hash=sha256:dc57a0baa3eeedd99fafaef7511b5a6ef4581494e8168ee086031744e2679467 \ + --hash=sha256:e09f671a54ce70b79a1fc1dc6da3072b7ef7251fadb894ed92d9aa8218465a5f \ + --hash=sha256:e22d1059b951e7ae7c20ef6b06afd10fb95e3c41bf3c4fbc874dba113321c193 \ + --hash=sha256:e37bd100d2c5d3ba35db9c7c5ba5a9228cbcffe5c4778dc824b164e5257813d7 \ + --hash=sha256:e51ae7d81c825761d941962450f50d041db028b7278e7b08930b4541b3e45cb9 \ + --hash=sha256:e545b51da9f9af5c67815ca0eb40676c0f016d0b0381c86f20451e35696c5f95 \ + --hash=sha256:e6302ca4ae283deb0af68d2fbf467474b8b6aedcd3dab4db187e07f94c109763 \ + --hash=sha256:e71bbb595973622b817c042bd943c3f3667e9c9983ce3d205f973f486fec98a7 \ + --hash=sha256:ec56a2266f32bc06ed3c3e2a8f58417ce02f7e0356edc89786e52db13c593c98 \ + --hash=sha256:ed1a9a204f317ef879b32f9af507d47e49cd5e7f8e8d5d96358c98373314fc60 \ + --hash=sha256:ed97c282ee4f994ef814042423a529df9497e3c666dca19be1d4cd1129dc7ade \ + --hash=sha256:ed98364e1c262cf5f9363c3eca8c2df37024f52a8fa1180a3610014f26eac51c \ + --hash=sha256:ee57b926940ba00bca7ba7041e665cc956e55ef482f851b9b65acb20d867e7a2 \ + --hash=sha256:f1d725b754e967e648046f00c4facc42d414840f5ccc670c5670f59f83693e4f \ + --hash=sha256:f8102ae93c0bc863b1d41ea0f4499c20a83229f52ed870850892df555187154a \ + --hash=sha256:fc1c64934b8faf7584924143eb9db4770bbdb16659626e1a1a4d9efbcb68d947 \ + --hash=sha256:ff95a9283de8a457e6b12989de3f9f5193430f375d64297d323a615ea52cbdb3 +circuitbreaker==2.1.3 \ + --hash=sha256:1a4baee510f7bea3c91b194dcce7c07805fe96c4423ed5594b75af438531d084 \ + --hash=sha256:87ba6a3ed03fdc7032bc175561c2b04d52ade9d5faf94ca2b035fbdc5e6b1dd1 emoji==2.15.0 \ --hash=sha256:205296793d66a89d88af4688fa57fd6496732eb48917a87175a023c8138995eb \ --hash=sha256:eae4ab7d86456a70a00a985125a03263a5eac54cd55e51d7e184b1ed3b6757e4 @@ -142,24 +259,27 @@ fastuuid==0.14.0 \ --hash=sha256:ec27778c6ca3393ef662e2762dba8af13f4ec1aaa32d08d77f71f2a70ae9feb8 \ --hash=sha256:f54d5b36c56a2d5e1a31e73b950b28a0d83eb0c37b91d10408875a5a29494bad \ --hash=sha256:f74631b8322d2780ebcf2d2d75d58045c3e9378625ec51865fe0b5620800c39d -filelock==3.25.0 \ - --hash=sha256:5ccf8069f7948f494968fc0713c10e5c182a9c9d9eef3a636307a20c2490f047 \ - --hash=sha256:8f00faf3abf9dc730a1ffe9c354ae5c04e079ab7d3a683b7c32da5dd05f26af3 +filelock==3.25.2 \ + --hash=sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694 \ + --hash=sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70 google-api-core==2.30.0 \ --hash=sha256:02edfa9fab31e17fc0befb5f161b3bf93c9096d99aed584625f38065c511ad9b \ --hash=sha256:80be49ee937ff9aba0fd79a6eddfde35fe658b9953ab9b79c57dd7061afa8df5 -google-auth==2.48.0 \ - --hash=sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f \ - --hash=sha256:4f7e706b0cd3208a3d940a19a822c37a476ddba5450156c3e6624a71f7c841ce -google-cloud-aiplatform==1.140.0 \ - --hash=sha256:e94493a2682b9d17efa7146a53bb3665bf1595c3394fd3d0f45d18f71623fddc \ - --hash=sha256:ea7eb1870b4cf600f8c2472102e21c3a1bcaf723d6e49f00ed51bc6b88d54fff +google-auth==2.49.1 \ + --hash=sha256:16d40da1c3c5a0533f57d268fe72e0ebb0ae1cc3b567024122651c045d879b64 \ + --hash=sha256:195ebe3dca18eddd1b3db5edc5189b76c13e96f29e73043b923ebcf3f1a860f7 +google-cloud-aiplatform==1.141.0 \ + --hash=sha256:6bd25b4d514c40b8181ca703e1b313ad6d0454ab8006fc9907fb3e9f672f31d1 \ + --hash=sha256:e3b1cdb28865dd862aac9c685dfc5ac076488705aba0a5354016efadcddd59c6 google-cloud-bigquery==3.40.1 \ --hash=sha256:75afcfb6e007238fe1deefb2182105249321145ff921784fe7b1de2b4ba24506 \ --hash=sha256:9082a6b8193aba87bed6a2c79cf1152b524c99bb7e7ac33a785e333c09eac868 google-cloud-storage==3.9.0 \ --hash=sha256:2dce75a9e8b3387078cbbdad44757d410ecdb916101f8ba308abf202b6968066 \ --hash=sha256:f2d8ca7db2f652be757e92573b2196e10fbc09649b5c016f8b422ad593c641cc +googleapis-common-protos==1.73.0 \ + --hash=sha256:778d07cd4fbeff84c6f7c72102f0daf98fa2bfd3fa8bea426edc545588da0b5a \ + --hash=sha256:dfdaaa2e860f242046be561e6d6cb5c5f1541ae02cfbcb034371aadb2942b4e8 greenlet==3.3.2 \ --hash=sha256:02b0a8682aecd4d3c6c18edf52bc8e51eacdd75c8eac52a790a210b06aa295fd \ --hash=sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082 \ @@ -223,18 +343,18 @@ jsonpath-ng==1.8.0 \ langdetect==1.0.9 \ --hash=sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a \ --hash=sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0 -litellm==1.82.0 \ - --hash=sha256:5496b5d4532cccdc7a095c21cbac4042f7662021c57bc1d17be4e39838929e80 \ - --hash=sha256:d388f52447daccbcaafa19a3e68d17b75f1374b5bf2cde680d65e1cd86e50d22 -llama-stack==0.4.3 \ - --hash=sha256:423207eae2b640894992a9075ff9dd6300ff904ab06a49fe38cfe0bb809d4669 \ - --hash=sha256:70d379ae9dbb5b1d0693f14054d9817aba183ffcd805133f0a4442baee132c6d -llama-stack-api==0.4.4 \ - --hash=sha256:3973ca3bacf86916e04e521f77e7909533eec7364d32c3eabc35dc2976dbfe7d \ - --hash=sha256:7bbc63330ed186502dcd48f65cae014dbeb788ba5690be738c98693cfcd2f599 -llama-stack-client==0.4.3 \ - --hash=sha256:97b8cc5032bad4f0cdd1b0ae992cf44f5554679d315b7c40f46deb358c041f50 \ - --hash=sha256:cb807be258206e8fedeb5e5ceba7be7108d3badb31d74199406808c3d1679c35 +litellm==1.82.2 \ + --hash=sha256:641ed024774fa3d5b4dd9347f0efb1e31fa422fba2a6500aabedee085d1194cb \ + --hash=sha256:f5f4c4049f344a88bf80b2e421bb927807687c99624515d7ff4152d533ec9dcb +llama-stack==0.5.2 \ + --hash=sha256:581fda638088ee029aab20afe3c42ba8f7f6ef21c80bd9ebcae20bb13c3409d3 \ + --hash=sha256:9334c781e4ded6520aa60c3301a9087e9fb8fdaea8e5f30f8e21d85b17231d8d +llama-stack-api==0.5.2 \ + --hash=sha256:6531556dd8bb6555d778360ecfcd850aad7a49a8172b68146995d538e71641f0 \ + --hash=sha256:a272e4b803fe24a8ba7d22e6d904bf88abd118ba0b6610a20ff5dedb09f38ad7 +llama-stack-client==0.5.2 \ + --hash=sha256:17c1bbad90f7699da4eb3cae256e8823caa4d2be945512a45c8c6f89ab899f28 \ + --hash=sha256:473f4d67ac0b243b0fc29555a0203a742615d31bea606b4332d9e2f193f73d6a markupsafe==3.0.3 \ --hash=sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f \ --hash=sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a \ @@ -354,12 +474,18 @@ multiprocess==0.70.18 \ nltk==3.9.3 \ --hash=sha256:60b3db6e9995b3dd976b1f0fa7dec22069b2677e759c28eb69b62ddd44870522 \ --hash=sha256:cb5945d6424a98d694c2b9a0264519fab4363711065a46aa0ae7a2195b92e71f -openai==2.26.0 \ - --hash=sha256:6151bf8f83802f036117f06cc8a57b3a4da60da9926826cc96747888b57f394f \ - --hash=sha256:b41f37c140ae0034a6e92b0c509376d907f3a66109935fba2c1b471a7c05a8fb +oci==2.168.1 \ + --hash=sha256:b941674171b41e999b8e3adb38d4797d7b42d2bb5ff40d17c26e8ce2a7d4b605 \ + --hash=sha256:d106cfffc9153b5c9de628877c967ed87bbbfbbc9d411c97feee0eba8f2e4eab +openai==2.28.0 \ + --hash=sha256:79aa5c45dba7fef84085701c235cf13ba88485e1ef4f8dfcedc44fc2a698fc1d \ + --hash=sha256:bb7fdff384d2a787fa82e8822d1dd3c02e8cf901d60f1df523b7da03cbb6d48d opentelemetry-api==1.40.0 \ --hash=sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f \ --hash=sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9 +opentelemetry-distro==0.61b0 \ + --hash=sha256:975b845f50181ad53753becf4fd4b123b54fa04df5a9d78812264436d6518981 \ + --hash=sha256:f21d1ac0627549795d75e332006dd068877f00e461b1b2e8fe4568d6eb7b9590 opentelemetry-exporter-otlp==1.40.0 \ --hash=sha256:48c87e539ec9afb30dc443775a1334cc5487de2f72a770a4c00b1610bf6c697d \ --hash=sha256:7caa0870b95e2fcb59d64e16e2b639ecffb07771b6cd0000b5d12e5e4fef765a @@ -384,6 +510,38 @@ opentelemetry-sdk==1.40.0 \ opentelemetry-semantic-conventions==0.61b0 \ --hash=sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a \ --hash=sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2 +oracledb==3.4.2 \ + --hash=sha256:00c79448017f367bb7ab6900efe0706658a53768abea2b4519a4c9b2d5743890 \ + --hash=sha256:0e16fe3d057e0c41a23ad2ae95bfa002401690773376d476be608f79ac74bf05 \ + --hash=sha256:0f04a2d62073407672f114d02529921de0677c6883ed7c64d8d1a3c04caa3238 \ + --hash=sha256:1617a1db020346883455af005efbefd51be2c4d797e43b1b38455a19f8526b48 \ + --hash=sha256:19fa80ef84f85ad74077aa626067bbe697e527bd39604b4209f9d86cb2876b89 \ + --hash=sha256:1e4930d7f6584832dcc15b8ca415a7957b0c45f5aa7c4f88702e070e5c53bf93 \ + --hash=sha256:23aa07c1eaca17ae74c6fdc86b218f58484d56452958aead1aa460c0596a76c1 \ + --hash=sha256:31b7ee83c23d0439778303de8a675717f805f7e8edb5556d48c4d8343bcf14f5 \ + --hash=sha256:3df8eee1410d25360599968b1625b000f10c5ae0e47274031a7842a9dc418890 \ + --hash=sha256:404ec1451d0448653ee074213b87d6c5bd65eaa74b50083ddf2c9c3e11c71c71 \ + --hash=sha256:46e0f2278ff1fe83fbc33a3b93c72d429323ec7eed47bc9484e217776cd437e5 \ + --hash=sha256:55397e7eb43bb7017c03a981c736c25724182f5210951181dfe3fab0e5d457fb \ + --hash=sha256:574c8280d49cbbe21dbe03fc28356d9b9a5b9e300ebcde6c6d106e51453a7e65 \ + --hash=sha256:59ad6438f56a25e8e1a4a3dd1b42235a5d09ab9ba417ff2ad14eae6596f3d06f \ + --hash=sha256:5d7befb014174c5ae11c3a08f5ed6668a25ab2335d8e7104dca70d54d54a5b3a \ + --hash=sha256:5ed78d7e7079a778062744ccf42141ce4806818c3f4dd6463e4a7edd561c9f86 \ + --hash=sha256:643c25d301a289a371e37fcedb59e5fa5e54fb321708e5c12821c4b55bdd8a4d \ + --hash=sha256:6d85622664cc88d5a82bbd7beccb62cd53bd272c550a5e15e7d5f8ae6b86f1f1 \ + --hash=sha256:9f434a739405557bd57cb39b62238142bb27855a524a70dc6d397a2a8c576c9d \ + --hash=sha256:a7396664e592881225ba66385ee83ce339d864f39003d6e4ca31a894a7e7c552 \ + --hash=sha256:ac25a0448fc830fb7029ad50cd136cdbfcd06975d53967e269772cc5cb8c203a \ + --hash=sha256:b1095d95d0c8b37e4d0e17cf1928919cb59222b6344362a1cf6a2f3ca205a28a \ + --hash=sha256:b26a10f9c790bd141ffc8af68520803ed4a44a9258bf7d1eea9bfdd36bd6df7f \ + --hash=sha256:b8e4b8a852251cef09038b75f30fce1227010835f4e19cfbd436027acba2697c \ + --hash=sha256:b974caec2c330c22bbe765705a5ac7d98ec3022811dec2042d561a3c65cb991b \ + --hash=sha256:d7ce75c498bff758548ec6e4424ab4271aa257e5887cc436a54bc947fd46199a \ + --hash=sha256:d8d75e4f879b908be66cce05ba6c05791a5dbb4a15e39abc01aa25c8a2492bd9 \ + --hash=sha256:e068ef844a327877bfefbef1bc6fb7284c727bb87af80095f08d95bcaf7b8bb2 \ + --hash=sha256:f8ea989965a4f636a309444bd696ab877bba373d5d67bf744785f9bd8c560865 \ + --hash=sha256:f93cae08e8ed20f2d5b777a8602a71f9418389c661d2c937e84d94863e7e7011 \ + --hash=sha256:ff3c89cecea62af8ca02aa33cab0f2edc0214c747eac7d3364ed6b2640cb55e4 polyleven==0.11.0 \ --hash=sha256:046e90c02c5b8dae2ab71c4fb33772bd6f27b7883b05e2117573bf478b5ced44 \ --hash=sha256:05207bb66da15a2dc5c530e2f5cb5f0588d0a7e79b3bd542965f9e06e3fb14fe \ @@ -541,12 +699,15 @@ pycryptodomex==3.23.0 \ pydantic-settings==2.13.1 \ --hash=sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025 \ --hash=sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237 -pyjwt==2.11.0 \ - --hash=sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623 \ - --hash=sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469 -pythainlp==5.2.0 \ - --hash=sha256:04c6e4bdd806204be742f139b1f2e666411c4509c270dfff1a8b5afa69d36d2b \ - --hash=sha256:fd64d6b3d33973782390822e74b8e2c9b867760eeed19d0d218945165b431e35 +pyjwt==2.12.1 \ + --hash=sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c \ + --hash=sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b +pyopenssl==25.3.0 \ + --hash=sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6 \ + --hash=sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329 +pythainlp==5.3.1 \ + --hash=sha256:516c34d22689c2b469dd74bb18221eb9336e42f5137aa32940008293f1895de4 \ + --hash=sha256:f33fb134fcfbd281fb64494c924fddb5e7cc27e053f7a73f18b6b5acbb7a4e2d python-dotenv==1.2.2 \ --hash=sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a \ --hash=sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3 @@ -674,15 +835,26 @@ rich==14.3.3 \ semver==3.0.4 \ --hash=sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746 \ --hash=sha256:afc7d8c584a5ed0a11033af086e8af226a9c0b206f313e0301f8dd7b6b589602 -sentence-transformers==5.2.3 \ - --hash=sha256:3cd3044e1f3fe859b6a1b66336aac502eaae5d3dd7d5c8fc237f37fbf58137c7 \ - --hash=sha256:6437c62d4112b615ddebda362dfc16a4308d604c5b68125ed586e3e95d5b2e30 +sentence-transformers==5.3.0 \ + --hash=sha256:414a0a881f53a4df0e6cbace75f823bfcb6b94d674c42a384b498959b7c065e2 \ + --hash=sha256:dca6b98db790274a68185d27a65801b58b4caf653a4e556b5f62827509347c7d sse-starlette==3.3.2 \ --hash=sha256:5c3ea3dad425c601236726af2f27689b74494643f57017cafcb6f8c9acfbb862 \ --hash=sha256:678fca55a1945c734d8472a6cad186a55ab02840b4f6786f5ee8770970579dcd tenacity==9.1.4 \ --hash=sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55 \ --hash=sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a +tornado==6.5.5 \ + --hash=sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9 \ + --hash=sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6 \ + --hash=sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca \ + --hash=sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e \ + --hash=sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07 \ + --hash=sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa \ + --hash=sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b \ + --hash=sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521 \ + --hash=sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7 \ + --hash=sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5 trl==0.29.0 \ --hash=sha256:7d49cb1526c55cc1d798d921d9d91bb84a35ad5c645d6277441ffb7a30a233aa \ --hash=sha256:b1c9f756a3d73c5457b7025b0c7bb9792873a87a2f3841cccf4f9d4f0e9ab273 diff --git a/requirements.hashes.wheel.txt b/requirements.hashes.wheel.txt index fc084979a..04e79edf7 100644 --- a/requirements.hashes.wheel.txt +++ b/requirements.hashes.wheel.txt @@ -26,19 +26,19 @@ asyncpg==0.31.0 \ attrs==25.4.0 \ --hash=sha256:6d626bc22c041e7c3a84117f6d65a2be8eba3b58a83d3d110c58ee71db234b1e cffi==2.0.0 \ - --hash=sha256:59a31b51151b49adbbcd58c0542ea5b0eddc74fd18e5cb5d4deffc97e4c93388 \ - --hash=sha256:92f0b76faee1feab91508f8c932b54fd9a9203986b156761a5bf266addd23f25 \ - --hash=sha256:d6f66e891be32d0acc7933c7ff9f55285da5b73a1364e8d222bf1e9241b06a6a \ - --hash=sha256:fe96da20ecfe1e84563b411a2996740d2891d126aeef9e19476563c22dca65d8 -charset-normalizer==3.4.4 \ - --hash=sha256:ccdd9a2c5c46da6335354acd6a04ef80e2eb2804a8ed58f932b7f5ae345c5891 + --hash=sha256:257e90f733c1a33b9f5ade4a4f47db6a3984de5c2d2654848feca129888ff9d5 \ + --hash=sha256:34cf2187e399eb7baaa20488d0b78ab20be91060ef9ac531685f37478ca1a12a \ + --hash=sha256:e39d7009b1872abbd91bb17057b48fe24057a1026a20ab06b672fbdd4721d789 \ + --hash=sha256:ffe747cf5e265169ad5bd64b4359368f74e445cf8ea3cfe99045450554fb4e13 chevron==0.14.0 \ --hash=sha256:215f5e3e7ac75d150eadfc0f8c651b3815dc36813e122484b1ed68e142e5adfb click==8.3.1 \ --hash=sha256:a87f0253ce1fb7747cdde1674d73f34241bb4de9fca7a31bc866fb0a8a5f4307 -cryptography==46.0.4 \ - --hash=sha256:26ffa7666abe41b7b75905f7353c43ff4e812cf7f5b2b253ec3bcddd0d29e7a1 \ - --hash=sha256:b1352b1c73e908b825c91e8fe23f24775fcebca7d59f66a6864e4dcf6fdd6323 +cryptography==46.0.5 \ + --hash=sha256:2b44c9fd892f763465b2d7782bf310d65c04dab741b1241f5be203ccf022368d \ + --hash=sha256:661cf199efa488e0c5fb4987d36214e11c1fe2dfb842a1a330b1854ff069f8d3 \ + --hash=sha256:88347ad17f60b60e31e5f2b58e37339d88fa90bfa4f0d35528b3bd18d464427b \ + --hash=sha256:bb1b90386c7b5d3d8c9d8d53b207cfbe5c3e639457ff9ebe84f2131918b785a9 datasets==4.5.0 \ --hash=sha256:afc7c4ccba966970c71e264f0eb7977bdc7ca8ea9a946e331cf9d3d1d072cb2f dill==0.4.0 \ @@ -81,8 +81,6 @@ google-genai==1.59.0 \ --hash=sha256:388b25b31c0c00307a947690f21528a7b652e329feb3a14d49cbb16765012313 google-resumable-media==2.8.0 \ --hash=sha256:bdf13a0ceec8ba97622165ceeb789e6f0fb7c1614665be246b819811ca661f9c -googleapis-common-protos==1.72.0 \ - --hash=sha256:b530c484aa7341cab24eda0b801abc608c85da52ff9002d8a485e2f9d9d302c5 grpc-google-iam-v1==0.14.3 \ --hash=sha256:b29d3adb3873130b93f289e44449166586cd684bfdaccc8a4c4d35dc0e3ce9df grpcio==1.76.0 \ @@ -147,7 +145,7 @@ numpy==2.3.5 \ oauthlib==3.3.1 \ --hash=sha256:df9219023be7c78d77401950592822d1bbc318225d3cd71d02271c549ef1f980 packaging==26.0 \ - --hash=sha256:4e4d463e366772479222863a8ff3bb307310faee4da6256c6c39641c207b1a44 + --hash=sha256:76081d1703deb79a44f8c44f23e391b98ab21ea54a2c21b0e8890779e0eaecb7 pandas==2.3.3 \ --hash=sha256:308c8ca6262e058136d91a7d4d2bd84dd02e65caf60340fefd20bcf26b97e819 \ --hash=sha256:6ba245cff03afa84a0bc5e715a11caafab1dc51ca8fe4d827017706c4b62f238 \ @@ -178,11 +176,11 @@ pyarrow==23.0.0 \ --hash=sha256:f5af9838bbfefa2535c3ae9bf4fbfaab63367994c1e65243b1830e41b943b366 \ --hash=sha256:f8eea464a5138cfe7b32b5422caedaca6a9fe4b20af1ed917f2a6cf9ec802d94 pyasn1==0.6.2 \ - --hash=sha256:4448ac141114a5dfc4a2e3b6184a48b363541f26b4ca764e2b89651505dc4576 + --hash=sha256:dad757598fe812256782cbf67535fef02cf245bec157dfa1d82ceee62151cd65 pyasn1-modules==0.4.2 \ --hash=sha256:cda22a851735d664e92c8354a30405d88c5a1aa7d6313ba6c583bc3034654937 pycparser==3.0 \ - --hash=sha256:a0a805ba04eb0922ef6f214736f63a5c38dbaebdc4d991f88d64cf3d06af00c5 + --hash=sha256:86072d0cf4bf7e125171d79036c2126a2fa377687520e834987c2de4b4e5a9e9 pydantic==2.12.5 \ --hash=sha256:ba78cfc9d85e40047d67d32828da04ddbe9a27bd0718983938629bfca0d3cc6c pydantic-core==2.41.5 \ @@ -197,10 +195,10 @@ python-dateutil==2.9.0.post0 \ python-multipart==0.0.22 \ --hash=sha256:b5fc2a9738209bd168ef0c746ca0ee5eb66f2c69aeeb689e0dbc15b36c779aac pyyaml==6.0.3 \ - --hash=sha256:1ccd350719df6e1794b08d7a28b14e3bed61ef2742cb0ea03ced7c05f1e14cfd \ - --hash=sha256:6ea391cbbbc6abd3412718672301a184bb87f1f877b58081150b5be5942f0915 \ - --hash=sha256:d122b1f987c23385d9600d211475f765fcb59a2fff150c5e6036293196df5b80 \ - --hash=sha256:d4980565b8082efbd9baf9d659cfb37dce47e56e0530bfe1dba00e3287159f69 + --hash=sha256:84a2acfe1e8dfcf8be95fa61611ea7637f8bfcff49761a6a8c396aaa0b6d94a9 \ + --hash=sha256:bcaf1b152ce8cd6ec4ff56f8ca929f176eb01e6aa472fe10b3ea31f19e15fb39 \ + --hash=sha256:c22e4a0d2bb69fddc73b3b52f1540533df1f206a08f52ea8cae49337615b55d2 \ + --hash=sha256:c99b924c5311933fa398cc53c617f99705cf2d96226d023f75e95e15e753f81f referencing==0.37.0 \ --hash=sha256:bd019ff74869f84a893b7e50b84ce3d0db1e465ceb3a336403011c6467ffacb0 requests==2.32.5 \ @@ -212,8 +210,6 @@ rpds-py==0.30.0 \ --hash=sha256:613bc173bc12f35e40c3d1c1e6252555eaa48ea9425ee0b01fb151f3ca8a2a05 \ --hash=sha256:6c94c5f1c2501e50c22b7c993b083aeef4d342974d3058763296fb4646d8a059 \ --hash=sha256:75a565fc839958562ab5ad648c3a4bd0c46874a5556acb48da423f91c47c355b -rsa==4.9.1 \ - --hash=sha256:1c5f55eb95147d5a8d6f4a20869cfb97d680b28e2ea6ef3747b70c518c44f7fa safetensors==0.7.0 \ --hash=sha256:18abcf37ffae5f85a81ca46c440fdc5f38ae0938ff0f4a4de14e37386ed248e7 \ --hash=sha256:6833f019f20c8f0bb790ae3f0fd088f50c9fe4e051106dc3e59df5a953f68532 \ @@ -230,7 +226,7 @@ scipy==1.17.0 \ --hash=sha256:a1b3d25c892207a7626f0634fe768bd71b5f09cbb393be07d82fc44cced109ac \ --hash=sha256:f1c7b6ff095ed94c422bce52aa0ca8c8aea18922877853bbacaf8b7947e02510 setuptools==80.9.0 \ - --hash=sha256:ade450006de562f26341e24f8199338e8aff247cd565ac4f52235478a8dfa815 + --hash=sha256:3a3c26f9000ab213d87be4efa0f1926fb0975237ae9e8e7cc68c9d7fcba6c294 six==1.17.0 \ --hash=sha256:33f558442b372864d53b1813933f5d624876f418521b1b89624ea6e3d79f0e97 sniffio==1.3.1 \ @@ -264,11 +260,6 @@ torch==2.9.1 \ --hash=sha256:9bd4844a0cf3f199351830697973a168ad2fd3a99e77b150ca4a1582067dc633 \ --hash=sha256:ccb4a5b3c15819df80d96d2474b053306a2a4eba0301337c4aa56a58cbe45e10 \ --hash=sha256:e42ab849b64444059f5eda352d61c9c3a078f30797e48e2972857182c9a00cf8 -tornado==6.5.4 \ - --hash=sha256:4009891b752e77c47c396ed587fd3abeeeaf53ce0ff180ac5fbd81f3c5522903 \ - --hash=sha256:9532231bbed1fab64200c9f755616e45f4a9d2b52fbefb91dd3b24f9b9ade291 \ - --hash=sha256:be68f2c30d855e21a11c7e5a6cdaf6e7d12b8373e703798ad75f79422732b02a \ - --hash=sha256:d533131fa41fd4b126d33efd3fd1f7be15a3380af7fd107d6cfac4942a47c07c tqdm==4.67.3 \ --hash=sha256:d798b33fcc041b9a42c57f462b9c068a5a15c2dbcef1c87695d80c7074770a4d transformers==4.57.6 \ diff --git a/requirements.overrides.txt b/requirements.overrides.txt index b1f4aaaf2..413782c93 100644 --- a/requirements.overrides.txt +++ b/requirements.overrides.txt @@ -1,11 +1,11 @@ # override these package to the version available on RHOAI wheels index: -# https://console.redhat.com/api/pypi/public-rhai/rhoai/3.2/cpu-ubi9/simple +# https://console.redhat.com/api/pypi/public-rhai/rhoai/3.3/cpu-ubi9/simple transformers==4.57.6 tokenizers==0.22.2 scipy==1.17.0 aiohttp==3.13.3 aiosqlite==0.22.1 -cryptography==46.0.4 +cryptography==46.0.5 anyio==4.12.1 datasets==4.5.0 pandas==2.3.3 diff --git a/scripts/gen_doc.py b/scripts/gen_doc.py index c62e18a44..e23bf6244 100755 --- a/scripts/gen_doc.py +++ b/scripts/gen_doc.py @@ -25,13 +25,13 @@ def generate_docfile(directory): f"# List of source files stored in `{directory}` directory", file=indexfile, ) - print("", file=indexfile) + print(file=indexfile) files = sorted(os.listdir()) for file in files: if file.endswith(".py"): print(f"## [{file}]({file})", file=indexfile) - with open(file, "r", encoding="utf-8") as fin: + with open(file, encoding="utf-8") as fin: source = fin.read() try: mod = ast.parse(source) diff --git a/scripts/konflux_requirements.sh b/scripts/konflux_requirements.sh index 5331bfff2..67cff3e7e 100755 --- a/scripts/konflux_requirements.sh +++ b/scripts/konflux_requirements.sh @@ -4,6 +4,8 @@ # Packages from pypi.org go to requirements.source.txt # Packages from console.redhat.com go to requirements.wheel.txt +set -x + RAW_REQ_FILE="requirements.no_hashes.txt" SOURCE_FILE="requirements.source.txt" WHEEL_FILE="requirements.wheel.txt" @@ -87,4 +89,4 @@ echo "Done!" echo "Packages from pypi.org written to: $SOURCE_HASH_FILE ($(wc -l < "$SOURCE_HASH_FILE") packages)" echo "Packages from console.redhat.com written to: $WHEEL_HASH_FILE ($(wc -l < "$WHEEL_HASH_FILE") packages)" echo "Build dependencies written to: $BUILD_FILE ($(wc -l < "$BUILD_FILE") packages)" -echo "Remember to commit $SOURCE_HASH_FILE, $WHEEL_HASH_FILE, $BUILD_FILE, pipeline configurations and push the changes" \ No newline at end of file +echo "Remember to commit $SOURCE_HASH_FILE, $WHEEL_HASH_FILE, $BUILD_FILE, pipeline configurations and push the changes" diff --git a/src/a2a_storage/__init__.py b/src/a2a_storage/__init__.py index 2707019cb..aa7b1fafc 100644 --- a/src/a2a_storage/__init__.py +++ b/src/a2a_storage/__init__.py @@ -16,8 +16,8 @@ __all__ = [ "A2AContextStore", + "A2AStorageFactory", "InMemoryA2AContextStore", - "SQLiteA2AContextStore", "PostgresA2AContextStore", - "A2AStorageFactory", + "SQLiteA2AContextStore", ] diff --git a/src/app/endpoints/a2a.py b/src/app/endpoints/a2a.py index a00ece91e..e5fe2abd1 100644 --- a/src/app/endpoints/a2a.py +++ b/src/app/endpoints/a2a.py @@ -340,7 +340,7 @@ async def _process_task_streaming( # pylint: disable=too-many-locals stream = await client.responses.create(**responses_params.model_dump()) except APIConnectionError as e: error_message = ( - f"Unable to connect to Llama Stack backend service: {str(e)}. " + f"Unable to connect to Llama Stack backend service: {e!s}. " "The service may be temporarily unavailable. Please try again later." ) logger.error( diff --git a/src/app/endpoints/conversations_v1.py b/src/app/endpoints/conversations_v1.py index 1dc14cc95..158d312b6 100644 --- a/src/app/endpoints/conversations_v1.py +++ b/src/app/endpoints/conversations_v1.py @@ -16,7 +16,6 @@ from configuration import configuration from models.config import Action from models.database.conversations import ( - UserTurn, UserConversation, ) from models.requests import ConversationUpdateRequest @@ -38,6 +37,7 @@ check_configuration_loaded, delete_conversation, retrieve_conversation, + retrieve_conversation_turns, validate_and_retrieve_conversation, ) from utils.suid import ( @@ -45,7 +45,10 @@ normalize_conversation_id, to_llama_stack_conversation_id, ) -from utils.conversations import build_conversation_turns_from_items +from utils.conversations import ( + build_conversation_turns_from_items, + get_all_conversation_items, +) from log import get_logger logger = get_logger(__name__) @@ -236,46 +239,23 @@ async def get_conversation_endpoint_handler( # pylint: disable=too-many-locals, llama_stack_conv_id, ) - # Use Conversations API to retrieve conversation items - conversation_items_response = await client.conversations.items.list( - conversation_id=llama_stack_conv_id, - after=None, - include=None, - limit=None, - order="asc", # oldest first - ) + # Retrieve turns metadata from database (can be empty for legacy conversations) + db_turns = retrieve_conversation_turns(normalized_conv_id) - if not conversation_items_response.data: + # Use Conversations API to retrieve conversation items + items = await get_all_conversation_items(client, llama_stack_conv_id) + if not items: logger.error("No items found for conversation %s", conversation_id) response = NotFoundResponse( resource="conversation", resource_id=normalized_conv_id ).model_dump() raise HTTPException(**response) - items = conversation_items_response.data - logger.info( "Successfully retrieved %d items for conversation %s", len(items), conversation_id, ) - # Retrieve turns metadata from database - db_turns: list[UserTurn] = [] - try: - with get_session() as session: - db_turns = ( - session.query(UserTurn) - .filter_by(conversation_id=normalized_conv_id) - .order_by(UserTurn.turn_number) - .all() - ) - except SQLAlchemyError as e: - logger.error( - "Database error occurred while retrieving conversation turns for %s.", - normalized_conv_id, - ) - response = InternalServerErrorResponse.database_error() - raise HTTPException(**response.model_dump()) from e # Build conversation turns from items and populate turns metadata # Use conversation.created_at for legacy conversations without turn metadata diff --git a/src/app/endpoints/health.py b/src/app/endpoints/health.py index 7a8058d79..994103ced 100644 --- a/src/app/endpoints/health.py +++ b/src/app/endpoints/health.py @@ -75,7 +75,7 @@ async def get_providers_health_statuses() -> list[ProviderHealthStatus]: providers = await client.providers.list() logger.debug("Found %d providers", len(providers)) - health_results = [ + return [ ProviderHealthStatus( provider_id=provider.provider_id, status=str(provider.health.get("status", "unknown")), @@ -83,7 +83,6 @@ async def get_providers_health_statuses() -> list[ProviderHealthStatus]: ) for provider in providers ] - return health_results except APIConnectionError as e: logger.error("Failed to check providers health: %s", e) @@ -91,7 +90,7 @@ async def get_providers_health_statuses() -> list[ProviderHealthStatus]: ProviderHealthStatus( provider_id="unknown", status=HealthStatus.ERROR.value, - message=f"Failed to initialize health check: {str(e)}", + message=f"Failed to initialize health check: {e!s}", ) ] diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py index a2f519292..348cf954b 100644 --- a/src/app/endpoints/models.py +++ b/src/app/endpoints/models.py @@ -49,7 +49,7 @@ def parse_llama_stack_model(model: Any) -> dict[str, Any]: if k not in ("provider_id", "provider_resource_id", "model_type") } - legacy_model = { + return { "identifier": getattr(model, "id", ""), "metadata": metadata, "api_model_type": model_type, @@ -58,7 +58,6 @@ def parse_llama_stack_model(model: Any) -> dict[str, Any]: "provider_resource_id": str(custom_metadata.get("provider_resource_id", "")), "model_type": model_type, } - return legacy_model models_responses: dict[int | str, dict[str, Any]] = { diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py index fd123bd21..d9635e791 100644 --- a/src/app/endpoints/query.py +++ b/src/app/endpoints/query.py @@ -1,5 +1,3 @@ -# pylint: disable=too-many-locals,too-many-branches,too-many-nested-blocks - """Handler for REST API call to provide answer to query using Response API.""" import datetime @@ -36,6 +34,7 @@ UnauthorizedResponse, UnprocessableEntityResponse, ) +from utils.conversations import append_turn_items_to_conversation from utils.endpoints import ( check_configuration_loaded, validate_and_retrieve_conversation, @@ -59,14 +58,11 @@ get_topic_summary, prepare_responses_params, ) -from utils.shields import ( - append_turn_to_conversation, - run_shield_moderation, - validate_shield_ids_override, -) +from utils.shields import run_shield_moderation, validate_shield_ids_override from utils.suid import normalize_conversation_id from utils.types import ( ResponsesApiParams, + ShieldModerationResult, TurnSummary, ) from utils.vector_search import build_rag_context @@ -158,14 +154,21 @@ async def query_endpoint_handler( client = AsyncLlamaStackClientHolder().get_client() - # Build RAG context from Inline RAG sources - inline_rag_context = await build_rag_context( - client, query_request.query, query_request.vector_store_ids, query_request.solr - ) - # Moderation input is the raw user content (query + attachments) without injected RAG # context, to avoid false positives from retrieved document content. moderation_input = prepare_input(query_request) + moderation_result = await run_shield_moderation( + client, moderation_input, query_request.shield_ids + ) + + # Build RAG context from Inline RAG sources + inline_rag_context = await build_rag_context( + client, + moderation_result.decision, + query_request.query, + query_request.vector_store_ids, + query_request.solr, + ) # Prepare API request parameters responses_params = await prepare_responses_params( @@ -177,7 +180,7 @@ async def query_endpoint_handler( stream=False, store=True, request_headers=request.headers, - inline_rag_context=inline_rag_context.context_text or None, + inline_rag_context=inline_rag_context.context_text, ) # Handle Azure token refresh if needed @@ -189,32 +192,22 @@ async def query_endpoint_handler( ): client = await update_azure_token(client) - # Build index identification mapping for RAG source resolution - vector_store_ids = extract_vector_store_ids_from_tools(responses_params.tools) - rag_id_mapping = configuration.rag_id_mapping - # Retrieve response using Responses API - turn_summary = await retrieve_response( - client, - responses_params, - query_request.shield_ids, - vector_store_ids, - rag_id_mapping, - moderation_input=moderation_input, - ) - - # Combine inline RAG results (BYOK + Solr) with tool-based RAG results for the transcript - rag_chunks = inline_rag_context.rag_chunks - tool_rag_chunks = turn_summary.rag_chunks or [] - logger.info("RAG as a tool retrieved %d chunks", len(tool_rag_chunks)) - turn_summary.rag_chunks = rag_chunks + tool_rag_chunks - - # Add tool-based RAG documents and chunks - rag_documents = inline_rag_context.referenced_documents - tool_rag_documents = turn_summary.referenced_documents or [] - turn_summary.referenced_documents = deduplicate_referenced_documents( - rag_documents + tool_rag_documents - ) + turn_summary = await retrieve_response(client, responses_params, moderation_result) + + if moderation_result.decision == "passed": + # Combine inline RAG results (BYOK + Solr) with tool-based RAG results for the transcript + rag_chunks = inline_rag_context.rag_chunks + tool_rag_chunks = turn_summary.rag_chunks + logger.info("RAG as a tool retrieved %d chunks", len(tool_rag_chunks)) + turn_summary.rag_chunks = rag_chunks + tool_rag_chunks + + # Add tool-based RAG documents and chunks + rag_documents = inline_rag_context.referenced_documents + tool_rag_documents = turn_summary.referenced_documents + turn_summary.referenced_documents = deduplicate_referenced_documents( + rag_documents + tool_rag_documents + ) # Get topic summary for new conversation if not user_conversation and query_request.generate_topic_summary: @@ -269,13 +262,10 @@ async def query_endpoint_handler( ) -async def retrieve_response( # pylint: disable=too-many-locals +async def retrieve_response( client: AsyncLlamaStackClient, responses_params: ResponsesApiParams, - shield_ids: Optional[list[str]] = None, - vector_store_ids: Optional[list[str]] = None, - rag_id_mapping: Optional[dict[str, str]] = None, - moderation_input: Optional[str] = None, + moderation_result: ShieldModerationResult, ) -> TurnSummary: """ Retrieve response from LLMs and agents. @@ -286,33 +276,23 @@ async def retrieve_response( # pylint: disable=too-many-locals Parameters: client: The AsyncLlamaStackClient to use for the request. responses_params: The Responses API parameters. - shield_ids: Optional list of shield IDs for moderation. - vector_store_ids: Vector store IDs used in the query for source resolution. - rag_id_mapping: Mapping from vector_db_id to user-facing rag_id. - moderation_input: Text to moderate. Should be the raw user content (query + - attachments) without injected RAG context to avoid false positives. - Falls back to responses_params.input if not provided. + moderation_result: The moderation result. Returns: TurnSummary: Summary of the LLM response content """ response: Optional[OpenAIResponseObject] = None - try: - moderation_result = await run_shield_moderation( + if moderation_result.decision == "blocked": + await append_turn_items_to_conversation( client, - moderation_input or cast(str, responses_params.input), - shield_ids, + responses_params.conversation, + responses_params.input, + [moderation_result.refusal_response], + ) + return TurnSummary( + id=moderation_result.moderation_id, llm_response=moderation_result.message ) - if moderation_result.decision == "blocked": - # Handle shield moderation blocking - violation_message = moderation_result.message - await append_turn_to_conversation( - client, - responses_params.conversation, - cast(str, responses_params.input), - violation_message, - ) - return TurnSummary(llm_response=violation_message) + try: response = await client.responses.create( **responses_params.model_dump(exclude_none=True) ) @@ -333,6 +313,8 @@ async def retrieve_response( # pylint: disable=too-many-locals error_response = handle_known_apistatus_errors(e, responses_params.model) raise HTTPException(**error_response.model_dump()) from e + vector_store_ids = extract_vector_store_ids_from_tools(responses_params.tools) + rag_id_mapping = configuration.rag_id_mapping return build_turn_summary( response, responses_params.model, vector_store_ids, rag_id_mapping ) diff --git a/src/app/endpoints/responses.py b/src/app/endpoints/responses.py new file mode 100644 index 000000000..89a475f9a --- /dev/null +++ b/src/app/endpoints/responses.py @@ -0,0 +1,731 @@ +# pylint: disable=too-many-locals,too-many-branches,too-many-nested-blocks, too-many-arguments,too-many-positional-arguments + +"""Handler for REST API call to provide answer using Responses API (LCORE specification).""" + +import json +from datetime import UTC, datetime +from typing import Annotated, Any, Optional, cast +from collections.abc import AsyncIterator + +from fastapi import APIRouter, Depends, HTTPException, Request +from fastapi.responses import StreamingResponse +from llama_stack_api import ( + OpenAIResponseObject, + OpenAIResponseObjectStream, + OpenAIResponseObjectStreamResponseOutputItemAdded as OutputItemAddedChunk, + OpenAIResponseObjectStreamResponseOutputItemDone as OutputItemDoneChunk, +) +from llama_stack_client import ( + APIConnectionError, + APIStatusError as LLSApiStatusError, + AsyncLlamaStackClient, +) +from openai._exceptions import ( + APIStatusError as OpenAIAPIStatusError, +) + +from authentication import get_auth_dependency +from authentication.interface import AuthTuple +from authorization.azure_token_manager import AzureEntraIDManager +from authorization.middleware import authorize +from client import AsyncLlamaStackClientHolder +from configuration import configuration +from log import get_logger +from models.config import Action +from models.requests import ResponsesRequest +from models.responses import ( + ForbiddenResponse, + InternalServerErrorResponse, + NotFoundResponse, + PromptTooLongResponse, + QuotaExceededResponse, + ResponsesResponse, + ServiceUnavailableResponse, + UnauthorizedResponse, + UnprocessableEntityResponse, +) + +from utils.conversations import append_turn_items_to_conversation +from utils.endpoints import ( + check_configuration_loaded, + resolve_response_context, +) +from utils.mcp_headers import mcp_headers_dependency +from utils.mcp_oauth_probe import check_mcp_auth +from utils.query import ( + consume_query_tokens, + extract_provider_and_model_from_model_id, + handle_known_apistatus_errors, + store_query_results, + update_azure_token, + validate_model_provider_override, +) +from utils.quota import check_tokens_available, get_available_quotas +from utils.responses import ( + build_tool_call_summary, + build_turn_summary, + check_model_configured, + deduplicate_referenced_documents, + extract_attachments_text, + extract_text_from_response_items, + extract_token_usage, + extract_vector_store_ids_from_tools, + get_topic_summary, + get_zero_usage, + parse_referenced_documents, + resolve_tool_choice, + select_model_for_responses, +) +from utils.shields import run_shield_moderation +from utils.suid import ( + normalize_conversation_id, +) +from utils.types import ( + RAGContext, + ResponseInput, + ResponsesApiParams, + ShieldModerationBlocked, + ShieldModerationResult, + TurnSummary, +) +from utils.vector_search import ( + append_inline_rag_context_to_responses_input, + build_rag_context, +) + +logger = get_logger(__name__) +router = APIRouter(tags=["responses"]) + +responses_response: dict[int | str, dict[str, Any]] = { + 200: ResponsesResponse.openapi_response(), + 401: UnauthorizedResponse.openapi_response( + examples=["missing header", "missing token"] + ), + 403: ForbiddenResponse.openapi_response( + examples=["endpoint", "conversation read", "model override"] + ), + 404: NotFoundResponse.openapi_response( + examples=["model", "conversation", "provider"] + ), + 413: PromptTooLongResponse.openapi_response(), + 422: UnprocessableEntityResponse.openapi_response(), + 429: QuotaExceededResponse.openapi_response(), + 500: InternalServerErrorResponse.openapi_response(examples=["configuration"]), + 503: ServiceUnavailableResponse.openapi_response(), +} + + +@router.post( + "/responses", + responses=responses_response, + response_model=None, + summary="Responses Endpoint Handler", +) +@authorize(Action.QUERY) +async def responses_endpoint_handler( + request: Request, + responses_request: ResponsesRequest, + auth: Annotated[AuthTuple, Depends(get_auth_dependency())], + mcp_headers: dict[str, dict[str, str]] = Depends(mcp_headers_dependency), +) -> ResponsesResponse | StreamingResponse: + """ + Handle request to the /responses endpoint using Responses API (LCORE specification). + + Processes a POST request to the responses endpoint, forwarding the + user's request to a selected Llama Stack LLM and returning the generated response + following the LCORE OpenAPI specification. + + Returns: + ResponsesResponse: Contains the response following LCORE specification (non-streaming). + StreamingResponse: SSE-formatted streaming response with enriched events (streaming). + - response.created event includes conversation attribute + - response.completed event includes available_quotas attribute + + Raises: + HTTPException: + - 401: Unauthorized - Missing or invalid credentials + - 403: Forbidden - Insufficient permissions or model override not allowed + - 404: Not Found - Conversation, model, or provider not found + - 413: Prompt too long - Prompt exceeded model's context window size + - 422: Unprocessable Entity - Request validation failed + - 429: Quota limit exceeded - The token quota for model or user has been exceeded + - 500: Internal Server Error - Configuration not loaded or other server errors + - 503: Service Unavailable - Unable to connect to Llama Stack backend + """ + responses_request = responses_request.model_copy(deep=True) + check_configuration_loaded(configuration) + started_at = datetime.now(UTC) + user_id = auth[0] + + await check_mcp_auth(configuration, mcp_headers) + + # Check token availability + check_tokens_available(configuration.quota_limiters, user_id) + + # Enforce RBAC: optionally disallow overriding model in requests + validate_model_provider_override( + responses_request.model, + None, # provider specified as model prefix + request.state.authorized_actions, + ) + + response_context = await resolve_response_context( + user_id=user_id, + others_allowed=( + Action.READ_OTHERS_CONVERSATIONS in request.state.authorized_actions + ), + conversation_id=responses_request.conversation, + previous_response_id=responses_request.previous_response_id, + generate_topic_summary=responses_request.generate_topic_summary, + ) + responses_request.conversation = response_context.conversation + responses_request.generate_topic_summary = response_context.generate_topic_summary + client = AsyncLlamaStackClientHolder().get_client() + + # LCORE-specific: Automatically select model if not provided in request + # This extends the base LLS API which requires model to be specified. + if not responses_request.model: + responses_request.model = await select_model_for_responses( + client, response_context.user_conversation + ) + if not await check_model_configured(client, responses_request.model): + _, model_id = extract_provider_and_model_from_model_id(responses_request.model) + error_response = NotFoundResponse(resource="model", resource_id=model_id) + raise HTTPException(**error_response.model_dump()) + + # Handle Azure token refresh if needed + if ( + responses_request.model.startswith("azure") + and AzureEntraIDManager().is_entra_id_configured + and AzureEntraIDManager().is_token_expired + and AzureEntraIDManager().refresh_token() + ): + client = await update_azure_token(client) + + input_text = ( + responses_request.input + if isinstance(responses_request.input, str) + else extract_text_from_response_items(responses_request.input) + ) + attachments_text = extract_attachments_text(responses_request.input) + + moderation_result = await run_shield_moderation( + client, + input_text + "\n\n" + attachments_text, + responses_request.shield_ids, + ) + + ( + responses_request.tools, + responses_request.tool_choice, + vector_store_ids, + ) = await resolve_tool_choice( + responses_request.tools, + responses_request.tool_choice, + auth[1], + mcp_headers, + request.headers, + ) + + # Build RAG context from Inline RAG sources + inline_rag_context = await build_rag_context( + client, + moderation_result.decision, + input_text, + vector_store_ids, + responses_request.solr, + ) + if moderation_result.decision == "passed": + responses_request.input = append_inline_rag_context_to_responses_input( + responses_request.input, inline_rag_context.context_text + ) + + response_handler = ( + handle_streaming_response + if responses_request.stream + else handle_non_streaming_response + ) + return await response_handler( + client=client, + request=responses_request, + auth=auth, + input_text=input_text, + started_at=started_at, + moderation_result=moderation_result, + inline_rag_context=inline_rag_context, + ) + + +async def handle_streaming_response( + client: AsyncLlamaStackClient, + request: ResponsesRequest, + auth: AuthTuple, + input_text: str, + started_at: datetime, + moderation_result: ShieldModerationResult, + inline_rag_context: RAGContext, +) -> StreamingResponse: + """Handle streaming response from Responses API. + + Args: + client: The AsyncLlamaStackClient instance + request: ResponsesRequest (LCORE-specific fields e.g. generate_topic_summary) + auth: Authentication tuple + input_text: The extracted input text + started_at: Timestamp when the conversation started + moderation_result: Result of shield moderation check + inline_rag_context: Inline RAG context to be used for the response + Returns: + StreamingResponse with SSE-formatted events + """ + api_params = ResponsesApiParams.model_validate(request.model_dump()) + turn_summary = TurnSummary() + # Handle blocked response + if moderation_result.decision == "blocked": + turn_summary.id = moderation_result.moderation_id + turn_summary.llm_response = moderation_result.message + available_quotas = get_available_quotas( + quota_limiters=configuration.quota_limiters, user_id=auth[0] + ) + generator = shield_violation_generator( + moderation_result, + api_params.conversation, + request.echoed_params(), + started_at, + available_quotas, + ) + if api_params.store: + await append_turn_items_to_conversation( + client=client, + conversation_id=api_params.conversation, + user_input=request.input, + llm_output=[moderation_result.refusal_response], + ) + else: + try: + response = await client.responses.create( + **api_params.model_dump(exclude_none=True) + ) + generator = response_generator( + stream=cast(AsyncIterator[OpenAIResponseObjectStream], response), + user_input=request.input, + api_params=api_params, + user_id=auth[0], + turn_summary=turn_summary, + inline_rag_context=inline_rag_context, + ) + except RuntimeError as e: # library mode wraps 413 into runtime error + if "context_length" in str(e).lower(): + error_response = PromptTooLongResponse(model=api_params.model) + raise HTTPException(**error_response.model_dump()) from e + raise e + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except (LLSApiStatusError, OpenAIAPIStatusError) as e: + error_response = handle_known_apistatus_errors(e, api_params.model) + raise HTTPException(**error_response.model_dump()) from e + + return StreamingResponse( + generate_response( + generator=generator, + turn_summary=turn_summary, + client=client, + auth=auth, + input_text=input_text, + started_at=started_at, + api_params=api_params, + generate_topic_summary=request.generate_topic_summary or False, + ), + media_type="text/event-stream", + ) + + +async def shield_violation_generator( + moderation_result: ShieldModerationBlocked, + conversation_id: str, + echoed_params: dict[str, Any], + created_at: datetime, + available_quotas: dict[str, int], +) -> AsyncIterator[str]: + """Generate SSE-formatted streaming response for shield-blocked requests. + + Follows the Open Responses spec: + - Content-Type: text/event-stream + - Each event has 'event:' field matching the type in the event body + - Data objects are JSON-encoded strings + - Terminal event is the literal string [DONE] + - Emits full event sequence: response.created (in_progress), output_item.added, + output_item.done, response.completed (completed) + - Performs topic summary and persistence after [DONE] is emitted + + Args: + moderation_result: The moderation result + conversation_id: The conversation ID to include in the response + echoed_params: Echoed parameters from the request + created_at: Unix timestamp when the response was created + available_quotas: Available quotas dictionary for the user + Yields: + SSE-formatted strings for streaming events, ending with [DONE] + """ + normalized_conv_id = normalize_conversation_id(conversation_id) + + # 1. Send response.created event with status "in_progress" and empty output + created_response_object = ResponsesResponse.model_construct( + id=moderation_result.moderation_id, + created_at=int(created_at.timestamp()), + status="in_progress", + output=[], + conversation=normalized_conv_id, + available_quotas={}, + output_text="", + **echoed_params, + ) + created_response_dict = created_response_object.model_dump(exclude_none=True) + created_event = { + "type": "response.created", + "sequence_number": 0, + "response": created_response_dict, + } + data_json = json.dumps(created_event) + yield f"event: response.created\ndata: {data_json}\n\n" + + # 2. Send response.output_item.added event + item_added_event = OutputItemAddedChunk( + response_id=moderation_result.moderation_id, + item=moderation_result.refusal_response, + output_index=0, + sequence_number=1, + ) + data_json = json.dumps(item_added_event.model_dump(exclude_none=True)) + yield f"event: response.output_item.added\ndata: {data_json}\n\n" + + # 3. Send response.output_item.done event + item_done_event = OutputItemDoneChunk( + response_id=moderation_result.moderation_id, + item=moderation_result.refusal_response, + output_index=0, + sequence_number=2, + ) + data_json = json.dumps(item_done_event.model_dump(exclude_none=True)) + yield f"event: response.output_item.done\ndata: {data_json}\n\n" + + # 4. Send response.completed event with status "completed" and output populated + completed_response_object = ResponsesResponse.model_construct( + id=moderation_result.moderation_id, + created_at=int(created_at.timestamp()), + completed_at=int(datetime.now(UTC).timestamp()), + status="completed", + output=[moderation_result.refusal_response], + usage=get_zero_usage(), + conversation=normalized_conv_id, + available_quotas=available_quotas, + output_text=moderation_result.message, + **echoed_params, + ) + completed_response_dict = completed_response_object.model_dump(exclude_none=True) + completed_event = { + "type": "response.completed", + "sequence_number": 3, + "response": completed_response_dict, + } + data_json = json.dumps(completed_event) + yield f"event: response.completed\ndata: {data_json}\n\n" + + yield "data: [DONE]\n\n" + + +async def response_generator( + stream: AsyncIterator[OpenAIResponseObjectStream], + user_input: ResponseInput, + api_params: ResponsesApiParams, + user_id: str, + turn_summary: TurnSummary, + inline_rag_context: RAGContext, +) -> AsyncIterator[str]: + """Generate SSE-formatted streaming response with LCORE-enriched events. + + Args: + stream: The streaming response from Llama Stack + user_input: User input to the response + api_params: ResponsesApiParams + user_id: User ID for quota retrieval + turn_summary: TurnSummary to populate during streaming + inline_rag_context: Inline RAG context to be used for the response + Yields: + SSE-formatted strings for streaming events, ending with [DONE] + """ + normalized_conv_id = normalize_conversation_id(api_params.conversation) + + logger.debug("Starting streaming response (Responses API) processing") + + latest_response_object: Optional[OpenAIResponseObject] = None + sequence_number = 0 + + async for chunk in stream: + event_type = getattr(chunk, "type", None) + logger.debug("Processing streaming chunk, type: %s", event_type) + + chunk_dict = chunk.model_dump(exclude_none=True) + + # Create own sequence number for chunks to maintain order + chunk_dict["sequence_number"] = sequence_number + sequence_number += 1 + + # Add conversation attribute to the response if chunk has it + if "response" in chunk_dict: + chunk_dict["response"]["conversation"] = normalized_conv_id + + # Intermediate response - no quota consumption and text yet + if event_type == "response.in_progress": + chunk_dict["response"]["available_quotas"] = {} + chunk_dict["response"]["output_text"] = "" + + # Handle completion, incomplete, and failed events - only quota handling here + if event_type in ( + "response.completed", + "response.incomplete", + "response.failed", + ): + latest_response_object = cast( + OpenAIResponseObject, cast(Any, chunk).response + ) + + # Extract and consume tokens if any were used + turn_summary.token_usage = extract_token_usage( + latest_response_object.usage, api_params.model + ) + consume_query_tokens( + user_id=user_id, + model_id=api_params.model, + token_usage=turn_summary.token_usage, + ) + + # Get available quotas after token consumption + available_quotas = get_available_quotas( + quota_limiters=configuration.quota_limiters, user_id=user_id + ) + chunk_dict["response"]["available_quotas"] = available_quotas + turn_summary.llm_response = extract_text_from_response_items( + latest_response_object.output + ) + chunk_dict["response"]["output_text"] = turn_summary.llm_response + + data_json = json.dumps(chunk_dict) + yield f"event: {event_type or 'error'}\ndata: {data_json}\n\n" + + # Extract response metadata from final response object + if latest_response_object: + turn_summary.id = latest_response_object.id + vector_store_ids = extract_vector_store_ids_from_tools(api_params.tools) + tool_rag_docs = parse_referenced_documents( + latest_response_object, vector_store_ids, configuration.rag_id_mapping + ) + turn_summary.referenced_documents = deduplicate_referenced_documents( + inline_rag_context.referenced_documents + tool_rag_docs + ) + for item in latest_response_object.output: + tool_call, tool_result = build_tool_call_summary( + item, + turn_summary.rag_chunks, + vector_store_ids, + configuration.rag_id_mapping, + ) + if tool_call: + turn_summary.tool_calls.append(tool_call) + if tool_result: + turn_summary.tool_results.append(tool_result) + + turn_summary.rag_chunks.extend(inline_rag_context.rag_chunks) + + client = AsyncLlamaStackClientHolder().get_client() + # Explicitly append the turn to conversation if context passed by previous response + if api_params.store and api_params.previous_response_id and latest_response_object: + await append_turn_items_to_conversation( + client, api_params.conversation, user_input, latest_response_object.output + ) + + yield "data: [DONE]\n\n" + + +async def generate_response( + generator: AsyncIterator[str], + turn_summary: TurnSummary, + client: AsyncLlamaStackClient, + auth: AuthTuple, + input_text: str, + started_at: datetime, + api_params: ResponsesApiParams, + generate_topic_summary: bool, +) -> AsyncIterator[str]: + """Stream the response from the generator and persist conversation details. + + After streaming completes, conversation details are persisted. + + Args: + generator: The SSE event generator + turn_summary: TurnSummary populated during streaming + client: The AsyncLlamaStackClient instance + auth: Authentication tuple + input_text: The extracted input text + started_at: Timestamp when the conversation started + api_params: ResponsesApiParams + generate_topic_summary: Whether to generate topic summary for new conversations + Yields: + SSE-formatted strings from the generator + """ + user_id, _, skip_userid_check, _ = auth + async for event in generator: + yield event + + # Get topic summary for new conversation + topic_summary = None + if generate_topic_summary: + logger.debug("Generating topic summary for new conversation") + topic_summary = await get_topic_summary(input_text, client, api_params.model) + + completed_at = datetime.now(UTC) + if api_params.store: + store_query_results( + user_id=user_id, + conversation_id=normalize_conversation_id(api_params.conversation), + model=api_params.model, + started_at=started_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + completed_at=completed_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + summary=turn_summary, + query=input_text, + attachments=[], + skip_userid_check=skip_userid_check, + topic_summary=topic_summary, + ) + + +async def handle_non_streaming_response( + client: AsyncLlamaStackClient, + request: ResponsesRequest, + auth: AuthTuple, + input_text: str, + started_at: datetime, + moderation_result: ShieldModerationResult, + inline_rag_context: RAGContext, +) -> ResponsesResponse: + """Handle non-streaming response from Responses API. + + Args: + client: The AsyncLlamaStackClient instance + request: Request object + auth: Authentication tuple + input_text: The extracted input text + started_at: Timestamp when the conversation started + moderation_result: Result of shield moderation check + inline_rag_context: Inline RAG context to be used for the response + Returns: + ResponsesResponse with the completed response + """ + user_id, _, skip_userid_check, _ = auth + api_params = ResponsesApiParams.model_validate(request.model_dump()) + + # Fork: Get response object (blocked vs normal) + if moderation_result.decision == "blocked": + output_text = moderation_result.message + api_response = OpenAIResponseObject.model_construct( + id=moderation_result.moderation_id, + created_at=int(started_at.timestamp()), + status="completed", + output=[moderation_result.refusal_response], + usage=get_zero_usage(), + **request.echoed_params(), + ) + if api_params.store: + await append_turn_items_to_conversation( + client=client, + conversation_id=api_params.conversation, + user_input=request.input, + llm_output=[moderation_result.refusal_response], + ) + else: + try: + api_response = cast( + OpenAIResponseObject, + await client.responses.create( + **api_params.model_dump(exclude_none=True) + ), + ) + token_usage = extract_token_usage(api_response.usage, api_params.model) + logger.info("Consuming tokens") + consume_query_tokens( + user_id=user_id, + model_id=api_params.model, + token_usage=token_usage, + ) + output_text = extract_text_from_response_items(api_response.output) + # Explicitly append the turn to conversation if context passed by previous response + if api_params.store and api_params.previous_response_id: + await append_turn_items_to_conversation( + client, api_params.conversation, request.input, api_response.output + ) + + except RuntimeError as e: + if "context_length" in str(e).lower(): + error_response = PromptTooLongResponse(model=api_params.model) + raise HTTPException(**error_response.model_dump()) from e + raise e + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except (LLSApiStatusError, OpenAIAPIStatusError) as e: + error_response = handle_known_apistatus_errors(e, api_params.model) + raise HTTPException(**error_response.model_dump()) from e + + # Get available quotas + logger.info("Getting available quotas") + available_quotas = get_available_quotas( + quota_limiters=configuration.quota_limiters, user_id=user_id + ) + # Get topic summary for new conversation + topic_summary = None + if request.generate_topic_summary: + logger.debug("Generating topic summary for new conversation") + topic_summary = await get_topic_summary(input_text, client, api_params.model) + + vector_store_ids = extract_vector_store_ids_from_tools(api_params.tools) + turn_summary = build_turn_summary( + api_response, + api_params.model, + vector_store_ids, + configuration.rag_id_mapping, + ) + turn_summary.referenced_documents = deduplicate_referenced_documents( + inline_rag_context.referenced_documents + turn_summary.referenced_documents + ) + turn_summary.rag_chunks.extend(inline_rag_context.rag_chunks) + completed_at = datetime.now(UTC) + if api_params.store: + store_query_results( + user_id=user_id, + conversation_id=normalize_conversation_id(api_params.conversation), + model=api_params.model, + started_at=started_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + completed_at=completed_at.strftime("%Y-%m-%dT%H:%M:%SZ"), + summary=turn_summary, + query=input_text, + attachments=[], + skip_userid_check=skip_userid_check, + topic_summary=topic_summary, + ) + response = ResponsesResponse.model_validate( + { + **api_response.model_dump(exclude_none=True), + "available_quotas": available_quotas, + "conversation": normalize_conversation_id(api_params.conversation), + "completed_at": int(completed_at.timestamp()), + "output_text": output_text, + } + ) + return response diff --git a/src/app/endpoints/rlsapi_v1.py b/src/app/endpoints/rlsapi_v1.py index 886d04c12..4f34f3761 100644 --- a/src/app/endpoints/rlsapi_v1.py +++ b/src/app/endpoints/rlsapi_v1.py @@ -6,7 +6,7 @@ import functools import time -from datetime import datetime +from datetime import datetime, UTC from typing import Annotated, Any, Optional, cast import jinja2 @@ -123,7 +123,7 @@ def _build_instructions(systeminfo: RlsapiV1SystemInfo) -> str: Returns: The rendered instructions string for the LLM. """ - date_today = datetime.now().strftime("%B %d, %Y") + date_today = datetime.now(tz=UTC).strftime("%B %d, %Y") return _get_prompt_template().render( date=date_today, diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py index d77f26807..250a29b89 100644 --- a/src/app/endpoints/streaming_query.py +++ b/src/app/endpoints/streaming_query.py @@ -8,7 +8,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import StreamingResponse -from llama_stack_api.openai_responses import ( +from llama_stack_api import ( OpenAIResponseObject, OpenAIResponseObjectStream, OpenAIResponseObjectStreamResponseMcpCallArgumentsDone as MCPArgsDoneChunk, @@ -57,6 +57,7 @@ UnauthorizedResponse, UnprocessableEntityResponse, ) +from utils.conversations import append_turn_items_to_conversation from utils.endpoints import ( check_configuration_loaded, validate_and_retrieve_conversation, @@ -189,10 +190,22 @@ async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals client = AsyncLlamaStackClientHolder().get_client() + # Moderation input is the raw user content (query + attachments) without injected RAG + # context, to avoid false positives from retrieved document content. + moderation_input = prepare_input(query_request) + moderation_result = await run_shield_moderation( + client, moderation_input, query_request.shield_ids + ) + # Build RAG context from Inline RAG sources inline_rag_context = await build_rag_context( - client, query_request.query, query_request.vector_store_ids, query_request.solr + client, + moderation_result.decision, + query_request.query, + query_request.vector_store_ids, + query_request.solr, ) + # Prepare API request parameters responses_params = await prepare_responses_params( client=client, @@ -203,7 +216,7 @@ async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals stream=True, store=True, request_headers=request.headers, - inline_rag_context=inline_rag_context.context_text or None, + inline_rag_context=inline_rag_context.context_text, ) # Handle Azure token refresh if needed @@ -227,8 +240,10 @@ async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals query_request=query_request, started_at=started_at, client=client, + moderation_result=moderation_result, vector_store_ids=extract_vector_store_ids_from_tools(responses_params.tools), rag_id_mapping=configuration.rag_id_mapping, + inline_rag_context=inline_rag_context, ) # Update metrics for the LLM call @@ -240,9 +255,14 @@ async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals generator, turn_summary = await retrieve_response_generator( responses_params=responses_params, context=context, - inline_rag_documents=inline_rag_context.referenced_documents, ) + # Combine inline RAG results (BYOK + Solr) with tool-based results + if context.moderation_result.decision == "passed": + turn_summary.referenced_documents = deduplicate_referenced_documents( + inline_rag_context.referenced_documents + turn_summary.referenced_documents + ) + response_media_type = ( MEDIA_TYPE_TEXT if query_request.media_type == MEDIA_TYPE_TEXT @@ -263,7 +283,6 @@ async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals async def retrieve_response_generator( responses_params: ResponsesApiParams, context: ResponseGeneratorContext, - inline_rag_documents: list[ReferencedDocument], ) -> tuple[AsyncIterator[str], TurnSummary]: """ Retrieve the appropriate response generator. @@ -275,30 +294,27 @@ async def retrieve_response_generator( Args: responses_params: The Responses API parameters context: The response generator context - inline_rag_documents: Referenced documents from inline RAG (BYOK + Solr) - Returns: tuple[AsyncIterator[str], TurnSummary]: The response generator and turn summary """ turn_summary = TurnSummary() try: - moderation_result = await run_shield_moderation( - context.client, - prepare_input(context.query_request), - context.query_request.shield_ids, - ) - if moderation_result.decision == "blocked": - turn_summary.llm_response = moderation_result.message - await append_turn_to_conversation( + if context.moderation_result.decision == "blocked": + turn_summary.llm_response = context.moderation_result.message + turn_summary.id = context.moderation_result.moderation_id + await append_turn_items_to_conversation( context.client, responses_params.conversation, - cast(str, responses_params.input), - moderation_result.message, + responses_params.input, + [context.moderation_result.refusal_response], ) media_type = context.query_request.media_type or MEDIA_TYPE_JSON return ( - shield_violation_generator(moderation_result.message, media_type), + shield_violation_generator( + context.moderation_result.message, + media_type, + ), turn_summary, ) # Retrieve response stream (may raise exceptions) @@ -306,9 +322,14 @@ async def retrieve_response_generator( **responses_params.model_dump(exclude_none=True) ) # Store pre-RAG documents for later merging with tool-based RAG - turn_summary.inline_rag_documents = inline_rag_documents - return response_generator(response, context, turn_summary), turn_summary - + return ( + response_generator( + response, + context, + turn_summary, + ), + turn_summary, + ) # Handle know LLS client errors only at stream creation time and shield execution except RuntimeError as e: # library mode wraps 413 into runtime error if "context_length" in str(e).lower(): @@ -698,7 +719,7 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat # Completed response - capture final text and response object elif event_type == "response.completed": latest_response_object = cast( - OpenAIResponseObject, getattr(chunk, "response") + OpenAIResponseObject, getattr(chunk, "response") # noqa: B009 ) turn_summary.llm_response = turn_summary.llm_response or "".join(text_parts) yield stream_event( @@ -714,7 +735,7 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat # Incomplete or failed response - emit error elif event_type in ("response.incomplete", "response.failed"): latest_response_object = cast( - OpenAIResponseObject, getattr(chunk, "response") + OpenAIResponseObject, getattr(chunk, "response") # noqa: B009 ) error_message = ( latest_response_object.error.message @@ -741,15 +762,19 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat turn_summary.token_usage = extract_token_usage( latest_response_object.usage, context.model_id ) - tool_based_documents = parse_referenced_documents( + # Parse tool-based referenced documents from the final response object + tool_rag_docs = parse_referenced_documents( latest_response_object, vector_store_ids=context.vector_store_ids, rag_id_mapping=context.rag_id_mapping, ) - - # Merge pre-RAG documents with tool-based documents and deduplicate + # Combine inline RAG results (BYOK + Solr) with tool-based results turn_summary.referenced_documents = deduplicate_referenced_documents( - turn_summary.inline_rag_documents + tool_based_documents + context.inline_rag_context.referenced_documents + tool_rag_docs + ) + # Combine inline RAG chunks (BYOK + Solr) with tool-based chunks + turn_summary.rag_chunks = ( + context.inline_rag_context.rag_chunks + turn_summary.rag_chunks ) diff --git a/src/app/routers.py b/src/app/routers.py index 78663e18f..97e3522f6 100644 --- a/src/app/routers.py +++ b/src/app/routers.py @@ -26,6 +26,7 @@ rlsapi_v1, # A2A (Agent-to-Agent) protocol support a2a, + responses, ) @@ -58,7 +59,7 @@ def include_routers(app: FastAPI) -> None: app.include_router(feedback.router, prefix="/v1") app.include_router(conversations_v1.router, prefix="/v1") app.include_router(conversations_v2.router, prefix="/v2") - + app.include_router(responses.router, prefix="/v1") # RHEL Lightspeed rlsapi v1 compatibility - stateless CLA (Command Line Assistant) endpoint app.include_router(rlsapi_v1.router, prefix="/v1") diff --git a/src/authorization/resolvers.py b/src/authorization/resolvers.py index 359064d4b..3ce553c0c 100644 --- a/src/authorization/resolvers.py +++ b/src/authorization/resolvers.py @@ -148,8 +148,7 @@ def _get_claims(auth: AuthTuple) -> dict[str, Any]: # No claims for guests return {} - jwt_claims = unsafe_get_claims(token) - return jwt_claims + return unsafe_get_claims(token) @staticmethod def _evaluate_operator( diff --git a/src/configuration.py b/src/configuration.py index c9ea8e4af..41cd3deb1 100644 --- a/src/configuration.py +++ b/src/configuration.py @@ -2,39 +2,38 @@ from typing import Any, Optional +import yaml + # We want to support environment variable replacement in the configuration # similarly to how it is done in llama-stack, so we use their function directly from llama_stack.core.stack import replace_env_vars -import yaml import constants +from cache.cache import Cache +from cache.cache_factory import CacheFactory +from log import get_logger from models.config import ( A2AStateConfiguration, + AuthenticationConfiguration, AuthorizationConfiguration, AzureEntraIdConfiguration, Configuration, + ConversationHistoryConfiguration, Customization, + DatabaseConfiguration, + InferenceConfiguration, LlamaStackConfiguration, + ModelContextProtocolServer, OkpConfiguration, + QuotaHandlersConfiguration, RagConfiguration, - UserDataCollection, ServiceConfiguration, - ModelContextProtocolServer, - AuthenticationConfiguration, - InferenceConfiguration, - DatabaseConfiguration, - ConversationHistoryConfiguration, - QuotaHandlersConfiguration, SplunkConfiguration, + UserDataCollection, ) - -from cache.cache import Cache -from cache.cache_factory import CacheFactory - from quota.quota_limiter import QuotaLimiter -from quota.token_usage_history import TokenUsageHistory from quota.quota_limiter_factory import QuotaLimiterFactory -from log import get_logger +from quota.token_usage_history import TokenUsageHistory logger = get_logger(__name__) @@ -382,18 +381,28 @@ def okp(self) -> "OkpConfiguration": @property def rag_id_mapping(self) -> dict[str, str]: - """Return mapping from vector_db_id to rag_id from BYOK RAG config. + """Return mapping from vector_db_id to rag_id from BYOK and OKP RAG config. Returns: - dict[str, str]: Mapping where keys are llama-stack vector_db_ids - and values are user-facing rag_ids from configuration. + dict[str, str]: Mapping where keys are llama-stack vector_store_ids + (old vector_db_id) and values are user-facing rag_ids from configuration. Raises: LogicError: If the configuration has not been loaded. """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") - return {brag.vector_db_id: brag.rag_id for brag in self._configuration.byok_rag} + byok_mapping = { + brag.vector_db_id: brag.rag_id for brag in self._configuration.byok_rag + } + + rag = self._configuration.rag + okp_id = constants.OKP_RAG_ID + okp_enabled = okp_id in (rag.inline or []) or okp_id in (rag.tool or []) + okp_mapping = ( + {constants.SOLR_DEFAULT_VECTOR_STORE_ID: okp_id} if okp_enabled else {} + ) + return {**byok_mapping, **okp_mapping} @property def score_multiplier_mapping(self) -> dict[str, float]: diff --git a/src/constants.py b/src/constants.py index 0c5437fb2..20145a812 100644 --- a/src/constants.py +++ b/src/constants.py @@ -2,7 +2,7 @@ # Minimal and maximal supported Llama Stack version MINIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.17" -MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.4.3" +MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.5.2" UNABLE_TO_PROCESS_RESPONSE = "Unable to process this request" @@ -214,3 +214,5 @@ # Environment variable to force StreamHandler instead of RichHandler # Set to any non-empty value to disable RichHandler LIGHTSPEED_STACK_DISABLE_RICH_HANDLER_ENV_VAR = "LIGHTSPEED_STACK_DISABLE_RICH_HANDLER" + +DEFAULT_VIOLATION_MESSAGE = "I cannot process this request due to policy restrictions." diff --git a/src/llama_stack_configuration.py b/src/llama_stack_configuration.py index 39e026630..a0530b326 100644 --- a/src/llama_stack_configuration.py +++ b/src/llama_stack_configuration.py @@ -251,8 +251,7 @@ def construct_models_section( # Strip sentence-transformers/ prefix if present provider_model_id = embedding_model - if provider_model_id.startswith("sentence-transformers/"): - provider_model_id = provider_model_id[len("sentence-transformers/") :] + provider_model_id = provider_model_id.removeprefix("sentence-transformers/") # Skip if embedding model already registered existing_model_ids = [m.get("provider_model_id") for m in output] @@ -443,6 +442,7 @@ def enrich_solr(ls_config: dict[str, Any], solr_config: dict[str, Any]) -> None: "parent_total_chunks_field": "total_chunks", "parent_total_tokens_field": "total_tokens", "chunk_filter_query": chunk_filter_query, + "chunk_family_fields": ["headings"], }, "persistence": { "namespace": constants.SOLR_DEFAULT_VECTOR_STORE_ID, @@ -489,8 +489,7 @@ def enrich_solr(ls_config: dict[str, Any], solr_config: dict[str, Any]) -> None: # Strip sentence-transformers/ prefix from constant for provider_model_id provider_model_id = constants.SOLR_DEFAULT_EMBEDDING_MODEL - if provider_model_id.startswith("sentence-transformers/"): - provider_model_id = provider_model_id[len("sentence-transformers/") :] + provider_model_id = provider_model_id.removeprefix("sentence-transformers/") # Check if already registered registered_models = ls_config["registered_resources"]["models"] diff --git a/src/models/context.py b/src/models/context.py index 2ef76f36d..9876a1485 100644 --- a/src/models/context.py +++ b/src/models/context.py @@ -4,6 +4,7 @@ from llama_stack_client import AsyncLlamaStackClient from models.requests import QueryRequest +from utils.types import RAGContext, ShieldModerationResult @dataclass @@ -23,6 +24,8 @@ class ResponseGeneratorContext: # pylint: disable=too-many-instance-attributes query_request: The query request object started_at: Timestamp when the request started (ISO 8601 format) client: The Llama Stack client for API interactions + moderation_result: The moderation result + inline_rag_context: Inline RAG context vector_store_ids: Vector store IDs used in the query for source resolution. rag_id_mapping: Mapping from vector_db_id to user-facing rag_id. """ @@ -42,7 +45,9 @@ class ResponseGeneratorContext: # pylint: disable=too-many-instance-attributes # Dependencies & State client: AsyncLlamaStackClient + moderation_result: ShieldModerationResult # RAG index identification + inline_rag_context: RAGContext vector_store_ids: list[str] = field(default_factory=list) rag_id_mapping: dict[str, str] = field(default_factory=dict) diff --git a/src/models/database/conversations.py b/src/models/database/conversations.py index b34c9eb53..baebf6aa9 100644 --- a/src/models/database/conversations.py +++ b/src/models/database/conversations.py @@ -31,6 +31,7 @@ class UserConversation(Base): # pylint: disable=too-few-public-methods DateTime(timezone=True), server_default=func.now(), # pylint: disable=not-callable ) + last_response_id: Mapped[str] = mapped_column(nullable=True) # The number of user messages in the conversation message_count: Mapped[int] = mapped_column(default=0) @@ -66,3 +67,7 @@ class UserTurn(Base): # pylint: disable=too-few-public-methods provider: Mapped[str] = mapped_column(nullable=False) model: Mapped[str] = mapped_column(nullable=False) + + # Llama Stack response ID for this turn (1:1); nullable for legacy turns without it. + # Indexed for fast lookup when resolving previous_response_id to conversation. + response_id: Mapped[str] = mapped_column(nullable=True, index=True) diff --git a/src/models/requests.py b/src/models/requests.py index 4027e3772..0e05e61d5 100644 --- a/src/models/requests.py +++ b/src/models/requests.py @@ -6,10 +6,11 @@ from llama_stack_api.openai_responses import ( OpenAIResponseInputToolChoice as ToolChoice, - OpenAIResponseInputToolChoiceMode as ToolChoiceMode, OpenAIResponseInputTool as InputTool, OpenAIResponsePrompt as Prompt, OpenAIResponseText as Text, + OpenAIResponseToolMCP as OutputToolMCP, + OpenAIResponseReasoning as Reasoning, ) from pydantic import BaseModel, Field, field_validator, model_validator @@ -20,6 +21,28 @@ logger = get_logger(__name__) +# Attribute names that are echoed back in the response. +_ECHOED_FIELDS = set( + { + "instructions", + "max_tool_calls", + "max_output_tokens", + "metadata", + "model", + "parallel_tool_calls", + "previous_response_id", + "prompt", + "reasoning", + "safety_identifier", + "temperature", + "top_p", + "truncation", + "text", + "tool_choice", + "store", + } +) + class Attachment(BaseModel): """Model representing an attachment that can be send from the UI as part of query. @@ -179,8 +202,7 @@ class QueryRequest(BaseModel): shield_ids: Optional[list[str]] = Field( None, description="Optional list of safety shield IDs to apply. " - "If None, all configured shields are used. " - "If provided, must contain at least one valid shield ID (empty list raises 422 error).", + "If None, all configured shields are used. ", examples=["llama-guard", "custom-shield"], ) @@ -503,8 +525,7 @@ def validate_categories( if len(value) == 0: return None # Convert empty list to None for consistency - unique_categories = list(dict.fromkeys(value)) # don't lose ordering - return unique_categories + return list(dict.fromkeys(value)) # don't lose ordering @model_validator(mode="after") def check_feedback_provided(self) -> Self: @@ -616,6 +637,7 @@ class ResponsesRequest(BaseModel): instructions: System instructions or guidelines provided to the model (acts as the system prompt). max_infer_iters: Maximum number of inference iterations the model can perform. + max_output_tokens: Maximum number of tokens allowed in the response. max_tool_calls: Maximum number of tool calls allowed in a single response. metadata: Custom metadata dictionary with key-value pairs for tracking or logging. parallel_tool_calls: Whether the model can make multiple tool calls in parallel. @@ -623,17 +645,21 @@ class ResponsesRequest(BaseModel): conversation. Mutually exclusive with conversation. prompt: Prompt object containing a template with variables for dynamic substitution. + reasoning: Reasoning configuration for the response. + safety_identifier: Safety identifier for the response. store: Whether to store the response in conversation history. Defaults to True. stream: Whether to stream the response as it is generated. Defaults to False. temperature: Sampling temperature controlling randomness (typically 0.0–2.0). text: Text response configuration specifying output format constraints (JSON schema, JSON object, or plain text). tool_choice: Tool selection strategy ("auto", "required", "none", or specific - tool configuration). Defaults to "auto". + tool configuration). tools: List of tools available to the model (file search, web search, function calls, MCP tools). Defaults to all tools available to the model. generate_topic_summary: LCORE-specific flag indicating whether to generate a topic summary for new conversations. Defaults to True. + shield_ids: LCORE-specific list of safety shield IDs to apply. If None, all + configured shields are used. solr: LCORE-specific Solr vector_io provider query parameters (e.g. filter queries). Optional. """ @@ -644,18 +670,23 @@ class ResponsesRequest(BaseModel): include: Optional[list[IncludeParameter]] = None instructions: Optional[str] = None max_infer_iters: Optional[int] = None + max_output_tokens: Optional[int] = None max_tool_calls: Optional[int] = None metadata: Optional[dict[str, str]] = None parallel_tool_calls: Optional[bool] = None previous_response_id: Optional[str] = None prompt: Optional[Prompt] = None + reasoning: Optional[Reasoning] = None + safety_identifier: Optional[str] = None store: bool = True stream: bool = False temperature: Optional[float] = None text: Optional[Text] = None - tool_choice: Optional[ToolChoice] = ToolChoiceMode.auto + tool_choice: Optional[ToolChoice] = None tools: Optional[list[InputTool]] = None + # LCORE-specific attributes generate_topic_summary: Optional[bool] = True + shield_ids: Optional[list[str]] = None solr: Optional[dict[str, Any]] = None model_config = { @@ -663,40 +694,11 @@ class ResponsesRequest(BaseModel): "json_schema_extra": { "examples": [ { - "input": "What is Kubernetes?", + "input": "Hello World!", "model": "openai/gpt-4o-mini", - "conversation": "conv_0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", "instructions": "You are a helpful assistant", - "include": ["message.output_text.logprobs"], - "max_tool_calls": 5, - "metadata": {"source": "api"}, - "parallel_tool_calls": True, - "prompt": { - "id": "prompt_123", - "variables": { - "topic": {"type": "input_text", "text": "Kubernetes"} - }, - "version": "1.0", - }, "store": True, "stream": False, - "temperature": 0.7, - "text": { - "format": { - "type": "json_schema", - "schema": { - "type": "object", - "properties": {"answer": {"type": "string"}}, - }, - } - }, - "tool_choice": "auto", - "tools": [ - { - "type": "file_search", - "vector_store_ids": ["vs_123"], - } - ], "generate_topic_summary": True, } ] @@ -731,3 +733,32 @@ def check_suid(cls, value: Optional[str]) -> Optional[str]: if value and not suid.check_suid(value): raise ValueError(f"Improper conversation ID '{value}'") return value + + @field_validator("previous_response_id") + @classmethod + def check_previous_response_id(cls, value: Optional[str]) -> Optional[str]: + """Validate that previous_response_id does not start with 'modr'.""" + if value is not None and value.startswith("modr"): + raise ValueError("You cannot provide context by moderation response.") + return value + + def echoed_params(self) -> dict[str, Any]: + """Dump attributes that are echoed back in the response. + + The tools attribute is converted from input tool to output tool model. + + Returns: + Dict of echoed attributes. + """ + data = self.model_dump(include=_ECHOED_FIELDS) + if self.tools is not None: + data["tools"] = [ + ( + OutputToolMCP.model_validate(t.model_dump()).model_dump() + if t.type == "mcp" + else t.model_dump() + ) + for t in self.tools + ] + + return data diff --git a/src/models/responses.py b/src/models/responses.py index 9e87ad2cf..fd5ef955a 100644 --- a/src/models/responses.py +++ b/src/models/responses.py @@ -2,7 +2,7 @@ """Models for REST API responses.""" -from typing import Any, ClassVar, Literal, Optional +from typing import Any, ClassVar, Literal, Optional, cast from fastapi import status from llama_stack_api.openai_responses import ( @@ -13,6 +13,7 @@ OpenAIResponseText as Text, OpenAIResponseTool as OutputTool, OpenAIResponseUsage as Usage, + OpenAIResponseReasoning as Reasoning, ) from pydantic import BaseModel, Field from pydantic_core import SchemaError @@ -1412,31 +1413,35 @@ class ResponsesResponse(AbstractSuccessfulResponse): """Model representing a response from the Responses API following LCORE specification. Attributes: - id: Unique identifier for this response. - object: Object type identifier, always "response". created_at: Unix timestamp when the response was created. - status: Current status of the response (e.g., "completed", "blocked", - "in_progress"). completed_at: Unix timestamp when the response was completed, if applicable. + error: Error details if the response failed or was blocked. + id: Unique identifier for this response. model: Model identifier in "provider/model" format used for generation. + object: Object type identifier, always "response". output: List of structured output items containing messages, tool calls, and other content. This is the primary response content. - error: Error details if the response failed or was blocked. - instructions: System instructions or guidelines provided to the model. - max_tool_calls: Maximum number of tool calls allowed in a single response. - metadata: Additional metadata dictionary with custom key-value pairs. parallel_tool_calls: Whether the model can make multiple tool calls in parallel. previous_response_id: Identifier of the previous response in a multi-turn conversation. prompt: The input prompt object that was sent to the model. + status: Current status of the response (e.g., "completed", "blocked", + "in_progress"). temperature: Temperature parameter used for generation (controls randomness). text: Text response configuration object used for OpenAI responses. - tool_choice: Tool selection strategy used (e.g., "auto", "required", "none"). - tools: List of tools available to the model during generation. top_p: Top-p sampling parameter used for generation. + tools: List of tools available to the model during generation. + tool_choice: Tool selection strategy used (e.g., "auto", "required", "none"). truncation: Strategy used for handling content that exceeds context limits. usage: Token usage statistics including input_tokens, output_tokens, and total_tokens. + instructions: System instructions or guidelines provided to the model. + max_tool_calls: Maximum number of tool calls allowed in a single response. + reasoning: Reasoning configuration (effort level) used for the response. + max_output_tokens: Upper bound for tokens generated in the response. + safety_identifier: Safety/guardrail identifier applied to the request. + metadata: Additional metadata dictionary with custom key-value pairs. + store: Whether the response was stored. conversation: Conversation ID linking this response to a conversation thread (LCORE-specific). available_quotas: Remaining token quotas for the user (LCORE-specific). @@ -1444,27 +1449,32 @@ class ResponsesResponse(AbstractSuccessfulResponse): output array. """ - id: str - object: Literal["response"] = "response" created_at: int - status: str completed_at: Optional[int] = None + error: Optional[Error] = None + id: str model: str + object: Literal["response"] = "response" output: list[Output] - error: Optional[Error] = None - instructions: Optional[str] = None - max_tool_calls: Optional[int] = None - metadata: Optional[dict[str, str]] = None parallel_tool_calls: bool = True previous_response_id: Optional[str] = None prompt: Optional[Prompt] = None + status: str temperature: Optional[float] = None text: Optional[Text] = None - tool_choice: Optional[ToolChoice] = None - tools: Optional[list[OutputTool]] = None top_p: Optional[float] = None + tools: Optional[list[OutputTool]] = None + tool_choice: Optional[ToolChoice] = None truncation: Optional[str] = None - usage: Usage + usage: Optional[Usage] = None + instructions: Optional[str] = None + max_tool_calls: Optional[int] = None + reasoning: Optional[Reasoning] = None + max_output_tokens: Optional[int] = None + safety_identifier: Optional[str] = None + metadata: Optional[dict[str, str]] = None + store: Optional[bool] = None + # LCORE-specific attributes conversation: Optional[str] = None available_quotas: dict[str, int] output_text: str @@ -1473,12 +1483,11 @@ class ResponsesResponse(AbstractSuccessfulResponse): "json_schema_extra": { "examples": [ { - "id": "resp_abc123", - "object": "response", "created_at": 1704067200, "completed_at": 1704067250, + "id": "resp_abc123", "model": "openai/gpt-4-turbo", - "status": "completed", + "object": "response", "output": [ { "type": "message", @@ -1494,21 +1503,89 @@ class ResponsesResponse(AbstractSuccessfulResponse): ], } ], + "parallel_tool_calls": True, + "status": "completed", + "temperature": 0.7, + "text": {"format": {"type": "text"}}, "usage": { "input_tokens": 100, "output_tokens": 50, "total_tokens": 150, }, "instructions": "You are a helpful assistant", - "temperature": 0.7, - "conversation": "conv_0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", + "store": True, + "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", "available_quotas": {"daily": 1000, "monthly": 50000}, - "output_text": "Kubernetes is an open-source container orchestration system...", + "output_text": ( + "Kubernetes is an open-source container " + "orchestration system..." + ), } - ] + ], + "sse_example": ( + "event: response.created\n" + 'data: {"type":"response.created","sequence_number":0,' + '"response":{"id":"resp_abc","created_at":1704067200,' + '"status":"in_progress","output":[],"conversation":' + '"0d21ba731f21f798dc9680125d5d6f49","available_quotas":{},' + '"output_text":""}}\n\n' + "event: response.output_item.added\n" + 'data: {"response_id":"resp_abc","item":{"type":"message",' + '"role":"assistant","content":[{"type":"output_text",' + '"text":"Hello! How can I help?"}]},"output_index":0,' + '"sequence_number":1}\n\n' + "event: response.output_item.done\n" + 'data: {"response_id":"resp_abc","item":{"type":"message",' + '"role":"assistant","content":[{"type":"output_text",' + '"text":"Hello! How can I help?"}]},"output_index":0,' + '"sequence_number":2}\n\n' + "event: response.completed\n" + 'data: {"type":"response.completed","sequence_number":3,' + '"response":{"id":"resp_abc","created_at":1704067200,' + '"completed_at":1704067250,"status":"completed",' + '"output":[{"type":"message","role":"assistant",' + '"content":[{"type":"output_text","text":"Hello! How can I help?"}]}],' + '"usage":{"input_tokens":10,"output_tokens":6,"total_tokens":16},' + '"conversation":"0d21ba731f21f798dc9680125d5d6f49",' + '"available_quotas":{"daily":1000,"monthly":50000},' + '"output_text":"Hello! How can I help?"}}\n\n' + "data: [DONE]\n\n" + ), } } + @classmethod + def openapi_response(cls) -> dict[str, Any]: + """ + Build OpenAPI response dict with application/json and text/event-stream. + + Uses the single JSON example from the model schema and adds + text/event-stream example from json_schema_extra.sse_example. + """ + schema = cls.model_json_schema() + model_examples = schema.get("examples", []) + json_example = model_examples[0] if model_examples else None + + schema_extra = ( + cast(dict[str, Any], dict(cls.model_config)).get("json_schema_extra") or {} + ) + sse_example = schema_extra.get("sse_example", "") + + content: dict[str, Any] = { + "application/json": {"example": json_example} if json_example else {}, + "text/event-stream": { + "schema": {"type": "string"}, + "description": "SSE stream of events", + "examples": {"stream": {"value": sse_example}} if sse_example else {}, + }, + } + + return { + "description": SUCCESSFUL_RESPONSE_DESCRIPTION, + "model": cls, + "content": content, + } + class DetailModel(BaseModel): """Nested detail model for error responses.""" diff --git a/src/observability/__init__.py b/src/observability/__init__.py index df8601281..4a4285e8a 100644 --- a/src/observability/__init__.py +++ b/src/observability/__init__.py @@ -11,4 +11,4 @@ from observability.formats import InferenceEventData, build_inference_event from observability.splunk import send_splunk_event -__all__ = ["send_splunk_event", "InferenceEventData", "build_inference_event"] +__all__ = ["InferenceEventData", "build_inference_event", "send_splunk_event"] diff --git a/src/quota/cluster_quota_limiter.py b/src/quota/cluster_quota_limiter.py index 805791b9d..d2170f853 100644 --- a/src/quota/cluster_quota_limiter.py +++ b/src/quota/cluster_quota_limiter.py @@ -1,7 +1,7 @@ """Simple cluster quota limiter where quota is fixed for the whole cluster.""" -from models.config import QuotaHandlersConfiguration from log import get_logger +from models.config import QuotaHandlersConfiguration from quota.revokable_quota_limiter import RevokableQuotaLimiter logger = get_logger(__name__) diff --git a/src/quota/connect_pg.py b/src/quota/connect_pg.py index fbbf95109..e4ba2c9bf 100644 --- a/src/quota/connect_pg.py +++ b/src/quota/connect_pg.py @@ -1,6 +1,7 @@ """PostgreSQL connection handler.""" from typing import Any + import psycopg2 from log import get_logger diff --git a/src/quota/quota_limiter.py b/src/quota/quota_limiter.py index bc0d659f8..14543c590 100644 --- a/src/quota/quota_limiter.py +++ b/src/quota/quota_limiter.py @@ -30,16 +30,15 @@ - reset quota to 10,000,000 tokens each month """ +import datetime +import sqlite3 from abc import ABC, abstractmethod - from typing import Optional -import datetime -import sqlite3 import psycopg2 from log import get_logger -from models.config import SQLiteDatabaseConfiguration, PostgreSQLDatabaseConfiguration +from models.config import PostgreSQLDatabaseConfiguration, SQLiteDatabaseConfiguration from quota.connect_pg import connect_pg from quota.connect_sqlite import connect_sqlite diff --git a/src/quota/quota_limiter_factory.py b/src/quota/quota_limiter_factory.py index 9b8105c4d..3067f7493 100644 --- a/src/quota/quota_limiter_factory.py +++ b/src/quota/quota_limiter_factory.py @@ -1,12 +1,11 @@ """Quota limiter factory class.""" -from log import get_logger import constants +from log import get_logger from models.config import QuotaHandlersConfiguration - -from quota.user_quota_limiter import UserQuotaLimiter from quota.cluster_quota_limiter import ClusterQuotaLimiter from quota.quota_limiter import QuotaLimiter +from quota.user_quota_limiter import UserQuotaLimiter logger = get_logger(__name__) diff --git a/src/quota/revokable_quota_limiter.py b/src/quota/revokable_quota_limiter.py index 9bb47ed3c..9f5ff54cb 100644 --- a/src/quota/revokable_quota_limiter.py +++ b/src/quota/revokable_quota_limiter.py @@ -1,24 +1,24 @@ """Simple quota limiter where quota can be revoked.""" -from datetime import datetime +from datetime import UTC, datetime -from models.config import QuotaHandlersConfiguration from log import get_logger -from utils.connection_decorator import connection +from models.config import QuotaHandlersConfiguration from quota.quota_exceed_error import QuotaExceedError from quota.quota_limiter import QuotaLimiter from quota.sql import ( CREATE_QUOTA_TABLE_PG, CREATE_QUOTA_TABLE_SQLITE, - UPDATE_AVAILABLE_QUOTA_PG, - UPDATE_AVAILABLE_QUOTA_SQLITE, + INIT_QUOTA_PG, + INIT_QUOTA_SQLITE, SELECT_QUOTA_PG, SELECT_QUOTA_SQLITE, SET_AVAILABLE_QUOTA_PG, SET_AVAILABLE_QUOTA_SQLITE, - INIT_QUOTA_PG, - INIT_QUOTA_SQLITE, + UPDATE_AVAILABLE_QUOTA_PG, + UPDATE_AVAILABLE_QUOTA_SQLITE, ) +from utils.connection_decorator import connection logger = get_logger(__name__) @@ -140,7 +140,7 @@ def _revoke_quota(self, set_statement: str, subject_id: str) -> None: revoked. """ # timestamp to be used - revoked_at = datetime.now() + revoked_at = datetime.now(tz=UTC) cursor = self.connection.cursor() cursor.execute( @@ -188,7 +188,7 @@ def _increase_quota(self, set_statement: str, subject_id: str) -> None: subject_id (str): Identifier of the subject whose quota will be increased. """ # timestamp to be used - updated_at = datetime.now() + updated_at = datetime.now(tz=UTC) cursor = self.connection.cursor() cursor.execute( @@ -286,7 +286,7 @@ def _consume_tokens( change. """ # timestamp to be used - updated_at = datetime.now() + updated_at = datetime.now(tz=UTC) to_be_consumed = input_tokens + output_tokens @@ -329,7 +329,7 @@ def _init_quota(self, subject_id: str = "") -> None: initialize. Defaults to empty string. """ # timestamp to be used - revoked_at = datetime.now() + revoked_at = datetime.now(tz=UTC) if self.sqlite_connection_config is not None: cursor = self.connection.cursor() diff --git a/src/quota/token_usage_history.py b/src/quota/token_usage_history.py index db6134e14..3ea14a452 100644 --- a/src/quota/token_usage_history.py +++ b/src/quota/token_usage_history.py @@ -6,25 +6,23 @@ """ import sqlite3 -from datetime import datetime +from datetime import UTC, datetime from typing import Any, Optional import psycopg2 from log import get_logger - +from models.config import ( + PostgreSQLDatabaseConfiguration, + QuotaHandlersConfiguration, + SQLiteDatabaseConfiguration, +) from quota.connect_pg import connect_pg from quota.connect_sqlite import connect_sqlite from quota.sql import ( - CREATE_TOKEN_USAGE_TABLE, CONSUME_TOKENS_FOR_USER_PG, CONSUME_TOKENS_FOR_USER_SQLITE, -) - -from models.config import ( - QuotaHandlersConfiguration, - SQLiteDatabaseConfiguration, - PostgreSQLDatabaseConfiguration, + CREATE_TOKEN_USAGE_TABLE, ) from utils.connection_decorator import connection @@ -135,7 +133,7 @@ def consume_tokens( # pylint: disable=too-many-arguments,too-many-positional-ar return # timestamp to be used - updated_at = datetime.now() + updated_at = datetime.now(tz=UTC) # it is not possible to use context manager there, because SQLite does # not support it diff --git a/src/quota/user_quota_limiter.py b/src/quota/user_quota_limiter.py index 13901fd05..eee9dd7c2 100644 --- a/src/quota/user_quota_limiter.py +++ b/src/quota/user_quota_limiter.py @@ -1,7 +1,7 @@ """Simple user quota limiter where each user has a fixed quota.""" -from models.config import QuotaHandlersConfiguration from log import get_logger +from models.config import QuotaHandlersConfiguration from quota.revokable_quota_limiter import RevokableQuotaLimiter logger = get_logger(__name__) diff --git a/src/utils/conversations.py b/src/utils/conversations.py index 66205594f..ce882402c 100644 --- a/src/utils/conversations.py +++ b/src/utils/conversations.py @@ -3,7 +3,10 @@ import json from datetime import UTC, datetime from typing import Any, Optional, cast +from collections.abc import Sequence +from fastapi import HTTPException +from llama_stack_api import OpenAIResponseMessage, OpenAIResponseOutput from llama_stack_api.openai_responses import ( OpenAIResponseOutputMessageFileSearchToolCall as FileSearchCall, OpenAIResponseOutputMessageFunctionToolCall as FunctionCall, @@ -11,6 +14,8 @@ OpenAIResponseOutputMessageMCPListTools as MCPListTools, OpenAIResponseOutputMessageWebSearchToolCall as WebSearchCall, ) +from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient +from llama_stack_client.types.conversations.item_create_params import Item from llama_stack_client.types.conversations.item_list_response import ( ItemListResponse, OpenAIResponseInputFunctionToolCallOutput as FunctionToolCallOutput, @@ -21,9 +26,14 @@ from constants import DEFAULT_RAG_TOOL from models.database.conversations import UserTurn -from models.responses import ConversationTurn, Message +from models.responses import ( + ConversationTurn, + InternalServerErrorResponse, + Message, + ServiceUnavailableResponse, +) from utils.responses import parse_arguments_string -from utils.types import ToolCallSummary, ToolResultSummary +from utils.types import ResponseInput, ToolCallSummary, ToolResultSummary def _extract_text_from_content(content: str | list[Any]) -> str: @@ -423,3 +433,82 @@ def build_conversation_turns_from_items( ) return chat_history + + +async def append_turn_items_to_conversation( + client: AsyncLlamaStackClient, + conversation_id: str, + user_input: ResponseInput, + llm_output: Sequence[OpenAIResponseOutput], +) -> None: + """ + Append a turn (user input + LLM output) to a conversation in LLS database. + + Args: + client: The Llama Stack client. + conversation_id: The Llama Stack conversation ID. + user_input: User input text or list of ResponseItem. + llm_output: Output from the LLM: a list of OpenAIResponseOutput. + """ + if isinstance(user_input, str): + user_message = OpenAIResponseMessage( + role="user", + content=user_input, + ) + user_items = [user_message.model_dump()] + else: + user_items = [item.model_dump() for item in user_input] + + output_items = [item.model_dump() for item in llm_output] + + items = user_items + output_items + try: + await client.conversations.items.create( + conversation_id, + items=cast(list[Item], items), + ) + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except APIStatusError as e: + error_response = InternalServerErrorResponse.generic() + raise HTTPException(**error_response.model_dump()) from e + + +async def get_all_conversation_items( + client: AsyncLlamaStackClient, + conversation_id_llama_stack: str, +) -> list[ItemListResponse]: + """Fetch all items for a conversation (Conversations API), paginating as needed. + + Args: + client: Llama Stack client. + conversation_id_llama_stack: Conversation ID in Llama Stack format. + + Returns: + List of all items in the conversation, oldest first. + """ + try: + paginator = client.conversations.items.list( + conversation_id=conversation_id_llama_stack, + order="asc", + ) + first_page = await paginator + items: list[ItemListResponse] = list(first_page.data or []) + page = first_page + while page.has_next_page(): + page = await page.get_next_page() + items.extend(page.data or []) + return items + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except APIStatusError as e: + error_response = InternalServerErrorResponse.generic() + raise HTTPException(**error_response.model_dump()) from e diff --git a/src/utils/endpoints.py b/src/utils/endpoints.py index 332002eeb..a8ffb9a64 100644 --- a/src/utils/endpoints.py +++ b/src/utils/endpoints.py @@ -6,17 +6,20 @@ from pydantic import AnyUrl, ValidationError from sqlalchemy.exc import SQLAlchemyError +from client import AsyncLlamaStackClientHolder import constants from app.database import get_session from configuration import AppConfig, LogicError from log import get_logger -from models.database.conversations import UserConversation +from models.database.conversations import UserConversation, UserTurn from models.responses import ( ForbiddenResponse, InternalServerErrorResponse, NotFoundResponse, ) -from utils.types import ReferencedDocument, TurnSummary +from utils.responses import create_new_conversation +from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id +from utils.types import ReferencedDocument, ResponsesConversationContext, TurnSummary logger = get_logger(__name__) @@ -59,6 +62,35 @@ def retrieve_conversation(conversation_id: str) -> Optional[UserConversation]: return session.query(UserConversation).filter_by(id=conversation_id).first() +def retrieve_conversation_turns(conversation_id: str) -> list[UserTurn]: + """Retrieve all turns for a conversation from the database, ordered by turn number. + + Args: + conversation_id (str): The normalized conversation ID. + + Returns: + list[UserTurn]: The list of turns for the conversation, ordered by turn_number. + + Raises: + HTTPException: 500 if a database error occurs. + """ + try: + with get_session() as session: + return ( + session.query(UserTurn) + .filter_by(conversation_id=conversation_id) + .order_by(UserTurn.turn_number) + .all() + ) + except SQLAlchemyError as e: + logger.error( + "Database error occurred while retrieving conversation turns for %s.", + conversation_id, + ) + response = InternalServerErrorResponse.database_error() + raise HTTPException(**response.model_dump()) from e + + def validate_conversation_ownership( user_id: str, conversation_id: str, others_allowed: bool = False ) -> Optional[UserConversation]: @@ -179,6 +211,145 @@ def validate_and_retrieve_conversation( return user_conversation +async def resolve_response_context( + user_id: str, + others_allowed: bool, + conversation_id: Optional[str], + previous_response_id: Optional[str], + generate_topic_summary: Optional[bool], +) -> ResponsesConversationContext: + """Resolve conversation context for the responses endpoint without mutating the request. + + Parameters: + user_id: ID of the user making the request. + others_allowed: Whether the user can access conversations owned by others. + conversation_id: Conversation ID from the request, if any. + previous_response_id: Previous response ID from the request, if any. + generate_topic_summary: Resolved value for request.generate_topic_summary. + + Returns: + ResponsesConversationContext: Contains conversation, user_conversation, and + resolved generate_topic_summary to apply to the request. + + Raises: + HTTPException: 404 if previous_response_id is set but the turn does not exist; + other HTTP exceptions from validate_and_retrieve_conversation. + """ + client = AsyncLlamaStackClientHolder().get_client() + # Context for the LLM passed by conversation + if conversation_id: + logger.info("Conversation ID specified in request: %s", conversation_id) + user_conversation = validate_and_retrieve_conversation( + normalized_conv_id=normalize_conversation_id(conversation_id), + user_id=user_id, + others_allowed=others_allowed, + ) + return ResponsesConversationContext( + conversation=to_llama_stack_conversation_id(user_conversation.id), + user_conversation=user_conversation, + generate_topic_summary=False, + ) + + # Context for the LLM passed by previous response id + if previous_response_id: + if not check_turn_existence(previous_response_id): + error_response = NotFoundResponse( + resource="response", resource_id=previous_response_id + ) + raise HTTPException(**error_response.model_dump()) + prev_user_turn = retrieve_turn_by_response_id(previous_response_id) + user_conversation = validate_and_retrieve_conversation( + normalized_conv_id=prev_user_turn.conversation_id, + user_id=user_id, + others_allowed=others_allowed, + ) + if ( + user_conversation.last_response_id is not None + and user_conversation.last_response_id != previous_response_id + ): + new_conv_id = await create_new_conversation(client) + want_topic_summary = ( + generate_topic_summary if generate_topic_summary is not None else True + ) + return ResponsesConversationContext( + conversation=new_conv_id, + user_conversation=user_conversation, + generate_topic_summary=want_topic_summary, + ) + return ResponsesConversationContext( + conversation=to_llama_stack_conversation_id(user_conversation.id), + user_conversation=user_conversation, + generate_topic_summary=False, + ) + + # No context passed, create new conversation + new_conv_id = await create_new_conversation(client) + want_topic_summary = ( + generate_topic_summary if generate_topic_summary is not None else True + ) + return ResponsesConversationContext( + conversation=new_conv_id, + user_conversation=None, + generate_topic_summary=want_topic_summary, + ) + + +def retrieve_turn_by_response_id(response_id: str) -> UserTurn: + """Retrieve a response's turn from the database by response ID. + + Looks up the turn that has this response_id to get its conversation. + Used for fork/previous_response_id resolution. + + Args: + response_id: The ID of the response (stored on UserTurn.response_id). + + Returns: + The UserTurn row for that response (has conversation_id). + + Raises: + HTTPException: 404 if no turn has this response_id; 500 on database error. + """ + try: + with get_session() as session: + turn = session.query(UserTurn).filter_by(response_id=response_id).first() + if turn is None: + logger.error("Response %s not found in database.", response_id) + response = NotFoundResponse( + resource="response", resource_id=response_id + ) + raise HTTPException(**response.model_dump()) + return turn + except SQLAlchemyError as e: + logger.exception( + "Database error while retrieving turn by response_id %s", response_id + ) + response = InternalServerErrorResponse.database_error() + raise HTTPException(**response.model_dump()) from e + + +def check_turn_existence(response_id: str) -> bool: + """Check if a turn exists for a given response ID. + + Args: + response_id: The ID of the response to check. + + Returns: + bool: True if the turn exists, False otherwise. + """ + try: + with get_session() as session: + turn = session.query(UserTurn).filter_by(response_id=response_id).first() + return turn is not None + except SQLAlchemyError as e: + logger.exception( + "Database error while checking turn existence for response_id %s", + response_id, + ) + raise HTTPException( + **InternalServerErrorResponse.database_error().model_dump() + ) from e + + def check_configuration_loaded(config: AppConfig) -> None: """ Raise an error if the configuration is not loaded. diff --git a/src/utils/query.py b/src/utils/query.py index 8d96b5eb6..5bd96dbf7 100644 --- a/src/utils/query.py +++ b/src/utils/query.py @@ -44,6 +44,7 @@ create_transcript_metadata, store_transcript, ) +from utils.suid import is_moderation_id from utils.types import TurnSummary logger = get_logger(__name__) @@ -290,6 +291,7 @@ def store_query_results( # pylint: disable=too-many-arguments model_id=model_id, provider_id=provider_id, topic_summary=topic_summary, + response_id=summary.id, ) except SQLAlchemyError as e: logger.exception("Error persisting conversation details.") @@ -377,6 +379,7 @@ def persist_user_conversation_details( model_id: str, provider_id: str, topic_summary: Optional[str], + response_id: str, ) -> None: """Associate conversation to user in the database. @@ -388,6 +391,7 @@ def persist_user_conversation_details( model_id: The model identifier provider_id: The provider identifier topic_summary: Optional topic summary for the conversation + response_id: Response ID for the conversation """ # Normalize the conversation ID (strip 'conv_' prefix if present) normalized_id = normalize_conversation_id(conversation_id) @@ -402,7 +406,6 @@ def persist_user_conversation_details( existing_conversation = ( session.query(UserConversation).filter_by(id=normalized_id).first() ) - if not existing_conversation: conversation = UserConversation( id=normalized_id, @@ -411,6 +414,10 @@ def persist_user_conversation_details( last_used_provider=provider_id, topic_summary=topic_summary or "", message_count=1, + # For new conversation either current response or None if moderation-blocked + last_response_id=( + response_id if not is_moderation_id(response_id) else None + ), ) session.add(conversation) logger.debug( @@ -427,6 +434,9 @@ def persist_user_conversation_details( user_id, existing_conversation.message_count, ) + # Update last response id only if not moderation-blocked + if not is_moderation_id(response_id): + existing_conversation.last_response_id = response_id max_turn_number = ( session.query(func.max(UserTurn.turn_number)) @@ -441,6 +451,7 @@ def persist_user_conversation_details( completed_at=datetime.fromisoformat(completed_at), provider=provider_id, model=model_id, + response_id=response_id, ) session.add(turn) logger.debug( diff --git a/src/utils/responses.py b/src/utils/responses.py index bdced0f71..04f4c96c1 100644 --- a/src/utils/responses.py +++ b/src/utils/responses.py @@ -7,11 +7,12 @@ from typing import Any, Optional, cast from fastapi import HTTPException +from llama_stack_api import OpenAIResponseObject from llama_stack_api.openai_responses import ( OpenAIResponseContentPartRefusal as ContentPartRefusal, OpenAIResponseInputMessageContent as InputMessageContent, + OpenAIResponseInputMessageContentFile as InputFilePart, OpenAIResponseInputMessageContentText as InputTextPart, - OpenAIResponseInputTool as InputTool, OpenAIResponseInputToolFileSearch as InputToolFileSearch, OpenAIResponseInputToolMCP as InputToolMCP, OpenAIResponseMCPApprovalRequest as MCPApprovalRequest, @@ -27,9 +28,15 @@ OpenAIResponseOutputMessageMCPListTools as MCPListTools, OpenAIResponseOutputMessageWebSearchToolCall as WebSearchCall, OpenAIResponseUsage as ResponseUsage, + OpenAIResponseInputTool as InputTool, + OpenAIResponseUsageInputTokensDetails as UsageInputTokensDetails, + OpenAIResponseUsageOutputTokensDetails as UsageOutputTokensDetails, + OpenAIResponseInputToolChoiceMode as ToolChoiceMode, + OpenAIResponseInputToolChoice as ToolChoice, ) from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient +from client import AsyncLlamaStackClientHolder import constants import metrics from configuration import configuration @@ -55,6 +62,7 @@ from utils.types import ( RAGChunk, ReferencedDocument, + ResponseInput, ResponseItem, ResponsesApiParams, ToolCallSummary, @@ -377,6 +385,29 @@ def resolve_vector_store_ids( return [rag_id_to_vector_db_id.get(vs_id, vs_id) for vs_id in vector_store_ids] +def translate_tools_vector_store_ids( + tools: list[InputTool], byok_rags: list[ByokRag] +) -> list[InputTool]: + """Translate user-facing vector_store_ids to llama-stack IDs in each file_search tool. + + Parameters: + tools: List of request tools (may contain file_search with user-facing IDs). + byok_rags: BYOK RAG configuration for ID resolution. + + Returns: + New list of tools with file_search vector_store_ids translated; other tools + unchanged. + """ + result: list[InputTool] = [] + for tool in tools: + if tool.type == "file_search": + resolved_ids = resolve_vector_store_ids(tool.vector_store_ids, byok_rags) + result.append(tool.model_copy(update={"vector_store_ids": resolved_ids})) + else: + result.append(tool) + return result + + def get_rag_tools(vector_store_ids: list[str]) -> Optional[list[InputToolFileSearch]]: """Convert vector store IDs to tools format for Responses API. @@ -842,6 +873,16 @@ def _resolve_source_for_result( if len(vector_store_ids) > 1: attributes = getattr(result, "attributes", {}) or {} + + # Primary: read index name embedded directly by rag-content. + # This value is already the user-facing rag_id, not a vector_db_id, + # so no mapping is needed. + attr_source: Optional[str] = attributes.get("source") + if attr_source: + return attr_source + + # Fallback: if llama-stack ever populates vector_store_id in results, + # use it with the rag_id_mapping. attr_store_id: Optional[str] = attributes.get("vector_store_id") if attr_store_id: return rag_id_mapping.get(attr_store_id, attr_store_id) @@ -997,8 +1038,7 @@ async def select_model_for_responses( and user_conversation.last_used_model and user_conversation.last_used_provider ): - model_id = f"{user_conversation.last_used_provider}/{user_conversation.last_used_model}" - return model_id + return f"{user_conversation.last_used_provider}/{user_conversation.last_used_model}" # 2. Select default model from configuration if configuration.inference is not None: @@ -1036,7 +1076,7 @@ async def select_model_for_responses( def build_turn_summary( - response: Optional[ResponseObject], + response: Optional[OpenAIResponseObject], model: str, vector_store_ids: Optional[list[str]] = None, rag_id_mapping: Optional[dict[str, str]] = None, @@ -1058,6 +1098,7 @@ def build_turn_summary( if response is None or response.output is None: return summary + summary.id = response.id # Extract text from output items summary.llm_response = extract_text_from_response_items(response.output) @@ -1109,15 +1150,12 @@ def extract_text_from_response_item(response_item: ResponseItem) -> str: response_item: A single item from request input or response output. Returns: - Extracted text content, or empty string if not a message or role is user. + Extracted text content, or empty string if not a message. """ if response_item.type != "message": return "" message_item = cast(ResponseMessage, response_item) - if message_item.role == "user": - return "" - return _extract_text_from_content(message_item.content) @@ -1139,15 +1177,16 @@ def _extract_text_from_content( text_fragments: list[str] = [] for part in content: - if part.type == "input_text": + part_type = getattr(part, "type", None) + if part_type == "input_text": input_text_part = cast(InputTextPart, part) if input_text_part.text: text_fragments.append(input_text_part.text.strip()) - elif part.type == "output_text": + elif part_type == "output_text": output_text_part = cast(OutputTextPart, part) if output_text_part.text: text_fragments.append(output_text_part.text.strip()) - elif part.type == "refusal": + elif part_type == "refusal": refusal_part = cast(ContentPartRefusal, part) if refusal_part.refusal: text_fragments.append(refusal_part.refusal.strip()) @@ -1168,3 +1207,130 @@ def deduplicate_referenced_documents( seen.add(key) out.append(d) return out + + +async def create_new_conversation( + client: AsyncLlamaStackClient, +) -> str: + """Create a new conversation via the Llama Stack Conversations API. + + Args: + client: The Llama Stack client used to create the conversation. + + Returns: + The new conversation's ID (string), as returned by the API. + """ + try: + conversation = await client.conversations.create(metadata={}) + return conversation.id + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except APIStatusError as e: + error_response = InternalServerErrorResponse.generic() + raise HTTPException(**error_response.model_dump()) from e + + +def get_zero_usage() -> ResponseUsage: + """Create a Usage object with zero values for input and output tokens. + + Returns: + Usage object with zero values for input and output tokens. + """ + return ResponseUsage( + input_tokens=0, + input_tokens_details=UsageInputTokensDetails(cached_tokens=0), + output_tokens=0, + output_tokens_details=UsageOutputTokensDetails(reasoning_tokens=0), + total_tokens=0, + ) + + +def extract_attachments_text(response_input: ResponseInput) -> str: + """Extract file_data from input_file parts inside message content. + + Args: + response_input: Response input (string or list of response items). + + Returns: + All present file_data values joined by double newline. + """ + if isinstance(response_input, str): + return "" + file_data_parts: list[str] = [] + for item in response_input: + if item.type != "message": + continue + message = cast(ResponseMessage, item) + content = message.content + if isinstance(content, str): + continue + for part in content: + if part.type == "input_file": + file_part = cast(InputFilePart, part) + if file_part.file_data: + file_data_parts.append(file_part.file_data) + return "\n\n".join(file_data_parts) + + +async def resolve_tool_choice( + tools: Optional[list[InputTool]], + tool_choice: Optional[ToolChoice], + token: str, + mcp_headers: Optional[McpHeaders] = None, + request_headers: Optional[Mapping[str, str]] = None, +) -> tuple[Optional[list[InputTool]], Optional[ToolChoice], Optional[list[str]]]: + """Resolve tools and tool_choice for the Responses API. + + If the request includes tools, uses them as-is and derives vector_store_ids + from tool configs; otherwise loads tools via prepare_tools (using all + configured vector stores) and honors tool_choice "none" via the no_tools + flag. When no tools end up configured, tool_choice is cleared to None. + + Args: + tools: Tools from the request, or None to use LCORE-configured tools. + tool_choice: Requested tool choice (e.g. auto, required, none) or None. + token: User token for MCP/auth. + mcp_headers: Optional MCP headers to propagate. + request_headers: Optional request headers for tool resolution. + + Returns: + A tuple of (prepared_tools, prepared_tool_choice, vector_store_ids): + prepared_tools is the list of tools to use, or None if none configured; + prepared_tool_choice is the resolved tool choice, or None when there + are no tools; vector_store_ids is extracted from tools (in user-facing format) + when provided, otherwise None. + """ + prepared_tools: Optional[list[InputTool]] = None + client = AsyncLlamaStackClientHolder().get_client() + if tools: # explicitly specified in request + # Per-request override of vector stores (user-facing rag_ids) + vector_store_ids = extract_vector_store_ids_from_tools(tools) + # Translate user-facing rag_ids to llama-stack vector_store_ids in each file_search tool + byok_rags = configuration.configuration.byok_rag + prepared_tools = translate_tools_vector_store_ids(tools, byok_rags) + prepared_tool_choice = tool_choice or ToolChoiceMode.auto + else: + # Vector stores were not overwritten in request, use all configured vector stores + vector_store_ids = None + # Get all tools configured in LCORE (returns None or non-empty list) + no_tools = ( + isinstance(tool_choice, ToolChoiceMode) + and tool_choice == ToolChoiceMode.none + ) + # Vector stores are prepared in llama-stack format + prepared_tools = await prepare_tools( + client=client, + vector_store_ids=vector_store_ids, # allow all configured vector stores + no_tools=no_tools, + token=token, + mcp_headers=mcp_headers, + request_headers=request_headers, + ) + # If there are no tools, tool_choice cannot be set at all - LLS implicit behavior + prepared_tool_choice = tool_choice if prepared_tools else None + + return prepared_tools, prepared_tool_choice, vector_store_ids diff --git a/src/utils/shields.py b/src/utils/shields.py index ff99fc3b0..19bd0c214 100644 --- a/src/utils/shields.py +++ b/src/utils/shields.py @@ -3,8 +3,14 @@ from typing import Any, Optional from fastapi import HTTPException -from llama_stack_api import OpenAIResponseContentPartRefusal, OpenAIResponseMessage -from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient +from llama_stack_api import OpenAIResponseMessage +from llama_stack_client import ( + APIConnectionError, + APIStatusError as LLSApiStatusError, + AsyncLlamaStackClient, +) +from llama_stack_client.types import ShieldListResponse +from openai._exceptions import APIStatusError as OpenAIAPIStatusError import metrics from configuration import AppConfig @@ -16,17 +22,16 @@ UnprocessableEntityResponse, ServiceUnavailableResponse, ) -from utils.suid import get_suid +from utils.query import handle_known_apistatus_errors from utils.types import ( ShieldModerationBlocked, ShieldModerationPassed, ShieldModerationResult, ) +from constants import DEFAULT_VIOLATION_MESSAGE logger = get_logger(__name__) -DEFAULT_VIOLATION_MESSAGE = "I cannot process this request due to policy restrictions." - async def get_available_shields(client: AsyncLlamaStackClient) -> list[str]: """ @@ -129,47 +134,11 @@ async def run_shield_moderation( Raises: HTTPException: If shield's provider_resource_id is not configured or model not found. """ - all_shields = await client.shields.list() - - # Filter shields based on shield_ids parameter - if shield_ids is not None: - if len(shield_ids) == 0: - response = UnprocessableEntityResponse( - response="Invalid shield configuration", - cause=( - "shield_ids provided but no shields selected. " - "Remove the parameter to use default shields." - ), - ) - raise HTTPException(**response.model_dump()) - - shields_to_run = [s for s in all_shields if s.identifier in shield_ids] - - # Log warning if requested shield not found - requested = set(shield_ids) - available = {s.identifier for s in shields_to_run} - missing = requested - available - if missing: - logger.warning("Requested shields not found: %s", missing) - - # Reject if no requested shields were found (prevents accidental bypass) - if not shields_to_run: - response = UnprocessableEntityResponse( - response="Invalid shield configuration", - cause=f"Requested shield_ids not found: {sorted(missing)}", - ) - raise HTTPException(**response.model_dump()) - else: - shields_to_run = list(all_shields) - + shields_to_run = await get_shields_for_request(client, shield_ids) available_models = {model.id for model in await client.models.list()} - for shield in shields_to_run: - # Only validate provider_resource_id against models for llama-guard. - # Llama Stack does not verify that the llama-guard model is registered, - # so we check it here to fail fast with a clear error. - # Custom shield providers (e.g. lightspeed_question_validity) configure - # their model internally, so provider_resource_id is not a model ID. + # Lightspeed safety providers configure their model internally + # so provider_resource_id is not necessarily a valid model ID. if shield.provider_id == "llama-guard" and ( not shield.provider_resource_id or shield.provider_resource_id not in available_models @@ -184,18 +153,17 @@ async def run_shield_moderation( moderation_result = await client.moderations.create( input=input_text, model=shield.provider_resource_id ) - # Known Llama Stack bug: error is raised when violation is present - # in the shield LLM response but has wrong format that cannot be parsed. - except ValueError: - logger.warning( - "Shield violation detected, treating as blocked", + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), ) - metrics.llm_calls_validation_errors_total.inc() - return ShieldModerationBlocked( - message=DEFAULT_VIOLATION_MESSAGE, - moderation_id=f"modr_{get_suid()}", - refusal_response=create_refusal_response(DEFAULT_VIOLATION_MESSAGE), + raise HTTPException(**error_response.model_dump()) from e + except (LLSApiStatusError, OpenAIAPIStatusError) as e: + error_response = handle_known_apistatus_errors( + e, shield.provider_resource_id or "" ) + raise HTTPException(**error_response.model_dump()) from e if moderation_result.results and moderation_result.results[0].flagged: result = moderation_result.results[0] @@ -247,7 +215,7 @@ async def append_turn_to_conversation( cause=str(e), ) raise HTTPException(**error_response.model_dump()) from e - except APIStatusError as e: + except LLSApiStatusError as e: error_response = InternalServerErrorResponse.generic() raise HTTPException(**error_response.model_dump()) from e @@ -255,18 +223,60 @@ async def append_turn_to_conversation( def create_refusal_response(refusal_message: str) -> OpenAIResponseMessage: """Create a refusal response message object. - Creates an OpenAIResponseMessage with assistant role containing a refusal - content part. This can be used for both conversation items and response output. - Args: refusal_message: The refusal message text. Returns: - OpenAIResponseMessage with refusal content. + OpenAIResponseMessage with refusal message. """ - refusal_content = OpenAIResponseContentPartRefusal(refusal=refusal_message) return OpenAIResponseMessage( - type="message", role="assistant", - content=[refusal_content], + content=refusal_message, ) + + +async def get_shields_for_request( + client: AsyncLlamaStackClient, + shield_ids: Optional[list[str]] = None, +) -> ShieldListResponse: + """Resolve shields for the request: filtered by shield_ids or all configured. + + Args: + client: Llama Stack client. + shield_ids: Optional list of shield IDs. If provided, only shields + with these identifiers are returned; if None, all configured + shields are returned. + + Returns: + ShieldListResponse: List of Shield objects to run for this request. + + Raises: + HTTPException: 404 if shield_ids is provided and any requested + shield is not configured in Llama Stack. + """ + if shield_ids == []: + return [] + try: + configured_shields: ShieldListResponse = await client.shields.list() + if shield_ids is None: + return configured_shields + requested = set(shield_ids) + configured_ids = {s.identifier for s in configured_shields} + missing = requested - configured_ids + if missing: + response = NotFoundResponse( + resource=f"Shield{'s' if len(missing) > 1 else ''}", + resource_id=", ".join(missing), + ) + raise HTTPException(**response.model_dump()) + + return [s for s in configured_shields if s.identifier in requested] + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except LLSApiStatusError as e: + error_response = InternalServerErrorResponse.generic() + raise HTTPException(**error_response.model_dump()) from e diff --git a/src/utils/suid.py b/src/utils/suid.py index aafd64de5..f05fbe701 100644 --- a/src/utils/suid.py +++ b/src/utils/suid.py @@ -40,7 +40,7 @@ def check_suid(suid: str) -> bool: return False # Strip 'conv_' prefix if present - hex_part = suid[5:] if suid.startswith("conv_") else suid + hex_part = suid.removeprefix("conv_") # Check for 48-char hex string (llama-stack conversation ID format) if len(hex_part) == 48: @@ -103,3 +103,11 @@ def to_llama_stack_conversation_id(conversation_id: str) -> str: if not conversation_id.startswith("conv_"): return f"conv_{conversation_id}" return conversation_id + + +def is_moderation_id(suid: str) -> bool: + """Check if given string is a moderation ID. + + Returns True if the string starts with 'modr'. + """ + return suid.startswith("modr") diff --git a/src/utils/tool_formatter.py b/src/utils/tool_formatter.py index 8b200ba1d..bde4741be 100644 --- a/src/utils/tool_formatter.py +++ b/src/utils/tool_formatter.py @@ -36,7 +36,7 @@ def format_tool_response(tool_dict: dict[str, Any]) -> dict[str, Any]: description = clean_description # Extract only the required fields - formatted_tool = { + return { "identifier": tool_dict.get("identifier", ""), "description": description, "parameters": tool_dict.get("parameters", []), @@ -46,8 +46,6 @@ def format_tool_response(tool_dict: dict[str, Any]) -> dict[str, Any]: "type": tool_dict.get("type", ""), } - return formatted_tool - def extract_clean_description(description: str) -> str: """ diff --git a/src/utils/types.py b/src/utils/types.py index 8b0e618ca..1ccc77f43 100644 --- a/src/utils/types.py +++ b/src/utils/types.py @@ -1,6 +1,6 @@ """Common types for the project.""" -from typing import Annotated, Any, Literal, Optional, TypeAlias +from typing import Annotated, Any, Literal, Optional from llama_stack_api import ImageContentItem, TextContentItem from llama_stack_api.openai_responses import ( @@ -17,14 +17,16 @@ OpenAIResponseOutputMessageWebSearchToolCall as WebSearchToolCall, OpenAIResponsePrompt as Prompt, OpenAIResponseText as Text, + OpenAIResponseReasoning as Reasoning, ) from llama_stack_client.lib.agents.tool_parser import ToolParser from llama_stack_client.lib.agents.types import ( CompletionMessage as AgentCompletionMessage, ToolCall as AgentToolCall, ) -from pydantic import AnyUrl, BaseModel, Field +from pydantic import AnyUrl, BaseModel, ConfigDict, Field +from models.database.conversations import UserConversation from utils.token_counter import TokenCounter @@ -117,6 +119,31 @@ class ShieldModerationPassed(BaseModel): decision: Literal["passed"] = "passed" +class ResponsesConversationContext(BaseModel): + """Result of resolving conversation context for the responses endpoint. + + Holds the conversation ID to use for the LLM, the optional user conversation + record, and the resolved generate_topic_summary flag. Caller assigns these + to the request in outer scope instead of mutating the request inside the + resolver. + + Attributes: + conversation: Conversation ID in llama-stack format to use for the request. + user_conversation: Resolved user conversation record, or None for new ones. + generate_topic_summary: Resolved value for request.generate_topic_summary. + """ + + conversation: str = Field(description="Conversation ID in llama-stack format") + user_conversation: Optional[UserConversation] = Field( + default=None, + description="Resolved user conversation record, or None for new conversations", + ) + generate_topic_summary: bool = Field( + description="Resolved value for request.generate_topic_summary", + ) + model_config = ConfigDict(arbitrary_types_allowed=True) + + class ShieldModerationBlocked(BaseModel): """Shield moderation blocked the content; refusal details are present.""" @@ -131,7 +158,7 @@ class ShieldModerationBlocked(BaseModel): Field(discriminator="decision"), ] -IncludeParameter: TypeAlias = Literal[ +type IncludeParameter = Literal[ "web_search_call.action.sources", "code_interpreter_call.outputs", "computer_call_output.output.image_url", @@ -141,7 +168,7 @@ class ShieldModerationBlocked(BaseModel): "reasoning.encrypted_content", ] -ResponseItem: TypeAlias = ( +type ResponseItem = ( ResponseMessage | WebSearchToolCall | FileSearchToolCall @@ -153,7 +180,7 @@ class ShieldModerationBlocked(BaseModel): | McpApprovalResponse ) -ResponseInput: TypeAlias = str | list[ResponseItem] +type ResponseInput = str | list[ResponseItem] class ResponsesApiParams(BaseModel): @@ -177,6 +204,10 @@ class ResponsesApiParams(BaseModel): default=None, description="Maximum number of inference iterations", ) + max_output_tokens: Optional[int] = Field( + default=None, + description="Maximum number of tokens allowed in the response", + ) max_tool_calls: Optional[int] = Field( default=None, description="Maximum tool calls allowed in a single response", @@ -197,6 +228,10 @@ class ResponsesApiParams(BaseModel): default=None, description="Prompt template with variables for dynamic substitution", ) + reasoning: Optional[Reasoning] = Field( + default=None, + description="Reasoning configuration for the response", + ) store: bool = Field(description="Whether to store the response") stream: bool = Field(description="Whether to stream the response") temperature: Optional[float] = Field( @@ -230,6 +265,10 @@ def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]: MCP servers. See LCORE-1414 / GitHub issue #1269. """ result = super().model_dump(*args, **kwargs) + # Only one context option is allowed, previous_response_id has priority + # Turn is added to conversation manually if previous_response_id is used + if self.previous_response_id: + result.pop("conversation", None) dumped_tools = result.get("tools") if not self.tools or not isinstance(dumped_tools, list): return result @@ -327,12 +366,12 @@ class RAGContext(BaseModel): class TurnSummary(BaseModel): """Summary of a turn in llama stack.""" + id: str = Field(default="", description="ID of the response") llm_response: str = "" tool_calls: list[ToolCallSummary] = Field(default_factory=list) tool_results: list[ToolResultSummary] = Field(default_factory=list) rag_chunks: list[RAGChunk] = Field(default_factory=list) referenced_documents: list[ReferencedDocument] = Field(default_factory=list) - inline_rag_documents: list[ReferencedDocument] = Field(default_factory=list) token_usage: TokenCounter = Field(default_factory=TokenCounter) diff --git a/src/utils/vector_search.py b/src/utils/vector_search.py index 485914e0b..4a6b58f49 100644 --- a/src/utils/vector_search.py +++ b/src/utils/vector_search.py @@ -6,9 +6,12 @@ import asyncio import traceback -from typing import Any, Optional +from typing import Any, Optional, cast from urllib.parse import urljoin +from llama_stack_api.openai_responses import ( + OpenAIResponseMessage as ResponseMessage, +) from llama_stack_client import AsyncLlamaStackClient from pydantic import AnyUrl @@ -17,7 +20,7 @@ from log import get_logger from models.responses import ReferencedDocument from utils.responses import resolve_vector_store_ids -from utils.types import RAGChunk, RAGContext +from utils.types import RAGChunk, RAGContext, ResponseInput logger = get_logger(__name__) @@ -493,6 +496,7 @@ async def _fetch_solr_rag( async def build_rag_context( client: AsyncLlamaStackClient, + moderation_decision: str, query: str, vector_store_ids: Optional[list[str]], solr: Optional[dict[str, Any]] = None, @@ -503,12 +507,17 @@ async def build_rag_context( Args: client: The AsyncLlamaStackClient to use for the request - query_request: The user's query request - configuration: Application configuration + moderation_decision: The moderation decision + query: The user's query + vector_store_ids: The vector store IDs to query + solr: The Solr query parameters Returns: RAGContext containing formatted context text and referenced documents """ + if moderation_decision == "blocked": + return RAGContext() + # Fetch from all enabled RAG sources in parallel byok_chunks_task = _fetch_byok_rag(client, query, vector_store_ids) solr_chunks_task = _fetch_solr_rag(client, query, solr) @@ -625,3 +634,39 @@ def _convert_solr_chunks_to_rag_format( ) return rag_chunks + + +def append_inline_rag_context_to_responses_input( + input_value: ResponseInput, + inline_rag_context_text: str, +) -> ResponseInput: + """Append inline RAG context to Responses API input. + + If input is str, appends the context text. + If input is a sequence of items, appends the context to the text of the first user message. + If there is no user message, returns the input unchanged. + + Parameters: + input_value: The request input (string or list of ResponseItem). + inline_rag_context_text: RAG context string to inject. + + Returns: + The same type as input_value, with context merged in. + """ + if not inline_rag_context_text: + return input_value + if isinstance(input_value, str): + return input_value + "\n\n" + inline_rag_context_text + for item in input_value: + if item.type != "message" or item.role != "user": + continue + message = cast(ResponseMessage, item) + content = message.content + if isinstance(content, str): + message.content = content + "\n\n" + inline_rag_context_text + return input_value + for part in content: + if part.type == "input_text": + part.text = part.text + "\n\n" + inline_rag_context_text + return input_value + return input_value diff --git a/tests/benchmarks/data_generators.py b/tests/benchmarks/data_generators.py index e39f1d6e7..672f09cba 100644 --- a/tests/benchmarks/data_generators.py +++ b/tests/benchmarks/data_generators.py @@ -155,5 +155,4 @@ def generate_topic_summary() -> str: ], ] - summary = " ".join([random.choice(yap) for yap in yaps]) + "." - return summary + return " ".join([random.choice(yap) for yap in yaps]) + "." diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-invalid-mcp-file-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-invalid-mcp-file-auth.yaml new file mode 100644 index 000000000..483e32b73 --- /dev/null +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-invalid-mcp-file-auth.yaml @@ -0,0 +1,24 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +mcp_servers: + - name: "mcp-file" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "/tmp/invalid-mcp-secret-token" diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-client-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-client-auth.yaml new file mode 100644 index 000000000..05f304a5d --- /dev/null +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-client-auth.yaml @@ -0,0 +1,24 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +mcp_servers: + - name: "mcp-client" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "client" diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-file-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-file-auth.yaml index 1ff0d425e..79a8807ec 100644 --- a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-file-auth.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-file-auth.yaml @@ -18,8 +18,7 @@ user_data_collection: authentication: module: "noop" mcp_servers: - - name: "mcp-file-auth" - provider_id: "model-context-protocol" + - name: "mcp-file" url: "http://mock-mcp:3001" authorization_headers: Authorization: "/tmp/mcp-secret-token" diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-kubernetes-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-kubernetes-auth.yaml new file mode 100644 index 000000000..2d79f1f9d --- /dev/null +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-kubernetes-auth.yaml @@ -0,0 +1,24 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +mcp_servers: + - name: "mcp-kubernetes" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "kubernetes" diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-oauth-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-oauth-auth.yaml new file mode 100644 index 000000000..3294ac708 --- /dev/null +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-oauth-auth.yaml @@ -0,0 +1,24 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +mcp_servers: + - name: "mcp-oauth" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "oauth" diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml index 0656aa87c..647a2cae9 100644 --- a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml @@ -19,7 +19,18 @@ authentication: module: "noop" mcp_servers: - name: "mcp-oauth" - provider_id: "model-context-protocol" url: "http://mock-mcp:3001" authorization_headers: - Authorization: "oauth" \ No newline at end of file + Authorization: "oauth" + - name: "mcp-kubernetes" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "kubernetes" + - name: "mcp-file" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "/tmp/mcp-secret-token" + - name: "mcp-client" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "client" \ No newline at end of file diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-invalid-mcp-file-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-invalid-mcp-file-auth.yaml new file mode 100644 index 000000000..05ec86fdf --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-invalid-mcp-file-auth.yaml @@ -0,0 +1,25 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://llama-stack:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +mcp_servers: + - name: "mcp-file" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "/tmp/invalid-mcp-secret-token" diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-client-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-client-auth.yaml new file mode 100644 index 000000000..e0f952fc3 --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-client-auth.yaml @@ -0,0 +1,25 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://llama-stack:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +mcp_servers: + - name: "mcp-client" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "client" diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-file-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-file-auth.yaml index d39f55399..aca5c6ef2 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-file-auth.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-file-auth.yaml @@ -19,8 +19,7 @@ user_data_collection: authentication: module: "noop" mcp_servers: - - name: "mcp-file-auth" - provider_id: "model-context-protocol" + - name: "mcp-file" url: "http://mock-mcp:3001" authorization_headers: Authorization: "/tmp/mcp-secret-token" diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-kubernetes-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-kubernetes-auth.yaml new file mode 100644 index 000000000..66dc7f87b --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-kubernetes-auth.yaml @@ -0,0 +1,25 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://llama-stack:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +mcp_servers: + - name: "mcp-kubernetes" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "kubernetes" diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-oauth-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-oauth-auth.yaml new file mode 100644 index 000000000..b9125de8e --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-oauth-auth.yaml @@ -0,0 +1,25 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://llama-stack:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +mcp_servers: + - name: "mcp-oauth" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "oauth" diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml index a598ce441..e35535f42 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml @@ -20,7 +20,18 @@ authentication: module: "noop" mcp_servers: - name: "mcp-oauth" - provider_id: "model-context-protocol" url: "http://mock-mcp:3001" authorization_headers: - Authorization: "oauth" \ No newline at end of file + Authorization: "oauth" + - name: "mcp-kubernetes" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "kubernetes" + - name: "mcp-file" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "/tmp/mcp-secret-token" + - name: "mcp-client" + url: "http://mock-mcp:3001" + authorization_headers: + Authorization: "client" \ No newline at end of file diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py index 0ca8781d0..bc117f46f 100644 --- a/tests/e2e/features/environment.py +++ b/tests/e2e/features/environment.py @@ -16,11 +16,13 @@ from tests.e2e.utils.prow_utils import restore_llama_stack_pod from behave.runner import Context -from tests.e2e.utils.llama_stack_shields import ( +from tests.e2e.utils.llama_stack_utils import ( register_shield, + unregister_mcp_toolgroups, unregister_shield, ) from tests.e2e.utils.utils import ( + clear_llama_stack_storage, create_config_backup, is_prow_environment, remove_config_backup, @@ -57,6 +59,22 @@ "tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp-file-auth.yaml", "tests/e2e-prow/rhoai/configs/lightspeed-stack-mcp-file-auth.yaml", ), + "invalid-mcp-file-auth": ( + "tests/e2e/configuration/{mode_dir}/lightspeed-stack-invalid-mcp-file-auth.yaml", + "tests/e2e-prow/rhoai/configs/lightspeed-stack-invalid-mcp-file-auth.yaml", + ), + "mcp-kubernetes-auth": ( + "tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp-kubernetes-auth.yaml", + "tests/e2e-prow/rhoai/configs/lightspeed-stack-mcp-kubernetes-auth.yaml", + ), + "mcp-client-auth": ( + "tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp-client-auth.yaml", + "tests/e2e-prow/rhoai/configs/lightspeed-stack-mcp-client-auth.yaml", + ), + "mcp-oauth-auth": ( + "tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp-oauth-auth.yaml", + "tests/e2e-prow/rhoai/configs/lightspeed-stack-mcp-oauth-auth.yaml", + ), } @@ -207,6 +225,27 @@ def before_scenario(context: Context, scenario: Scenario) -> None: switch_config(context.scenario_config) restart_container("lightspeed-stack") + config_name: str | None = None + if "MCPFileAuthConfig" in scenario.effective_tags: + config_name = "mcp-file-auth" + elif "InvalidMCPFileAuthConfig" in scenario.effective_tags: + config_name = "invalid-mcp-file-auth" + elif "MCPKubernetesAuthConfig" in scenario.effective_tags: + config_name = "mcp-kubernetes-auth" + elif "MCPClientAuthConfig" in scenario.effective_tags: + config_name = "mcp-client-auth" + elif "MCPOAuthAuthConfig" in scenario.effective_tags: + config_name = "mcp-oauth-auth" + + if config_name is not None: + if not context.is_library_mode: + unregister_mcp_toolgroups() + else: + clear_llama_stack_storage() + context.scenario_config = _get_config_path(config_name, mode_dir) + switch_config(context.scenario_config) + restart_container("lightspeed-stack") + def after_scenario(context: Context, scenario: Scenario) -> None: """Run after each scenario is run. @@ -241,7 +280,15 @@ def after_scenario(context: Context, scenario: Scenario) -> None: context.llama_stack_was_running = False # Tags that require config restoration after scenario - config_restore_tags = {"InvalidFeedbackStorageConfig", "NoCacheConfig"} + config_restore_tags = { + "InvalidFeedbackStorageConfig", + "NoCacheConfig", + "MCPFileAuthConfig", + "InvalidMCPFileAuthConfig", + "MCPKubernetesAuthConfig", + "MCPClientAuthConfig", + "MCPOAuthAuthConfig", + } if config_restore_tags & set(scenario.effective_tags): switch_config(context.feature_config) restart_container("lightspeed-stack") diff --git a/tests/e2e/features/info.feature b/tests/e2e/features/info.feature index 7b16933af..e3c2c066c 100644 --- a/tests/e2e/features/info.feature +++ b/tests/e2e/features/info.feature @@ -16,7 +16,7 @@ Feature: Info tests When I access REST API endpoint "info" using HTTP GET method Then The status code of the response is 200 And The body of the response has proper name Lightspeed Core Service (LCS) and version 0.4.2 - And The body of the response has llama-stack version 0.4.3 + And The body of the response has llama-stack version 0.5.2 @skip-in-library-mode Scenario: Check if info endpoint reports error when llama-stack connection is not working diff --git a/tests/e2e/features/mcp.feature b/tests/e2e/features/mcp.feature index 90d6c5cf8..cd8980e9c 100644 --- a/tests/e2e/features/mcp.feature +++ b/tests/e2e/features/mcp.feature @@ -5,7 +5,358 @@ Feature: MCP tests Given The service is started locally And REST API service prefix is /v1 - Scenario: Check if tools endpoint reports error when MCP requires authentication + +# File-based + @skip # will be fixed by LCORE-1461 + @MCPFileAuthConfig + Scenario: Check if tools endpoint succeeds when MCP file-based auth token is passed + Given The system is in default state + When I access REST API endpoint "tools" using HTTP GET method + Then The status code of the response is 200 + And The body of the response contains mcp-file + + @skip-in-library-mode # will be fixed in LCORE-1428 + @MCPFileAuthConfig + Scenario: Check if query endpoint succeeds when MCP file-based auth token is passed + Given The system is in default state + And I capture the current token metrics + When I use "query" to ask question + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 200 + And The response should contain following fragments + | Fragments in LLM response | + | Hello | + And The token metrics should have increased + + @skip-in-library-mode # will be fixed in LCORE-1428 + @MCPFileAuthConfig + Scenario: Check if streaming_query endpoint succeeds when MCP file-based auth token is passed + Given The system is in default state + And I capture the current token metrics + When I use "streaming_query" to ask question + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + When I wait for the response to be completed + Then The status code of the response is 200 + And The streamed response should contain following fragments + | Fragments in LLM response | + | Hello | + And The token metrics should have increased + + @skip # will be fixed by LCORE-1461 + @InvalidMCPFileAuthConfig + Scenario: Check if tools endpoint reports error when MCP file-based invalid auth token is passed + Given The system is in default state + When I access REST API endpoint "tools" using HTTP GET method + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "MCP server at http://mock-mcp:3001 requires OAuth" + } + } + """ + + @skip # will be fixed by LCORE-1463 + @InvalidMCPFileAuthConfig + Scenario: Check if query endpoint reports error when MCP file-based invalid auth token is passed + Given The system is in default state + When I use "query" to ask question + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "MCP server at http://mock-mcp:3001 requires OAuth" + } + } + """ + + @skip # will be fixed by LCORE-1463 + @InvalidMCPFileAuthConfig + Scenario: Check if streaming_query endpoint reports error when MCP file-based invalid auth token is passed + Given The system is in default state + When I use "streaming_query" to ask question + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "MCP server at http://mock-mcp:3001 requires OAuth" + } + } + """ + +# Kubernetes + @skip # will be fixed by LCORE-1461 + @MCPKubernetesAuthConfig + Scenario: Check if tools endpoint succeeds when MCP kubernetes auth token is passed + Given The system is in default state + And I set the Authorization header to Bearer kubernetes-test-token + When I access REST API endpoint "tools" using HTTP GET method + Then The status code of the response is 200 + And The body of the response contains mcp-kubernetes + + @skip-in-library-mode # will be fixed in LCORE-1428 + @MCPKubernetesAuthConfig + Scenario: Check if query endpoint succeeds when MCP kubernetes auth token is passed + Given The system is in default state + And I set the Authorization header to Bearer kubernetes-test-token + And I capture the current token metrics + When I use "query" to ask question with authorization header + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 200 + And The response should contain following fragments + | Fragments in LLM response | + | Hello | + And The token metrics should have increased + + @skip-in-library-mode # will be fixed in LCORE-1428 + @MCPKubernetesAuthConfig + Scenario: Check if streaming_query endpoint succeeds when MCP kubernetes auth token is passed + Given The system is in default state + And I set the Authorization header to Bearer kubernetes-test-token + And I capture the current token metrics + When I use "streaming_query" to ask question with authorization header + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + When I wait for the response to be completed + Then The status code of the response is 200 + And The streamed response should contain following fragments + | Fragments in LLM response | + | Hello | + And The token metrics should have increased + + @skip # will be fixed by LCORE-1461 + @MCPKubernetesAuthConfig + Scenario: Check if tools endpoint reports error when MCP kubernetes invalid auth token is passed + Given The system is in default state + And I set the Authorization header to Bearer kubernetes-invalid-token + When I access REST API endpoint "tools" using HTTP GET method + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "MCP server at http://mock-mcp:3001 requires OAuth" + } + } + """ + + @skip # will be fixed by LCORE-1463 + @MCPKubernetesAuthConfig + Scenario: Check if query endpoint reports error when MCP kubernetes invalid auth token is passed + Given The system is in default state + And I set the Authorization header to Bearer kubernetes-invalid-token + When I use "query" to ask question with authorization header + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "MCP server at http://mock-mcp:3001 requires OAuth" + } + } + """ + + @skip # will be fixed by LCORE-1463 + @MCPKubernetesAuthConfig + Scenario: Check if streaming_query endpoint reports error when MCP kubernetes invalid auth token is passed + Given The system is in default state + And I set the Authorization header to Bearer kubernetes-invalid-token + When I use "streaming_query" to ask question with authorization header + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "MCP server at http://mock-mcp:3001 requires OAuth" + } + } + """ + +# Client-provided + @skip # will be fixed by LCORE-1462 + @MCPClientAuthConfig + Scenario: Check if tools endpoint succeeds by skipping when MCP client-provided auth token is omitted + Given The system is in default state + When I access REST API endpoint "tools" using HTTP GET method + Then The status code of the response is 200 + And The body of the response does not contain mcp-client + + @MCPClientAuthConfig + Scenario: Check if query endpoint succeeds by skipping when MCP client-provided auth token is omitted + Given The system is in default state + And I capture the current token metrics + When I use "query" to ask question + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 200 + And The body of the response does not contain mcp-client + And The response should contain following fragments + | Fragments in LLM response | + | Hello | + And The token metrics should have increased + + @MCPClientAuthConfig + Scenario: Check if streaming_query endpoint succeeds by skipping when MCP client-provided auth token is omitted + Given The system is in default state + And I capture the current token metrics + When I use "streaming_query" to ask question + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + When I wait for the response to be completed + Then The status code of the response is 200 + And The body of the response does not contain mcp-client + And The streamed response should contain following fragments + | Fragments in LLM response | + | Hello | + And The token metrics should have increased + + @MCPClientAuthConfig + Scenario: Check if tools endpoint succeeds when MCP client-provided auth token is passed + Given The system is in default state + And I set the "MCP-HEADERS" header to + """ + {"mcp-client": {"Authorization": "Bearer client-test-token"}} + """ + When I access REST API endpoint "tools" using HTTP GET method + Then The status code of the response is 200 + And The body of the response contains mcp-client + + @skip-in-library-mode # will be fixed in LCORE-1428 + @MCPClientAuthConfig + Scenario: Check if query endpoint succeeds when MCP client-provided auth token is passed + Given The system is in default state + And I set the "MCP-HEADERS" header to + """ + {"mcp-client": {"Authorization": "Bearer client-test-token"}} + """ + And I capture the current token metrics + When I use "query" to ask question with authorization header + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 200 + And The response should contain following fragments + | Fragments in LLM response | + | Hello | + And The token metrics should have increased + + @skip-in-library-mode # will be fixed in LCORE-1428 + @MCPClientAuthConfig + Scenario: Check if streaming_query endpoint succeeds when MCP client-provided auth token is passed + Given The system is in default state + And I set the "MCP-HEADERS" header to + """ + {"mcp-client": {"Authorization": "Bearer client-test-token"}} + """ + And I capture the current token metrics + When I use "streaming_query" to ask question with authorization header + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + When I wait for the response to be completed + Then The status code of the response is 200 + And The streamed response should contain following fragments + | Fragments in LLM response | + | Hello | + And The token metrics should have increased + + @MCPClientAuthConfig + Scenario: Check if tools endpoint reports error when MCP client-provided invalid auth token is passed + Given The system is in default state + And I set the "MCP-HEADERS" header to + """ + {"mcp-client": {"Authorization": "Bearer client-invalid-token"}} + """ + When I access REST API endpoint "tools" using HTTP GET method + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "MCP server at http://mock-mcp:3001 requires OAuth" + } + } + """ + + @MCPClientAuthConfig + Scenario: Check if query endpoint reports error when MCP client-provided invalid auth token is passed + Given The system is in default state + And I set the "MCP-HEADERS" header to + """ + {"mcp-client": {"Authorization": "Bearer client-invalid-token"}} + """ + When I use "query" to ask question with authorization header + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "MCP server at http://mock-mcp:3001 requires OAuth" + } + } + """ + + @MCPClientAuthConfig + Scenario: Check if streaming_query endpoint reports error when MCP client-provided invalid auth token is passed + Given The system is in default state + And I set the "MCP-HEADERS" header to + """ + {"mcp-client": {"Authorization": "Bearer client-invalid-token"}} + """ + When I use "streaming_query" to ask question with authorization header + """ + {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"} + """ + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "MCP server at http://mock-mcp:3001 requires OAuth" + } + } + """ + +# OAuth + + @MCPOAuthAuthConfig + Scenario: Check if tools endpoint reports error when MCP OAuth requires authentication Given The system is in default state When I access REST API endpoint "tools" using HTTP GET method Then The status code of the response is 401 @@ -20,7 +371,8 @@ Feature: MCP tests """ And The headers of the response contains the following header "www-authenticate" - Scenario: Check if query endpoint reports error when MCP requires authentication + @MCPOAuthAuthConfig + Scenario: Check if query endpoint reports error when MCP OAuth requires authentication Given The system is in default state When I use "query" to ask question """ @@ -38,7 +390,8 @@ Feature: MCP tests """ And The headers of the response contains the following header "www-authenticate" - Scenario: Check if streaming_query endpoint reports error when MCP requires authentication + @MCPOAuthAuthConfig + Scenario: Check if streaming_query endpoint reports error when MCP OAuth requires authentication Given The system is in default state When I use "streaming_query" to ask question """ @@ -56,22 +409,24 @@ Feature: MCP tests """ And The headers of the response contains the following header "www-authenticate" - Scenario: Check if tools endpoint succeeds when MCP auth token is passed + @MCPOAuthAuthConfig + Scenario: Check if tools endpoint succeeds when MCP OAuth auth token is passed Given The system is in default state And I set the "MCP-HEADERS" header to """ - {"mcp-oauth": {"Authorization": "Bearer test-token"}} + {"mcp-oauth": {"Authorization": "Bearer oauth-test-token"}} """ When I access REST API endpoint "tools" using HTTP GET method Then The status code of the response is 200 And The body of the response contains mcp-oauth @skip-in-library-mode # will be fixed in LCORE-1428 - Scenario: Check if query endpoint succeeds when MCP auth token is passed + @MCPOAuthAuthConfig + Scenario: Check if query endpoint succeeds when MCP OAuth auth token is passed Given The system is in default state And I set the "MCP-HEADERS" header to """ - {"mcp-oauth": {"Authorization": "Bearer test-token"}} + {"mcp-oauth": {"Authorization": "Bearer oauth-test-token"}} """ And I capture the current token metrics When I use "query" to ask question with authorization header @@ -85,11 +440,12 @@ Feature: MCP tests And The token metrics should have increased @skip-in-library-mode # will be fixed in LCORE-1428 - Scenario: Check if streaming_query endpoint succeeds when MCP auth token is passed + @MCPOAuthAuthConfig + Scenario: Check if streaming_query endpoint succeeds when MCP OAuth auth token is passed Given The system is in default state And I set the "MCP-HEADERS" header to """ - {"mcp-oauth": {"Authorization": "Bearer test-token"}} + {"mcp-oauth": {"Authorization": "Bearer oauth-test-token"}} """ And I capture the current token metrics When I use "streaming_query" to ask question with authorization header @@ -103,11 +459,12 @@ Feature: MCP tests | Hello | And The token metrics should have increased - Scenario: Check if tools endpoint reports error when MCP invalid auth token is passed + @MCPOAuthAuthConfig + Scenario: Check if tools endpoint reports error when MCP OAuth invalid auth token is passed Given The system is in default state And I set the "MCP-HEADERS" header to """ - {"mcp-oauth": {"Authorization": "Bearer invalid-token"}} + {"mcp-oauth": {"Authorization": "Bearer oauth-invalid-token"}} """ When I access REST API endpoint "tools" using HTTP GET method Then The status code of the response is 401 @@ -122,12 +479,12 @@ Feature: MCP tests """ And The headers of the response contains the following header "www-authenticate" - @skip # will be fixed in LCORE-1366 - Scenario: Check if query endpoint reports error when MCP invalid auth token is passed + @MCPOAuthAuthConfig + Scenario: Check if query endpoint reports error when MCP OAuth invalid auth token is passed Given The system is in default state And I set the "MCP-HEADERS" header to """ - {"mcp-oauth": {"Authorization": "Bearer invalid-token"}} + {"mcp-oauth": {"Authorization": "Bearer oauth-invalid-token"}} """ When I use "query" to ask question with authorization header """ @@ -145,11 +502,12 @@ Feature: MCP tests """ And The headers of the response contains the following header "www-authenticate" - Scenario: Check if streaming_query endpoint reports error when MCP invalid auth token is passed + @MCPOAuthAuthConfig + Scenario: Check if streaming_query endpoint reports error when MCP OAuth invalid auth token is passed Given The system is in default state And I set the "MCP-HEADERS" header to """ - {"mcp-oauth": {"Authorization": "Bearer invalid-token"}} + {"mcp-oauth": {"Authorization": "Bearer oauth-invalid-token"}} """ When I use "streaming_query" to ask question with authorization header """ diff --git a/tests/e2e/features/mcp_file_auth.feature b/tests/e2e/features/mcp_file_auth.feature deleted file mode 100644 index 455f0740c..000000000 --- a/tests/e2e/features/mcp_file_auth.feature +++ /dev/null @@ -1,20 +0,0 @@ -@MCPFileAuth -Feature: MCP file-based authorization tests - - Regression tests for LCORE-1414: MCP authorization tokens configured via - file-based authorization_headers must survive model_dump() serialization - and reach the MCP server as a valid Bearer token. - - Background: - Given The service is started locally - And REST API service prefix is /v1 - - @skip-in-library-mode - Scenario: Query succeeds with file-based MCP authorization - Given The system is in default state - When I use "query" to ask question - """ - {"query": "Use the mock_tool_e2e tool to send the message 'hello'", "model": "{MODEL}", "provider": "{PROVIDER}"} - """ - Then The status code of the response is 200 - And The body of the response contains mock_tool_e2e diff --git a/tests/e2e/features/query.feature b/tests/e2e/features/query.feature index f765257b1..a85aa8139 100644 --- a/tests/e2e/features/query.feature +++ b/tests/e2e/features/query.feature @@ -175,7 +175,7 @@ Scenario: Check if LLM responds for query request with error for missing query {"query": "Say hello", "model": "{MODEL}", "provider":"unknown"} """ Then The status code of the response is 404 - And The body of the response contains Model with ID gpt-4o-mini does not exist + And The body of the response contains Model with ID {MODEL} does not exist @skip-in-library-mode Scenario: Check if LLM responds for query request with error for inability to connect to llama-stack diff --git a/tests/e2e/features/responses.feature b/tests/e2e/features/responses.feature new file mode 100644 index 000000000..e1e0ccd61 --- /dev/null +++ b/tests/e2e/features/responses.feature @@ -0,0 +1,24 @@ +@Authorized +Feature: Responses endpoint API tests + + Background: + Given The service is started locally + And REST API service prefix is /v1 + + Scenario: Check if responses endpoint returns 200 for minimal request + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "responses" to ask question with authorization header + """ + {"input": "Say hello", "model": "{PROVIDER}/{MODEL}", "stream": false} + """ + Then The status code of the response is 200 + + Scenario: Check if responses endpoint returns 200 for minimal streaming request + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "responses" to ask question with authorization header + """ + {"input": "Say hello", "model": "{PROVIDER}/{MODEL}", "stream": true} + """ + Then The status code of the response is 200 \ No newline at end of file diff --git a/tests/e2e/features/steps/common_http.py b/tests/e2e/features/steps/common_http.py index 8e64fe5fc..a8dbcf212 100644 --- a/tests/e2e/features/steps/common_http.py +++ b/tests/e2e/features/steps/common_http.py @@ -165,11 +165,35 @@ def check_response_body_schema(context: Context) -> None: @then("The body of the response contains {substring}") def check_response_body_contains(context: Context, substring: str) -> None: - """Check that response body contains a substring.""" + """Check that response body contains a substring. + + Supports {MODEL} and {PROVIDER} placeholders in the substring so + assertions work with any configured provider (e.g. unknown-provider + error message includes the actual model id). + """ + assert context.response is not None, "Request needs to be performed first" + expected = replace_placeholders(context, substring) + assert ( + expected in context.response.text + ), f"The response text '{context.response.text}' doesn't contain '{expected}'" + + +@then("The body of the response does not contain {substring}") +def check_response_body_does_not_contain(context: Context, substring: str) -> None: + """Check that response body does not contain a substring.""" + assert context.response is not None, "Request needs to be performed first" + assert ( + substring not in context.response.text + ), f"The response text '{context.response.text}' contains '{substring}'" + + +@then("The body of the response does not contain {substring}") +def check_response_body_does_not_contain(context: Context, substring: str) -> None: + """Check that response body does not contain a substring.""" assert context.response is not None, "Request needs to be performed first" assert ( - substring in context.response.text - ), f"The response text '{context.response.text}' doesn't contain '{substring}'" + substring not in context.response.text + ), f"The response text '{context.response.text}' contains '{substring}'" @then("The body of the response is the following") diff --git a/tests/e2e/features/streaming_query.feature b/tests/e2e/features/streaming_query.feature index 4e587525e..d4e14b180 100644 --- a/tests/e2e/features/streaming_query.feature +++ b/tests/e2e/features/streaming_query.feature @@ -133,7 +133,7 @@ Feature: streaming_query endpoint API tests {"query": "Say hello", "model": "{MODEL}", "provider":"unknown"} """ Then The status code of the response is 404 - And The body of the response contains Model with ID gpt-4o-mini does not exist + And The body of the response contains Model with ID {MODEL} does not exist And The token metrics should not have changed Scenario: Check if LLM responds properly when XML and JSON attachments are sent diff --git a/tests/e2e/secrets/invalid-mcp-token b/tests/e2e/secrets/invalid-mcp-token new file mode 100644 index 000000000..3707272a2 --- /dev/null +++ b/tests/e2e/secrets/invalid-mcp-token @@ -0,0 +1 @@ +invalid-token \ No newline at end of file diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt index 3f94d09e0..0da5cae41 100644 --- a/tests/e2e/test_list.txt +++ b/tests/e2e/test_list.txt @@ -9,11 +9,11 @@ features/conversation_cache_v2.feature features/feedback.feature features/health.feature features/info.feature +features/responses.feature features/query.feature features/rlsapi_v1.feature features/rlsapi_v1_errors.feature features/streaming_query.feature features/rest_api.feature features/mcp.feature -features/mcp_file_auth.feature features/models.feature diff --git a/tests/e2e/utils/llama_stack_shields.py b/tests/e2e/utils/llama_stack_utils.py similarity index 62% rename from tests/e2e/utils/llama_stack_shields.py rename to tests/e2e/utils/llama_stack_utils.py index 4f793c0bf..2a8c66670 100644 --- a/tests/e2e/utils/llama_stack_shields.py +++ b/tests/e2e/utils/llama_stack_utils.py @@ -1,9 +1,12 @@ -"""E2E helpers to unregister and re-register Llama Stack shields via the client API. +"""E2E test utilities for Llama Stack (toolgroups and shields). -Used by the @disable-shields tag: before the scenario we call client.shields.delete() -to unregister the shield; after the scenario we call client.shields.register() -to restore it. Only applies in server mode (Llama Stack as a separate service). -Requires E2E_LLAMA_STACK_URL or E2E_LLAMA_HOSTNAME/E2E_LLAMA_PORT. +This module provides functions to manage MCP toolgroups and shields on a running +Llama Stack instance during end-to-end tests: unregister MCP toolgroups when +switching configurations or testing MCP auth, and unregister/re-register shields +(e.g. for the @disable-shields tag). + +Only applies when running Llama Stack as a separate service (server mode). +Requires E2E_LLAMA_STACK_URL or E2E_LLAMA_HOSTNAME and E2E_LLAMA_PORT. """ import asyncio @@ -29,6 +32,54 @@ def _get_llama_stack_client() -> AsyncLlamaStackClient: return AsyncLlamaStackClient(base_url=base_url, api_key=api_key, timeout=timeout) +# ----------------------------------------------------------------------------- +# Toolgroups +# ----------------------------------------------------------------------------- + + +async def _unregister_toolgroup_async(identifier: str) -> None: + """Unregister a toolgroup by identifier; return (provider_id, provider_shield_id) for restore.""" + client = _get_llama_stack_client() + try: + await client.toolgroups.unregister(identifier) + except APIConnectionError: + raise + except APIStatusError as e: + # 400 "not found": toolgroup already absent, scenario can proceed + if e.status_code == 400 and "not found" in str(e).lower(): + return None + raise + finally: + await client.close() + + +async def _unregister_mcp_toolgroups_async() -> None: + """Unregister all MCP toolgroups.""" + client = _get_llama_stack_client() + try: + toolgroups = await client.toolgroups.list() + for toolgroup in toolgroups: + if ( + toolgroup.identifier + and toolgroup.provider_id == "model-context-protocol" + ): + await _unregister_toolgroup_async(toolgroup.identifier) + except APIConnectionError: + raise + finally: + await client.close() + + +def unregister_mcp_toolgroups() -> None: + """Unregister all MCP toolgroups.""" + asyncio.run(_unregister_mcp_toolgroups_async()) + + +# ----------------------------------------------------------------------------- +# Shields +# ----------------------------------------------------------------------------- + + async def _unregister_shield_async(identifier: str) -> Optional[tuple[str, str]]: """Unregister a shield by identifier; return (provider_id, provider_shield_id) for restore.""" client = _get_llama_stack_client() diff --git a/tests/e2e/utils/utils.py b/tests/e2e/utils/utils.py index 6b73a0e1f..a47ff1750 100644 --- a/tests/e2e/utils/utils.py +++ b/tests/e2e/utils/utils.py @@ -246,6 +246,35 @@ def remove_config_backup(backup_path: str) -> None: print(f"Warning: Could not remove backup file {backup_path}: {e}") +def clear_llama_stack_storage(container_name: str = "lightspeed-stack") -> None: + """Clear Llama Stack storage in library mode (embedded Llama Stack). + + Removes the ~/.llama directory so that toolgroups and other persisted + state are reset. Used before MCP config scenarios when not running in + server mode (no separate Llama Stack to unregister toolgroups from). + Only runs when using Docker (skipped in Prow). + + Parameters: + container_name (str): Docker container name (default "lightspeed-stack"). + + Returns: + None + """ + if is_prow_environment(): + return + + try: + subprocess.run( + ["docker", "exec", container_name, "sh", "-c", "rm -rf ~/.llama"], + capture_output=True, + text=True, + timeout=10, + check=False, + ) + except subprocess.TimeoutExpired as e: + print(f"Warning: Could not clear Llama Stack storage: {e}") + + def restart_container(container_name: str) -> None: """Restart a Docker container by name and wait until it is healthy. @@ -268,7 +297,7 @@ def restart_container(container_name: str) -> None: check=True, ) except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: - print(f"Failed to restart container {container_name}: {str(e.stderr)}") + print(f"Failed to restart container {container_name}: {e.stderr!s}") raise # Wait for container to be healthy @@ -287,5 +316,4 @@ def replace_placeholders(context: Context, text: str) -> str: """ result = text.replace("{MODEL}", context.default_model) result = result.replace("{PROVIDER}", context.default_provider) - result = result.replace("{VECTOR_STORE_ID}", context.faiss_vector_store_id) - return result + return result.replace("{VECTOR_STORE_ID}", context.faiss_vector_store_id) diff --git a/tests/integration/endpoints/test_query_byok_integration.py b/tests/integration/endpoints/test_query_byok_integration.py new file mode 100644 index 000000000..40191821f --- /dev/null +++ b/tests/integration/endpoints/test_query_byok_integration.py @@ -0,0 +1,1112 @@ +"""Integration tests for /query endpoint BYOK inline and tool RAG functionality.""" + +# pylint: disable=too-many-lines + +from collections.abc import Generator +from typing import Any + +import pytest +from fastapi import Request +from llama_stack_api.openai_responses import OpenAIResponseObject +from llama_stack_client.types import VersionInfo +from pytest_mock import AsyncMockType, MockerFixture +from sqlalchemy.engine import Engine +from sqlalchemy.orm import Session, sessionmaker + +import app.database +import constants +from app.endpoints.query import query_endpoint_handler +from authentication.interface import AuthTuple +from configuration import AppConfig +from models.requests import QueryRequest +from models.responses import QueryResponse + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_byok_vector_io_response(mocker: MockerFixture) -> Any: + """Build a mock vector_io.query response with BYOK RAG chunks. + + Returns a mock with .chunks and .scores attributes simulating a + vector store search result with two chunks. + """ + chunk_1 = mocker.MagicMock() + chunk_1.content = "OpenShift is a Kubernetes distribution by Red Hat." + chunk_1.chunk_id = "chunk-1" + chunk_1.metadata = { + "document_id": "doc-ocp-overview", + "title": "OpenShift Overview", + "reference_url": "https://docs.redhat.com/ocp/overview", + } + + chunk_2 = mocker.MagicMock() + chunk_2.content = "Pods are the smallest deployable units in Kubernetes." + chunk_2.chunk_id = "chunk-2" + chunk_2.metadata = { + "document_id": "doc-k8s-pods", + "title": "Kubernetes Pods", + "reference_url": "https://docs.redhat.com/k8s/pods", + } + + response = mocker.MagicMock() + response.chunks = [chunk_1, chunk_2] + response.scores = [0.95, 0.88] + return response + + +def _make_vector_io_response( + mocker: MockerFixture, + chunks_data: list[tuple[str, str, float]], +) -> Any: + """Build a mock vector_io.query response with arbitrary chunks. + + Parameters: + mocker: pytest-mock fixture. + chunks_data: List of (content, chunk_id, score) tuples. + + Returns: + Mock with .chunks and .scores attributes. + """ + chunks = [] + scores = [] + for content, chunk_id, score in chunks_data: + chunk = mocker.MagicMock() + chunk.content = content + chunk.chunk_id = chunk_id + chunk.metadata = {"document_id": chunk_id} + chunks.append(chunk) + scores.append(score) + + response = mocker.MagicMock() + response.chunks = chunks + response.scores = scores + return response + + +def _build_base_mock_client(mocker: MockerFixture) -> Any: + """Build a base mock Llama Stack client with common stubs. + + Configures models, shields, conversations, version, and a default + responses.create return value. + """ + mock_client = mocker.AsyncMock() + + # Model list + mock_model = mocker.MagicMock() + mock_model.id = "test-provider/test-model" + mock_model.custom_metadata = { + "provider_id": "test-provider", + "model_type": "llm", + } + mock_client.models.list.return_value = [mock_model] + + # Shields (empty) + mock_client.shields.list.return_value = [] + + # Conversations + mock_conversation = mocker.MagicMock() + mock_conversation.id = "conv_" + "a" * 48 + mock_client.conversations.create = mocker.AsyncMock(return_value=mock_conversation) + + # Version + mock_client.inspect.version.return_value = VersionInfo(version="0.4.3") + + # Default response + mock_response = mocker.MagicMock(spec=OpenAIResponseObject) + mock_response.id = "response-byok" + mock_output_item = mocker.MagicMock() + mock_output_item.type = "message" + mock_output_item.role = "assistant" + mock_output_item.content = ( + "Based on the documentation, OpenShift is a Kubernetes distribution." + ) + mock_output_item.refusal = None + mock_response.output = [mock_output_item] + mock_response.stop_reason = "end_turn" + mock_response.tool_calls = [] + mock_usage = mocker.MagicMock() + mock_usage.input_tokens = 50 + mock_usage.output_tokens = 20 + mock_response.usage = mock_usage + mock_client.responses.create.return_value = mock_response + + return mock_client + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(name="mock_byok_client") +def mock_byok_client_fixture( + mocker: MockerFixture, +) -> Generator[Any, None, None]: + """Mock Llama Stack client with BYOK inline RAG configured. + + Configures vector_io.query to return BYOK RAG chunks and sets + vector_stores.list to empty (no tool-based vector stores). + """ + mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") + mock_client = _build_base_mock_client(mocker) + + # BYOK vector_io returns results + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + # No tool-based vector stores + mock_vector_stores_response = mocker.MagicMock() + mock_vector_stores_response.data = [] + mock_client.vector_stores.list.return_value = mock_vector_stores_response + + mock_holder_class.return_value.get_client.return_value = mock_client + yield mock_client + + +@pytest.fixture(name="mock_byok_tool_rag_client") +def mock_byok_tool_rag_client_fixture( + mocker: MockerFixture, +) -> Generator[Any, None, None]: + """Mock Llama Stack client with BYOK tool RAG (file_search) configured. + + Configures vector_stores.list with a BYOK store and responses.create + to return a file_search_call output item alongside the assistant message. + """ + mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") + mock_client = _build_base_mock_client(mocker) + + # vector_io returns empty (no inline RAG) + mock_empty_vector_io = mocker.MagicMock() + mock_empty_vector_io.chunks = [] + mock_empty_vector_io.scores = [] + mock_client.vector_io.query = mocker.AsyncMock(return_value=mock_empty_vector_io) + + # Tool-based vector stores available + mock_vector_store = mocker.MagicMock() + mock_vector_store.id = "vs-byok-knowledge" + mock_list_result = mocker.MagicMock() + mock_list_result.data = [mock_vector_store] + mock_client.vector_stores.list.return_value = mock_list_result + + # Response with file_search tool call + mock_response = mocker.MagicMock(spec=OpenAIResponseObject) + mock_response.id = "response-tool-rag" + + mock_tool_output = mocker.MagicMock() + mock_tool_output.type = "file_search_call" + mock_tool_output.id = "call-fs-1" + mock_tool_output.queries = ["What is OpenShift?"] + mock_tool_output.status = "completed" + + mock_result = mocker.MagicMock() + mock_result.file_id = "doc-ocp-1" + mock_result.filename = "openshift-docs.txt" + mock_result.score = 0.92 + mock_result.text = "OpenShift is a Kubernetes distribution by Red Hat." + mock_result.attributes = { + "doc_url": "https://docs.redhat.com/ocp/overview", + "link": "https://docs.redhat.com/ocp/overview", + } + mock_result.model_dump = mocker.Mock( + return_value={ + "file_id": "doc-ocp-1", + "filename": "openshift-docs.txt", + "score": 0.92, + "text": "OpenShift is a Kubernetes distribution by Red Hat.", + "attributes": { + "doc_url": "https://docs.redhat.com/ocp/overview", + }, + } + ) + mock_tool_output.results = [mock_result] + + mock_message = mocker.MagicMock() + mock_message.type = "message" + mock_message.role = "assistant" + mock_message.content = ( + "Based on the documentation, OpenShift is a Kubernetes distribution." + ) + mock_message.refusal = None + + mock_response.output = [mock_tool_output, mock_message] + mock_response.stop_reason = "end_turn" + mock_response.tool_calls = [] + mock_usage = mocker.MagicMock() + mock_usage.input_tokens = 60 + mock_usage.output_tokens = 25 + mock_response.usage = mock_usage + mock_client.responses.create.return_value = mock_response + + mock_holder_class.return_value.get_client.return_value = mock_client + yield mock_client + + +@pytest.fixture(name="patch_db_session", autouse=True) +def patch_db_session_fixture( + test_db_session: Session, + test_db_engine: Engine, +) -> Generator[Session, None, None]: + """Patch global database session to use in-memory test database.""" + original_engine = app.database.engine + original_session_local = app.database.session_local + + app.database.engine = test_db_engine + app.database.session_local = sessionmaker(bind=test_db_engine) + + yield test_db_session + + app.database.engine = original_engine + app.database.session_local = original_session_local + + +@pytest.fixture(name="byok_config") +def byok_config_fixture(test_config: AppConfig, mocker: MockerFixture) -> AppConfig: + """Load test config and patch BYOK RAG configuration. + + Adds a BYOK RAG entry and inline RAG strategy so that inline RAG + code paths are exercised with real configuration logic. + """ + byok_entry = mocker.MagicMock() + byok_entry.rag_id = "test-knowledge" + byok_entry.vector_db_id = "vs-byok-knowledge" + byok_entry.score_multiplier = 1.0 + byok_entry.model_dump.return_value = { + "rag_id": "test-knowledge", + "rag_type": "inline::faiss", + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "embedding_dimension": 768, + "vector_db_id": "vs-byok-knowledge", + "db_path": "/tmp/test-db", + "score_multiplier": 1.0, + } + + # Patch the loaded configuration's byok_rag and rag.inline + test_config.configuration.byok_rag = [byok_entry] + test_config.configuration.rag.inline = ["test-knowledge"] + + return test_config + + +@pytest.fixture(name="byok_tool_config") +def byok_tool_config_fixture( + test_config: AppConfig, mocker: MockerFixture +) -> AppConfig: + """Load test config with BYOK RAG configured for tool-based (file_search) usage. + + Sets rag.inline to empty and rag.tool to include the BYOK store, + so only tool-based RAG is active. + """ + byok_entry = mocker.MagicMock() + byok_entry.rag_id = "test-knowledge" + byok_entry.vector_db_id = "vs-byok-knowledge" + byok_entry.score_multiplier = 1.0 + byok_entry.model_dump.return_value = { + "rag_id": "test-knowledge", + "rag_type": "inline::faiss", + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "embedding_dimension": 768, + "vector_db_id": "vs-byok-knowledge", + "db_path": "/tmp/test-db", + "score_multiplier": 1.0, + } + + test_config.configuration.byok_rag = [byok_entry] + test_config.configuration.rag.inline = [] + test_config.configuration.rag.tool = ["test-knowledge"] + + return test_config + + +# ============================================================================== +# Inline BYOK RAG Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_query_byok_inline_rag_injects_context( + byok_config: AppConfig, + mock_byok_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that inline BYOK RAG fetches chunks and injects context into the query. + + Verifies: + - vector_io.query is called for BYOK inline RAG + - RAG context is injected into the responses.create input + - Response includes RAG chunks from inline sources + """ + _ = byok_config + + query_request = QueryRequest(query="What is OpenShift?") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert response.response is not None + + # Verify vector_io.query was called for inline RAG + mock_byok_client.vector_io.query.assert_called() + # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query. + # e.g. "vector_store_id" is the store queried, "query" is the search text. + call_kwargs = mock_byok_client.vector_io.query.call_args.kwargs + assert call_kwargs["query"] == "What is OpenShift?" + + # Verify RAG context was injected into responses.create input + # Use call_args_list[0] — the first call is the main query; + # a second call may follow for topic summary generation. + create_kwargs = mock_byok_client.responses.create.call_args_list[0].kwargs + input_text = create_kwargs["input"] + assert "file_search found" in input_text + assert "OpenShift is a Kubernetes distribution" in input_text + + # Verify RAG chunks are included in the response + assert response.rag_chunks is not None + assert len(response.rag_chunks) > 0 + + +@pytest.mark.asyncio +async def test_query_byok_inline_rag_returns_referenced_documents( + byok_config: AppConfig, + mock_byok_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that inline BYOK RAG extracts referenced documents from chunks. + + Verifies: + - Referenced documents are extracted from BYOK RAG chunk metadata + - Documents include URLs from chunk metadata + """ + _ = byok_config + _ = mock_byok_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert response.referenced_documents is not None + assert len(response.referenced_documents) == 2 + + # Verify known document metadata propagated from mock chunks + doc_urls = [ + str(doc.doc_url) for doc in response.referenced_documents if doc.doc_url + ] + assert any( + "docs.redhat.com/ocp/overview" in url for url in doc_urls + ), f"Expected ocp/overview URL in {doc_urls}" + assert any( + "docs.redhat.com/k8s/pods" in url for url in doc_urls + ), f"Expected k8s/pods URL in {doc_urls}" + + doc_titles = [ + doc.doc_title for doc in response.referenced_documents if doc.doc_title + ] + assert "OpenShift Overview" in doc_titles + assert "Kubernetes Pods" in doc_titles + + +@pytest.mark.asyncio +async def test_query_byok_inline_rag_with_request_vector_store_ids( + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that per-request vector_store_ids override config-based inline RAG. + + Config has rag.inline = ["source-a"] (resolves to vs-source-a). + Request passes vector_store_ids = ["vs-source-b"]. + Only vs-source-b should be queried, proving the override works. + (passing vector_store_ids overrides config) + + Verifies: + - vector_io.query is called with the request-specified store, not config + - The config-based store is NOT queried + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 1.0 + + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a"] + + mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") + mock_client = _build_base_mock_client(mocker) + + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + # Override: request specifies vs-source-b, not the config's vs-source-a + query_request = QueryRequest( + query="What is OpenShift?", + vector_store_ids=["vs-source-b"], + ) + + await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + # Verify only vs-source-b was queried (not the config's vs-source-a) + assert mock_client.vector_io.query.call_count == 1 + # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query. + # e.g. "vector_store_id" is the store queried, "query" is the search text. + call_kwargs = mock_client.vector_io.query.call_args.kwargs + assert call_kwargs["vector_store_id"] == "vs-source-b" + + +@pytest.mark.asyncio +async def test_query_byok_request_vector_store_ids_filters_configured_stores( + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that request vector_store_ids selects a subset of stores configured in rag.inline. + + Both source-a and source-b are registered in byok_rag and listed in rag.inline. + The request passes vector_store_ids = ["vs-source-a"] to select only one. + + Verifies: + - vector_io.query is called exactly once (for vs-source-a only) + - vs-source-b is NOT queried despite being in rag.inline + - Returned chunks only reference source-a + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 1.0 + + # Both sources are in config + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a", "source-b"] + + mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") + mock_client = _build_base_mock_client(mocker) + + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + # Request narrows down to only vs-source-a + query_request = QueryRequest( + query="What is OpenShift?", + vector_store_ids=["vs-source-a"], + ) + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + # Only vs-source-a should have been queried + assert mock_client.vector_io.query.call_count == 1 + # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query. + # e.g. "vector_store_id" is the store queried, "query" is the search text. + call_kwargs = mock_client.vector_io.query.call_args.kwargs + assert call_kwargs["vector_store_id"] == "vs-source-a" + + # Chunks should only come from source-a + assert response.rag_chunks is not None + assert len(response.rag_chunks) == 2 + assert all(chunk.source == "source-a" for chunk in response.rag_chunks) + + +@pytest.mark.asyncio +async def test_query_byok_inline_rag_empty_vector_store_ids_returns_no_chunks( + byok_config: AppConfig, + mock_byok_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that passing an empty vector_store_ids list produces no RAG chunks. + + Verifies: + - vector_io.query is never called when vector_store_ids=[] + - Response contains no RAG chunks + - Response still succeeds + """ + _ = byok_config + + query_request = QueryRequest(query="What is OpenShift?", vector_store_ids=[]) + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert response.response is not None + mock_byok_client.vector_io.query.assert_not_called() + assert not response.rag_chunks + + +@pytest.mark.asyncio +async def test_query_byok_inline_rag_error_is_handled_gracefully( + byok_config: AppConfig, + mock_byok_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK RAG search failures are handled gracefully. + + Verifies: + - When vector_io.query raises an exception, the query still succeeds + - The error is silently handled (BYOK search errors are non-fatal) + """ + _ = byok_config + + mock_byok_client.vector_io.query.side_effect = Exception("Connection refused") + + query_request = QueryRequest(query="What is OpenShift?") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + # Query should succeed despite BYOK RAG failure, but with no chunks + assert isinstance(response, QueryResponse) + assert not response.rag_chunks + + +# ============================================================================== +# Tool-based BYOK RAG Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_query_byok_tool_rag_returns_tool_calls( + byok_tool_config: AppConfig, + mock_byok_tool_rag_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK tool RAG results include file_search tool calls. + + Verifies: + - Response includes tool_calls from file_search_call output + - Tool call name is file_search + """ + _ = byok_tool_config + _ = mock_byok_tool_rag_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert response.tool_calls is not None + assert len(response.tool_calls) > 0 + assert response.tool_calls[0].name == "file_search" + + +@pytest.mark.asyncio +async def test_query_byok_tool_rag_referenced_documents( + byok_tool_config: AppConfig, + mock_byok_tool_rag_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK tool RAG extracts referenced documents from file_search results. + + Verifies: + - Referenced documents are extracted from file_search_call results + - Documents include proper metadata + """ + _ = byok_tool_config + _ = mock_byok_tool_rag_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert response.referenced_documents is not None + assert len(response.referenced_documents) >= 1 + + # Verify known values from the mock file_search result propagated + doc_urls = [ + str(doc.doc_url) for doc in response.referenced_documents if doc.doc_url + ] + assert any( + "docs.redhat.com/ocp/overview" in url for url in doc_urls + ), f"Expected ocp/overview URL in {doc_urls}" + + +# ============================================================================== +# Combined Inline + Tool RAG Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_query_byok_combined_inline_and_tool_rag( # pylint: disable=too-many-locals,too-many-statements + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that inline and tool-based BYOK RAG results are combined. + + Verifies: + - Both inline RAG chunks and tool RAG chunks appear in response + - RAG chunks from both sources are merged + """ + # Configure both inline and tool RAG + byok_entry = mocker.MagicMock() + byok_entry.rag_id = "test-knowledge" + byok_entry.vector_db_id = "vs-byok-knowledge" + byok_entry.score_multiplier = 1.0 + test_config.configuration.byok_rag = [byok_entry] + test_config.configuration.rag.inline = ["test-knowledge"] + test_config.configuration.rag.tool = ["test-knowledge"] + + # Mock Llama Stack client + mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") + mock_client = _build_base_mock_client(mocker) + + # Inline RAG returns chunks via vector_io + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + # Tool RAG vector stores + mock_vector_store = mocker.MagicMock() + mock_vector_store.id = "vs-byok-knowledge" + mock_list_result = mocker.MagicMock() + mock_list_result.data = [mock_vector_store] + mock_client.vector_stores.list.return_value = mock_list_result + + # Response includes file_search_call (tool RAG result) + mock_response = mocker.MagicMock(spec=OpenAIResponseObject) + mock_response.id = "response-combined" + + mock_tool_output = mocker.MagicMock() + mock_tool_output.type = "file_search_call" + mock_tool_output.id = "call-fs-combined" + mock_tool_output.queries = ["What is OpenShift?"] + mock_tool_output.status = "completed" + + mock_result = mocker.MagicMock() + mock_result.file_id = "doc-tool-1" + mock_result.filename = "tool-doc.txt" + mock_result.score = 0.90 + mock_result.text = "Tool-based RAG result about OpenShift." + mock_result.attributes = {"doc_url": "https://example.com/tool-doc"} + mock_result.model_dump = mocker.Mock( + return_value={ + "file_id": "doc-tool-1", + "filename": "tool-doc.txt", + "score": 0.90, + "text": "Tool-based RAG result about OpenShift.", + "attributes": {"doc_url": "https://example.com/tool-doc"}, + } + ) + mock_tool_output.results = [mock_result] + + mock_message = mocker.MagicMock() + mock_message.type = "message" + mock_message.role = "assistant" + mock_message.content = "Combined answer from inline and tool RAG." + mock_message.refusal = None + + mock_response.output = [mock_tool_output, mock_message] + mock_response.stop_reason = "end_turn" + mock_response.tool_calls = [] + mock_usage = mocker.MagicMock() + mock_usage.input_tokens = 80 + mock_usage.output_tokens = 30 + mock_response.usage = mock_usage + mock_client.responses.create.return_value = mock_response + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + # Verify both inline and tool RAG chunks are present + assert response.rag_chunks is not None + assert len(response.rag_chunks) == 3 + + # Verify tool calls are present (from tool RAG) + assert response.tool_calls is not None + assert len(response.tool_calls) == 1 + + +# ============================================================================== +# Inline RAG rag_id Resolution Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_query_byok_inline_rag_only_configured_rag_id_is_queried( + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that only the rag_id listed in rag.inline triggers retrieval. + + Two BYOK sources are registered (source-a and source-b) but only + source-a is listed in rag.inline. Only the vector_db_id for + source-a should be queried and only its chunks should appear in the response. + + Verifies: + - vector_io.query is called exactly once (for the configured source) + - The call targets the correct vector_db_id + - Returned chunks only reference source-a + - source-b chunks are absent + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 1.0 + + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a"] + + mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") + mock_client = _build_base_mock_client(mocker) + + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert mock_client.vector_io.query.call_count == 1 + # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query. + # e.g. "vector_store_id" is the store queried, "query" is the search text. + call_kwargs = mock_client.vector_io.query.call_args.kwargs + assert call_kwargs["vector_store_id"] == "vs-source-a" + + assert response.rag_chunks is not None + assert len(response.rag_chunks) == 2 + sources = {chunk.source for chunk in response.rag_chunks} + assert "source-a" in sources + assert "source-b" not in sources + + +# ============================================================================== +# Score Multiplier Priority Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_query_byok_score_multiplier_shifts_chunk_priority( # pylint: disable=too-many-locals + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that score_multiplier can shift chunk priority across sources. + + Doc A (source-a) has high base similarity (0.90) with multiplier 1.0. + Doc B (source-b) has low base similarity (0.40) with multiplier 5.0. + After weighting: Doc A = 0.90, Doc B = 2.00. + Doc B should appear above Doc A in the final chunks. + + Verifies: + - The chunk with the higher weighted score appears first + - score_multiplier correctly influences ranking + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 5.0 + + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a", "source-b"] + + mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") + mock_client = _build_base_mock_client(mocker) + + # Source A: high base similarity + resp_a = _make_vector_io_response( + mocker, + [ + ("Doc A content - high similarity", "doc-a", 0.90), + ], + ) + # Source B: low base similarity + resp_b = _make_vector_io_response( + mocker, + [ + ("Doc B content - low similarity", "doc-b", 0.40), + ], + ) + + # Return different results per vector store + async def _side_effect(**kwargs: Any) -> Any: + if kwargs["vector_store_id"] == "vs-source-a": + return resp_a + return resp_b + + mock_client.vector_io.query = mocker.AsyncMock(side_effect=_side_effect) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="test query") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert response.rag_chunks is not None + assert len(response.rag_chunks) == 2 + + # Doc B (weighted 2.0) should rank above Doc A (weighted 0.9) + first_chunk = response.rag_chunks[0] + second_chunk = response.rag_chunks[1] + assert first_chunk.source == "source-b" + assert second_chunk.source == "source-a" + assert first_chunk.score > second_chunk.score + + +# ============================================================================== +# BYOK_RAG_MAX_CHUNKS Capping Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_query_byok_max_chunks_caps_retrieved_results( # pylint: disable=too-many-locals + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK_RAG_MAX_CHUNKS caps the number of returned chunks. + + A single source returns more chunks than BYOK_RAG_MAX_CHUNKS allows. + The response should contain at most BYOK_RAG_MAX_CHUNKS chunks and + they should be the highest-scored ones. + + Verifies: + - Number of RAG chunks does not exceed BYOK_RAG_MAX_CHUNKS + - Returned chunks are the top-scoring ones + """ + entry = mocker.MagicMock() + entry.rag_id = "big-source" + entry.vector_db_id = "vs-big-source" + entry.score_multiplier = 1.0 + + test_config.configuration.byok_rag = [entry] + test_config.configuration.rag.inline = ["big-source"] + + mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") + mock_client = _build_base_mock_client(mocker) + + # Generate more chunks than BYOK_RAG_MAX_CHUNKS + num_chunks = constants.BYOK_RAG_MAX_CHUNKS + 1 + chunks_data = [ + (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2)) + for i in range(num_chunks) + ] + # Scores increase with index: chunk-0 = 0.50, chunk-14 = 0.92 (for max=10) + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_vector_io_response(mocker, chunks_data) + ) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="test query") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert response.rag_chunks is not None + assert len(response.rag_chunks) == constants.BYOK_RAG_MAX_CHUNKS + + # Verify chunks are sorted by score descending (highest first) + scores = [chunk.score for chunk in response.rag_chunks] + assert scores == sorted(scores, reverse=True) + + # The lowest-scored chunks from the original set should be excluded + # The highest score in the original set is at the last index + highest_original_score = chunks_data[-1][2] # score of the last chunk + assert response.rag_chunks[0].score == highest_original_score + + +@pytest.mark.asyncio +async def test_query_byok_max_chunks_caps_across_multiple_sources( # pylint: disable=too-many-locals + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK_RAG_MAX_CHUNKS caps chunks across multiple sources. + + Two sources each return several chunks. The combined result should + not exceed BYOK_RAG_MAX_CHUNKS and should contain the globally + highest-scored chunks regardless of source. + + Verifies: + - Total chunks across sources are capped at BYOK_RAG_MAX_CHUNKS + - Top-scoring chunks from both sources are included + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 1.0 + + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a", "source-b"] + + mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") + mock_client = _build_base_mock_client(mocker) + + # Overlapping score bands so top-k must pick from both sources + n = constants.BYOK_RAG_MAX_CHUNKS + resp_a = _make_vector_io_response( + mocker, + [ + (f"Source A chunk {i}", f"a-chunk-{i}", round(0.70 + i * 0.05, 2)) + for i in range(n) + ], + ) + resp_b = _make_vector_io_response( + mocker, + [ + (f"Source B chunk {i}", f"b-chunk-{i}", round(0.72 + i * 0.05, 2)) + for i in range(n) + ], + ) + + async def _side_effect(**kwargs: Any) -> Any: + if kwargs["vector_store_id"] == "vs-source-a": + return resp_a + return resp_b + + mock_client.vector_io.query = mocker.AsyncMock(side_effect=_side_effect) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="test query") + + response = await query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert response.rag_chunks is not None + assert len(response.rag_chunks) == constants.BYOK_RAG_MAX_CHUNKS + + scores = [chunk.score for chunk in response.rag_chunks] + assert scores == sorted(scores, reverse=True) + + # Both sources must survive the cap + sources = {chunk.source for chunk in response.rag_chunks} + assert "source-a" in sources + assert "source-b" in sources + + # Lowest-scoring chunks from each source must be dropped + chunk_contents = {chunk.content for chunk in response.rag_chunks} + assert "Source A chunk 0" not in chunk_contents + assert "Source B chunk 0" not in chunk_contents diff --git a/tests/integration/endpoints/test_streaming_query_byok_integration.py b/tests/integration/endpoints/test_streaming_query_byok_integration.py new file mode 100644 index 000000000..5f58f6036 --- /dev/null +++ b/tests/integration/endpoints/test_streaming_query_byok_integration.py @@ -0,0 +1,1099 @@ +"""Integration tests for /streaming_query endpoint BYOK inline and tool RAG functionality.""" + +# pylint: disable=too-many-lines + +import json +from collections.abc import AsyncIterator, Generator +from typing import Any + +import pytest +from fastapi import Request, status +from fastapi.responses import StreamingResponse +from llama_stack_api.openai_responses import OpenAIResponseObject +from pytest_mock import AsyncMockType, MockerFixture +from sqlalchemy.engine import Engine +from sqlalchemy.orm import Session, sessionmaker + +import app.database +import constants +from app.endpoints.streaming_query import streaming_query_endpoint_handler +from authentication.interface import AuthTuple +from configuration import AppConfig +from models.requests import QueryRequest +from tests.integration.endpoints.test_query_byok_integration import ( + _build_base_mock_client, + _make_byok_vector_io_response, + _make_vector_io_response, +) + + +async def _collect_sse_events(response: StreamingResponse) -> list[dict[str, Any]]: + """Consume a StreamingResponse and parse SSE events into dicts. + + Parameters: + response: The StreamingResponse to consume. + + Returns: + List of parsed JSON event dicts from ``data:`` lines. + """ + events: list[dict[str, Any]] = [] + async for chunk in response.body_iterator: + text = chunk if isinstance(chunk, str) else bytes(chunk).decode() + for line in text.strip().splitlines(): + if line.startswith("data: "): + try: + events.append(json.loads(line[6:])) + except json.JSONDecodeError: + pass + return events + + +def _build_base_streaming_mock_client(mocker: MockerFixture) -> Any: + """Build a base mock Llama Stack client configured for streaming responses. + + Extends the base query mock client with streaming-specific stubs: + conversations.items.create and a streaming responses.create. + """ + mock_client = _build_base_mock_client(mocker) + + # Streaming additions + mock_client.conversations.items.create = mocker.AsyncMock() + + async def _mock_stream() -> AsyncIterator[Any]: + chunk = mocker.MagicMock() + chunk.type = "response.output_text.done" + chunk.text = ( + "Based on the documentation, OpenShift is a Kubernetes distribution." + ) + yield chunk + + # Emit response.completed so referenced_documents propagate to end event + completed_chunk = mocker.MagicMock() + completed_chunk.type = "response.completed" + mock_final = mocker.MagicMock(spec=OpenAIResponseObject) + mock_final.id = "response-inline-stream" + mock_final.error = None + mock_usage = mocker.MagicMock() + mock_usage.input_tokens = 50 + mock_usage.output_tokens = 20 + mock_final.usage = mock_usage + mock_final.output = [] + completed_chunk.response = mock_final + yield completed_chunk + + async def _responses_create(**kwargs: Any) -> Any: + if kwargs.get("stream", True): + return _mock_stream() + mock_resp = mocker.MagicMock() + mock_resp.output = [mocker.MagicMock(content="topic summary")] + return mock_resp + + mock_client.responses.create = mocker.AsyncMock(side_effect=_responses_create) + + return mock_client + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(name="patch_db_session", autouse=True) +def patch_db_session_fixture( + test_db_session: Session, + test_db_engine: Engine, +) -> Generator[Session, None, None]: + """Patch global database session to use in-memory test database.""" + original_engine = app.database.engine + original_session_local = app.database.session_local + + app.database.engine = test_db_engine + app.database.session_local = sessionmaker(bind=test_db_engine) + + yield test_db_session + + app.database.engine = original_engine + app.database.session_local = original_session_local + + +@pytest.fixture(name="mock_streaming_byok_client") +def mock_streaming_byok_client_fixture( + mocker: MockerFixture, +) -> Generator[Any, None, None]: + """Mock Llama Stack client with BYOK inline RAG configured for streaming. + + Configures vector_io.query to return BYOK RAG chunks and sets + vector_stores.list to empty (no tool-based vector stores). + """ + mock_holder_class = mocker.patch( + "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" + ) + mock_client = _build_base_streaming_mock_client(mocker) + + # BYOK vector_io returns results + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + # No tool-based vector stores + mock_vector_stores_response = mocker.MagicMock() + mock_vector_stores_response.data = [] + mock_client.vector_stores.list.return_value = mock_vector_stores_response + + mock_holder_class.return_value.get_client.return_value = mock_client + yield mock_client + + +@pytest.fixture(name="mock_streaming_byok_tool_client") +def mock_streaming_byok_tool_client_fixture( # pylint: disable=too-many-statements + mocker: MockerFixture, +) -> Generator[Any, None, None]: + """Mock Llama Stack client with BYOK tool RAG (file_search) for streaming. + + Configures vector_stores.list with a BYOK store and responses.create + to stream file_search_call output items alongside the assistant message. + """ + mock_holder_class = mocker.patch( + "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" + ) + mock_client = _build_base_streaming_mock_client(mocker) + + # vector_io returns empty (no inline RAG) + mock_empty_vector_io = mocker.MagicMock() + mock_empty_vector_io.chunks = [] + mock_empty_vector_io.scores = [] + mock_client.vector_io.query = mocker.AsyncMock(return_value=mock_empty_vector_io) + + # Tool-based vector stores available + mock_vector_store = mocker.MagicMock() + mock_vector_store.id = "vs-byok-knowledge" + mock_list_result = mocker.MagicMock() + mock_list_result.data = [mock_vector_store] + mock_client.vector_stores.list.return_value = mock_list_result + + # Build a streaming response with file_search and completion events + async def _mock_tool_stream() -> AsyncIterator[Any]: + # file_search output item done + item_done_chunk = mocker.MagicMock() + item_done_chunk.type = "response.output_item.done" + item_done_chunk.output_index = 0 + + mock_item = mocker.MagicMock() + mock_item.type = "file_search_call" + mock_item.id = "call-fs-stream-1" + mock_item.queries = ["What is OpenShift?"] + mock_item.status = "completed" + + mock_result = mocker.MagicMock() + mock_result.file_id = "doc-ocp-1" + mock_result.filename = "openshift-docs.txt" + mock_result.score = 0.92 + mock_result.text = "OpenShift is a Kubernetes distribution by Red Hat." + mock_result.attributes = { + "doc_url": "https://docs.redhat.com/ocp/overview", + } + mock_result.model_dump = mocker.Mock( + return_value={ + "file_id": "doc-ocp-1", + "filename": "openshift-docs.txt", + "score": 0.92, + "text": "OpenShift is a Kubernetes distribution.", + "attributes": {"doc_url": "https://docs.redhat.com/ocp/overview"}, + } + ) + mock_item.results = [mock_result] + item_done_chunk.item = mock_item + yield item_done_chunk + + # Text done + text_done_chunk = mocker.MagicMock() + text_done_chunk.type = "response.output_text.done" + text_done_chunk.text = ( + "Based on the documentation, OpenShift is a Kubernetes distribution." + ) + yield text_done_chunk + + # Response completed + completed_chunk = mocker.MagicMock() + completed_chunk.type = "response.completed" + mock_final_response = mocker.MagicMock(spec=OpenAIResponseObject) + mock_final_response.id = "response-tool-stream" + mock_final_response.error = None + + mock_usage = mocker.MagicMock() + mock_usage.input_tokens = 60 + mock_usage.output_tokens = 25 + mock_final_response.usage = mock_usage + + # file_search results in the final response output + mock_fs_output = mocker.MagicMock() + mock_fs_output.type = "file_search_call" + mock_fs_output.id = "call-fs-stream-1" + mock_fs_output.results = [mock_result] + mock_final_response.output = [mock_fs_output] + + completed_chunk.response = mock_final_response + yield completed_chunk + + async def _responses_create(**kwargs: Any) -> Any: + if kwargs.get("stream", True): + return _mock_tool_stream() + mock_resp = mocker.MagicMock() + mock_resp.output = [mocker.MagicMock(content="topic summary")] + return mock_resp + + mock_client.responses.create = mocker.AsyncMock(side_effect=_responses_create) + + mock_holder_class.return_value.get_client.return_value = mock_client + yield mock_client + + +@pytest.fixture(name="byok_config") +def byok_config_fixture(test_config: AppConfig, mocker: MockerFixture) -> AppConfig: + """Load test config and patch BYOK RAG configuration for inline RAG.""" + byok_entry = mocker.MagicMock() + byok_entry.rag_id = "test-knowledge" + byok_entry.vector_db_id = "vs-byok-knowledge" + byok_entry.score_multiplier = 1.0 + byok_entry.model_dump.return_value = { + "rag_id": "test-knowledge", + "rag_type": "inline::faiss", + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "embedding_dimension": 768, + "vector_db_id": "vs-byok-knowledge", + "db_path": "/tmp/test-db", + "score_multiplier": 1.0, + } + + test_config.configuration.byok_rag = [byok_entry] + test_config.configuration.rag.inline = ["test-knowledge"] + + return test_config + + +@pytest.fixture(name="byok_tool_config") +def byok_tool_config_fixture( + test_config: AppConfig, mocker: MockerFixture +) -> AppConfig: + """Load test config with BYOK RAG configured for tool-based (file_search) usage.""" + byok_entry = mocker.MagicMock() + byok_entry.rag_id = "test-knowledge" + byok_entry.vector_db_id = "vs-byok-knowledge" + byok_entry.score_multiplier = 1.0 + byok_entry.model_dump.return_value = { + "rag_id": "test-knowledge", + "rag_type": "inline::faiss", + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "embedding_dimension": 768, + "vector_db_id": "vs-byok-knowledge", + "db_path": "/tmp/test-db", + "score_multiplier": 1.0, + } + + test_config.configuration.byok_rag = [byok_entry] + test_config.configuration.rag.inline = [] + test_config.configuration.rag.tool = ["test-knowledge"] + + return test_config + + +# ============================================================================== +# Inline BYOK RAG Streaming Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_streaming_query_byok_inline_rag_injects_context( + byok_config: AppConfig, + mock_streaming_byok_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that inline BYOK RAG context is injected into streaming query input. + + Verifies: + - RAG context from vector_io.query is injected into responses.create input + - Input contains formatted file_search results + """ + _ = byok_config + + query_request = QueryRequest(query="What is OpenShift?") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + # Verify RAG context was injected into responses.create input + # responses.create is the mock for the OpenAI-compatible LLM API call. + # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. + create_call = mock_streaming_byok_client.responses.create.call_args_list[0] + call_kwargs = create_call.kwargs + input_text = call_kwargs["input"] + assert "file_search found" in input_text + assert "OpenShift is a Kubernetes distribution" in input_text + + +@pytest.mark.asyncio +async def test_streaming_query_byok_inline_rag_with_request_vector_store_ids( + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that per-request vector_store_ids override config for streaming query. + + Config has rag.inline = ["source-a"] (resolves to vs-source-a). + Request passes vector_store_ids = ["vs-source-b"]. + Only vs-source-b should be queried, proving the override works. + (passing vector_store_ids overrides config) + + Verifies: + - vector_io.query is called with the request-specified store, not config + - The config-based store is NOT queried + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 1.0 + + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a"] + + mock_holder_class = mocker.patch( + "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" + ) + mock_client = _build_base_streaming_mock_client(mocker) + + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + # Override: request specifies vs-source-b, not the config's vs-source-a + query_request = QueryRequest( + query="What is OpenShift?", + vector_store_ids=["vs-source-b"], + ) + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + # Verify only vs-source-b was queried (not the config's vs-source-a) + assert mock_client.vector_io.query.call_count == 1 + # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query. + # e.g. "vector_store_id" is the store queried, "query" is the search text. + call_kwargs = mock_client.vector_io.query.call_args.kwargs + assert call_kwargs["vector_store_id"] == "vs-source-b" + + +@pytest.mark.asyncio +async def test_streaming_query_byok_request_vector_store_ids_filters_configured_stores( + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that request vector_store_ids selects a subset of stores configured in rag.inline. + + Both source-a and source-b are registered in byok_rag and listed in rag.inline. + The request passes vector_store_ids = ["vs-source-a"] to select only one. + + Verifies: + - vector_io.query is called exactly once (for vs-source-a only) + - vs-source-b is NOT queried despite being in rag.inline + - Injected context contains only source-a content + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 1.0 + + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a", "source-b"] + + mock_holder_class = mocker.patch( + "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" + ) + mock_client = _build_base_streaming_mock_client(mocker) + + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest( + query="What is OpenShift?", + vector_store_ids=["vs-source-a"], + ) + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + # Only vs-source-a should have been queried + assert mock_client.vector_io.query.call_count == 1 + # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query. + # e.g. "vector_store_id" is the store queried, "query" is the search text. + call_kwargs = mock_client.vector_io.query.call_args.kwargs + assert call_kwargs["vector_store_id"] == "vs-source-a" + + # Verify source-a context was injected into the LLM input + # responses.create is the mock for the OpenAI-compatible LLM API call. + # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. + create_call = mock_client.responses.create.call_args_list[0] + input_text = create_call.kwargs["input"] + assert "file_search found" in input_text + + +@pytest.mark.asyncio +async def test_streaming_query_byok_inline_rag_empty_vector_store_ids_no_context( + byok_config: AppConfig, + mock_streaming_byok_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that passing an empty vector_store_ids list produces no inline context. + + Verifies: + - vector_io.query is never called when vector_store_ids=[] + - No RAG context is injected into the streaming input + - Streaming response still succeeds + """ + _ = byok_config + + query_request = QueryRequest(query="What is OpenShift?", vector_store_ids=[]) + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + mock_streaming_byok_client.vector_io.query.assert_not_called() + + # responses.create is the mock for the OpenAI-compatible LLM API call. + # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. + create_call = mock_streaming_byok_client.responses.create.call_args_list[0] + input_text = create_call.kwargs["input"] + assert "file_search found" not in input_text + + +@pytest.mark.asyncio +async def test_streaming_query_byok_inline_rag_error_handled_gracefully( + byok_config: AppConfig, + mock_streaming_byok_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK RAG search failures are handled gracefully in streaming. + + Verifies: + - When vector_io.query raises an exception, streaming query still succeeds + - The error is silently handled (BYOK search errors are non-fatal) + - No inline RAG context is injected into the prompt when search fails + """ + _ = byok_config + + mock_streaming_byok_client.vector_io.query.side_effect = Exception( + "Connection refused" + ) + + query_request = QueryRequest(query="What is OpenShift?") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + # Streaming query should succeed despite BYOK RAG failure + assert response.status_code == status.HTTP_200_OK + assert isinstance(response, StreamingResponse) + + # No inline RAG context should be injected when the search fails. + # "file_search found" is the header added by _format_rag_context when chunks are present. + # responses.create is the mock for the OpenAI-compatible LLM API call. + # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. + create_call = mock_streaming_byok_client.responses.create.call_args_list[0] + input_text = create_call.kwargs["input"] + assert "file_search found" not in input_text + + +@pytest.mark.asyncio +async def test_streaming_query_byok_inline_rag_returns_referenced_documents( + byok_config: AppConfig, + mock_streaming_byok_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that inline BYOK RAG emits referenced documents in the end event. + + Verifies: + - Injected context references documents from BYOK RAG chunk metadata + - The SSE end event includes referenced_documents with known URLs/titles + """ + _ = byok_config + _ = mock_streaming_byok_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + # Consume the stream and verify the end event carries referenced documents + events = await _collect_sse_events(response) + end_events = [e for e in events if e.get("event") == "end"] + assert len(end_events) == 1 + + ref_docs = end_events[0]["data"].get("referenced_documents", []) + assert len(ref_docs) == 2, f"Expected 2 referenced docs, got {ref_docs}" + + doc_urls = [str(doc.get("doc_url", "")) for doc in ref_docs if doc.get("doc_url")] + assert any( + "docs.redhat.com/ocp/overview" in url for url in doc_urls + ), f"Expected ocp/overview URL in {doc_urls}" + assert any( + "docs.redhat.com/k8s/pods" in url for url in doc_urls + ), f"Expected k8s/pods URL in {doc_urls}" + + doc_titles = [doc.get("doc_title") for doc in ref_docs if doc.get("doc_title")] + assert "OpenShift Overview" in doc_titles + assert "Kubernetes Pods" in doc_titles + + +# ============================================================================== +# Tool-based BYOK RAG Streaming Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_streaming_query_byok_tool_rag_emits_tool_call_events( + byok_tool_config: AppConfig, + mock_streaming_byok_tool_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK tool RAG emits tool call SSE events during streaming. + + Verifies: + - Stream contains tool_call events from file_search_call output + - Tool call event references file_search / knowledge_search + """ + _ = byok_tool_config + _ = mock_streaming_byok_tool_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + events = await _collect_sse_events(response) + tool_call_events = [e for e in events if e.get("event") == "tool_call"] + assert len(tool_call_events) > 0 + + tool_names = [e["data"].get("name", "") for e in tool_call_events] + assert any( + "file_search" in name or "knowledge_search" in name for name in tool_names + ) + + +@pytest.mark.asyncio +async def test_streaming_query_byok_tool_rag_emits_referenced_documents( + byok_tool_config: AppConfig, + mock_streaming_byok_tool_client: AsyncMockType, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK tool RAG streaming emits referenced documents in end event. + + Verifies: + - End event includes referenced_documents list + - Documents include URLs from file_search results + """ + _ = byok_tool_config + _ = mock_streaming_byok_tool_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + events = await _collect_sse_events(response) + end_events = [e for e in events if e.get("event") == "end"] + assert len(end_events) == 1 + + ref_docs = end_events[0]["data"].get("referenced_documents", []) + assert isinstance(ref_docs, list) + assert len(ref_docs) >= 1, "Expected at least one referenced document" + + # Verify known URL from the mock file_search result propagated + doc_urls = [str(doc.get("doc_url", "")) for doc in ref_docs if doc.get("doc_url")] + assert any( + "docs.redhat.com/ocp/overview" in url for url in doc_urls + ), f"Expected ocp/overview URL in {doc_urls}" + + +# ============================================================================== +# Combined Inline + Tool RAG Streaming Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_streaming_query_byok_combined_inline_and_tool_rag( + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that inline and tool-based BYOK RAG both work in streaming. + + Verifies: + - Inline RAG context is injected into the input + - Tool RAG file_search is passed as a tool + - Streaming response succeeds + """ + # Configure both inline and tool RAG + byok_entry = mocker.MagicMock() + byok_entry.rag_id = "test-knowledge" + byok_entry.vector_db_id = "vs-byok-knowledge" + byok_entry.score_multiplier = 1.0 + test_config.configuration.byok_rag = [byok_entry] + test_config.configuration.rag.inline = ["test-knowledge"] + test_config.configuration.rag.tool = ["test-knowledge"] + + # Mock Llama Stack client + mock_holder_class = mocker.patch( + "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" + ) + mock_client = _build_base_streaming_mock_client(mocker) + + # Inline RAG returns chunks via vector_io + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + # Tool RAG vector stores + mock_vector_store = mocker.MagicMock() + mock_vector_store.id = "vs-byok-knowledge" + mock_list_result = mocker.MagicMock() + mock_list_result.data = [mock_vector_store] + mock_client.vector_stores.list.return_value = mock_list_result + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + assert response.status_code == status.HTTP_200_OK + + # Verify inline RAG context was injected + # responses.create is the mock for the OpenAI-compatible LLM API call. + # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. + create_call = mock_client.responses.create.call_args_list[0] + call_kwargs = create_call.kwargs + input_text = call_kwargs["input"] + assert "file_search found" in input_text + + # Verify tool RAG file_search was passed + assert call_kwargs.get("tools") is not None + assert any(tool.get("type") == "file_search" for tool in call_kwargs["tools"]) + + +# ============================================================================== +# Inline RAG rag_id Resolution Streaming Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_streaming_query_byok_only_configured_rag_id_is_queried( + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that only the rag_id listed in rag.inline triggers retrieval in streaming. + + Two BYOK sources are registered (source-a and source-b) but only + source-a is listed in rag.inline. Only vs-source-a should be queried + and only its content should appear in the injected context. + + Verifies: + - vector_io.query is called exactly once (for the configured source) + - The call targets the correct vector_db_id + - vs-source-b is NOT queried + - Injected context contains source-a content + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 1.0 + + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a"] + + mock_holder_class = mocker.patch( + "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" + ) + mock_client = _build_base_streaming_mock_client(mocker) + + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_byok_vector_io_response(mocker) + ) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="What is OpenShift?") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + assert mock_client.vector_io.query.call_count == 1 + # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query. + # e.g. "vector_store_id" is the store queried, "query" is the search text. + call_kwargs = mock_client.vector_io.query.call_args.kwargs + assert call_kwargs["vector_store_id"] == "vs-source-a" + + queried_stores = [ + c.kwargs["vector_store_id"] for c in mock_client.vector_io.query.call_args_list + ] + assert "vs-source-b" not in queried_stores + + # responses.create is the mock for the OpenAI-compatible LLM API call. + # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. + create_call = mock_client.responses.create.call_args_list[0] + input_text = create_call.kwargs["input"] + assert "file_search found" in input_text + + +# ============================================================================== +# Score Multiplier Priority Streaming Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_streaming_query_byok_score_multiplier_shifts_priority( # pylint: disable=too-many-locals + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that score_multiplier shifts chunk priority in streaming query. + + Doc A (source-a) has high base similarity (0.90) with multiplier 1.0. + Doc B (source-b) has low base similarity (0.40) with multiplier 5.0. + After weighting: Doc A = 0.90, Doc B = 2.00. + The injected context should list Doc B content before Doc A. + + Verifies: + - The higher-weighted chunk content appears first in the injected context + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 5.0 + + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a", "source-b"] + + mock_holder_class = mocker.patch( + "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" + ) + mock_client = _build_base_streaming_mock_client(mocker) + + resp_a = _make_vector_io_response( + mocker, + [ + ("Doc A high similarity", "doc-a", 0.90), + ], + ) + resp_b = _make_vector_io_response( + mocker, + [ + ("Doc B low similarity boosted", "doc-b", 0.40), + ], + ) + + async def _side_effect(**kwargs: Any) -> Any: + if kwargs["vector_store_id"] == "vs-source-a": + return resp_a + return resp_b + + mock_client.vector_io.query = mocker.AsyncMock(side_effect=_side_effect) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="test query") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + # Verify Doc B (weighted 2.0) appears before Doc A (weighted 0.9) in context + # responses.create is the mock for the OpenAI-compatible LLM API call. + # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. + create_call = mock_client.responses.create.call_args_list[0] + input_text = create_call.kwargs["input"] + pos_b = input_text.find("Doc B low similarity boosted") + pos_a = input_text.find("Doc A high similarity") + assert pos_b != -1 and pos_a != -1 + assert pos_b < pos_a + + +# ============================================================================== +# BYOK_RAG_MAX_CHUNKS Capping Streaming Tests +# ============================================================================== + + +@pytest.mark.asyncio +async def test_streaming_query_byok_max_chunks_caps_context( # pylint: disable=too-many-locals + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK_RAG_MAX_CHUNKS caps chunks in streaming query context. + + A source returns more chunks than BYOK_RAG_MAX_CHUNKS. The injected + context should contain at most BYOK_RAG_MAX_CHUNKS chunk entries. + + Verifies: + - Context chunk count does not exceed BYOK_RAG_MAX_CHUNKS + - Only the highest-scored chunks appear in the context + """ + entry = mocker.MagicMock() + entry.rag_id = "big-source" + entry.vector_db_id = "vs-big-source" + entry.score_multiplier = 1.0 + + test_config.configuration.byok_rag = [entry] + test_config.configuration.rag.inline = ["big-source"] + + mock_holder_class = mocker.patch( + "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" + ) + mock_client = _build_base_streaming_mock_client(mocker) + + # Generate more chunks than BYOK_RAG_MAX_CHUNKS + num_chunks = constants.BYOK_RAG_MAX_CHUNKS + 5 + chunks_data = [ + (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2)) + for i in range(num_chunks) + ] + mock_client.vector_io.query = mocker.AsyncMock( + return_value=_make_vector_io_response(mocker, chunks_data) + ) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="test query") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + # Verify the context header reports the capped count + # responses.create is the mock for the OpenAI-compatible LLM API call. + # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. + create_call = mock_client.responses.create.call_args_list[0] + input_text = create_call.kwargs["input"] + expected_header = f"file_search found {constants.BYOK_RAG_MAX_CHUNKS} chunks:" + assert expected_header in input_text + + # The lowest-scoring chunk should NOT be in the context + assert "Chunk content 0" not in input_text + # The highest-scoring chunk should be in the context + assert f"Chunk content {num_chunks - 1}" in input_text + + +@pytest.mark.asyncio +async def test_streaming_query_byok_max_chunks_caps_across_multiple_sources( # pylint: disable=too-many-locals + test_config: AppConfig, + mocker: MockerFixture, + test_request: Request, + test_auth: AuthTuple, +) -> None: + """Test that BYOK_RAG_MAX_CHUNKS caps chunks across multiple sources in streaming. + + Two sources each return several chunks. The combined context should + not exceed BYOK_RAG_MAX_CHUNKS and should contain the globally + highest-scored chunks regardless of source. + + Verifies: + - Total chunks across sources are capped at BYOK_RAG_MAX_CHUNKS + - Only the highest-scored chunks appear in the context + """ + entry_a = mocker.MagicMock() + entry_a.rag_id = "source-a" + entry_a.vector_db_id = "vs-source-a" + entry_a.score_multiplier = 1.0 + + entry_b = mocker.MagicMock() + entry_b.rag_id = "source-b" + entry_b.vector_db_id = "vs-source-b" + entry_b.score_multiplier = 1.0 + + test_config.configuration.byok_rag = [entry_a, entry_b] + test_config.configuration.rag.inline = ["source-a", "source-b"] + + mock_holder_class = mocker.patch( + "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" + ) + mock_client = _build_base_streaming_mock_client(mocker) + + # Overlapping score bands so top-k must pick from both sources + n = constants.BYOK_RAG_MAX_CHUNKS + resp_a = _make_vector_io_response( + mocker, + [ + (f"Source A chunk {i}", f"a-chunk-{i}", round(0.70 + i * 0.05, 2)) + for i in range(n) + ], + ) + resp_b = _make_vector_io_response( + mocker, + [ + (f"Source B chunk {i}", f"b-chunk-{i}", round(0.72 + i * 0.05, 2)) + for i in range(n) + ], + ) + + async def _side_effect(**kwargs: Any) -> Any: + if kwargs["vector_store_id"] == "vs-source-a": + return resp_a + return resp_b + + mock_client.vector_io.query = mocker.AsyncMock(side_effect=_side_effect) + + mock_vs_resp = mocker.MagicMock() + mock_vs_resp.data = [] + mock_client.vector_stores.list.return_value = mock_vs_resp + + mock_holder_class.return_value.get_client.return_value = mock_client + + query_request = QueryRequest(query="test query") + + response = await streaming_query_endpoint_handler( + request=test_request, + query_request=query_request, + auth=test_auth, + mcp_headers={}, + ) + + assert isinstance(response, StreamingResponse) + + # responses.create is the mock for the OpenAI-compatible LLM API call. + # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. + create_call = mock_client.responses.create.call_args_list[0] + input_text = create_call.kwargs["input"] + expected_header = f"file_search found {constants.BYOK_RAG_MAX_CHUNKS} chunks:" + assert expected_header in input_text + + # Both sources must appear in the context (overlapping scores guarantee this) + assert "Source A chunk" in input_text + assert "Source B chunk" in input_text + + # Lowest-scoring chunks from each source must be dropped + assert "Source A chunk 0" not in input_text + assert "Source B chunk 0" not in input_text diff --git a/tests/integration/test_openapi_json.py b/tests/integration/test_openapi_json.py index 17ff8ac66..05ccc83f8 100644 --- a/tests/integration/test_openapi_json.py +++ b/tests/integration/test_openapi_json.py @@ -231,6 +231,11 @@ def test_servers_section_present_from_url(spec_from_url: dict[str, Any]) -> None "post", {"200", "401", "403", "404"}, ), + ( + "/v1/responses", + "post", + {"200", "401", "403", "404", "413", "422", "429", "500", "503"}, + ), ("/v1/config", "get", {"200", "401", "403", "500"}), ("/v1/feedback", "post", {"200", "401", "403", "404", "500"}), ("/v1/feedback/status", "get", {"200"}), @@ -318,6 +323,11 @@ def test_paths_and_responses_exist_from_file( "post", {"200", "401", "403", "404"}, ), + ( + "/v1/responses", + "post", + {"200", "401", "403", "404", "413", "422", "429", "500", "503"}, + ), ("/v1/config", "get", {"200", "401", "403", "500"}), ("/v1/feedback", "post", {"200", "401", "403", "404", "500"}), ("/v1/feedback/status", "get", {"200"}), diff --git a/tests/unit/app/endpoints/test_conversations.py b/tests/unit/app/endpoints/test_conversations.py index 5ca4faf0b..9c75f0d2f 100644 --- a/tests/unit/app/endpoints/test_conversations.py +++ b/tests/unit/app/endpoints/test_conversations.py @@ -552,12 +552,8 @@ async def test_llama_stack_not_found_error( ) -> None: """Test the endpoint when LlamaStack returns NotFoundError. - Verify the GET /conversations/{conversation_id} handler raises an HTTP - 404 when the Llama Stack client reports the session as not found. - - Asserts that the raised HTTPException contains a response message - indicating the conversation was not found and a cause that includes - "does not exist" and the conversation ID. + When the Llama Stack client reports the session as not found, + get_all_conversation_items maps it to HTTP 500 (InternalServerError). """ mock_authorization_resolvers(mocker) mocker.patch( @@ -589,13 +585,13 @@ async def test_llama_stack_not_found_error( auth=MOCK_AUTH, ) - assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND - + assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR detail = exc_info.value.detail assert isinstance(detail, dict) - assert "Conversation not found" in detail["response"] # type: ignore - assert "does not exist" in detail["cause"] # type: ignore - assert VALID_CONVERSATION_ID in detail["cause"] # type: ignore + assert detail["response"] == "Internal server error" + assert detail["cause"] == ( + "An unexpected error occurred while processing the request." + ) @pytest.mark.asyncio async def test_get_conversation_forbidden( @@ -679,7 +675,10 @@ async def test_get_others_conversations_allowed_for_authorized_user( mock_item2.role = "assistant" mock_item2.content = "Hi there!" mock_items_response.data = [mock_item1, mock_item2] - mock_client.conversations.items.list.return_value = mock_items_response + mock_items_response.has_next_page.return_value = False + mock_client.conversations.items.list = mocker.AsyncMock( + return_value=mock_items_response + ) mock_client_holder = mocker.patch( "app.endpoints.conversations_v1.AsyncLlamaStackClientHolder" @@ -732,7 +731,8 @@ async def test_successful_conversation_retrieval( type="message", role="assistant", content="I'm doing well, thanks!" ), ] - mock_client.conversations.items.list.return_value = mock_items + mock_items.has_next_page.return_value = False + mock_client.conversations.items.list = mocker.AsyncMock(return_value=mock_items) mock_client_holder = mocker.patch( "app.endpoints.conversations_v1.AsyncLlamaStackClientHolder" @@ -806,7 +806,10 @@ async def test_no_items_found_in_get_conversation( mock_client = mocker.AsyncMock() mock_items_response = mocker.Mock() mock_items_response.data = [] - mock_client.conversations.items.list.return_value = mock_items_response + mock_items_response.has_next_page.return_value = False + mock_client.conversations.items.list = mocker.AsyncMock( + return_value=mock_items_response + ) mock_client_holder = mocker.patch( "app.endpoints.conversations_v1.AsyncLlamaStackClientHolder" ) @@ -832,7 +835,10 @@ async def test_api_status_error_in_get_conversation( dummy_request: Request, mock_conversation: MockType, ) -> None: - """Test when APIStatusError is raised during conversation retrieval.""" + """Test when APIStatusError is raised during conversation retrieval. + + get_all_conversation_items maps APIStatusError to HTTP 500. + """ mock_authorization_resolvers(mocker) mocker.patch( "app.endpoints.conversations_v1.configuration", setup_configuration @@ -863,10 +869,10 @@ async def test_api_status_error_in_get_conversation( auth=MOCK_AUTH, ) - assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND + assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR detail = exc_info.value.detail assert isinstance(detail, dict) - assert "Conversation not found" in detail["response"] # type: ignore + assert "response" in detail @pytest.mark.asyncio async def test_sqlalchemy_error_in_get_conversation( @@ -940,7 +946,7 @@ def query_side_effect(model_class: type[Any]) -> Any: mock_session_context.__enter__.return_value = mock_session mock_session_context.__exit__.return_value = None mocker.patch( - "app.endpoints.conversations_v1.get_session", + "utils.endpoints.get_session", return_value=mock_session_context, ) diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py index 044fb5bf2..06ee69926 100644 --- a/tests/unit/app/endpoints/test_query.py +++ b/tests/unit/app/endpoints/test_query.py @@ -17,7 +17,11 @@ from models.responses import QueryResponse from utils.token_counter import TokenCounter from utils.types import ( + RAGChunk, + RAGContext, + ReferencedDocument, ResponsesApiParams, + ShieldModerationPassed, ToolCallSummary, ToolResultSummary, TurnSummary, @@ -42,8 +46,7 @@ def create_dummy_request() -> Request: request (fastapi.Request): A Request constructed with a bare HTTP scope (type "http") for use in tests. """ - req = Request(scope={"type": "http", "headers": []}) - return req + return Request(scope={"type": "http", "headers": []}) @pytest.fixture(name="setup_configuration") @@ -126,6 +129,10 @@ async def test_successful_query_no_conversation( "app.endpoints.query.get_topic_summary", new=mocker.AsyncMock(return_value=None), ) + mocker.patch( + "app.endpoints.query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mock_responses_params = mocker.Mock(spec=ResponsesApiParams) mock_responses_params.model = "provider1/model1" @@ -170,6 +177,93 @@ async def mock_retrieve_response(*_args: Any, **_kwargs: Any) -> TurnSummary: assert response.conversation_id == "123" assert response.response == "Kubernetes is a container orchestration platform" + @pytest.mark.asyncio + async def test_query_merges_inline_and_tool_rag_chunks_and_documents( + self, + dummy_request: Request, + setup_configuration: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that inline RAG and tool-based RAG chunks/docs are correctly merged.""" + query_request = QueryRequest( + query="What is Kubernetes?" + ) # pyright: ignore[reportCallIssue] + + mocker.patch("app.endpoints.query.configuration", setup_configuration) + mocker.patch("app.endpoints.query.check_configuration_loaded") + mocker.patch("app.endpoints.query.check_tokens_available") + mocker.patch("app.endpoints.query.validate_model_provider_override") + + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_response_obj = mocker.Mock() + mock_response_obj.output = [] + mock_client.responses = mocker.Mock() + mock_client.responses.create = mocker.AsyncMock(return_value=mock_response_obj) + mock_client_holder = mocker.Mock() + mock_client_holder.get_client.return_value = mock_client + mocker.patch( + "app.endpoints.query.AsyncLlamaStackClientHolder", + return_value=mock_client_holder, + ) + mocker.patch( + "app.endpoints.query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) + + inline_chunk = RAGChunk(content="inline chunk content", source="byok") + inline_doc = ReferencedDocument(doc_title="Inline Doc") + inline_rag = RAGContext( + context_text="", + rag_chunks=[inline_chunk], + referenced_documents=[inline_doc], + ) + mocker.patch( + "app.endpoints.query.build_rag_context", + new=mocker.AsyncMock(return_value=inline_rag), + ) + + mock_responses_params = mocker.Mock(spec=ResponsesApiParams) + mock_responses_params.model = "provider1/model1" + mock_responses_params.conversation = "conv_123" + mock_responses_params.tools = None + mock_responses_params.model_dump.return_value = { + "input": "test", + "model": "provider1/model1", + } + mocker.patch( + "app.endpoints.query.prepare_responses_params", + new=mocker.AsyncMock(return_value=mock_responses_params), + ) + + tool_chunk = RAGChunk(content="tool chunk content", source="vs-1") + tool_doc = ReferencedDocument(doc_title="Tool Doc") + mock_turn_summary = TurnSummary() + mock_turn_summary.rag_chunks = [tool_chunk] + mock_turn_summary.referenced_documents = [tool_doc] + + mocker.patch( + "app.endpoints.query.retrieve_response", + new=mocker.AsyncMock(return_value=mock_turn_summary), + ) + mocker.patch("app.endpoints.query.store_query_results") + mocker.patch("app.endpoints.query.consume_query_tokens") + mocker.patch("app.endpoints.query.get_available_quotas", return_value={}) + + response = await query_endpoint_handler( + request=dummy_request, + query_request=query_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + + assert isinstance(response, QueryResponse) + assert len(response.rag_chunks) == 2 + assert response.rag_chunks[0].content == "inline chunk content" + assert response.rag_chunks[1].content == "tool chunk content" + assert len(response.referenced_documents) == 2 + assert response.referenced_documents[0].doc_title == "Inline Doc" + assert response.referenced_documents[1].doc_title == "Tool Doc" + @pytest.mark.asyncio async def test_successful_query_with_conversation( self, @@ -215,7 +309,10 @@ async def test_successful_query_with_conversation( "app.endpoints.query.prepare_responses_params", new=mocker.AsyncMock(return_value=mock_responses_params), ) - + mocker.patch( + "app.endpoints.query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mocker.patch( "app.endpoints.query.retrieve_response", new=mocker.AsyncMock(return_value=TurnSummary()), @@ -276,6 +373,10 @@ async def test_query_with_attachments( "app.endpoints.query.get_topic_summary", new=mocker.AsyncMock(return_value=None), ) + mocker.patch( + "app.endpoints.query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mock_responses_params = mocker.Mock(spec=ResponsesApiParams) mock_responses_params.model = "provider1/model1" @@ -336,6 +437,10 @@ async def test_query_with_topic_summary( "app.endpoints.query.AsyncLlamaStackClientHolder", return_value=mock_client_holder, ) + mocker.patch( + "app.endpoints.query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mock_responses_params = mocker.Mock(spec=ResponsesApiParams) mock_responses_params.model = "provider1/model1" @@ -406,6 +511,10 @@ async def test_query_azure_token_refresh( "app.endpoints.query.get_topic_summary", new=mocker.AsyncMock(return_value=None), ) + mocker.patch( + "app.endpoints.query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mock_responses_params = mocker.Mock(spec=ResponsesApiParams) mock_responses_params.model = "azure/model1" @@ -477,6 +586,7 @@ async def test_retrieve_response_success(self, mocker: MockerFixture) -> None: mock_responses_params = mocker.Mock(spec=ResponsesApiParams) mock_responses_params.input = "test query" mock_responses_params.model = "provider1/model1" + mock_responses_params.tools = None mock_responses_params.model_dump.return_value = { "input": "test query", "model": "provider1/model1", @@ -493,10 +603,6 @@ async def test_retrieve_response_success(self, mocker: MockerFixture) -> None: mock_response.output = [mock_output_item] mock_response.usage = mock_usage - mocker.patch( - "app.endpoints.query.run_shield_moderation", - return_value=mocker.Mock(decision="passed"), - ) mock_client.responses.create = mocker.AsyncMock(return_value=mock_response) mock_summary = TurnSummary() @@ -507,7 +613,9 @@ async def test_retrieve_response_success(self, mocker: MockerFixture) -> None: return_value=mock_summary, ) - result = await retrieve_response(mock_client, mock_responses_params) + result = await retrieve_response( + mock_client, mock_responses_params, ShieldModerationPassed() + ) assert isinstance(result, TurnSummary) assert result.llm_response == "Response text" @@ -528,19 +636,20 @@ async def test_retrieve_response_shield_blocked( "model": "provider1/model1", } + mock_refusal = mocker.Mock() mock_moderation_result = mocker.Mock() mock_moderation_result.decision = "blocked" mock_moderation_result.message = "Content blocked by moderation" - mocker.patch( - "app.endpoints.query.run_shield_moderation", - new=mocker.AsyncMock(return_value=mock_moderation_result), - ) + mock_moderation_result.moderation_id = "mod_123" + mock_moderation_result.refusal_response = mock_refusal mock_append = mocker.patch( - "app.endpoints.query.append_turn_to_conversation", + "app.endpoints.query.append_turn_items_to_conversation", new=mocker.AsyncMock(), ) - result = await retrieve_response(mock_client, mock_responses_params) + result = await retrieve_response( + mock_client, mock_responses_params, mock_moderation_result + ) assert isinstance(result, TurnSummary) assert result.llm_response == "Content blocked by moderation" @@ -559,10 +668,6 @@ async def test_retrieve_response_connection_error( "model": "provider1/model1", } - mocker.patch( - "app.endpoints.query.run_shield_moderation", - return_value=mocker.Mock(decision="passed"), - ) mock_client.responses.create = mocker.AsyncMock( side_effect=APIConnectionError( message="Connection failed", request=mocker.Mock() @@ -570,7 +675,9 @@ async def test_retrieve_response_connection_error( ) with pytest.raises(HTTPException) as exc_info: - await retrieve_response(mock_client, mock_responses_params) + await retrieve_response( + mock_client, mock_responses_params, ShieldModerationPassed() + ) assert exc_info.value.status_code == 503 @@ -588,10 +695,6 @@ async def test_retrieve_response_api_status_error( "model": "provider1/model1", } - mocker.patch( - "app.endpoints.query.run_shield_moderation", - return_value=mocker.Mock(decision="passed"), - ) mock_client.responses.create = mocker.AsyncMock( side_effect=APIStatusError( message="API error", response=mocker.Mock(request=None), body=None @@ -608,7 +711,9 @@ async def test_retrieve_response_api_status_error( ) with pytest.raises(HTTPException): - await retrieve_response(mock_client, mock_responses_params) + await retrieve_response( + mock_client, mock_responses_params, ShieldModerationPassed() + ) @pytest.mark.asyncio async def test_retrieve_response_runtime_error_context_length( @@ -624,16 +729,14 @@ async def test_retrieve_response_runtime_error_context_length( "model": "provider1/model1", } - mocker.patch( - "app.endpoints.query.run_shield_moderation", - return_value=mocker.Mock(decision="passed"), - ) mock_client.responses.create = mocker.AsyncMock( side_effect=RuntimeError("context_length exceeded") ) with pytest.raises(HTTPException) as exc_info: - await retrieve_response(mock_client, mock_responses_params) + await retrieve_response( + mock_client, mock_responses_params, ShieldModerationPassed() + ) assert exc_info.value.status_code == 413 @@ -651,16 +754,14 @@ async def test_retrieve_response_runtime_error_other( "model": "provider1/model1", } - mocker.patch( - "app.endpoints.query.run_shield_moderation", - return_value=mocker.Mock(decision="passed"), - ) mock_client.responses.create = mocker.AsyncMock( side_effect=RuntimeError("Some other error") ) with pytest.raises(RuntimeError): - await retrieve_response(mock_client, mock_responses_params) + await retrieve_response( + mock_client, mock_responses_params, ShieldModerationPassed() + ) @pytest.mark.asyncio async def test_retrieve_response_with_tool_calls( @@ -671,6 +772,7 @@ async def test_retrieve_response_with_tool_calls( mock_responses_params = mocker.Mock(spec=ResponsesApiParams) mock_responses_params.input = "test query" mock_responses_params.model = "provider1/model1" + mock_responses_params.tools = None mock_responses_params.model_dump.return_value = { "input": "test query", "model": "provider1/model1", @@ -683,10 +785,6 @@ async def test_retrieve_response_with_tool_calls( mock_response.output = [mocker.Mock(type="message")] mock_response.usage = mock_usage - mocker.patch( - "app.endpoints.query.run_shield_moderation", - return_value=mocker.Mock(decision="passed"), - ) mock_client.responses.create = mocker.AsyncMock(return_value=mock_response) mock_tool_call = ToolCallSummary(id="1", name="test", args={}) @@ -703,7 +801,9 @@ async def test_retrieve_response_with_tool_calls( return_value=mock_summary, ) - result = await retrieve_response(mock_client, mock_responses_params) + result = await retrieve_response( + mock_client, mock_responses_params, ShieldModerationPassed() + ) assert result.llm_response == "Response text" assert len(result.tool_calls) == 1 @@ -712,4 +812,3 @@ async def test_retrieve_response_with_tool_calls( assert result.token_usage.output_tokens == 5 assert result.rag_chunks == [] assert result.referenced_documents == [] - assert result.inline_rag_documents == [] diff --git a/tests/unit/app/endpoints/test_responses.py b/tests/unit/app/endpoints/test_responses.py new file mode 100644 index 000000000..725e43a07 --- /dev/null +++ b/tests/unit/app/endpoints/test_responses.py @@ -0,0 +1,1374 @@ +# pylint: disable=redefined-outer-name, too-many-locals, too-many-lines +"""Unit tests for the /responses REST API endpoint (LCORE Responses API).""" + +from datetime import UTC, datetime +from typing import Any, cast + +import pytest +from fastapi import HTTPException, Request +from fastapi.responses import StreamingResponse +from llama_stack_api import OpenAIResponseObject +from llama_stack_api.openai_responses import OpenAIResponseMessage +from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient +from pytest_mock import MockerFixture + +from app.endpoints.responses import ( + handle_non_streaming_response, + handle_streaming_response, + responses_endpoint_handler, +) +from configuration import AppConfig +from models.config import Action +from models.database.conversations import UserConversation +from models.requests import ResponsesRequest +from models.responses import ResponsesResponse +from utils.types import RAGContext, ResponsesConversationContext, TurnSummary + +MOCK_AUTH = ( + "00000001-0001-0001-0001-000000000001", + "mock_username", + False, + "mock_token", +) +VALID_CONV_ID = "conv_e6afd7aaa97b49ce8f4f96a801b07893d9cb784d72e53e3c" +VALID_CONV_ID_NORMALIZED = "e6afd7aaa97b49ce8f4f96a801b07893d9cb784d72e53e3c" +MODULE = "app.endpoints.responses" +ENDPOINTS_MODULE = "utils.endpoints" +UTILS_RESPONSES_MODULE = "utils.responses" + + +def _patch_base(mocker: MockerFixture, config: AppConfig) -> None: + """Patch configuration and mandatory checks for responses endpoint.""" + mocker.patch(f"{MODULE}.configuration", config) + mocker.patch(f"{MODULE}.check_configuration_loaded") + mocker.patch(f"{MODULE}.check_tokens_available") + mocker.patch(f"{MODULE}.validate_model_provider_override") + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mocker.Mock() + mocker.patch( + f"{UTILS_RESPONSES_MODULE}.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + f"{UTILS_RESPONSES_MODULE}.prepare_tools", + new=mocker.AsyncMock(return_value=None), + ) + + +def _patch_client(mocker: MockerFixture) -> Any: + """Patch AsyncLlamaStackClientHolder; return (mock_client, mock_holder).""" + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_vector_stores = mocker.Mock() + mock_vector_stores.list = mocker.AsyncMock(return_value=mocker.Mock(data=[])) + mock_client.vector_stores = mock_vector_stores + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + return mock_client, mock_holder + + +def _patch_resolve_response_context( + mocker: MockerFixture, + *, + conversation: str = "conv_new", + user_conversation: UserConversation | None = None, + generate_topic_summary: bool = False, +) -> None: + """Patch resolve_response_context to return the given conversation context.""" + mocker.patch( + f"{MODULE}.resolve_response_context", + new=mocker.AsyncMock( + return_value=ResponsesConversationContext( + conversation=conversation, + user_conversation=user_conversation, + generate_topic_summary=generate_topic_summary, + ) + ), + ) + + +def _patch_rag( + mocker: MockerFixture, + *, + rag_context: str = "", +) -> None: + """Patch RAG for responses endpoint by mocking build_rag_context.""" + mocker.patch( + f"{MODULE}.build_rag_context", + new=mocker.AsyncMock( + return_value=RAGContext( + context_text=rag_context, + referenced_documents=[], + ), + ), + ) + + +def _patch_moderation(mocker: MockerFixture, decision: str = "passed") -> Any: + """Patch run_shield_moderation; return mock moderation result.""" + mock_moderation = mocker.Mock() + mock_moderation.decision = decision + mocker.patch( + f"{MODULE}.run_shield_moderation", + new=mocker.AsyncMock(return_value=mock_moderation), + ) + return mock_moderation + + +def _make_responses_response( + *, + output_text: str = "", + conversation: str = "", + model: str = "provider/model1", + **kwargs: Any, +) -> ResponsesResponse: + """Build a minimal valid ResponsesResponse for tests.""" + defaults = { + "id": "resp_1", + "object": "response", + "created_at": 0, + "status": "completed", + "model": model, + "output": [], + "conversation": conversation, + "completed_at": 0, + "output_text": output_text, + "available_quotas": {}, + } + defaults.update(kwargs) + return ResponsesResponse(**defaults) + + +def _patch_handle_non_streaming_common( + mocker: MockerFixture, config: AppConfig +) -> None: + """Patch deps used by handle_non_streaming_response (blocked and success).""" + mocker.patch(f"{MODULE}.configuration", config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + + +@pytest.fixture(name="dummy_request") +def dummy_request_fixture() -> Request: + """Minimal FastAPI Request with authorized_actions for responses endpoint.""" + req = Request(scope={"type": "http", "headers": []}) + req.state.authorized_actions = {Action.QUERY, Action.READ_OTHERS_CONVERSATIONS} + return req + + +@pytest.fixture(name="minimal_config") +def minimal_config_fixture() -> AppConfig: + """Minimal AppConfig for responses endpoint tests.""" + cfg = AppConfig() + cfg.init_from_dict( + { + "name": "test", + "service": {"host": "localhost", "port": 8080}, + "llama_stack": { + "api_key": "test-key", + "url": "http://test.com:1234", + "use_as_library_client": False, + }, + "user_data_collection": {}, + "authentication": {"module": "noop"}, + "authorization": {"access_rules": []}, + } + ) + return cfg + + +def _request_with_model_and_conv( + input_text: str = "Hello", model: str = "provider/model1" +) -> ResponsesRequest: + """Build request with model and conversation set (as handler does).""" + return ResponsesRequest( + input=input_text, + model=model, + conversation=VALID_CONV_ID, + ) + + +def _request_with_previous_response_id( + input_text: str = "Hello", + model: str = "provider/model1", + previous_response_id: str = "resp_prev_123", + store: bool = True, +) -> ResponsesRequest: + """Build request with previous_response_id (conversation set by handler).""" + request = ResponsesRequest( + input=input_text, + model=model, + previous_response_id=previous_response_id, + store=store, + ) + request.conversation = VALID_CONV_ID + return request + + +class TestResponsesEndpointHandler: + """Unit tests for responses_endpoint_handler.""" + + @pytest.mark.asyncio + async def test_successful_responses_string_input_non_streaming( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test successful responses request with string input returns ResponsesResponse.""" + responses_request = ResponsesRequest(input="What is Kubernetes?") + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker, conversation="conv_new_123") + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + _patch_rag(mocker) + _patch_moderation(mocker, decision="passed") + + mock_response = _make_responses_response( + output_text="Kubernetes is a container orchestration platform.", + conversation="conv_new_123", + ) + mocker.patch( + f"{MODULE}.handle_non_streaming_response", + new=mocker.AsyncMock(return_value=mock_response), + ) + + response = await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + assert isinstance(response, ResponsesResponse) + assert ( + response.output_text == "Kubernetes is a container orchestration platform." + ) + assert response.conversation == "conv_new_123" + + @pytest.mark.asyncio + async def test_responses_with_conversation_validates_and_retrieves( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that providing conversation ID calls validate_and_retrieve_conversation.""" + responses_request = ResponsesRequest( + input="Follow-up question", + conversation=VALID_CONV_ID, + ) + _patch_base(mocker, minimal_config) + mock_user_conv = mocker.Mock(spec=UserConversation) + mock_user_conv.id = VALID_CONV_ID_NORMALIZED + mock_validate = mocker.patch( + f"{ENDPOINTS_MODULE}.validate_and_retrieve_conversation", + return_value=mock_user_conv, + ) + _, mock_holder = _patch_client(mocker) + mocker.patch( + f"{ENDPOINTS_MODULE}.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + f"{ENDPOINTS_MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch( + f"{ENDPOINTS_MODULE}.to_llama_stack_conversation_id", + return_value=VALID_CONV_ID, + ) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + _patch_rag(mocker) + _patch_moderation(mocker, decision="passed") + mocker.patch( + f"{MODULE}.handle_non_streaming_response", + new=mocker.AsyncMock( + return_value=_make_responses_response( + output_text="Answer", + conversation=VALID_CONV_ID_NORMALIZED, + ) + ), + ) + + await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + + mock_validate.assert_called_once() + + @pytest.mark.asyncio + async def test_responses_model_not_configured_raises_404( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that unconfigured model leads to 404 HTTPException.""" + responses_request = ResponsesRequest(input="Hello", model="provider/unknown") + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/unknown"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=False), + ) + mocker.patch( + f"{MODULE}.extract_provider_and_model_from_model_id", + return_value=("provider", "unknown"), + ) + + with pytest.raises(HTTPException) as exc_info: + await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + assert exc_info.value.status_code == 404 + + @pytest.mark.asyncio + async def test_responses_streaming_returns_streaming_response( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that stream=True delegates to handle_streaming_response.""" + responses_request = ResponsesRequest(input="Stream this", stream=True) + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + _patch_rag(mocker) + _patch_moderation(mocker, decision="passed") + mock_streaming = mocker.Mock(spec=StreamingResponse) + mocker.patch( + f"{MODULE}.handle_streaming_response", + new=mocker.AsyncMock(return_value=mock_streaming), + ) + + response = await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + assert response is mock_streaming + + @pytest.mark.asyncio + async def test_responses_azure_token_refresh( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that Azure token refresh is called when model starts with azure.""" + responses_request = ResponsesRequest(input="Hi", model="azure/some-model") + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="azure/some-model"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + mock_azure = mocker.Mock() + mock_azure.is_entra_id_configured = True + mock_azure.is_token_expired = True + mock_azure.refresh_token.return_value = True + mocker.patch(f"{MODULE}.AzureEntraIDManager", return_value=mock_azure) + updated_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_update_token = mocker.patch( + f"{MODULE}.update_azure_token", + new=mocker.AsyncMock(return_value=updated_client), + ) + _patch_rag(mocker) + _patch_moderation(mocker, decision="passed") + mocker.patch( + f"{MODULE}.handle_non_streaming_response", + new=mocker.AsyncMock( + return_value=_make_responses_response( + output_text="Ok", + conversation="conv_new", + model="azure/some-model", + ) + ), + ) + + await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + mock_update_token.assert_called_once() + + @pytest.mark.asyncio + async def test_responses_structured_input_appends_rag_message( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that non-string input uses extract_text and appends RAG message.""" + structured_input: list[Any] = [ + OpenAIResponseMessage(role="user", content="What is K8s?"), + ] + responses_request = ResponsesRequest( + input=cast(Any, structured_input), + ) + _patch_base(mocker, minimal_config) + _patch_client(mocker) + _patch_resolve_response_context(mocker) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + mock_build_rag = mocker.patch( + f"{MODULE}.build_rag_context", + new=mocker.AsyncMock( + return_value=RAGContext( + context_text="\n\nRelevant documentation:\nDoc1", + referenced_documents=[], + ), + ), + ) + _patch_moderation(mocker, decision="passed") + mocker.patch( + f"{MODULE}.handle_non_streaming_response", + new=mocker.AsyncMock( + return_value=_make_responses_response( + output_text="K8s is Kubernetes.", + conversation="conv_new", + ) + ), + ) + + await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + + mock_build_rag.assert_called_once() + call_args = mock_build_rag.call_args[0] + assert ( + call_args[2] == "What is K8s?" + ) # input_text (3rd arg to build_rag_context) + + @pytest.mark.asyncio + async def test_responses_blocked_with_conversation_appends_refusal( + self, + dummy_request: Request, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Blocked moderation with conversation calls append_turn_items_to_conversation.""" + responses_request = ResponsesRequest( + input="Bad", + conversation=VALID_CONV_ID, + stream=False, + model="provider/model1", + ) + _patch_base(mocker, minimal_config) + mock_user_conv = mocker.Mock(spec=UserConversation) + mock_user_conv.id = VALID_CONV_ID_NORMALIZED + mocker.patch( + f"{ENDPOINTS_MODULE}.validate_and_retrieve_conversation", + return_value=mock_user_conv, + ) + mock_client, mock_holder = _patch_client(mocker) + mocker.patch( + f"{ENDPOINTS_MODULE}.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + f"{ENDPOINTS_MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch( + f"{ENDPOINTS_MODULE}.to_llama_stack_conversation_id", + return_value=VALID_CONV_ID, + ) + mocker.patch( + f"{MODULE}.select_model_for_responses", + new=mocker.AsyncMock(return_value="provider/model1"), + ) + mocker.patch( + f"{MODULE}.check_model_configured", + new=mocker.AsyncMock(return_value=True), + ) + _patch_rag(mocker) + mock_moderation = _patch_moderation(mocker, decision="blocked") + mock_moderation.message = "Blocked" + mock_moderation.moderation_id = "resp_blocked_123" + mock_moderation.refusal_response = OpenAIResponseMessage( + type="message", role="assistant", content="Blocked" + ) + mock_append = mocker.patch( + f"{MODULE}.append_turn_items_to_conversation", + new=mocker.AsyncMock(), + ) + mocker.patch(f"{MODULE}.store_query_results") + + response = await responses_endpoint_handler( + request=dummy_request, + responses_request=responses_request, + auth=MOCK_AUTH, + mcp_headers={}, + ) + + mock_append.assert_awaited_once_with( + client=mock_client, + conversation_id=VALID_CONV_ID, + user_input=responses_request.input, + llm_output=[mock_moderation.refusal_response], + ) + assert isinstance(response, ResponsesResponse) + payload = response.model_dump() + assert "model" in payload, "Handler must set model on the response payload" + ResponsesResponse.model_validate(payload) + + +class TestHandleNonStreamingResponse: + """Unit tests for handle_non_streaming_response.""" + + @pytest.mark.asyncio + async def test_handle_non_streaming_blocked_returns_refusal( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that blocked moderation returns response with refusal message.""" + request = _request_with_model_and_conv("Bad input") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "blocked" + mock_moderation.message = "Content blocked" + mock_refusal = mocker.Mock(spec=OpenAIResponseMessage) + mock_refusal.type = "message" + mock_refusal.role = "assistant" + mock_refusal.content = "Content blocked" + mock_moderation.refusal_response = mock_refusal + + _patch_handle_non_streaming_common(mocker, minimal_config) + mock_client.conversations.items.create = mocker.AsyncMock() + mock_api_response = mocker.Mock() + mock_api_response.output = [mock_refusal] + mock_api_response.model_dump.return_value = { + "id": "resp_blocked", + "object": "response", + "created_at": 0, + "status": "completed", + "model": "provider/model1", + "output": [mock_refusal], + "usage": { + "input_tokens": 0, + "output_tokens": 0, + "total_tokens": 0, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0}, + }, + } + mocker.patch( + f"{MODULE}.OpenAIResponseObject.model_construct", + return_value=mock_api_response, + ) + + response = await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Bad input", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + assert isinstance(response, ResponsesResponse) + assert response.output_text == "Content blocked" + mock_client.responses.create.assert_not_called() + + @pytest.mark.asyncio + async def test_handle_non_streaming_success_returns_response( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test successful handle_non_streaming_response returns ResponsesResponse.""" + request = _request_with_model_and_conv("Hello") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mock_api_response = mocker.Mock(spec=OpenAIResponseObject) + mock_api_response.output = [] + mock_api_response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + mock_api_response.model_dump.return_value = { + "id": "resp_1", + "object": "response", + "created_at": 0, + "status": "completed", + "model": "provider/model1", + "output": [], + "usage": { + "input_tokens": 1, + "output_tokens": 2, + "total_tokens": 3, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0}, + }, + } + mock_client.responses.create = mocker.AsyncMock(return_value=mock_api_response) + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.extract_token_usage", + return_value=mocker.Mock(input_tokens=1, output_tokens=2), + ) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=mocker.Mock(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.extract_text_from_response_items", + return_value="Model reply", + ) + mocker.patch( + f"{MODULE}.extract_vector_store_ids_from_tools", + return_value=[], + ) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + + response = await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hello", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert isinstance(response, ResponsesResponse) + assert response.output_text == "Model reply" + mock_client.responses.create.assert_called_once() + + @pytest.mark.asyncio + async def test_handle_non_streaming_with_previous_response_id_appends_turn( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test append_turn_items_to_conversation triggers with store and previous_response_id.""" + request = _request_with_previous_response_id("Hi", previous_response_id="r1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mock_api_response = mocker.Mock(spec=OpenAIResponseObject) + mock_api_response.output = [] + mock_api_response.id = "resp_1" + mock_api_response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + mock_api_response.model_dump.return_value = { + "id": "resp_1", + "object": "response", + "created_at": 0, + "status": "completed", + "model": "provider/model1", + "output": [], + "usage": { + "input_tokens": 1, + "output_tokens": 2, + "total_tokens": 3, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0}, + }, + } + mock_client.responses.create = mocker.AsyncMock(return_value=mock_api_response) + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.extract_token_usage", + return_value=mocker.Mock(input_tokens=1, output_tokens=2), + ) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=mocker.Mock(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.extract_text_from_response_items", + return_value="Reply", + ) + mocker.patch( + f"{MODULE}.extract_vector_store_ids_from_tools", + return_value=[], + ) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mock_append = mocker.patch( + f"{MODULE}.append_turn_items_to_conversation", + new=mocker.AsyncMock(), + ) + + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + mock_append.assert_awaited_once() + call_args = mock_append.call_args[0] + assert call_args[1] == VALID_CONV_ID + assert call_args[3] == [] + + @pytest.mark.asyncio + async def test_handle_non_streaming_context_length_raises_413( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that RuntimeError with context_length raises 413.""" + request = _request_with_model_and_conv("Long input") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=RuntimeError("context_length exceeded") + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + + with pytest.raises(HTTPException) as exc_info: + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Long input", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert exc_info.value.status_code == 413 + + @pytest.mark.asyncio + async def test_handle_non_streaming_connection_error_raises_503( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that APIConnectionError raises 503.""" + request = _request_with_model_and_conv("Hi") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=APIConnectionError( + message="Connection failed", + request=mocker.Mock(), + ) + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + + with pytest.raises(HTTPException) as exc_info: + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert exc_info.value.status_code == 503 + + @pytest.mark.asyncio + async def test_handle_non_streaming_api_status_error_raises_http( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that APIStatusError is handled and re-raised as HTTPException.""" + request = _request_with_model_and_conv("Hi") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=APIStatusError( + message="API error", + response=mocker.Mock(request=None), + body=None, + ) + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch( + f"{MODULE}.handle_known_apistatus_errors", + return_value=mocker.Mock( + model_dump=lambda: { + "status_code": 500, + "detail": {"response": "Error", "cause": "API error"}, + } + ), + ) + + with pytest.raises(HTTPException) as exc_info: + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert exc_info.value.status_code == 500 + + @pytest.mark.asyncio + async def test_handle_non_streaming_runtime_error_without_context_reraises( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that RuntimeError without context_length is re-raised.""" + request = _request_with_model_and_conv("Hi") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=RuntimeError("Some other error") + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + _patch_handle_non_streaming_common(mocker, minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + + with pytest.raises(RuntimeError, match="Some other error"): + await handle_non_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + +class TestHandleStreamingResponse: + """Unit tests for handle_streaming_response and streaming generators.""" + + @pytest.mark.asyncio + async def test_handle_streaming_blocked_returns_sse_consumes_shield_generator( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test streaming with blocked moderation yields SSE from shield_violation_generator.""" + request = _request_with_model_and_conv("Bad", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "blocked" + mock_moderation.message = "Blocked" + mock_moderation.moderation_id = "mod_123" + mock_refusal = OpenAIResponseMessage( + role="assistant", content="Blocked", type="message" + ) + mock_moderation.refusal_response = mock_refusal + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + + mock_client.conversations.items.create = mocker.AsyncMock() + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Bad", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert isinstance(response, StreamingResponse) + assert response.media_type == "text/event-stream" + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + body = "".join(collected) + assert "event: response.created" in body + assert "event: response.output_item.added" in body + assert "event: response.output_item.done" in body + assert "event: response.completed" in body + assert "[DONE]" in body + mock_client.responses.create.assert_not_called() + + @pytest.mark.asyncio + async def test_handle_streaming_success_returns_sse_consumes_response_generator( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test streaming with passed moderation yields SSE from response_generator.""" + request = _request_with_model_and_conv("Hi", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mock_chunk = mocker.Mock() + mock_chunk.type = "response.completed" + mock_chunk.response = mocker.Mock() + mock_chunk.response.id = "r1" + mock_chunk.response.output = [] + mock_chunk.response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + mock_chunk.model_dump.return_value = { + "type": "response.completed", + "response": {"id": "r1", "usage": {"input_tokens": 1}}, + } + + async def mock_stream() -> Any: + yield mock_chunk + + mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream()) + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock()) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[]) + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=TurnSummary(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + assert isinstance(response, StreamingResponse) + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + body = "".join(collected) + assert "response.completed" in body or "event:" in body + assert "[DONE]" in body + mock_client.responses.create.assert_called_once() + + @pytest.mark.asyncio + async def test_handle_streaming_in_progress_chunk_sets_quotas_and_output_text( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test in_progress chunk includes available_quotas and output_text.""" + request = _request_with_model_and_conv("Hi", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + in_progress_chunk = mocker.Mock() + in_progress_chunk.type = "response.in_progress" + in_progress_chunk.model_dump.return_value = { + "type": "response.in_progress", + "response": {"id": "r0"}, + } + + completed_chunk = mocker.Mock() + completed_chunk.type = "response.completed" + completed_chunk.response = mocker.Mock() + completed_chunk.response.id = "r1" + completed_chunk.response.output = [] + completed_chunk.response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + completed_chunk.model_dump.return_value = { + "type": "response.completed", + "response": {"id": "r1", "usage": {"input_tokens": 1}}, + } + + async def mock_stream() -> Any: + yield in_progress_chunk + yield completed_chunk + + mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream()) + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock()) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[]) + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=TurnSummary(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + body = "".join(collected) + assert "response.in_progress" in body + assert '"available_quotas":{}' in body or '"available_quotas": {}' in body + assert "[DONE]" in body + + @pytest.mark.asyncio + async def test_handle_streaming_builds_tool_call_summary_from_output( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that response output items are passed to build_tool_call_summary.""" + request = _request_with_model_and_conv("Hi", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mock_output_item = mocker.Mock() + completed_chunk = mocker.Mock() + completed_chunk.type = "response.completed" + completed_chunk.response = mocker.Mock() + completed_chunk.response.id = "r1" + completed_chunk.response.output = [mock_output_item] + completed_chunk.response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + completed_chunk.model_dump.return_value = { + "type": "response.completed", + "response": {"id": "r1", "usage": {"input_tokens": 1}}, + } + + async def mock_stream() -> Any: + yield completed_chunk + + mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream()) + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock()) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[]) + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=TurnSummary(referenced_documents=[]), + ) + mock_build_tool_call = mocker.patch( + f"{MODULE}.build_tool_call_summary", + return_value=(mocker.Mock(), mocker.Mock()), + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mocker.patch(f"{MODULE}.parse_referenced_documents", return_value=[]) + mocker.patch( + f"{MODULE}.deduplicate_referenced_documents", side_effect=lambda x: x + ) + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + mock_build_tool_call.assert_called_once() + + @pytest.mark.asyncio + async def test_handle_streaming_with_previous_response_id_appends_turn( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test that store=True and previous_response_id in streaming triggers append_turn_items.""" + request = _request_with_previous_response_id( + "Hi", previous_response_id="r_prev" + ) + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + completed_chunk = mocker.Mock() + completed_chunk.type = "response.completed" + completed_chunk.response = mocker.Mock() + completed_chunk.response.id = "r1" + completed_chunk.response.output = [] + completed_chunk.response.usage = mocker.Mock( + input_tokens=1, output_tokens=2, total_tokens=3 + ) + completed_chunk.model_dump.return_value = { + "type": "response.completed", + "response": {"id": "r1", "usage": {"input_tokens": 1}}, + } + + async def mock_stream() -> Any: + yield completed_chunk + + mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream()) + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch(f"{MODULE}.get_available_quotas", return_value={}) + mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock()) + mocker.patch(f"{MODULE}.consume_query_tokens") + mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[]) + mocker.patch( + f"{MODULE}.build_turn_summary", + return_value=TurnSummary(referenced_documents=[]), + ) + mocker.patch( + f"{MODULE}.get_topic_summary", + new=mocker.AsyncMock(return_value=None), + ) + mocker.patch(f"{MODULE}.store_query_results") + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + mock_append = mocker.patch( + f"{MODULE}.append_turn_items_to_conversation", + new=mocker.AsyncMock(), + ) + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder) + + response = await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + collected: list[str] = [] + async for part in response.body_iterator: + chunk_str = ( + part.decode("utf-8") + if isinstance(part, bytes) + else (part if isinstance(part, str) else bytes(part).decode("utf-8")) + ) + collected.append(chunk_str) + mock_append.assert_called_once() + call_args = mock_append.call_args[0] + assert call_args[1] == VALID_CONV_ID + assert call_args[3] == [] + + @pytest.mark.asyncio + async def test_handle_streaming_context_length_raises_413( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test streaming raises 413 when create raises RuntimeError context_length.""" + request = _request_with_model_and_conv("Long", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=RuntimeError("context_length exceeded") + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + with pytest.raises(HTTPException) as exc_info: + await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Long", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + assert exc_info.value.status_code == 413 + + @pytest.mark.asyncio + async def test_handle_streaming_connection_error_raises_503( + self, + minimal_config: AppConfig, + mocker: MockerFixture, + ) -> None: + """Test streaming raises 503 when create raises APIConnectionError.""" + request = _request_with_model_and_conv("Hi", model="provider/model1") + mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient) + mock_client.responses.create = mocker.AsyncMock( + side_effect=APIConnectionError( + message="Connection failed", + request=mocker.Mock(), + ) + ) + mock_moderation = mocker.Mock() + mock_moderation.decision = "passed" + + mocker.patch(f"{MODULE}.configuration", minimal_config) + mocker.patch( + f"{MODULE}.normalize_conversation_id", + return_value=VALID_CONV_ID_NORMALIZED, + ) + with pytest.raises(HTTPException) as exc_info: + await handle_streaming_response( + client=mock_client, + request=request, + auth=MOCK_AUTH, + input_text="Hi", + started_at=datetime.now(UTC), + moderation_result=mock_moderation, + inline_rag_context=RAGContext(), + ) + + assert exc_info.value.status_code == 503 diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py index 8107a387c..3e0670e94 100644 --- a/tests/unit/app/endpoints/test_streaming_query.py +++ b/tests/unit/app/endpoints/test_streaming_query.py @@ -52,7 +52,14 @@ from models.responses import InternalServerErrorResponse from utils.token_counter import TokenCounter from utils.stream_interrupts import StreamInterruptRegistry -from utils.types import RAGContext, ReferencedDocument, ResponsesApiParams, TurnSummary +from utils.types import ( + RAGChunk, + RAGContext, + ReferencedDocument, + ResponsesApiParams, + ShieldModerationPassed, + TurnSummary, +) MOCK_AUTH_STREAMING = ( "00000001-0001-0001-0001-000000000001", @@ -354,6 +361,10 @@ async def test_successful_streaming_query( "app.endpoints.streaming_query.prepare_responses_params", new=mocker.AsyncMock(return_value=mock_responses_params), ) + mocker.patch( + "app.endpoints.streaming_query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mocker.patch("app.endpoints.streaming_query.AzureEntraIDManager") mocker.patch( @@ -437,6 +448,10 @@ async def test_streaming_query_text_media_type_header( "app.endpoints.streaming_query.prepare_responses_params", new=mocker.AsyncMock(return_value=mock_responses_params), ) + mocker.patch( + "app.endpoints.streaming_query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mocker.patch("app.endpoints.streaming_query.AzureEntraIDManager") mocker.patch( @@ -531,6 +546,10 @@ async def test_streaming_query_with_conversation( "app.endpoints.streaming_query.prepare_responses_params", new=mocker.AsyncMock(return_value=mock_responses_params), ) + mocker.patch( + "app.endpoints.streaming_query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mocker.patch("app.endpoints.streaming_query.AzureEntraIDManager") mocker.patch( @@ -623,6 +642,10 @@ async def test_streaming_query_with_attachments( "app.endpoints.streaming_query.prepare_responses_params", new=mocker.AsyncMock(return_value=mock_responses_params), ) + mocker.patch( + "app.endpoints.streaming_query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mocker.patch("app.endpoints.streaming_query.AzureEntraIDManager") mocker.patch( @@ -725,6 +748,10 @@ async def test_streaming_query_azure_token_refresh( "app.endpoints.streaming_query.extract_provider_and_model_from_model_id", return_value=("azure", "model1"), ) + mocker.patch( + "app.endpoints.streaming_query.run_shield_moderation", + new=mocker.AsyncMock(return_value=ShieldModerationPassed()), + ) mocker.patch("app.endpoints.streaming_query.metrics.llm_calls_total") async def mock_generator() -> AsyncIterator[str]: @@ -784,17 +811,15 @@ async def test_retrieve_response_generator_success( mock_context.client = mock_client mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.query_request = QueryRequest( query="test" ) # pyright: ignore[reportCallIssue] + mock_context.moderation_result = ShieldModerationPassed() async def mock_response_gen() -> AsyncIterator[str]: yield "test" - mocker.patch( - "app.endpoints.streaming_query.run_shield_moderation", - new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)), - ) mock_client.responses = mocker.Mock() mock_client.responses.create = mocker.AsyncMock( return_value=mock_response_gen() @@ -812,7 +837,7 @@ async def mock_response_generator( ) generator, turn_summary = await retrieve_response_generator( - mock_responses_params, mock_context, [] + mock_responses_params, mock_context ) assert isinstance(turn_summary, TurnSummary) @@ -834,6 +859,7 @@ async def test_retrieve_response_generator_shield_blocked( mock_context.client = mock_client mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.query_request = QueryRequest( query="test", media_type=MEDIA_TYPE_TEXT ) # pyright: ignore[reportCallIssue] @@ -841,17 +867,16 @@ async def test_retrieve_response_generator_shield_blocked( mock_moderation_result = mocker.Mock() mock_moderation_result.decision = "blocked" mock_moderation_result.message = "Content blocked" + mock_moderation_result.moderation_id = "mod_123" + mock_moderation_result.refusal_response = mocker.Mock() + mock_context.moderation_result = mock_moderation_result mocker.patch( - "app.endpoints.streaming_query.run_shield_moderation", - new=mocker.AsyncMock(return_value=mock_moderation_result), - ) - mocker.patch( - "app.endpoints.streaming_query.append_turn_to_conversation", + "app.endpoints.streaming_query.append_turn_items_to_conversation", new=mocker.AsyncMock(), ) _generator, turn_summary = await retrieve_response_generator( - mock_responses_params, mock_context, [] + mock_responses_params, mock_context ) assert isinstance(turn_summary, TurnSummary) @@ -878,14 +903,12 @@ async def test_retrieve_response_generator_connection_error( mock_context.client = mock_client mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.query_request = QueryRequest( query="test" ) # pyright: ignore[reportCallIssue] + mock_context.moderation_result = ShieldModerationPassed() - mocker.patch( - "app.endpoints.streaming_query.run_shield_moderation", - new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)), - ) mock_request_obj = mocker.Mock() mock_client.responses = mocker.Mock() mock_client.responses.create = mocker.AsyncMock( @@ -908,7 +931,7 @@ async def test_retrieve_response_generator_connection_error( ) with pytest.raises(HTTPException) as exc_info: - await retrieve_response_generator(mock_responses_params, mock_context, []) + await retrieve_response_generator(mock_responses_params, mock_context) assert exc_info.value.status_code == 503 @@ -933,14 +956,12 @@ async def test_retrieve_response_generator_api_status_error( mock_context.client = mock_client mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.query_request = QueryRequest( query="test" ) # pyright: ignore[reportCallIssue] + mock_context.moderation_result = ShieldModerationPassed() - mocker.patch( - "app.endpoints.streaming_query.run_shield_moderation", - new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)), - ) mock_request_obj = mocker.Mock() mock_client.responses = mocker.Mock() mock_client.responses.create = mocker.AsyncMock( @@ -960,7 +981,7 @@ async def test_retrieve_response_generator_api_status_error( ) with pytest.raises(HTTPException) as exc_info: - await retrieve_response_generator(mock_responses_params, mock_context, []) + await retrieve_response_generator(mock_responses_params, mock_context) assert exc_info.value.status_code == 500 @@ -985,14 +1006,12 @@ async def test_retrieve_response_generator_runtime_error_context_length( mock_context.client = mock_client mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.query_request = QueryRequest( query="test" ) # pyright: ignore[reportCallIssue] + mock_context.moderation_result = ShieldModerationPassed() - mocker.patch( - "app.endpoints.streaming_query.run_shield_moderation", - new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)), - ) mock_client.responses = mocker.Mock() mock_client.responses.create = mocker.AsyncMock( side_effect=RuntimeError("context_length exceeded") @@ -1009,7 +1028,7 @@ async def test_retrieve_response_generator_runtime_error_context_length( ) with pytest.raises(HTTPException) as exc_info: - await retrieve_response_generator(mock_responses_params, mock_context, []) + await retrieve_response_generator(mock_responses_params, mock_context) assert exc_info.value.status_code == 413 @@ -1034,21 +1053,19 @@ async def test_retrieve_response_generator_runtime_error_other( mock_context.client = mock_client mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.query_request = QueryRequest( query="test" ) # pyright: ignore[reportCallIssue] + mock_context.moderation_result = ShieldModerationPassed() - mocker.patch( - "app.endpoints.streaming_query.run_shield_moderation", - new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)), - ) mock_client.responses = mocker.Mock() mock_client.responses.create = mocker.AsyncMock( side_effect=RuntimeError("Some other error") ) with pytest.raises(RuntimeError): - await retrieve_response_generator(mock_responses_params, mock_context, []) + await retrieve_response_generator(mock_responses_params, mock_context) class TestGenerateResponse: @@ -1077,6 +1094,7 @@ async def mock_generator() -> AsyncIterator[str]: mock_context.user_id = "user_123" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.query_request = QueryRequest( query="test" ) # pyright: ignore[reportCallIssue] @@ -1134,6 +1152,7 @@ async def mock_generator() -> AsyncIterator[str]: mock_context.user_id = "user_123" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.query_request = QueryRequest( query="test", generate_topic_summary=True ) # pyright: ignore[reportCallIssue] @@ -1186,6 +1205,7 @@ async def mock_generator() -> AsyncIterator[str]: mock_context.conversation_id = "conv_123" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.user_id = "user_123" mock_context.query_request = QueryRequest( query="test" @@ -1228,6 +1248,7 @@ async def mock_generator() -> AsyncIterator[str]: mock_context.conversation_id = "conv_123" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.user_id = "user_123" mock_context.query_request = QueryRequest( query="test" @@ -1273,6 +1294,7 @@ async def mock_generator() -> AsyncIterator[str]: mock_context.conversation_id = "conv_123" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.user_id = "user_123" mock_context.query_request = QueryRequest( query="test", media_type=MEDIA_TYPE_JSON @@ -1320,6 +1342,7 @@ async def mock_generator() -> AsyncIterator[str]: mock_context.conversation_id = "conv_123" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_context.user_id = "user_123" mock_context.query_request = QueryRequest( query="test", media_type=MEDIA_TYPE_JSON @@ -1608,6 +1631,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -1637,6 +1661,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -1667,6 +1692,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -1707,6 +1733,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -1748,6 +1775,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -1796,6 +1824,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -1846,6 +1875,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() mock_turn_summary.llm_response = "Response" @@ -1893,6 +1923,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -1938,6 +1969,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -1982,6 +2014,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -2024,6 +2057,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -2067,6 +2101,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -2108,6 +2143,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -2128,6 +2164,61 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: assert len(result) > 0 assert any("error" in item for item in result) + @pytest.mark.asyncio + async def test_response_generator_merges_inline_and_tool_rag_chunks_and_documents( + self, mocker: MockerFixture + ) -> None: + """Test that inline RAG and tool-based RAG chunks/docs are correctly merged.""" + inline_chunk = RAGChunk(content="inline chunk content", source="byok") + inline_doc = ReferencedDocument(doc_title="Inline Doc") + inline_rag = RAGContext( + context_text="", + rag_chunks=[inline_chunk], + referenced_documents=[inline_doc], + ) + + tool_chunk = RAGChunk(content="tool chunk content", source="vs-1") + tool_ref_doc = ReferencedDocument(doc_title="Tool Doc") + + mock_response_obj = mocker.Mock(spec=OpenAIResponseObject) + mock_response_obj.usage = mocker.Mock() + mock_response_obj.output = [] + + async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: + completed_chunk = mocker.Mock(spec=CompletedChunk) + completed_chunk.type = "response.completed" + completed_chunk.response = mock_response_obj + yield completed_chunk + + mock_context = mocker.Mock(spec=ResponseGeneratorContext) + mock_context.query_request = QueryRequest( + query="test", media_type=MEDIA_TYPE_JSON + ) # pyright: ignore[reportCallIssue] + mock_context.model_id = "provider1/model1" + mock_context.vector_store_ids = [] + mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = inline_rag + + mock_turn_summary = TurnSummary() + mock_turn_summary.rag_chunks = [tool_chunk] + mock_turn_summary.referenced_documents = [tool_ref_doc] + mocker.patch( + "app.endpoints.streaming_query.parse_referenced_documents", + return_value=[tool_ref_doc], + ) + + async for _ in response_generator( + mock_turn_response(), mock_context, mock_turn_summary + ): + pass + + assert len(mock_turn_summary.rag_chunks) == 2 + assert mock_turn_summary.rag_chunks[0].content == "inline chunk content" + assert mock_turn_summary.rag_chunks[1].content == "tool chunk content" + assert len(mock_turn_summary.referenced_documents) == 2 + assert mock_turn_summary.referenced_documents[0].doc_title == "Inline Doc" + assert mock_turn_summary.referenced_documents[1].doc_title == "Tool Doc" + class TestStreamHttpErrorEvent: """Tests for stream_http_error_event function.""" @@ -2230,6 +2321,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -2282,6 +2374,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -2354,6 +2447,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() @@ -2369,8 +2463,7 @@ def build_mcp_tool_call_side_effect( # Remove item from dict to simulate real behavior # arguments parameter is required by function signature but unused here _ = arguments - if output_index in mcp_call_items: - del mcp_call_items[output_index] + mcp_call_items.pop(output_index, None) return mock_tool_call mocker.patch( @@ -2442,6 +2535,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]: mock_context.model_id = "provider1/model1" mock_context.vector_store_ids = [] mock_context.rag_id_mapping = {} + mock_context.inline_rag_context = RAGContext() mock_turn_summary = TurnSummary() diff --git a/tests/unit/app/test_routers.py b/tests/unit/app/test_routers.py index 754e3fdb2..668943a50 100644 --- a/tests/unit/app/test_routers.py +++ b/tests/unit/app/test_routers.py @@ -28,6 +28,7 @@ rlsapi_v1, a2a, query, + responses, ) @@ -53,7 +54,7 @@ def include_router( # pylint: disable=too-many-arguments prefix: str = "", tags: Optional[list] = None, dependencies: Optional[Sequence] = None, - responses: Optional[dict] = None, + responses: Optional[dict] = None, # pylint: disable=redefined-outer-name deprecated: Optional[bool] = None, include_in_schema: Optional[bool] = None, default_response_class: Optional[Any] = None, @@ -108,7 +109,7 @@ def test_include_routers() -> None: include_routers(app) # are all routers added? - assert len(app.routers) == 20 + assert len(app.routers) == 21 assert root.router in app.get_routers() assert info.router in app.get_routers() assert models.router in app.get_routers() @@ -129,6 +130,7 @@ def test_include_routers() -> None: assert rlsapi_v1.router in app.get_routers() assert a2a.router in app.get_routers() assert stream_interrupt.router in app.get_routers() + assert responses.router in app.get_routers() def test_check_prefixes() -> None: @@ -136,7 +138,7 @@ def test_check_prefixes() -> None: Verify that include_routers registers the expected routers with their configured URL prefixes. - Asserts that 16 routers are registered on a MockFastAPI instance and that + Asserts that 21 routers are registered on a MockFastAPI instance and that each router's prefix matches the expected value (e.g., root, health, authorized, metrics use an empty prefix; most API routers use "/v1"; conversations_v2 uses "/v2"). @@ -145,7 +147,7 @@ def test_check_prefixes() -> None: include_routers(app) # are all routers added? - assert len(app.routers) == 20 + assert len(app.routers) == 21 assert app.get_router_prefix(root.router) == "" assert app.get_router_prefix(info.router) == "/v1" assert app.get_router_prefix(models.router) == "/v1" @@ -167,3 +169,4 @@ def test_check_prefixes() -> None: assert app.get_router_prefix(rlsapi_v1.router) == "/v1" assert app.get_router_prefix(a2a.router) == "" assert app.get_router_prefix(stream_interrupt.router) == "/v1" + assert app.get_router_prefix(responses.router) == "/v1" diff --git a/tests/unit/test_configuration.py b/tests/unit/test_configuration.py index acd1ca5af..49565eab6 100644 --- a/tests/unit/test_configuration.py +++ b/tests/unit/test_configuration.py @@ -2,16 +2,18 @@ # pylint: disable=too-many-lines +from collections.abc import Generator from pathlib import Path from typing import Any -from collections.abc import Generator -from pydantic import ValidationError import pytest +from pydantic import ValidationError + +import constants +from cache.in_memory_cache import InMemoryCache +from cache.sqlite_cache import SQLiteCache from configuration import AppConfig, LogicError from models.config import CustomProfile, ModelContextProtocolServer -from cache.sqlite_cache import SQLiteCache -from cache.in_memory_cache import InMemoryCache # pylint: disable=broad-exception-caught,protected-access @@ -446,49 +448,56 @@ def test_configuration_not_loaded() -> None: """Test that accessing configuration before loading raises an error.""" cfg = AppConfig() with pytest.raises(LogicError, match="logic error: configuration is not loaded"): - cfg.configuration # pylint: disable=pointless-statement + c = cfg.configuration + assert c is not None def test_service_configuration_not_loaded() -> None: """Test that accessing service_configuration before loading raises an error.""" cfg = AppConfig() with pytest.raises(LogicError, match="logic error: configuration is not loaded"): - cfg.service_configuration # pylint: disable=pointless-statement + c = cfg.service_configuration + assert c is not None def test_llama_stack_configuration_not_loaded() -> None: """Test that accessing llama_stack_configuration before loading raises an error.""" cfg = AppConfig() with pytest.raises(LogicError, match="logic error: configuration is not loaded"): - cfg.llama_stack_configuration # pylint: disable=pointless-statement + c = cfg.llama_stack_configuration + assert c is not None def test_user_data_collection_configuration_not_loaded() -> None: """Test that accessing user_data_collection_configuration before loading raises an error.""" cfg = AppConfig() with pytest.raises(LogicError, match="logic error: configuration is not loaded"): - cfg.user_data_collection_configuration # pylint: disable=pointless-statement + c = cfg.user_data_collection_configuration + assert c is not None def test_mcp_servers_not_loaded() -> None: """Test that accessing mcp_servers before loading raises an error.""" cfg = AppConfig() with pytest.raises(LogicError, match="logic error: configuration is not loaded"): - cfg.mcp_servers # pylint: disable=pointless-statement + c = cfg.mcp_servers + assert c is not None def test_authentication_configuration_not_loaded() -> None: """Test that accessing authentication_configuration before loading raises an error.""" cfg = AppConfig() with pytest.raises(LogicError, match="logic error: configuration is not loaded"): - cfg.authentication_configuration # pylint: disable=pointless-statement + c = cfg.authentication_configuration + assert c is not None def test_customization_not_loaded() -> None: """Test that accessing customization before loading raises an error.""" cfg = AppConfig() with pytest.raises(LogicError, match="logic error: configuration is not loaded"): - cfg.customization # pylint: disable=pointless-statement + c = cfg.customization + assert c is not None def test_load_configuration_with_customization_system_prompt_path(tmpdir: Path) -> None: @@ -947,11 +956,61 @@ def test_load_configuration_with_incomplete_azure_entra_id_raises(tmpdir: Path) cfg.load_configuration(str(cfg_filename)) -def test_rag_id_mapping_empty_when_no_byok(minimal_config: AppConfig) -> None: - """Test that rag_id_mapping returns empty dict when no BYOK RAG configured.""" +def test_rag_id_mapping_excludes_solr_when_okp_not_configured( + minimal_config: AppConfig, +) -> None: + """Test that rag_id_mapping does not include OKP/Solr when OKP is not in rag config.""" assert minimal_config.rag_id_mapping == {} +def test_rag_id_mapping_includes_solr_when_okp_in_inline() -> None: + """Test that rag_id_mapping includes OKP/Solr mapping when OKP is in rag.inline.""" + cfg = AppConfig() + cfg.init_from_dict( + { + "name": "test", + "service": {"host": "localhost", "port": 8080}, + "llama_stack": { + "api_key": "k", + "url": "http://test.com:1234", + "use_as_library_client": False, + }, + "user_data_collection": {}, + "authentication": {"module": "noop"}, + "rag": {"inline": [constants.OKP_RAG_ID]}, + } + ) + assert constants.SOLR_DEFAULT_VECTOR_STORE_ID in cfg.rag_id_mapping + assert ( + cfg.rag_id_mapping[constants.SOLR_DEFAULT_VECTOR_STORE_ID] + == constants.OKP_RAG_ID + ) + + +def test_rag_id_mapping_includes_solr_when_okp_in_tool() -> None: + """Test that rag_id_mapping includes OKP/Solr mapping when OKP is in rag.tool.""" + cfg = AppConfig() + cfg.init_from_dict( + { + "name": "test", + "service": {"host": "localhost", "port": 8080}, + "llama_stack": { + "api_key": "k", + "url": "http://test.com:1234", + "use_as_library_client": False, + }, + "user_data_collection": {}, + "authentication": {"module": "noop"}, + "rag": {"tool": [constants.OKP_RAG_ID]}, + } + ) + assert constants.SOLR_DEFAULT_VECTOR_STORE_ID in cfg.rag_id_mapping + assert ( + cfg.rag_id_mapping[constants.SOLR_DEFAULT_VECTOR_STORE_ID] + == constants.OKP_RAG_ID + ) + + def test_rag_id_mapping_with_byok(tmp_path: Path) -> None: """Test that rag_id_mapping builds correct mapping from BYOK config.""" db_file = tmp_path / "test.db" @@ -980,6 +1039,41 @@ def test_rag_id_mapping_with_byok(tmp_path: Path) -> None: assert cfg.rag_id_mapping == {"vs-001": "my-kb"} +def test_rag_id_mapping_with_byok_and_okp(tmp_path: Path) -> None: + """Test that rag_id_mapping includes both BYOK and OKP entries when OKP is configured.""" + db_file = tmp_path / "test.db" + db_file.touch() + cfg = AppConfig() + cfg.init_from_dict( + { + "name": "test", + "service": {"host": "localhost", "port": 8080}, + "llama_stack": { + "api_key": "k", + "url": "http://test.com:1234", + "use_as_library_client": False, + }, + "user_data_collection": {}, + "authentication": {"module": "noop"}, + "rag": {"inline": [constants.OKP_RAG_ID]}, + "byok_rag": [ + { + "rag_id": "my-kb", + "vector_db_id": "vs-001", + "db_path": str(db_file), + }, + ], + } + ) + assert "vs-001" in cfg.rag_id_mapping + assert cfg.rag_id_mapping["vs-001"] == "my-kb" + assert constants.SOLR_DEFAULT_VECTOR_STORE_ID in cfg.rag_id_mapping + assert ( + cfg.rag_id_mapping[constants.SOLR_DEFAULT_VECTOR_STORE_ID] + == constants.OKP_RAG_ID + ) + + def test_resolve_index_name_with_mapping(minimal_config: AppConfig) -> None: """Test resolve_index_name uses mapping when available.""" mapping = {"vs-x": "user-friendly-name"} diff --git a/tests/unit/utils/test_conversations.py b/tests/unit/utils/test_conversations.py index e4120f145..389793055 100644 --- a/tests/unit/utils/test_conversations.py +++ b/tests/unit/utils/test_conversations.py @@ -3,6 +3,9 @@ from datetime import datetime, UTC from typing import Any +from fastapi import HTTPException +from llama_stack_api import OpenAIResponseMessage +from llama_stack_client import APIConnectionError, APIStatusError import pytest from pytest_mock import MockerFixture @@ -11,7 +14,9 @@ from utils.conversations import ( _build_tool_call_summary_from_item, _extract_text_from_content, + append_turn_items_to_conversation, build_conversation_turns_from_items, + get_all_conversation_items, ) from utils.types import ToolCallSummary @@ -720,3 +725,133 @@ def test_legacy_conversation_without_metadata(self, mocker: MockerFixture) -> No # Timestamps should match conversation start time assert turn.started_at == "2024-01-01T10:00:00Z" assert turn.completed_at == "2024-01-01T10:00:00Z" + + +class TestAppendTurnItemsToConversation: # pylint: disable=too-few-public-methods + """Tests for append_turn_items_to_conversation function.""" + + @pytest.mark.asyncio + async def test_appends_user_input_and_llm_output( + self, mocker: MockerFixture + ) -> None: + """Test that append_turn_items_to_conversation creates conversation items correctly.""" + mock_client = mocker.Mock() + mock_client.conversations.items.create = mocker.AsyncMock(return_value=None) + assistant_msg = OpenAIResponseMessage( + type="message", + role="assistant", + content="I cannot help with that", + ) + + await append_turn_items_to_conversation( + mock_client, + conversation_id="conv-123", + user_input="Hello", + llm_output=[assistant_msg], + ) + + mock_client.conversations.items.create.assert_called_once() + call_args = mock_client.conversations.items.create.call_args + assert call_args[0][0] == "conv-123" + items = call_args[1]["items"] + assert len(items) == 2 + assert items[0]["type"] == "message" and items[0]["role"] == "user" + assert items[0]["content"] == "Hello" + assert items[1]["type"] == "message" and items[1]["role"] == "assistant" + assert items[1]["content"] == "I cannot help with that" + + +class TestGetAllConversationItems: + """Tests for get_all_conversation_items function.""" + + @pytest.mark.asyncio + async def test_returns_single_page_items(self, mocker: MockerFixture) -> None: + """Test that a single page of items is returned.""" + mock_client = mocker.Mock() + item_a = mocker.Mock(type="message", role="user", content="Hello") + item_b = mocker.Mock(type="message", role="assistant", content="Hi") + mock_page = mocker.Mock() + mock_page.data = [item_a, item_b] + mock_page.has_next_page.return_value = False + + mock_client.conversations.items.list = mocker.AsyncMock(return_value=mock_page) + + result = await get_all_conversation_items( + mock_client, "conv_0d21ba731f21f798dc9680125d5d6f49" + ) + + assert result == [item_a, item_b] + mock_client.conversations.items.list.assert_called_once_with( + conversation_id="conv_0d21ba731f21f798dc9680125d5d6f49", + order="asc", + ) + + @pytest.mark.asyncio + async def test_returns_all_items_across_pages(self, mocker: MockerFixture) -> None: + """Test that items from multiple pages are concatenated.""" + mock_client = mocker.Mock() + item_1 = mocker.Mock(type="message", role="user", content="First") + item_2 = mocker.Mock(type="message", role="assistant", content="Second") + item_3 = mocker.Mock(type="message", role="user", content="Third") + + first_page = mocker.Mock() + first_page.data = [item_1] + first_page.has_next_page.return_value = True + second_page = mocker.Mock() + second_page.data = [item_2, item_3] + second_page.has_next_page.return_value = False + + first_page.get_next_page = mocker.AsyncMock(return_value=second_page) + + mock_client.conversations.items.list = mocker.AsyncMock(return_value=first_page) + + result = await get_all_conversation_items(mock_client, "conv_abc") + + assert result == [item_1, item_2, item_3] + + @pytest.mark.asyncio + async def test_handles_empty_data(self, mocker: MockerFixture) -> None: + """Test that None or empty page data is handled.""" + mock_client = mocker.Mock() + mock_page = mocker.Mock() + mock_page.data = None + mock_page.has_next_page.return_value = False + + mock_client.conversations.items.list = mocker.AsyncMock(return_value=mock_page) + + result = await get_all_conversation_items(mock_client, "conv_empty") + + assert result == [] + + @pytest.mark.asyncio + async def test_handles_connection_error(self, mocker: MockerFixture) -> None: + """Test that APIConnectionError is converted to HTTPException 503.""" + mock_client = mocker.Mock() + mock_client.conversations.items.list = mocker.AsyncMock( + side_effect=APIConnectionError( + message="connection refused", request=mocker.Mock() + ) + ) + + with pytest.raises(HTTPException) as exc_info: + await get_all_conversation_items(mock_client, "conv_xyz") + + assert exc_info.value.status_code == 503 + assert "Llama Stack" in str(exc_info.value.detail) + + @pytest.mark.asyncio + async def test_handles_api_status_error(self, mocker: MockerFixture) -> None: + """Test that APIStatusError is converted to HTTPException 500.""" + mock_client = mocker.Mock() + mock_client.conversations.items.list = mocker.AsyncMock( + side_effect=APIStatusError( + message="internal error", + response=mocker.Mock(request=None), + body=None, + ) + ) + + with pytest.raises(HTTPException) as exc_info: + await get_all_conversation_items(mock_client, "conv_xyz") + + assert exc_info.value.status_code == 500 diff --git a/tests/unit/utils/test_endpoints.py b/tests/unit/utils/test_endpoints.py index cc092a1ed..cf8b46568 100644 --- a/tests/unit/utils/test_endpoints.py +++ b/tests/unit/utils/test_endpoints.py @@ -11,9 +11,9 @@ from pytest_mock import MockerFixture from sqlalchemy.exc import SQLAlchemyError -from models.database.conversations import UserConversation +from models.database.conversations import UserConversation, UserTurn from utils import endpoints -from utils.types import ReferencedDocument +from utils.types import ReferencedDocument, ResponsesConversationContext @pytest.fixture(name="input_file") @@ -451,3 +451,266 @@ def test_default_others_allowed_false(self, mocker: MockerFixture) -> None: mock_query.filter_by.assert_called_once_with( id=conversation_id, user_id=user_id ) + + +class TestResolveResponseContext: + """Tests for resolve_response_context function.""" + + @pytest.mark.asyncio + async def test_conversation_id_returns_context_with_existing_conversation( + self, mocker: MockerFixture + ) -> None: + """When conversation_id is set, validate and return context with it.""" + mock_holder = mocker.Mock() + mock_client = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + + mock_conv = mocker.Mock(spec=UserConversation) + mock_conv.id = "conv-normalized-123" + mocker.patch( + "utils.endpoints.normalize_conversation_id", + return_value="conv-normalized-123", + ) + mocker.patch( + "utils.endpoints.to_llama_stack_conversation_id", + return_value="conv_conv-normalized-123", + ) + mocker.patch( + "utils.endpoints.validate_and_retrieve_conversation", + return_value=mock_conv, + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id="conv-raw", + previous_response_id=None, + generate_topic_summary=None, + ) + + assert isinstance(result, ResponsesConversationContext) + assert result.conversation == "conv_conv-normalized-123" + assert result.user_conversation is mock_conv + assert result.generate_topic_summary is False + + @pytest.mark.asyncio + async def test_previous_response_id_turn_not_found_raises_404( + self, mocker: MockerFixture + ) -> None: + """When previous_response_id is set but turn does not exist, raise 404.""" + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mocker.Mock() + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch("utils.endpoints.check_turn_existence", return_value=False) + + with pytest.raises(HTTPException) as exc_info: + await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id="resp-missing", + generate_topic_summary=None, + ) + + assert exc_info.value.status_code == 404 + assert isinstance(exc_info.value.detail, dict) + assert "resp-missing" in str(exc_info.value.detail["cause"]) + + @pytest.mark.asyncio + async def test_previous_response_id_same_as_last_returns_existing_conversation( + self, mocker: MockerFixture + ) -> None: + """When previous_response_id equals last_response_id, use existing conv.""" + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mocker.Mock() + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch("utils.endpoints.check_turn_existence", return_value=True) + + mock_turn = mocker.Mock(spec=UserTurn) + mock_turn.conversation_id = "conv-existing" + mocker.patch( + "utils.endpoints.retrieve_turn_by_response_id", + return_value=mock_turn, + ) + + mock_conv = mocker.Mock(spec=UserConversation) + mock_conv.id = "conv-existing" + mock_conv.last_response_id = "resp-123" # same as previous_response_id + mocker.patch( + "utils.endpoints.validate_and_retrieve_conversation", + return_value=mock_conv, + ) + mocker.patch( + "utils.endpoints.to_llama_stack_conversation_id", + return_value="conv_conv-existing", + ) + mock_create = mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id="resp-123", + generate_topic_summary=None, + ) + + assert result.conversation == "conv_conv-existing" + assert result.user_conversation is mock_conv + assert result.generate_topic_summary is False + mock_create.assert_not_called() + + @pytest.mark.asyncio + async def test_previous_response_id_fork_creates_new_conversation( + self, mocker: MockerFixture + ) -> None: + """When last_response_id differs from previous_response_id, fork to new conv.""" + mock_client = mocker.Mock() + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch("utils.endpoints.check_turn_existence", return_value=True) + + mock_turn = mocker.Mock(spec=UserTurn) + mock_turn.conversation_id = "conv-existing" + mocker.patch( + "utils.endpoints.retrieve_turn_by_response_id", + return_value=mock_turn, + ) + + mock_conv = mocker.Mock(spec=UserConversation) + mock_conv.id = "conv-existing" + mock_conv.last_response_id = "resp-latest" # fork: different from prev + mocker.patch( + "utils.endpoints.validate_and_retrieve_conversation", + return_value=mock_conv, + ) + mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(return_value="conv_new_fork"), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id="resp-old", + generate_topic_summary=None, + ) + + assert result.conversation == "conv_new_fork" + assert result.user_conversation is mock_conv + assert result.generate_topic_summary is True + + @pytest.mark.asyncio + async def test_previous_response_id_fork_respects_generate_topic_summary( + self, mocker: MockerFixture + ) -> None: + """Fork path uses request generate_topic_summary when provided.""" + mock_client = mocker.Mock() + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch("utils.endpoints.check_turn_existence", return_value=True) + + mock_turn = mocker.Mock(spec=UserTurn) + mock_turn.conversation_id = "conv-existing" + mocker.patch( + "utils.endpoints.retrieve_turn_by_response_id", + return_value=mock_turn, + ) + + mock_conv = mocker.Mock(spec=UserConversation) + mock_conv.id = "conv-existing" + mock_conv.last_response_id = "resp-latest" + mocker.patch( + "utils.endpoints.validate_and_retrieve_conversation", + return_value=mock_conv, + ) + mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(return_value="conv_new"), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id="resp-old", + generate_topic_summary=False, + ) + + assert result.generate_topic_summary is False + + @pytest.mark.asyncio + async def test_no_context_creates_new_conversation( + self, mocker: MockerFixture + ) -> None: + """When neither conversation_id nor previous_response_id set, create new.""" + mock_client = mocker.Mock() + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mock_client + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(return_value="conv_brand_new"), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id=None, + generate_topic_summary=None, + ) + + assert result.conversation == "conv_brand_new" + assert result.user_conversation is None + assert result.generate_topic_summary is True + + @pytest.mark.asyncio + async def test_no_context_respects_generate_topic_summary( + self, mocker: MockerFixture + ) -> None: + """New conversation path uses generate_topic_summary when provided.""" + mock_holder = mocker.Mock() + mock_holder.get_client.return_value = mocker.Mock() + mocker.patch( + "utils.endpoints.AsyncLlamaStackClientHolder", + return_value=mock_holder, + ) + mocker.patch( + "utils.endpoints.create_new_conversation", + new=mocker.AsyncMock(return_value="conv_new"), + ) + + result = await endpoints.resolve_response_context( + user_id="user-1", + others_allowed=False, + conversation_id=None, + previous_response_id=None, + generate_topic_summary=False, + ) + + assert result.generate_topic_summary is False diff --git a/tests/unit/utils/test_query.py b/tests/unit/utils/test_query.py index 867a5c6c6..5c13a3a2d 100644 --- a/tests/unit/utils/test_query.py +++ b/tests/unit/utils/test_query.py @@ -407,6 +407,7 @@ def query_side_effect(*args: Any) -> Any: model_id="model1", provider_id="provider1", topic_summary="Topic", + response_id="resp_1", ) mock_session.add.assert_called() @@ -454,6 +455,7 @@ def query_side_effect(*args: Any) -> Any: model_id="new_model", provider_id="new_provider", topic_summary=None, + response_id="resp_1", ) assert existing_conv.last_used_model == "new_model" @@ -497,6 +499,7 @@ def query_side_effect(*args: Any) -> Any: model_id="model1", provider_id="provider1", topic_summary="Topic", + response_id="resp_1", ) # Verify that the turn number is incremented correctly diff --git a/tests/unit/utils/test_responses.py b/tests/unit/utils/test_responses.py index 54ae158aa..79c8209e0 100644 --- a/tests/unit/utils/test_responses.py +++ b/tests/unit/utils/test_responses.py @@ -98,8 +98,7 @@ def make_output_item( Returns: MockOutputItem: Mock object with type, role, and content attributes """ - mock_item = MockOutputItem(item_type=item_type, role=role, content=content) - return mock_item + return MockOutputItem(item_type=item_type, role=role, content=content) def make_content_part( @@ -127,9 +126,8 @@ def make_content_part( ("function_call", "assistant", "some text", ""), ("file_search_call", "assistant", "some text", ""), (None, "assistant", "some text", ""), - # User role messages are filtered out - return empty string - ("message", "user", "some text", ""), - # Valid assistant message with string content + # Message type extracts content regardless of role (input or output) + ("message", "user", "some text", "some text"), ("message", "assistant", "Hello, world!", "Hello, world!"), ("message", "assistant", "", ""), ], @@ -137,7 +135,7 @@ def make_content_part( "function_call_type_returns_empty", "file_search_call_type_returns_empty", "none_type_returns_empty", - "user_role_returns_empty", + "user_message_extracts_content", "valid_string_content", "empty_string_content", ], @@ -147,11 +145,7 @@ def test_extract_text_basic_cases( ) -> None: """Test basic extraction cases for different types, roles, and simple content. - Args: - item_type: Type of the output item - role: Role of the message - content: Content of the message - expected: Expected extracted text + Extraction works for both input and output items; role is not filtered. """ output_item = make_output_item(item_type=item_type, role=role, content=content) result = extract_text_from_response_item(output_item) # type: ignore[arg-type] @@ -307,8 +301,8 @@ def test_extract_text_from_response_items_filters_non_messages(self) -> None: result = extract_text_from_response_items([item1, item2]) # type: ignore[arg-type] assert result == "Valid message" - def test_extract_text_from_response_items_filters_user_messages(self) -> None: - """Test extract_text_from_response_items filters out user role messages.""" + def test_extract_text_from_response_items_includes_all_roles(self) -> None: + """Test extract_text_from_response_items extracts from all message roles.""" item1 = make_output_item( item_type="message", role="assistant", content="Assistant message" ) @@ -316,8 +310,8 @@ def test_extract_text_from_response_items_filters_user_messages(self) -> None: item_type="message", role="user", content="User message" ) result = extract_text_from_response_items([item1, item2]) # type: ignore[arg-type] - # User messages are filtered out - only assistant message is included - assert result == "Assistant message" + # All message items are included (generalizes for input and output) + assert result == "Assistant message User message" def test_extract_text_from_response_items_with_list_content(self) -> None: """Test extract_text_from_response_items with list-based content.""" @@ -2044,6 +2038,69 @@ def test_multiple_stores_attribute_not_in_mapping( ) assert source == "vs-unknown" + def test_multiple_stores_source_attribute_fallback( + self, mocker: MockerFixture + ) -> None: + """Test resolution falls back to source attribute when no vector_store_id.""" + mock_result = mocker.Mock() + mock_result.filename = "file-abc123" + mock_result.attributes = {"source": "ocp-documentation"} + + source = _resolve_source_for_result( + mock_result, + ["vs-001", "vs-002"], + {"vs-001": "ocp-4.18-docs"}, + ) + assert source == "ocp-documentation" + + def test_multiple_stores_source_attribute_ignores_mapping( + self, mocker: MockerFixture + ) -> None: + """Test source attribute is returned directly without rag_id_mapping lookup.""" + mock_result = mocker.Mock() + mock_result.filename = "file-abc123" + mock_result.attributes = {"source": "custom-index"} + + source = _resolve_source_for_result( + mock_result, + ["vs-001", "vs-002"], + {"custom-index": "should-not-be-used"}, + ) + assert source == "custom-index" + + def test_multiple_stores_source_preferred_over_vector_store_id( + self, mocker: MockerFixture + ) -> None: + """Test source attribute takes precedence over vector_store_id.""" + mock_result = mocker.Mock() + mock_result.filename = "file-abc123" + mock_result.attributes = { + "vector_store_id": "vs-002", + "source": "ocp-documentation", + } + + source = _resolve_source_for_result( + mock_result, + ["vs-001", "vs-002"], + {"vs-002": "rhel-9-docs"}, + ) + assert source == "ocp-documentation" + + def test_multiple_stores_no_vector_store_id_no_source( + self, mocker: MockerFixture + ) -> None: + """Test resolution returns None when neither vector_store_id nor source present.""" + mock_result = mocker.Mock() + mock_result.filename = "file-abc123" + mock_result.attributes = {"title": "some doc"} + + source = _resolve_source_for_result( + mock_result, + ["vs-001", "vs-002"], + {"vs-001": "ocp-docs"}, + ) + assert source is None + class TestBuildChunkAttributes: """Tests for _build_chunk_attributes function.""" diff --git a/tests/unit/utils/test_shields.py b/tests/unit/utils/test_shields.py index 55ee56886..5d68a73a2 100644 --- a/tests/unit/utils/test_shields.py +++ b/tests/unit/utils/test_shields.py @@ -1,5 +1,6 @@ """Unit tests for utils/shields.py functions.""" +from llama_stack_client import APIConnectionError, APIStatusError import pytest from fastapi import HTTPException, status from pytest_mock import MockerFixture @@ -9,6 +10,7 @@ append_turn_to_conversation, detect_shield_violations, get_available_shields, + get_shields_for_request, run_shield_moderation, validate_shield_ids_override, ) @@ -305,60 +307,25 @@ async def test_raises_http_exception_when_shield_has_no_provider_resource_id( assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND @pytest.mark.asyncio - async def test_returns_blocked_on_bad_request_error( + async def test_shield_ids_empty_list_runs_no_shields_returns_passed( self, mocker: MockerFixture ) -> None: - """Test that run_shield_moderation returns blocked when ValueError is raised.""" - mock_metric = mocker.patch( - "utils.shields.metrics.llm_calls_validation_errors_total" - ) - mock_client = mocker.Mock() - - # Setup shield - shield = mocker.Mock() - shield.identifier = "test-shield" - shield.provider_resource_id = "moderation-model" - mock_client.shields.list = mocker.AsyncMock(return_value=[shield]) - - # Setup model - model = mocker.Mock() - model.id = "moderation-model" - mock_client.models.list = mocker.AsyncMock(return_value=[model]) - - # Setup moderation to raise ValueError (known Llama Stack bug) - mock_client.moderations.create = mocker.AsyncMock( - side_effect=ValueError("Bad request") - ) - - result = await run_shield_moderation(mock_client, "test input") - - assert result.decision == "blocked" - assert result.message == DEFAULT_VIOLATION_MESSAGE - mock_metric.inc.assert_called_once() - - @pytest.mark.asyncio - async def test_shield_ids_empty_list_raises_422( - self, mocker: MockerFixture - ) -> None: - """Test that shield_ids=[] raises HTTPException 422 (prevents bypass).""" + """Test that shield_ids=[] runs no shields and returns passed.""" mock_client = mocker.Mock() shield = mocker.Mock() shield.identifier = "shield-1" mock_client.shields.list = mocker.AsyncMock(return_value=[shield]) + mock_client.models.list = mocker.AsyncMock(return_value=[]) - with pytest.raises(HTTPException) as exc_info: - await run_shield_moderation(mock_client, "test input", shield_ids=[]) + result = await run_shield_moderation(mock_client, "test input", shield_ids=[]) - assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert "shield_ids provided but no shields selected" in str( - exc_info.value.detail - ) + assert result.decision == "passed" @pytest.mark.asyncio - async def test_shield_ids_raises_exception_when_no_shields_found( + async def test_shield_ids_raises_404_when_no_shields_found( self, mocker: MockerFixture ) -> None: - """Test shield_ids raises HTTPException when no requested shields exist.""" + """Test shield_ids raises HTTPException 404 when requested shield not configured.""" mock_client = mocker.Mock() shield = mocker.Mock() shield.identifier = "shield-1" @@ -369,8 +336,8 @@ async def test_shield_ids_raises_exception_when_no_shields_found( mock_client, "test input", shield_ids=["typo-shield"] ) - assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert "Invalid shield configuration" in exc_info.value.detail["response"] # type: ignore + assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND + assert "Shield" in exc_info.value.detail["response"] # type: ignore assert "typo-shield" in exc_info.value.detail["cause"] # type: ignore @pytest.mark.asyncio @@ -518,3 +485,132 @@ def test_raises_422_when_empty_list_shield_ids_and_override_disabled( validate_shield_ids_override(query_request, mock_config) assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + +class TestGetShieldsForRequest: + """Tests for get_shields_for_request function.""" + + @pytest.mark.asyncio + async def test_returns_all_shields_when_shield_ids_none( + self, mocker: MockerFixture + ) -> None: + """Return all configured shields when shield_ids is None.""" + mock_client = mocker.Mock() + shield1 = mocker.Mock() + shield1.identifier = "shield-1" + shield2 = mocker.Mock() + shield2.identifier = "shield-2" + mock_client.shields.list = mocker.AsyncMock(return_value=[shield1, shield2]) + + result = await get_shields_for_request(mock_client, shield_ids=None) + + assert len(result) == 2 + assert result[0].identifier == "shield-1" + assert result[1].identifier == "shield-2" + mock_client.shields.list.assert_called_once() + + @pytest.mark.asyncio + async def test_returns_empty_list_when_no_shields_configured( + self, mocker: MockerFixture + ) -> None: + """Test that get_shields_for_request returns empty list when no shields configured.""" + mock_client = mocker.Mock() + mock_client.shields.list = mocker.AsyncMock(return_value=[]) + + result = await get_shields_for_request(mock_client, shield_ids=None) + + assert result == [] + + @pytest.mark.asyncio + async def test_filters_to_requested_shields_when_all_exist( + self, mocker: MockerFixture + ) -> None: + """Test that get_shields_for_request returns only requested shields when all exist.""" + mock_client = mocker.Mock() + shield1 = mocker.Mock() + shield1.identifier = "shield-1" + shield2 = mocker.Mock() + shield2.identifier = "shield-2" + shield3 = mocker.Mock() + shield3.identifier = "shield-3" + mock_client.shields.list = mocker.AsyncMock( + return_value=[shield1, shield2, shield3] + ) + + result = await get_shields_for_request( + mock_client, shield_ids=["shield-1", "shield-3"] + ) + + assert len(result) == 2 + assert result[0].identifier == "shield-1" + assert result[1].identifier == "shield-3" + + @pytest.mark.asyncio + async def test_raises_404_when_requested_shield_not_configured( + self, mocker: MockerFixture + ) -> None: + """Raise 404 when a requested shield is not configured.""" + mock_client = mocker.Mock() + shield = mocker.Mock() + shield.identifier = "shield-1" + mock_client.shields.list = mocker.AsyncMock(return_value=[shield]) + + with pytest.raises(HTTPException) as exc_info: + await get_shields_for_request( + mock_client, shield_ids=["shield-1", "missing-shield"] + ) + + assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND + assert "Shield" in exc_info.value.detail["response"] # type: ignore + assert "missing-shield" in exc_info.value.detail["cause"] # type: ignore + + @pytest.mark.asyncio + async def test_raises_404_when_multiple_requested_shields_not_configured( + self, mocker: MockerFixture + ) -> None: + """Raise 404 with all missing ids when multiple shields not configured.""" + mock_client = mocker.Mock() + mock_client.shields.list = mocker.AsyncMock(return_value=[]) + + with pytest.raises(HTTPException) as exc_info: + await get_shields_for_request( + mock_client, shield_ids=["missing-1", "missing-2"] + ) + + assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND + assert "Shields" in exc_info.value.detail["response"] # type: ignore + cause = exc_info.value.detail["cause"] # type: ignore + assert "missing-1" in cause + assert "missing-2" in cause + + @pytest.mark.asyncio + async def test_raises_503_on_connection_error(self, mocker: MockerFixture) -> None: + """Raise 503 on APIConnectionError.""" + mock_client = mocker.Mock() + mock_client.shields.list = mocker.AsyncMock( + side_effect=APIConnectionError( + message="Connection failed", request=mocker.Mock() + ) + ) + + with pytest.raises(HTTPException) as exc_info: + await get_shields_for_request(mock_client, shield_ids=None) + + assert exc_info.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE + + @pytest.mark.asyncio + async def test_raises_500_on_api_status_error(self, mocker: MockerFixture) -> None: + """Raise 500 on APIStatusError.""" + mock_client = mocker.Mock() + mock_client.shields.list = mocker.AsyncMock( + side_effect=APIStatusError( + message="Server error", + response=mocker.Mock(request=None), + body=None, + ) + ) + + with pytest.raises(HTTPException) as exc_info: + await get_shields_for_request(mock_client, shield_ids=None) + + assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR diff --git a/tests/unit/utils/test_vector_search.py b/tests/unit/utils/test_vector_search.py index 4930cb846..930f59d36 100644 --- a/tests/unit/utils/test_vector_search.py +++ b/tests/unit/utils/test_vector_search.py @@ -462,7 +462,7 @@ async def test_both_sources_disabled(self, mocker) -> None: # type: ignore[no-u mocker.patch("utils.vector_search.configuration", config_mock) client_mock = mocker.AsyncMock() - context = await build_rag_context(client_mock, "test query", None) + context = await build_rag_context(client_mock, "passed", "test query", None) assert context.context_text == "" assert context.rag_chunks == [] @@ -497,7 +497,7 @@ async def test_byok_enabled_only(self, mocker) -> None: # type: ignore[no-untyp client_mock = mocker.AsyncMock() client_mock.vector_io.query.return_value = search_response - context = await build_rag_context(client_mock, "test query", None) + context = await build_rag_context(client_mock, "passed", "test query", None) assert len(context.rag_chunks) > 0 assert "BYOK content" in context.context_text diff --git a/uv.lock b/uv.lock index e0d1cbcc0..d594f2a32 100644 --- a/uv.lock +++ b/uv.lock @@ -14,7 +14,7 @@ resolution-markers = [ [[package]] name = "a2a-sdk" -version = "0.3.24" +version = "0.3.25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "google-api-core" }, @@ -23,9 +23,9 @@ dependencies = [ { name = "protobuf" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ad/76/cefa956fb2d3911cb91552a1da8ce2dbb339f1759cb475e2982f0ae2332b/a2a_sdk-0.3.24.tar.gz", hash = "sha256:3581e6e8a854cd725808f5732f90b7978e661b6d4e227a4755a8f063a3c1599d", size = 255550, upload-time = "2026-02-20T10:05:43.423Z" } +sdist = { url = "https://files.pythonhosted.org/packages/55/83/3c99b276d09656cce039464509f05bf385e5600d6dc046a131bbcf686930/a2a_sdk-0.3.25.tar.gz", hash = "sha256:afda85bab8d6af0c5d15e82f326c94190f6be8a901ce562d045a338b7127242f", size = 270638, upload-time = "2026-03-10T13:08:46.417Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/6e/cae5f0caea527b39c0abd7204d9416768764573c76649ca03cc345a372be/a2a_sdk-0.3.24-py3-none-any.whl", hash = "sha256:7b248767096bb55311f57deebf6b767349388d94c1b376c60cb8f6b715e053f6", size = 145752, upload-time = "2026-02-20T10:05:41.729Z" }, + { url = "https://files.pythonhosted.org/packages/bd/f9/6a62520b7ecb945188a6e1192275f4732ff9341cd4629bc975a6c146aeab/a2a_sdk-0.3.25-py3-none-any.whl", hash = "sha256:2fce38faea82eb0b6f9f9c2bcf761b0d78612c80ef0e599b50d566db1b2654b5", size = 149609, upload-time = "2026-03-10T13:08:44.7Z" }, ] [[package]] @@ -231,20 +231,20 @@ wheels = [ [[package]] name = "azure-core" -version = "1.38.2" +version = "1.38.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "requests" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/00/fe/5c7710bc611a4070d06ba801de9a935cc87c3d4b689c644958047bdf2cba/azure_core-1.38.2.tar.gz", hash = "sha256:67562857cb979217e48dc60980243b61ea115b77326fa93d83b729e7ff0482e7", size = 363734, upload-time = "2026-02-18T19:33:05.6Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c8/29/9641b73248745774a52c7ce7f965ed1febbdea787ec21caad3ae6891d18a/azure_core-1.38.3.tar.gz", hash = "sha256:a7931fd445cb4af8802c6f39c6a326bbd1e34b115846550a8245fa656ead6f8e", size = 367267, upload-time = "2026-03-12T20:28:21.122Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/23/6371a551800d3812d6019cd813acd985f9fac0fedc1290129211a73da4ae/azure_core-1.38.2-py3-none-any.whl", hash = "sha256:074806c75cf239ea284a33a66827695ef7aeddac0b4e19dda266a93e4665ead9", size = 217957, upload-time = "2026-02-18T19:33:07.696Z" }, + { url = "https://files.pythonhosted.org/packages/9a/3d/ac86083efa45a439d0bbfb7947615227813d368b9e1e93d23fd30de6fec0/azure_core-1.38.3-py3-none-any.whl", hash = "sha256:bf59d29765bf4748ab9edf25f98a30b7ea9797f43e367c06d846a30b29c1f845", size = 218231, upload-time = "2026-03-12T20:28:22.462Z" }, ] [[package]] name = "azure-identity" -version = "1.25.2" +version = "1.25.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core" }, @@ -253,9 +253,9 @@ dependencies = [ { name = "msal-extensions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c2/3a/439a32a5e23e45f6a91f0405949dc66cfe6834aba15a430aebfc063a81e7/azure_identity-1.25.2.tar.gz", hash = "sha256:030dbaa720266c796221c6cdbd1999b408c079032c919fef725fcc348a540fe9", size = 284709, upload-time = "2026-02-11T01:55:42.323Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c5/0e/3a63efb48aa4a5ae2cfca61ee152fbcb668092134d3eb8bfda472dd5c617/azure_identity-1.25.3.tar.gz", hash = "sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6", size = 286304, upload-time = "2026-03-13T01:12:20.892Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/77/f658c76f9e9a52c784bd836aaca6fd5b9aae176f1f53273e758a2bcda695/azure_identity-1.25.2-py3-none-any.whl", hash = "sha256:1b40060553d01a72ba0d708b9a46d0f61f56312e215d8896d836653ffdc6753d", size = 191423, upload-time = "2026-02-11T01:55:44.245Z" }, + { url = "https://files.pythonhosted.org/packages/49/9a/417b3a533e01953a7c618884df2cb05a71e7b68bdbce4fbdb62349d2a2e8/azure_identity-1.25.3-py3-none-any.whl", hash = "sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c", size = 192138, upload-time = "2026-03-13T01:12:22.951Z" }, ] [[package]] @@ -292,7 +292,7 @@ wheels = [ [[package]] name = "black" -version = "26.3.0" +version = "26.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -302,19 +302,19 @@ dependencies = [ { name = "platformdirs" }, { name = "pytokens" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/11/5f/25b7b149b8b7d3b958efa4faa56446560408c0f2651108a517526de0320a/black-26.3.0.tar.gz", hash = "sha256:4d438dfdba1c807c6c7c63c4f15794dda0820d2222e7c4105042ac9ddfc5dd0b", size = 664127, upload-time = "2026-03-06T17:42:33.7Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/c5/61175d618685d42b005847464b8fb4743a67b1b8fdb75e50e5a96c31a27a/black-26.3.1.tar.gz", hash = "sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07", size = 666155, upload-time = "2026-03-12T03:36:03.593Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/76/b21711045b7f4c4f1774048d0b34dd10a265c42255658b251ce3303ae3c7/black-26.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c2b1e5eec220b419e3591a0aaa6351bd3a9c01fe6291fbaf76d84308eb7a2ede", size = 1895944, upload-time = "2026-03-06T17:46:24.841Z" }, - { url = "https://files.pythonhosted.org/packages/f2/c3/8c56e73283326bc92a36101c660228fff09a2403a57a03cacf3f7f84cf62/black-26.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1bab64de70bccc992432bee56cdffbe004ceeaa07352127c386faa87e81f9261", size = 1718669, upload-time = "2026-03-06T17:46:26.639Z" }, - { url = "https://files.pythonhosted.org/packages/7b/8b/712a3ae8f17c1f3cd6f9ac2fffb167a27192f5c7aba68724e8c4ab8474ad/black-26.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b6c5f734290803b7b26493ffd734b02b72e6c90d82d45ac4d5b862b9bdf7720", size = 1794844, upload-time = "2026-03-06T17:46:28.334Z" }, - { url = "https://files.pythonhosted.org/packages/ba/5b/ee955040e446df86473287dd24dc69c80dd05e02cc358bca90e22059f7b1/black-26.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:7c767396af15b54e1a6aae99ddf241ae97e589f666b1d22c4b6618282a04e4ca", size = 1420461, upload-time = "2026-03-06T17:46:29.965Z" }, - { url = "https://files.pythonhosted.org/packages/12/77/40b8bd44f032bb34c9ebf47ffc5bb47a2520d29e0a4b8a780ab515223b5a/black-26.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:765fd6ddd00f35c55250fdc6b790c272d54ac3f44da719cc42df428269b45980", size = 1229667, upload-time = "2026-03-06T17:46:31.654Z" }, - { url = "https://files.pythonhosted.org/packages/28/c3/21a834ce3de02c64221243f2adac63fa3c3f441efdb3adbf4136b33dfeb0/black-26.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:59754fd8f43ef457be190594c07a52c999e22cb1534dc5344bff1d46fdf1027d", size = 1895195, upload-time = "2026-03-06T17:46:33.12Z" }, - { url = "https://files.pythonhosted.org/packages/1c/f9/212d9697dd78362dadb778d4616b74c8c2cf7f2e4a55aac2adeb0576f2e9/black-26.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1fd94cfee67b8d336761a0b08629a25938e4a491c440951ce517a7209c99b5ff", size = 1718472, upload-time = "2026-03-06T17:46:34.576Z" }, - { url = "https://files.pythonhosted.org/packages/a2/dd/da980b2f512441375b73cb511f38a2c3db4be83ccaa1302b8d39c9fa2dff/black-26.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f7b3e653a90ca1ef4e821c20f8edaee80b649c38d2532ed2e9073a9534b14a7", size = 1793741, upload-time = "2026-03-06T17:46:36.261Z" }, - { url = "https://files.pythonhosted.org/packages/93/11/cd69ae8826fe3bc6eaf525c8c557266d522b258154a2968eb46d6d25fac7/black-26.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:f8fb9d7c2496adc83614856e1f6e55a9ce4b7ae7fc7f45b46af9189ddb493464", size = 1422522, upload-time = "2026-03-06T17:46:37.607Z" }, - { url = "https://files.pythonhosted.org/packages/75/f5/647cf50255203eb286be197925e86eedc101d5409147505db3e463229228/black-26.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:e8618c1d06838f56afbcb3ffa1aa16436cec62b86b38c7b32ca86f53948ffb91", size = 1231807, upload-time = "2026-03-06T17:46:39.072Z" }, - { url = "https://files.pythonhosted.org/packages/39/d7/7360654ba4f8b41afcaeb5aca973cfea5591da75aff79b0a8ae0bb8883f6/black-26.3.0-py3-none-any.whl", hash = "sha256:e825d6b121910dff6f04d7691f826d2449327e8e71c26254c030c4f3d2311985", size = 206848, upload-time = "2026-03-06T17:42:31.133Z" }, + { url = "https://files.pythonhosted.org/packages/dc/f8/da5eae4fc75e78e6dceb60624e1b9662ab00d6b452996046dfa9b8a6025b/black-26.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e6f89631eb88a7302d416594a32faeee9fb8fb848290da9d0a5f2903519fc1", size = 1895920, upload-time = "2026-03-12T03:40:13.921Z" }, + { url = "https://files.pythonhosted.org/packages/2c/9f/04e6f26534da2e1629b2b48255c264cabf5eedc5141d04516d9d68a24111/black-26.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41cd2012d35b47d589cb8a16faf8a32ef7a336f56356babd9fcf70939ad1897f", size = 1718499, upload-time = "2026-03-12T03:40:15.239Z" }, + { url = "https://files.pythonhosted.org/packages/04/91/a5935b2a63e31b331060c4a9fdb5a6c725840858c599032a6f3aac94055f/black-26.3.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f76ff19ec5297dd8e66eb64deda23631e642c9393ab592826fd4bdc97a4bce7", size = 1794994, upload-time = "2026-03-12T03:40:17.124Z" }, + { url = "https://files.pythonhosted.org/packages/e7/0a/86e462cdd311a3c2a8ece708d22aba17d0b2a0d5348ca34b40cdcbea512e/black-26.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ddb113db38838eb9f043623ba274cfaf7d51d5b0c22ecb30afe58b1bb8322983", size = 1420867, upload-time = "2026-03-12T03:40:18.83Z" }, + { url = "https://files.pythonhosted.org/packages/5b/e5/22515a19cb7eaee3440325a6b0d95d2c0e88dd180cb011b12ae488e031d1/black-26.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:dfdd51fc3e64ea4f35873d1b3fb25326773d55d2329ff8449139ebaad7357efb", size = 1230124, upload-time = "2026-03-12T03:40:20.425Z" }, + { url = "https://files.pythonhosted.org/packages/f5/77/5728052a3c0450c53d9bb3945c4c46b91baa62b2cafab6801411b6271e45/black-26.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:855822d90f884905362f602880ed8b5df1b7e3ee7d0db2502d4388a954cc8c54", size = 1895034, upload-time = "2026-03-12T03:40:21.813Z" }, + { url = "https://files.pythonhosted.org/packages/52/73/7cae55fdfdfbe9d19e9a8d25d145018965fe2079fa908101c3733b0c55a0/black-26.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8a33d657f3276328ce00e4d37fe70361e1ec7614da5d7b6e78de5426cb56332f", size = 1718503, upload-time = "2026-03-12T03:40:23.666Z" }, + { url = "https://files.pythonhosted.org/packages/e1/87/af89ad449e8254fdbc74654e6467e3c9381b61472cc532ee350d28cfdafb/black-26.3.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f1cd08e99d2f9317292a311dfe578fd2a24b15dbce97792f9c4d752275c1fa56", size = 1793557, upload-time = "2026-03-12T03:40:25.497Z" }, + { url = "https://files.pythonhosted.org/packages/43/10/d6c06a791d8124b843bf325ab4ac7d2f5b98731dff84d6064eafd687ded1/black-26.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:c7e72339f841b5a237ff14f7d3880ddd0fc7f98a1199e8c4327f9a4f478c1839", size = 1422766, upload-time = "2026-03-12T03:40:27.14Z" }, + { url = "https://files.pythonhosted.org/packages/59/4f/40a582c015f2d841ac24fed6390bd68f0fc896069ff3a886317959c9daf8/black-26.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:afc622538b430aa4c8c853f7f63bc582b3b8030fd8c80b70fb5fa5b834e575c2", size = 1232140, upload-time = "2026-03-12T03:40:28.882Z" }, + { url = "https://files.pythonhosted.org/packages/8e/0d/52d98722666d6fc6c3dd4c76df339501d6efd40e0ff95e6186a7b7f0befd/black-26.3.1-py3-none-any.whl", hash = "sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b", size = 207542, upload-time = "2026-03-12T03:36:01.668Z" }, ] [[package]] @@ -348,11 +348,11 @@ wheels = [ [[package]] name = "cachetools" -version = "7.0.3" +version = "7.0.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/48/5c/3b882b82e9af737906539a2eafb62f96a229f1fa80255bede0c7b554cbc4/cachetools-7.0.3.tar.gz", hash = "sha256:8c246313b95849964e54a909c03b327a87ab0428b068fac10da7b105ca275ef6", size = 37187, upload-time = "2026-03-05T21:00:57.918Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/dd/57fe3fdb6e65b25a5987fd2cdc7e22db0aef508b91634d2e57d22928d41b/cachetools-7.0.5.tar.gz", hash = "sha256:0cd042c24377200c1dcd225f8b7b12b0ca53cc2c961b43757e774ebe190fd990", size = 37367, upload-time = "2026-03-09T20:51:29.451Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/05/4a/573185481c50a8841331f54ddae44e4a3469c46aa0b397731c53a004369a/cachetools-7.0.3-py3-none-any.whl", hash = "sha256:c128ffca156eef344c25fcd08a96a5952803786fa33097f5f2d49edf76f79d53", size = 13907, upload-time = "2026-03-05T21:00:56.486Z" }, + { url = "https://files.pythonhosted.org/packages/06/f3/39cf3367b8107baa44f861dc802cbf16263c945b62d8265d36034fc07bea/cachetools-7.0.5-py3-none-any.whl", hash = "sha256:46bc8ebefbe485407621d0a4264b23c080cedd913921bad7ac3ed2f26c183114", size = 13918, upload-time = "2026-03-09T20:51:27.33Z" }, ] [[package]] @@ -401,21 +401,21 @@ wheels = [ [[package]] name = "chardet" -version = "7.0.1" +version = "7.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/80/4684035f1a2a3096506bc377276a815ccf0be3c3316eab35d589e82d9f3c/chardet-7.0.1.tar.gz", hash = "sha256:6fce895c12c5495bb598e59ae3cd89306969b4464ec7b6dd609b9c86e3397fe3", size = 490240, upload-time = "2026-03-04T21:25:26.97Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/84/e72ea5c06e687db591283474b8442ab95665fc6bae7b06043b2a6f0eaf6c/chardet-7.1.0.tar.gz", hash = "sha256:8f47bc4accac17bd9accbb4acc1d563acc024a783806c0a43c3a583f5285690b", size = 505743, upload-time = "2026-03-11T21:39:37.603Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/88/4c6fe7dcd5d36a2cfd7030084fbd79264083f329faaf96038c23888a8e05/chardet-7.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f661edbfa77b8683a503043ddc9b9fe9036cf28af13064200e11fa1844ded79c", size = 541828, upload-time = "2026-03-04T21:24:58.726Z" }, - { url = "https://files.pythonhosted.org/packages/f9/fb/3b92a2433eadef83ae131fa720a17857cfbf7687c5f188bfb2f9eee2d3dd/chardet-7.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:169951fa88d449e72e0c6194cec1c5e405fd36a6cfbe74c7dab5494cc35f1700", size = 533571, upload-time = "2026-03-04T21:25:00.703Z" }, - { url = "https://files.pythonhosted.org/packages/d9/75/37bee6900183ea08a3a0ae04b9f018f9e64c6b10716e1f7b423db0c4356c/chardet-7.0.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd6db7505556ae8f9e2a3bf6d689c2b86aa6b459cf39552645d2c4d3fdbf489c", size = 554182, upload-time = "2026-03-04T21:25:02.168Z" }, - { url = "https://files.pythonhosted.org/packages/e8/ed/2fe5ea435ae480bd3a76be1415920ce52b3ff6e188d8eab6a635d6a2a1d1/chardet-7.0.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f907962b18df78d5ca87a7484e4034354408d2c97cec6f53634b0ea0424c594", size = 557933, upload-time = "2026-03-04T21:25:03.694Z" }, - { url = "https://files.pythonhosted.org/packages/07/ba/7ca89301e492ac4184ba7f4736565d954ba3125acf6bf02c66a38a802bda/chardet-7.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:302798e1e62008ca34a216dd04ecc5e240993b2090628e2a35d4c0754313ea9a", size = 524256, upload-time = "2026-03-04T21:25:05.581Z" }, - { url = "https://files.pythonhosted.org/packages/56/26/1a22b9a19b4ca167ca462eaf91d0fc31285874d80b0381c55fdc5bc5f066/chardet-7.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:67fe3f453416ed9343057dcf06583b36aae6d8bdb013370b3ff46bc37b7e30ac", size = 541652, upload-time = "2026-03-04T21:25:07.041Z" }, - { url = "https://files.pythonhosted.org/packages/24/fe/2f2425f3b0801e897653723ee827bc87e5a0feacf826ab268a9216680615/chardet-7.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:63bc210ce73f8a1b87430b949f84d086cb326d67eb259305862e7c8861b73374", size = 533333, upload-time = "2026-03-04T21:25:08.886Z" }, - { url = "https://files.pythonhosted.org/packages/b2/8c/6b5f4b49c471b396bdbddad55b569e05d686ea65d91795dae6c774b285f0/chardet-7.0.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11f51985946b49739968b6dc2fa70e7d8f490bb15574377c5ee114f33d19ef7e", size = 553815, upload-time = "2026-03-04T21:25:10.861Z" }, - { url = "https://files.pythonhosted.org/packages/b9/45/860a82d618e5c3930faef0a0fe205b752323e5d10ce0c18fe5016fd4f8d2/chardet-7.0.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8714f0013c208452a98e23595d99cef53c5364565454425f431446eb586e2591", size = 557506, upload-time = "2026-03-04T21:25:14.081Z" }, - { url = "https://files.pythonhosted.org/packages/ed/44/7acb8f84fc7b5ad3c977ac31865b308881da1c0a6ca58be35554d2473dd7/chardet-7.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:c12abc65830068ad05bd257fb953aaaf63a551446688e03e145522086be5738c", size = 524145, upload-time = "2026-03-04T21:25:15.696Z" }, - { url = "https://files.pythonhosted.org/packages/a3/1f/c1a089db6333b1283409cad3714b8935e7e56722c9c60f9299726a1e57c2/chardet-7.0.1-py3-none-any.whl", hash = "sha256:e51e1ff2c51b2d622d97c9737bd5ee9d9b9038f05b7dd8f9ea10b9e2d9674c24", size = 408292, upload-time = "2026-03-04T21:25:25.214Z" }, + { url = "https://files.pythonhosted.org/packages/f0/b8/415efba024c5d6a3d81609de51598a11a99b9f2ffb916c42b72190da1973/chardet-7.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:43c1e3cba6c41d8958ee4acdab94c151dbe256d7ef8df4ae032dc62a892f294f", size = 542358, upload-time = "2026-03-11T21:39:11.023Z" }, + { url = "https://files.pythonhosted.org/packages/7f/d7/9517de8b58b487d5d05e957efacc8c9af180cb2cc97103b1a1c67120d8c0/chardet-7.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a3c22672c9502af99e0433b47421d0d72c8803efce2cd4a91a3ae1ab5972243", size = 534566, upload-time = "2026-03-11T21:39:12.462Z" }, + { url = "https://files.pythonhosted.org/packages/c3/33/1286f2a05935a80eaadcc13fc70fb0eaa00805acc756363f0f4aca2ed936/chardet-7.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fdfc42dfc44ccd569b84fe6a1fdea1df66dc0c48461bc3899dea5efea8d507f6", size = 556240, upload-time = "2026-03-11T21:39:14.388Z" }, + { url = "https://files.pythonhosted.org/packages/c7/cc/556aeffb4768b258cc461bc1063d3592e411e1744223da8c7fbbf524438e/chardet-7.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e096d9c211050fff40e22748e1d09d0cec8348fc13ee6e2e0a1da079345b8a86", size = 559737, upload-time = "2026-03-11T21:39:16.382Z" }, + { url = "https://files.pythonhosted.org/packages/af/4a/147151940ad5ac8bf9f8728a1e46bc63502cd95e93c3a9796f01914188f9/chardet-7.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6492bebaba8882afb3e14c786fb69ed767326b6f514b8e093dcdf6e2a094d33", size = 526574, upload-time = "2026-03-11T21:39:18.311Z" }, + { url = "https://files.pythonhosted.org/packages/b9/79/2c61f33c87d3698f15ca01b0882fbd2fcb95911a783cc615d31adfae025a/chardet-7.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc8c7520a9736da766f5794bbabb1c6cdfe446676429a5cf691af878631a80bf", size = 542249, upload-time = "2026-03-11T21:39:20.133Z" }, + { url = "https://files.pythonhosted.org/packages/eb/0c/2d0c4897e43f1bb1b68dad840551cda224696eda9951524db50721d3bc18/chardet-7.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6f806f325825325e0682226269a2a4859993344cccca14f2463855d4f5a93272", size = 534544, upload-time = "2026-03-11T21:39:21.844Z" }, + { url = "https://files.pythonhosted.org/packages/17/cb/a568eea24adc1a023da266854e9fc9e0eaffa72580d43c45b47f1b62dd2e/chardet-7.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bacc8f862998c59e9ee7fe4960538300d1cc3fe2c293b9cc99bbbc7bf3bedf51", size = 555894, upload-time = "2026-03-11T21:39:23.649Z" }, + { url = "https://files.pythonhosted.org/packages/f3/e7/958975ca18c7b5be9b94354c302a7f3d757c02e7c14e88e0c85af1e16c70/chardet-7.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d17822fc94467b7951adebd897cb01c0e37ac694be18d2cbd2b676d61df4f", size = 559286, upload-time = "2026-03-11T21:39:25.289Z" }, + { url = "https://files.pythonhosted.org/packages/84/0b/1eddfd650e98bb80ec9f74c0bb98fa60cc36f63d9209214cd069b2a27340/chardet-7.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:b951107b254cdc766e52f4b8339dcfa97c7b45ca9f5509075308db2497e7f3af", size = 526406, upload-time = "2026-03-11T21:39:27.103Z" }, + { url = "https://files.pythonhosted.org/packages/87/13/6aa6c9118ce153a806bb0472e27e8f8c24e6925db8a5b9fe99e03e45af15/chardet-7.1.0-py3-none-any.whl", hash = "sha256:7f677725333bf53f84b7f57458f44669a8a5eb2ac4092ac699cdfa9b1af08a5f", size = 411334, upload-time = "2026-03-11T21:39:36.198Z" }, ] [[package]] @@ -468,6 +468,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/93/342cc62a70ab727e093ed98e02a725d85b746345f05d2b5e5034649f4ec8/chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443", size = 11595, upload-time = "2021-01-02T22:47:57.847Z" }, ] +[[package]] +name = "circuitbreaker" +version = "2.1.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/ac/de7a92c4ed39cba31fe5ad9203b76a25ca67c530797f6bb420fff5f65ccb/circuitbreaker-2.1.3.tar.gz", hash = "sha256:1a4baee510f7bea3c91b194dcce7c07805fe96c4423ed5594b75af438531d084", size = 10787, upload-time = "2025-03-31T08:12:08.963Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/34/15f08edd4628f65217de1fc3c1a27c82e46fe357d60c217fc9881e12ebcc/circuitbreaker-2.1.3-py3-none-any.whl", hash = "sha256:87ba6a3ed03fdc7032bc175561c2b04d52ade9d5faf94ca2b035fbdc5e6b1dd1", size = 7737, upload-time = "2025-03-31T08:12:07.802Z" }, +] + [[package]] name = "click" version = "8.3.1" @@ -602,7 +611,7 @@ wheels = [ [[package]] name = "datasets" -version = "4.6.1" +version = "4.7.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dill" }, @@ -620,9 +629,9 @@ dependencies = [ { name = "tqdm" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d7/94/eb81c6fe32e9b6ef92223141b5a553aeff2e9456968424a8533cbe88f476/datasets-4.6.1.tar.gz", hash = "sha256:140ce500bc41939ff6ce995702d66b1f4b2ee7f117bb9b07512fab6804d4070a", size = 593865, upload-time = "2026-02-27T23:26:49.482Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/9c/ba18de0b70858533e422ed6cfe0e46789473cef7fc7fc3653e23fa494730/datasets-4.7.0.tar.gz", hash = "sha256:4984cdfc65d04464da7f95205a55cb50515fd94ae3176caacb50a1b7273792e2", size = 602008, upload-time = "2026-03-09T19:01:49.298Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/37/f0/99fe6eb530c7ee9ee1faee48059eb8a6437f80c893a496b98a78864e0fc6/datasets-4.6.1-py3-none-any.whl", hash = "sha256:f53228e6dadc9f837037b1bf3051d7d8c054abbb3eb29f1f022926e08090e0da", size = 520667, upload-time = "2026-02-27T23:26:46.855Z" }, + { url = "https://files.pythonhosted.org/packages/1e/03/c6d9c3119cf712f638fe763e887ecaac6acbb62bf1e2acc3cbde0df340fd/datasets-4.7.0-py3-none-any.whl", hash = "sha256:d5fe3025ec6acc3b5649f10d5576dff5e054134927604e6913c1467a04adc3c2", size = 527530, upload-time = "2026-03-09T19:01:47.443Z" }, ] [[package]] @@ -779,11 +788,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.25.0" +version = "3.25.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/77/18/a1fd2231c679dcb9726204645721b12498aeac28e1ad0601038f94b42556/filelock-3.25.0.tar.gz", hash = "sha256:8f00faf3abf9dc730a1ffe9c354ae5c04e079ab7d3a683b7c32da5dd05f26af3", size = 40158, upload-time = "2026-03-01T15:08:45.916Z" } +sdist = { url = "https://files.pythonhosted.org/packages/94/b8/00651a0f559862f3bb7d6f7477b192afe3f583cc5e26403b44e59a55ab34/filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694", size = 40480, upload-time = "2026-03-11T20:45:38.487Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/0b/de6f54d4a8bedfe8645c41497f3c18d749f0bd3218170c667bf4b81d0cdd/filelock-3.25.0-py3-none-any.whl", hash = "sha256:5ccf8069f7948f494968fc0713c10e5c182a9c9d9eef3a636307a20c2490f047", size = 26427, upload-time = "2026-03-01T15:08:44.593Z" }, + { url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759, upload-time = "2026-03-11T20:45:37.437Z" }, ] [[package]] @@ -893,16 +902,15 @@ grpc = [ [[package]] name = "google-auth" -version = "2.49.0" +version = "2.49.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, { name = "pyasn1-modules" }, - { name = "rsa" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7d/59/7371175bfd949abfb1170aa076352131d7281bd9449c0f978604fc4431c3/google_auth-2.49.0.tar.gz", hash = "sha256:9cc2d9259d3700d7a257681f81052db6737495a1a46b610597f4b8bafe5286ae", size = 333444, upload-time = "2026-03-06T21:53:06.07Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/80/6a696a07d3d3b0a92488933532f03dbefa4a24ab80fb231395b9a2a1be77/google_auth-2.49.1.tar.gz", hash = "sha256:16d40da1c3c5a0533f57d268fe72e0ebb0ae1cc3b567024122651c045d879b64", size = 333825, upload-time = "2026-03-12T19:30:58.135Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/37/45/de64b823b639103de4b63dd193480dce99526bd36be6530c2dba85bf7817/google_auth-2.49.0-py3-none-any.whl", hash = "sha256:f893ef7307f19cf53700b7e2f61b5a6affe3aa0edf9943b13788920ab92d8d87", size = 240676, upload-time = "2026-03-06T21:52:38.304Z" }, + { url = "https://files.pythonhosted.org/packages/e9/eb/c6c2478d8a8d633460be40e2a8a6f8f429171997a35a96f81d3b680dec83/google_auth-2.49.1-py3-none-any.whl", hash = "sha256:195ebe3dca18eddd1b3db5edc5189b76c13e96f29e73043b923ebcf3f1a860f7", size = 240737, upload-time = "2026-03-12T19:30:53.159Z" }, ] [package.optional-dependencies] @@ -912,7 +920,7 @@ requests = [ [[package]] name = "google-cloud-aiplatform" -version = "1.140.0" +version = "1.141.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "docstring-parser" }, @@ -928,9 +936,9 @@ dependencies = [ { name = "pydantic" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1b/14/1c223faf986afffdd61c994a10c30a04985ed5ba072201058af2c6e1e572/google_cloud_aiplatform-1.140.0.tar.gz", hash = "sha256:ea7eb1870b4cf600f8c2472102e21c3a1bcaf723d6e49f00ed51bc6b88d54fff", size = 10146640, upload-time = "2026-03-04T00:56:38.95Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/dc/1209c7aab43bd7233cf631165a3b1b4284d22fc7fe7387c66228d07868ab/google_cloud_aiplatform-1.141.0.tar.gz", hash = "sha256:e3b1cdb28865dd862aac9c685dfc5ac076488705aba0a5354016efadcddd59c6", size = 10152688, upload-time = "2026-03-10T22:20:08.692Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/5c/bb64aee2da24895d57611eed00fac54739bfa34f98ab344020a6605875bf/google_cloud_aiplatform-1.140.0-py2.py3-none-any.whl", hash = "sha256:e94493a2682b9d17efa7146a53bb3665bf1595c3394fd3d0f45d18f71623fddc", size = 8355660, upload-time = "2026-03-04T00:56:34.441Z" }, + { url = "https://files.pythonhosted.org/packages/6a/fc/428af69a69ff2e477e7f5e12d227b31fe5790f1a8234aacd54297f49c836/google_cloud_aiplatform-1.141.0-py2.py3-none-any.whl", hash = "sha256:6bd25b4d514c40b8181ca703e1b313ad6d0454ab8006fc9907fb3e9f672f31d1", size = 8358409, upload-time = "2026-03-10T22:20:04.871Z" }, ] [[package]] @@ -1018,7 +1026,7 @@ wheels = [ [[package]] name = "google-genai" -version = "1.66.0" +version = "1.67.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1032,9 +1040,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9b/ba/0b343b0770d4710ad2979fd9301d7caa56c940174d5361ed4a7cc4979241/google_genai-1.66.0.tar.gz", hash = "sha256:ffc01647b65046bca6387320057aa51db0ad64bcc72c8e3e914062acfa5f7c49", size = 504386, upload-time = "2026-03-04T22:15:28.156Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/07/59a498f81f2c7b0649eacda2ea470b7fd8bd7149f20caba22962081bdd51/google_genai-1.67.0.tar.gz", hash = "sha256:897195a6a9742deb6de240b99227189ada8b2d901d61bdfba836c3092021eab6", size = 506972, upload-time = "2026-03-12T20:39:16.241Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/dd/403949d922d4e261b08b64aaa132af4e456c3b15c8e2a2d9e6ef693f66e2/google_genai-1.66.0-py3-none-any.whl", hash = "sha256:7f127a39cf695277104ce4091bb26e417c59bb46e952ff3699c3a982d9c474ee", size = 732174, upload-time = "2026-03-04T22:15:26.63Z" }, + { url = "https://files.pythonhosted.org/packages/6e/c2/562aa1f086e53529ffbeb5b43d5d8bc42c1b968102b5e2163fad005ce298/google_genai-1.67.0-py3-none-any.whl", hash = "sha256:58b0484ff2d4335fa53c724b489e9f807fcca8115d9cdbd8fdf341121fbd6d2d", size = 733542, upload-time = "2026-03-12T20:39:14.615Z" }, ] [[package]] @@ -1162,26 +1170,26 @@ wheels = [ [[package]] name = "hf-xet" -version = "1.3.2" +version = "1.4.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8b/cb/9bb543bd987ffa1ee48202cc96a756951b734b79a542335c566148ade36c/hf_xet-1.3.2.tar.gz", hash = "sha256:e130ee08984783d12717444e538587fa2119385e5bd8fc2bb9f930419b73a7af", size = 643646, upload-time = "2026-02-27T17:26:08.051Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/08/23c84a26716382c89151b5b447b4beb19e3345f3a93d3b73009a71a57ad3/hf_xet-1.4.2.tar.gz", hash = "sha256:b7457b6b482d9e0743bd116363239b1fa904a5e65deede350fbc0c4ea67c71ea", size = 672357, upload-time = "2026-03-13T06:58:51.077Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/75/462285971954269432aad2e7938c5c7ff9ec7d60129cec542ab37121e3d6/hf_xet-1.3.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:335a8f36c55fd35a92d0062f4e9201b4015057e62747b7e7001ffb203c0ee1d2", size = 3761019, upload-time = "2026-02-27T17:25:49.441Z" }, - { url = "https://files.pythonhosted.org/packages/35/56/987b0537ddaf88e17192ea09afa8eca853e55f39a4721578be436f8409df/hf_xet-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c1ae4d3a716afc774e66922f3cac8206bfa707db13f6a7e62dfff74bfc95c9a8", size = 3521565, upload-time = "2026-02-27T17:25:47.469Z" }, - { url = "https://files.pythonhosted.org/packages/a8/5c/7e4a33a3d689f77761156cc34558047569e54af92e4d15a8f493229f6767/hf_xet-1.3.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6dbdf231efac0b9b39adcf12a07f0c030498f9212a18e8c50224d0e84ab803d", size = 4176494, upload-time = "2026-02-27T17:25:40.247Z" }, - { url = "https://files.pythonhosted.org/packages/6b/b3/71e856bf9d9a69b3931837e8bf22e095775f268c8edcd4a9e8c355f92484/hf_xet-1.3.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c1980abfb68ecf6c1c7983379ed7b1e2b49a1aaf1a5aca9acc7d48e5e2e0a961", size = 3955601, upload-time = "2026-02-27T17:25:38.376Z" }, - { url = "https://files.pythonhosted.org/packages/63/d7/aecf97b3f0a981600a67ff4db15e2d433389d698a284bb0ea5d8fcdd6f7f/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1c88fbd90ad0d27c46b77a445f0a436ebaa94e14965c581123b68b1c52f5fd30", size = 4154770, upload-time = "2026-02-27T17:25:56.756Z" }, - { url = "https://files.pythonhosted.org/packages/e2/e1/3af961f71a40e09bf5ee909842127b6b00f5ab4ee3817599dc0771b79893/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:35b855024ca37f2dd113ac1c08993e997fbe167b9d61f9ef66d3d4f84015e508", size = 4394161, upload-time = "2026-02-27T17:25:58.111Z" }, - { url = "https://files.pythonhosted.org/packages/a1/c3/859509bade9178e21b8b1db867b8e10e9f817ab9ac1de77cb9f461ced765/hf_xet-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:31612ba0629046e425ba50375685a2586e11fb9144270ebabd75878c3eaf6378", size = 3637377, upload-time = "2026-02-27T17:26:10.611Z" }, - { url = "https://files.pythonhosted.org/packages/05/7f/724cfbef4da92d577b71f68bf832961c8919f36c60d28d289a9fc9d024d4/hf_xet-1.3.2-cp313-cp313t-win_arm64.whl", hash = "sha256:433c77c9f4e132b562f37d66c9b22c05b5479f243a1f06a120c1c06ce8b1502a", size = 3497875, upload-time = "2026-02-27T17:26:09.034Z" }, - { url = "https://files.pythonhosted.org/packages/d8/28/dbb024e2e3907f6f3052847ca7d1a2f7a3972fafcd53ff79018977fcb3e4/hf_xet-1.3.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f93b7595f1d8fefddfede775c18b5c9256757824f7f6832930b49858483cd56f", size = 3763961, upload-time = "2026-02-27T17:25:52.537Z" }, - { url = "https://files.pythonhosted.org/packages/e4/71/b99aed3823c9d1795e4865cf437d651097356a3f38c7d5877e4ac544b8e4/hf_xet-1.3.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a85d3d43743174393afe27835bde0cd146e652b5fcfdbcd624602daef2ef3259", size = 3526171, upload-time = "2026-02-27T17:25:50.968Z" }, - { url = "https://files.pythonhosted.org/packages/9d/ca/907890ce6ef5598b5920514f255ed0a65f558f820515b18db75a51b2f878/hf_xet-1.3.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7c2a054a97c44e136b1f7f5a78f12b3efffdf2eed3abc6746fc5ea4b39511633", size = 4180750, upload-time = "2026-02-27T17:25:43.125Z" }, - { url = "https://files.pythonhosted.org/packages/8c/ad/bc7f41f87173d51d0bce497b171c4ee0cbde1eed2d7b4216db5d0ada9f50/hf_xet-1.3.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:06b724a361f670ae557836e57801b82c75b534812e351a87a2c739f77d1e0635", size = 3961035, upload-time = "2026-02-27T17:25:41.837Z" }, - { url = "https://files.pythonhosted.org/packages/73/38/600f4dda40c4a33133404d9fe644f1d35ff2d9babb4d0435c646c63dd107/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:305f5489d7241a47e0458ef49334be02411d1d0f480846363c1c8084ed9916f7", size = 4161378, upload-time = "2026-02-27T17:26:00.365Z" }, - { url = "https://files.pythonhosted.org/packages/00/b3/7bc1ff91d1ac18420b7ad1e169b618b27c00001b96310a89f8a9294fe509/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:06cdbde243c85f39a63b28e9034321399c507bcd5e7befdd17ed2ccc06dfe14e", size = 4398020, upload-time = "2026-02-27T17:26:03.977Z" }, - { url = "https://files.pythonhosted.org/packages/2b/0b/99bfd948a3ed3620ab709276df3ad3710dcea61976918cce8706502927af/hf_xet-1.3.2-cp37-abi3-win_amd64.whl", hash = "sha256:9298b47cce6037b7045ae41482e703c471ce36b52e73e49f71226d2e8e5685a1", size = 3641624, upload-time = "2026-02-27T17:26:13.542Z" }, - { url = "https://files.pythonhosted.org/packages/cc/02/9a6e4ca1f3f73a164c0cd48e41b3cc56585dcc37e809250de443d673266f/hf_xet-1.3.2-cp37-abi3-win_arm64.whl", hash = "sha256:83d8ec273136171431833a6957e8f3af496bee227a0fe47c7b8b39c106d1749a", size = 3503976, upload-time = "2026-02-27T17:26:12.123Z" }, + { url = "https://files.pythonhosted.org/packages/18/06/e8cf74c3c48e5485c7acc5a990d0d8516cdfb5fdf80f799174f1287cc1b5/hf_xet-1.4.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ac8202ae1e664b2c15cdfc7298cbb25e80301ae596d602ef7870099a126fcad4", size = 3796125, upload-time = "2026-03-13T06:58:33.177Z" }, + { url = "https://files.pythonhosted.org/packages/66/d4/b73ebab01cbf60777323b7de9ef05550790451eb5172a220d6b9845385ec/hf_xet-1.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6d2f8ee39fa9fba9af929f8c0d0482f8ee6e209179ad14a909b6ad78ffcb7c81", size = 3555985, upload-time = "2026-03-13T06:58:31.797Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e7/ded6d1bd041c3f2bca9e913a0091adfe32371988e047dd3a68a2463c15a2/hf_xet-1.4.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4642a6cf249c09da8c1f87fe50b24b2a3450b235bf8adb55700b52f0ea6e2eb6", size = 4212085, upload-time = "2026-03-13T06:58:24.323Z" }, + { url = "https://files.pythonhosted.org/packages/97/c1/a0a44d1f98934f7bdf17f7a915b934f9fca44bb826628c553589900f6df8/hf_xet-1.4.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:769431385e746c92dc05492dde6f687d304584b89c33d79def8367ace06cb555", size = 3988266, upload-time = "2026-03-13T06:58:22.887Z" }, + { url = "https://files.pythonhosted.org/packages/7a/82/be713b439060e7d1f1d93543c8053d4ef2fe7e6922c5b31642eaa26f3c4b/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c9dd1c1bc4cc56168f81939b0e05b4c36dd2d28c13dc1364b17af89aa0082496", size = 4188513, upload-time = "2026-03-13T06:58:40.858Z" }, + { url = "https://files.pythonhosted.org/packages/21/a6/cbd4188b22abd80ebd0edbb2b3e87f2633e958983519980815fb8314eae5/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fca58a2ae4e6f6755cc971ac6fcdf777ea9284d7e540e350bb000813b9a3008d", size = 4428287, upload-time = "2026-03-13T06:58:42.601Z" }, + { url = "https://files.pythonhosted.org/packages/b2/4e/84e45b25e2e3e903ed3db68d7eafa96dae9a1d1f6d0e7fc85120347a852f/hf_xet-1.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:163aab46854ccae0ab6a786f8edecbbfbaa38fcaa0184db6feceebf7000c93c0", size = 3665574, upload-time = "2026-03-13T06:58:53.881Z" }, + { url = "https://files.pythonhosted.org/packages/ee/71/c5ac2b9a7ae39c14e91973035286e73911c31980fe44e7b1d03730c00adc/hf_xet-1.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:09b138422ecbe50fd0c84d4da5ff537d27d487d3607183cd10e3e53f05188e82", size = 3528760, upload-time = "2026-03-13T06:58:52.187Z" }, + { url = "https://files.pythonhosted.org/packages/b4/86/b40b83a2ff03ef05c4478d2672b1fc2b9683ff870e2b25f4f3af240f2e7b/hf_xet-1.4.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:71f02d6e4cdd07f344f6844845d78518cc7186bd2bc52d37c3b73dc26a3b0bc5", size = 3800339, upload-time = "2026-03-13T06:58:36.245Z" }, + { url = "https://files.pythonhosted.org/packages/64/2e/af4475c32b4378b0e92a587adb1aa3ec53e3450fd3e5fe0372a874531c00/hf_xet-1.4.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e9b38d876e94d4bdcf650778d6ebbaa791dd28de08db9736c43faff06ede1b5a", size = 3559664, upload-time = "2026-03-13T06:58:34.787Z" }, + { url = "https://files.pythonhosted.org/packages/3c/4c/781267da3188db679e601de18112021a5cb16506fe86b246e22c5401a9c4/hf_xet-1.4.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:77e8c180b7ef12d8a96739a4e1e558847002afe9ea63b6f6358b2271a8bdda1c", size = 4217422, upload-time = "2026-03-13T06:58:27.472Z" }, + { url = "https://files.pythonhosted.org/packages/68/47/d6cf4a39ecf6c7705f887a46f6ef5c8455b44ad9eb0d391aa7e8a2ff7fea/hf_xet-1.4.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c3b3c6a882016b94b6c210957502ff7877802d0dbda8ad142c8595db8b944271", size = 3992847, upload-time = "2026-03-13T06:58:25.989Z" }, + { url = "https://files.pythonhosted.org/packages/2d/ef/e80815061abff54697239803948abc665c6b1d237102c174f4f7a9a5ffc5/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9d9a634cc929cfbaf2e1a50c0e532ae8c78fa98618426769480c58501e8c8ac2", size = 4193843, upload-time = "2026-03-13T06:58:44.59Z" }, + { url = "https://files.pythonhosted.org/packages/54/75/07f6aa680575d9646c4167db6407c41340cbe2357f5654c4e72a1b01ca14/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6b0932eb8b10317ea78b7da6bab172b17be03bbcd7809383d8d5abd6a2233e04", size = 4432751, upload-time = "2026-03-13T06:58:46.533Z" }, + { url = "https://files.pythonhosted.org/packages/cd/71/193eabd7e7d4b903c4aa983a215509c6114915a5a237525ec562baddb868/hf_xet-1.4.2-cp37-abi3-win_amd64.whl", hash = "sha256:ad185719fb2e8ac26f88c8100562dbf9dbdcc3d9d2add00faa94b5f106aea53f", size = 3671149, upload-time = "2026-03-13T06:58:57.07Z" }, + { url = "https://files.pythonhosted.org/packages/b4/7e/ccf239da366b37ba7f0b36095450efae4a64980bdc7ec2f51354205fdf39/hf_xet-1.4.2-cp37-abi3-win_arm64.whl", hash = "sha256:32c012286b581f783653e718c1862aea5b9eb140631685bb0c5e7012c8719a87", size = 3533426, upload-time = "2026-03-13T06:58:55.46Z" }, ] [[package]] @@ -1223,7 +1231,7 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "1.6.0" +version = "1.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -1236,9 +1244,9 @@ dependencies = [ { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d5/7a/304cec37112382c4fe29a43bcb0d5891f922785d18745883d2aa4eb74e4b/huggingface_hub-1.6.0.tar.gz", hash = "sha256:d931ddad8ba8dfc1e816bf254810eb6f38e5c32f60d4184b5885662a3b167325", size = 717071, upload-time = "2026-03-06T14:19:18.524Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b4/a8/94ccc0aec97b996a3a68f3e1fa06a4bd7185dd02bf22bfba794a0ade8440/huggingface_hub-1.7.1.tar.gz", hash = "sha256:be38fe66e9b03c027ad755cb9e4b87ff0303c98acf515b5d579690beb0bf3048", size = 722097, upload-time = "2026-03-13T09:36:07.758Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/92/e3/e3a44f54c8e2f28983fcf07f13d4260b37bd6a0d3a081041bc60b91d230e/huggingface_hub-1.6.0-py3-none-any.whl", hash = "sha256:ef40e2d5cb85e48b2c067020fa5142168342d5108a1b267478ed384ecbf18961", size = 612874, upload-time = "2026-03-06T14:19:16.844Z" }, + { url = "https://files.pythonhosted.org/packages/6f/75/ca21955d6117a394a482c7862ce96216239d0e3a53133ae8510727a8bcfa/huggingface_hub-1.7.1-py3-none-any.whl", hash = "sha256:38c6cce7419bbde8caac26a45ed22b0cea24152a8961565d70ec21f88752bfaa", size = 616308, upload-time = "2026-03-13T09:36:06.062Z" }, ] [[package]] @@ -1628,9 +1636,9 @@ requires-dist = [ { name = "jsonpath-ng", specifier = ">=1.6.1" }, { name = "kubernetes", specifier = ">=30.1.0" }, { name = "litellm", specifier = ">=1.75.5.post1" }, - { name = "llama-stack", specifier = "==0.4.3" }, - { name = "llama-stack-api", specifier = "==0.4.4" }, - { name = "llama-stack-client", specifier = "==0.4.3" }, + { name = "llama-stack", specifier = "==0.5.2" }, + { name = "llama-stack-api", specifier = "==0.5.2" }, + { name = "llama-stack-client", specifier = "==0.5.2" }, { name = "openai", specifier = ">=1.99.9" }, { name = "prometheus-client", specifier = ">=0.22.1" }, { name = "psycopg2-binary", specifier = ">=2.9.10" }, @@ -1707,7 +1715,7 @@ llslibdev = [ [[package]] name = "litellm" -version = "1.82.0" +version = "1.82.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1723,14 +1731,14 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6c/00/49bb5c28e0dea0f5086229a2a08d5fdc6c8dc0d8e2acb2a2d1f7dd9f4b70/litellm-1.82.0.tar.gz", hash = "sha256:d388f52447daccbcaafa19a3e68d17b75f1374b5bf2cde680d65e1cd86e50d22", size = 16800355, upload-time = "2026-03-01T02:35:30.363Z" } +sdist = { url = "https://files.pythonhosted.org/packages/60/12/010a86643f12ac0b004032d5927c260094299a84ed38b5ed20a8f8c7e3c4/litellm-1.82.2.tar.gz", hash = "sha256:f5f4c4049f344a88bf80b2e421bb927807687c99624515d7ff4152d533ec9dcb", size = 17353218, upload-time = "2026-03-13T21:24:24.5Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/89/eb28bfcf97d6b045c400e72eb047c381594467048c237dbb6c227764084c/litellm-1.82.0-py3-none-any.whl", hash = "sha256:5496b5d4532cccdc7a095c21cbac4042f7662021c57bc1d17be4e39838929e80", size = 14911978, upload-time = "2026-03-01T02:35:26.844Z" }, + { url = "https://files.pythonhosted.org/packages/96/e4/87e3ca82a8bf6e6bfffb42a539a1350dd6ced1b7169397bd439ba56fde10/litellm-1.82.2-py3-none-any.whl", hash = "sha256:641ed024774fa3d5b4dd9347f0efb1e31fa422fba2a6500aabedee085d1194cb", size = 15524224, upload-time = "2026-03-13T21:24:21.288Z" }, ] [[package]] name = "llama-stack" -version = "0.4.3" +version = "0.5.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1743,9 +1751,14 @@ dependencies = [ { name = "jinja2" }, { name = "jsonschema" }, { name = "llama-stack-api" }, + { name = "mcp" }, + { name = "numpy" }, + { name = "oci" }, { name = "openai" }, + { name = "opentelemetry-distro" }, { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "opentelemetry-sdk" }, + { name = "oracledb" }, { name = "pillow" }, { name = "prompt-toolkit" }, { name = "psycopg2-binary" }, @@ -1763,14 +1776,14 @@ dependencies = [ { name = "urllib3" }, { name = "uvicorn" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/57/f8/b46c825c7d4050524ca4da9ff7f2622b101044f65cf50f708cf5b6ac935d/llama_stack-0.4.3.tar.gz", hash = "sha256:70d379ae9dbb5b1d0693f14054d9817aba183ffcd805133f0a4442baee132c6d", size = 3357773, upload-time = "2026-01-26T21:46:01.588Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/a8/3724d0c06a06578a639345f5086b93ba234a0ac247ec4ed7854d0d5e5ca6/llama_stack-0.5.2.tar.gz", hash = "sha256:9334c781e4ded6520aa60c3301a9087e9fb8fdaea8e5f30f8e21d85b17231d8d", size = 16035748, upload-time = "2026-03-06T13:25:59.356Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f2/eb/a4c6c6e6391e13b7d71a116df847b13c334355e9ec18441635140b8fbe1f/llama_stack-0.4.3-py3-none-any.whl", hash = "sha256:423207eae2b640894992a9075ff9dd6300ff904ab06a49fe38cfe0bb809d4669", size = 3695786, upload-time = "2026-01-26T21:45:59.607Z" }, + { url = "https://files.pythonhosted.org/packages/62/4c/fea3f2ffeead47a934704f1527685106766c5ea69dd99c0a83e872b22aa7/llama_stack-0.5.2-py3-none-any.whl", hash = "sha256:581fda638088ee029aab20afe3c42ba8f7f6ef21c80bd9ebcae20bb13c3409d3", size = 3979442, upload-time = "2026-03-06T13:25:56.581Z" }, ] [[package]] name = "llama-stack-api" -version = "0.4.4" +version = "0.5.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "fastapi" }, @@ -1780,14 +1793,14 @@ dependencies = [ { name = "opentelemetry-sdk" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a9/84/075d5e4b2419777f7dc92a1153c683d82180739754c39ccb3ae01a9dc535/llama_stack_api-0.4.4.tar.gz", hash = "sha256:3973ca3bacf86916e04e521f77e7909533eec7364d32c3eabc35dc2976dbfe7d", size = 106579, upload-time = "2026-01-30T16:28:38.051Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/3d/ecc5cba3613a37887439f08bf202b455ad1d5411818c91833acfaaeee569/llama_stack_api-0.5.2.tar.gz", hash = "sha256:a272e4b803fe24a8ba7d22e6d904bf88abd118ba0b6610a20ff5dedb09f38ad7", size = 126436, upload-time = "2026-03-06T13:25:14.169Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2d/a0/e32f5b39a029c6fd120216d122a5333e8e4889103da56ff9efd6601eb987/llama_stack_api-0.4.4-py3-none-any.whl", hash = "sha256:7bbc63330ed186502dcd48f65cae014dbeb788ba5690be738c98693cfcd2f599", size = 107030, upload-time = "2026-01-30T16:28:36.725Z" }, + { url = "https://files.pythonhosted.org/packages/06/a7/caa050e0beb93147593766e8ea58a0aeab0de59d747ed74ec928c75ab113/llama_stack_api-0.5.2-py3-none-any.whl", hash = "sha256:6531556dd8bb6555d778360ecfcd850aad7a49a8172b68146995d538e71641f0", size = 151603, upload-time = "2026-03-06T13:25:12.876Z" }, ] [[package]] name = "llama-stack-client" -version = "0.4.3" +version = "0.5.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1806,9 +1819,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/db/3d/2aaeeef910e821ef7d3e65f3d773ba183cc84b7852f877396f64619a250c/llama_stack_client-0.4.3.tar.gz", hash = "sha256:cb807be258206e8fedeb5e5ceba7be7108d3badb31d74199406808c3d1679c35", size = 352952, upload-time = "2026-01-26T21:45:09.725Z" } +sdist = { url = "https://files.pythonhosted.org/packages/99/8a/8742475db7cedc2d452a3a7677da7f24aa84bdd262bc97543029c62df772/llama_stack_client-0.5.2.tar.gz", hash = "sha256:17c1bbad90f7699da4eb3cae256e8823caa4d2be945512a45c8c6f89ab899f28", size = 368612, upload-time = "2026-03-06T13:24:22.252Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/39/193aad0d49d834539fdc04c5f774fda22283267aff2400b68ffeb307474c/llama_stack_client-0.4.3-py3-none-any.whl", hash = "sha256:97b8cc5032bad4f0cdd1b0ae992cf44f5554679d315b7c40f46deb358c041f50", size = 375940, upload-time = "2026-01-26T21:45:08.067Z" }, + { url = "https://files.pythonhosted.org/packages/4d/f9/f6224b8819748358a573e3a2b8e299c0b6ba5f9cedf2942188c361c8e555/llama_stack_client-0.5.2-py3-none-any.whl", hash = "sha256:473f4d67ac0b243b0fc29555a0203a742615d31bea606b4332d9e2f193f73d6a", size = 391951, upload-time = "2026-03-06T13:24:20.559Z" }, ] [[package]] @@ -2218,9 +2231,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, ] +[[package]] +name = "oci" +version = "2.168.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "circuitbreaker" }, + { name = "cryptography" }, + { name = "pyopenssl" }, + { name = "python-dateutil" }, + { name = "pytz" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/68/edf8ffbb42e97ad44d64fce85be00818d979b472dd4377dc948155f811e9/oci-2.168.1.tar.gz", hash = "sha256:b941674171b41e999b8e3adb38d4797d7b42d2bb5ff40d17c26e8ce2a7d4b605", size = 16751235, upload-time = "2026-03-10T10:50:16.244Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/3e/29e05b4f8bed3b4a89b52fc57e76ac86669fc43a59e128eb526e395eda7b/oci-2.168.1-py3-none-any.whl", hash = "sha256:d106cfffc9153b5c9de628877c967ed87bbbfbbc9d411c97feee0eba8f2e4eab", size = 34033119, upload-time = "2026-03-10T10:50:08.501Z" }, +] + [[package]] name = "openai" -version = "2.26.0" +version = "2.28.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2232,9 +2262,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d7/91/2a06c4e9597c338cac1e5e5a8dd6f29e1836fc229c4c523529dca387fda8/openai-2.26.0.tar.gz", hash = "sha256:b41f37c140ae0034a6e92b0c509376d907f3a66109935fba2c1b471a7c05a8fb", size = 666702, upload-time = "2026-03-05T23:17:35.874Z" } +sdist = { url = "https://files.pythonhosted.org/packages/56/87/eb0abb4ef88ddb95b3c13149384c4c288f584f3be17d6a4f63f8c3e3c226/openai-2.28.0.tar.gz", hash = "sha256:bb7fdff384d2a787fa82e8822d1dd3c02e8cf901d60f1df523b7da03cbb6d48d", size = 670334, upload-time = "2026-03-13T19:56:27.306Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/2e/3f73e8ca53718952222cacd0cf7eecc9db439d020f0c1fe7ae717e4e199a/openai-2.26.0-py3-none-any.whl", hash = "sha256:6151bf8f83802f036117f06cc8a57b3a4da60da9926826cc96747888b57f394f", size = 1136409, upload-time = "2026-03-05T23:17:34.072Z" }, + { url = "https://files.pythonhosted.org/packages/c0/5a/df122348638885526e53140e9c6b0d844af7312682b3bde9587eebc28b47/openai-2.28.0-py3-none-any.whl", hash = "sha256:79aa5c45dba7fef84085701c235cf13ba88485e1ef4f8dfcedc44fc2a698fc1d", size = 1141218, upload-time = "2026-03-13T19:56:25.46Z" }, ] [[package]] @@ -2264,6 +2294,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" }, ] +[[package]] +name = "opentelemetry-distro" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/00/1f8acc51326956a596fefaf67751380001af36029132a7a07d4debce3c06/opentelemetry_distro-0.61b0.tar.gz", hash = "sha256:975b845f50181ad53753becf4fd4b123b54fa04df5a9d78812264436d6518981", size = 2590, upload-time = "2026-03-04T14:20:12.453Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/2c/efcc995cd7484e6e55b1d26bd7fa6c55ca96bd415ff94310b52c19f330b0/opentelemetry_distro-0.61b0-py3-none-any.whl", hash = "sha256:f21d1ac0627549795d75e332006dd068877f00e461b1b2e8fe4568d6eb7b9590", size = 3349, upload-time = "2026-03-04T14:18:57.788Z" }, +] + [[package]] name = "opentelemetry-exporter-otlp" version = "1.40.0" @@ -2379,6 +2423,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" }, ] +[[package]] +name = "oracledb" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/02/70a872d1a4a739b4f7371ab8d3d5ed8c6e57e142e2503531aafcb220893c/oracledb-3.4.2.tar.gz", hash = "sha256:46e0f2278ff1fe83fbc33a3b93c72d429323ec7eed47bc9484e217776cd437e5", size = 855467, upload-time = "2026-01-28T17:25:39.91Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/81/2e6154f34b71cd93b4946c73ea13b69d54b8d45a5f6bbffe271793240d21/oracledb-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a7396664e592881225ba66385ee83ce339d864f39003d6e4ca31a894a7e7c552", size = 4220806, upload-time = "2026-01-28T17:26:04.322Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a9/a1d59aaac77d8f727156ec6a3b03399917c90b7da4f02d057f92e5601f56/oracledb-3.4.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f04a2d62073407672f114d02529921de0677c6883ed7c64d8d1a3c04caa3238", size = 2233795, upload-time = "2026-01-28T17:26:05.877Z" }, + { url = "https://files.pythonhosted.org/packages/94/ec/8c4a38020cd251572bd406ddcbde98ca052ec94b5684f9aa9ef1ddfcc68c/oracledb-3.4.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d8d75e4f879b908be66cce05ba6c05791a5dbb4a15e39abc01aa25c8a2492bd9", size = 2424756, upload-time = "2026-01-28T17:26:07.35Z" }, + { url = "https://files.pythonhosted.org/packages/fa/7d/c251c2a8567151ccfcfbe3467ea9a60fb5480dc4719342e2e6b7a9679e5d/oracledb-3.4.2-cp312-cp312-win32.whl", hash = "sha256:31b7ee83c23d0439778303de8a675717f805f7e8edb5556d48c4d8343bcf14f5", size = 1453486, upload-time = "2026-01-28T17:26:08.869Z" }, + { url = "https://files.pythonhosted.org/packages/4c/78/c939f3c16fb39400c4734d5a3340db5659ba4e9dce23032d7b33ccfd3fe5/oracledb-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:ac25a0448fc830fb7029ad50cd136cdbfcd06975d53967e269772cc5cb8c203a", size = 1794445, upload-time = "2026-01-28T17:26:10.66Z" }, + { url = "https://files.pythonhosted.org/packages/22/68/f7126f5d911c295b57720c6b1a0609a5a2667b4546946433552a4de46333/oracledb-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:643c25d301a289a371e37fcedb59e5fa5e54fb321708e5c12821c4b55bdd8a4d", size = 4205176, upload-time = "2026-01-28T17:26:12.463Z" }, + { url = "https://files.pythonhosted.org/packages/5d/93/2fced60f92dc82e66980a8a3ba5c1ea48110bf1dd81d030edb69d88f992e/oracledb-3.4.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55397e7eb43bb7017c03a981c736c25724182f5210951181dfe3fab0e5d457fb", size = 2231298, upload-time = "2026-01-28T17:26:14.497Z" }, + { url = "https://files.pythonhosted.org/packages/75/a7/4dd286f3a6348d786fef9e6ab2e6c9b74ca9195d9a756f2a67e45743cdf0/oracledb-3.4.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26a10f9c790bd141ffc8af68520803ed4a44a9258bf7d1eea9bfdd36bd6df7f", size = 2439430, upload-time = "2026-01-28T17:26:16.044Z" }, + { url = "https://files.pythonhosted.org/packages/19/28/94bc753e5e969c60ee5d9c914e2b4ef79999eaca8e91bcab2fbf0586b80b/oracledb-3.4.2-cp313-cp313-win32.whl", hash = "sha256:b974caec2c330c22bbe765705a5ac7d98ec3022811dec2042d561a3c65cb991b", size = 1458209, upload-time = "2026-01-28T17:26:17.652Z" }, + { url = "https://files.pythonhosted.org/packages/cb/2b/593a9b2d4c12c9de3289e67d84fe023336d99f36ba51442a5a0f5ce6acf7/oracledb-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:3df8eee1410d25360599968b1625b000f10c5ae0e47274031a7842a9dc418890", size = 1793558, upload-time = "2026-01-28T17:26:19.914Z" }, +] + [[package]] name = "packaging" version = "26.0" @@ -2965,11 +3031,11 @@ wheels = [ [[package]] name = "pyjwt" -version = "2.11.0" +version = "2.12.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" }, + { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" }, ] [package.optional-dependencies] @@ -2995,6 +3061,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/6f/9ac2548e290764781f9e7e2aaf0685b086379dabfb29ca38536985471eaf/pylint-4.0.5-py3-none-any.whl", hash = "sha256:00f51c9b14a3b3ae08cff6b2cdd43f28165c78b165b628692e428fb1f8dc2cf2", size = 536694, upload-time = "2026-02-20T09:07:31.028Z" }, ] +[[package]] +name = "pyopenssl" +version = "25.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/80/be/97b83a464498a79103036bc74d1038df4a7ef0e402cfaf4d5e113fb14759/pyopenssl-25.3.0.tar.gz", hash = "sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329", size = 184073, upload-time = "2025-09-17T00:32:21.037Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" }, +] + [[package]] name = "pyproject-hooks" version = "1.2.0" @@ -3100,18 +3179,14 @@ wheels = [ [[package]] name = "pythainlp" -version = "5.2.0" +version = "5.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, - { name = "pandas" }, - { name = "pyyaml" }, - { name = "requests" }, { name = "tzdata", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/af/839446c681079c288d3734a007e7bc083e91e2c9bea17165647f0e12c63c/pythainlp-5.2.0.tar.gz", hash = "sha256:04c6e4bdd806204be742f139b1f2e666411c4509c270dfff1a8b5afa69d36d2b", size = 18719875, upload-time = "2025-12-20T12:55:14.163Z" } +sdist = { url = "https://files.pythonhosted.org/packages/40/18/dfaad6a9fa546c3cdf37d7930acb117d206575e639559a1aa0ee84ad457c/pythainlp-5.3.1.tar.gz", hash = "sha256:516c34d22689c2b469dd74bb18221eb9336e42f5137aa32940008293f1895de4", size = 19294788, upload-time = "2026-03-14T07:10:02.208Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/30/57/b29402fbabd8df3120b8319d731cab88881dda0ac9147b308ecb01d49ed3/pythainlp-5.2.0-py3-none-any.whl", hash = "sha256:fd64d6b3d33973782390822e74b8e2c9b867760eeed19d0d218945165b431e35", size = 19263794, upload-time = "2025-12-20T12:55:11.414Z" }, + { url = "https://files.pythonhosted.org/packages/5c/4f/3cec6cc70da44b6d684441732f709750edf1259357f9a15fbc0848c20f0f/pythainlp-5.3.1-py3-none-any.whl", hash = "sha256:f33fb134fcfbd281fb64494c924fddb5e7cc27e053f7a73f18b6b5acbb7a4e2d", size = 19843902, upload-time = "2026-03-14T07:09:58.552Z" }, ] [[package]] @@ -3163,6 +3238,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/78/397db326746f0a342855b81216ae1f0a32965deccfd7c830a2dbc66d2483/pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de", size = 13729, upload-time = "2026-01-30T01:03:45.029Z" }, ] +[[package]] +name = "pytz" +version = "2026.1.post1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/56/db/b8721d71d945e6a8ac63c0fc900b2067181dbb50805958d4d4661cf7d277/pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1", size = 321088, upload-time = "2026-03-03T07:47:50.683Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" }, +] + [[package]] name = "pywin32" version = "311" @@ -3411,41 +3495,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" }, ] -[[package]] -name = "rsa" -version = "4.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, -] - [[package]] name = "ruff" -version = "0.15.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" }, - { url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" }, - { url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" }, - { url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" }, - { url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" }, - { url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" }, - { url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" }, - { url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" }, - { url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" }, - { url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" }, - { url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" }, - { url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" }, - { url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" }, - { url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" }, - { url = "https://files.pythonhosted.org/packages/28/3a/950367aee7c69027f4f422059227b290ed780366b6aecee5de5039d50fa8/ruff-0.15.5-py3-none-win32.whl", hash = "sha256:732e5ee1f98ba5b3679029989a06ca39a950cced52143a0ea82a2102cb592b74", size = 10551676, upload-time = "2026-03-05T20:06:13.705Z" }, - { url = "https://files.pythonhosted.org/packages/b8/00/bf077a505b4e649bdd3c47ff8ec967735ce2544c8e4a43aba42ee9bf935d/ruff-0.15.5-py3-none-win_amd64.whl", hash = "sha256:821d41c5fa9e19117616c35eaa3f4b75046ec76c65e7ae20a333e9a8696bc7fe", size = 11678972, upload-time = "2026-03-05T20:06:45.379Z" }, - { url = "https://files.pythonhosted.org/packages/fe/4e/cd76eca6db6115604b7626668e891c9dd03330384082e33662fb0f113614/ruff-0.15.5-py3-none-win_arm64.whl", hash = "sha256:b498d1c60d2fe5c10c45ec3f698901065772730b411f164ae270bb6bfcc4740b", size = 10965572, upload-time = "2026-03-05T20:06:16.984Z" }, +version = "0.15.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/51/df/f8629c19c5318601d3121e230f74cbee7a3732339c52b21daa2b82ef9c7d/ruff-0.15.6.tar.gz", hash = "sha256:8394c7bb153a4e3811a4ecdacd4a8e6a4fa8097028119160dffecdcdf9b56ae4", size = 4597916, upload-time = "2026-03-12T23:05:47.51Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/2f/4e03a7e5ce99b517e98d3b4951f411de2b0fa8348d39cf446671adcce9a2/ruff-0.15.6-py3-none-linux_armv6l.whl", hash = "sha256:7c98c3b16407b2cf3d0f2b80c80187384bc92c6774d85fefa913ecd941256fff", size = 10508953, upload-time = "2026-03-12T23:05:17.246Z" }, + { url = "https://files.pythonhosted.org/packages/70/60/55bcdc3e9f80bcf39edf0cd272da6fa511a3d94d5a0dd9e0adf76ceebdb4/ruff-0.15.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ee7dcfaad8b282a284df4aa6ddc2741b3f4a18b0555d626805555a820ea181c3", size = 10942257, upload-time = "2026-03-12T23:05:23.076Z" }, + { url = "https://files.pythonhosted.org/packages/e7/f9/005c29bd1726c0f492bfa215e95154cf480574140cb5f867c797c18c790b/ruff-0.15.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3bd9967851a25f038fc8b9ae88a7fbd1b609f30349231dffaa37b6804923c4bb", size = 10322683, upload-time = "2026-03-12T23:05:33.738Z" }, + { url = "https://files.pythonhosted.org/packages/5f/74/2f861f5fd7cbb2146bddb5501450300ce41562da36d21868c69b7a828169/ruff-0.15.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13f4594b04e42cd24a41da653886b04d2ff87adbf57497ed4f728b0e8a4866f8", size = 10660986, upload-time = "2026-03-12T23:05:53.245Z" }, + { url = "https://files.pythonhosted.org/packages/c1/a1/309f2364a424eccb763cdafc49df843c282609f47fe53aa83f38272389e0/ruff-0.15.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2ed8aea2f3fe57886d3f00ea5b8aae5bf68d5e195f487f037a955ff9fbaac9e", size = 10332177, upload-time = "2026-03-12T23:05:56.145Z" }, + { url = "https://files.pythonhosted.org/packages/30/41/7ebf1d32658b4bab20f8ac80972fb19cd4e2c6b78552be263a680edc55ac/ruff-0.15.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70789d3e7830b848b548aae96766431c0dc01a6c78c13381f423bf7076c66d15", size = 11170783, upload-time = "2026-03-12T23:06:01.742Z" }, + { url = "https://files.pythonhosted.org/packages/76/be/6d488f6adca047df82cd62c304638bcb00821c36bd4881cfca221561fdfc/ruff-0.15.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:542aaf1de3154cea088ced5a819ce872611256ffe2498e750bbae5247a8114e9", size = 12044201, upload-time = "2026-03-12T23:05:28.697Z" }, + { url = "https://files.pythonhosted.org/packages/71/68/e6f125df4af7e6d0b498f8d373274794bc5156b324e8ab4bf5c1b4fc0ec7/ruff-0.15.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c22e6f02c16cfac3888aa636e9eba857254d15bbacc9906c9689fdecb1953ab", size = 11421561, upload-time = "2026-03-12T23:05:31.236Z" }, + { url = "https://files.pythonhosted.org/packages/f1/9f/f85ef5fd01a52e0b472b26dc1b4bd228b8f6f0435975442ffa4741278703/ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98893c4c0aadc8e448cfa315bd0cc343a5323d740fe5f28ef8a3f9e21b381f7e", size = 11310928, upload-time = "2026-03-12T23:05:45.288Z" }, + { url = "https://files.pythonhosted.org/packages/8c/26/b75f8c421f5654304b89471ed384ae8c7f42b4dff58fa6ce1626d7f2b59a/ruff-0.15.6-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:70d263770d234912374493e8cc1e7385c5d49376e41dfa51c5c3453169dc581c", size = 11235186, upload-time = "2026-03-12T23:05:50.677Z" }, + { url = "https://files.pythonhosted.org/packages/fc/d4/d5a6d065962ff7a68a86c9b4f5500f7d101a0792078de636526c0edd40da/ruff-0.15.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:55a1ad63c5a6e54b1f21b7514dfadc0c7fb40093fa22e95143cf3f64ebdcd512", size = 10635231, upload-time = "2026-03-12T23:05:37.044Z" }, + { url = "https://files.pythonhosted.org/packages/d6/56/7c3acf3d50910375349016cf33de24be021532042afbed87942858992491/ruff-0.15.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8dc473ba093c5ec238bb1e7429ee676dca24643c471e11fbaa8a857925b061c0", size = 10340357, upload-time = "2026-03-12T23:06:04.748Z" }, + { url = "https://files.pythonhosted.org/packages/06/54/6faa39e9c1033ff6a3b6e76b5df536931cd30caf64988e112bbf91ef5ce5/ruff-0.15.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:85b042377c2a5561131767974617006f99f7e13c63c111b998f29fc1e58a4cfb", size = 10860583, upload-time = "2026-03-12T23:05:58.978Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/509a201b843b4dfb0b32acdedf68d951d3377988cae43949ba4c4133a96a/ruff-0.15.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cef49e30bc5a86a6a92098a7fbf6e467a234d90b63305d6f3ec01225a9d092e0", size = 11410976, upload-time = "2026-03-12T23:05:39.955Z" }, + { url = "https://files.pythonhosted.org/packages/6c/25/3fc9114abf979a41673ce877c08016f8e660ad6cf508c3957f537d2e9fa9/ruff-0.15.6-py3-none-win32.whl", hash = "sha256:bbf67d39832404812a2d23020dda68fee7f18ce15654e96fb1d3ad21a5fe436c", size = 10616872, upload-time = "2026-03-12T23:05:42.451Z" }, + { url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" }, + { url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" }, ] [[package]] @@ -3567,7 +3639,7 @@ wheels = [ [[package]] name = "sentence-transformers" -version = "5.2.3" +version = "5.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -3580,18 +3652,18 @@ dependencies = [ { name = "transformers" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5b/30/21664028fc0776eb1ca024879480bbbab36f02923a8ff9e4cae5a150fa35/sentence_transformers-5.2.3.tar.gz", hash = "sha256:3cd3044e1f3fe859b6a1b66336aac502eaae5d3dd7d5c8fc237f37fbf58137c7", size = 381623, upload-time = "2026-02-17T14:05:20.238Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/26/448453925b6ce0c29d8b54327caa71ee4835511aef02070467402273079c/sentence_transformers-5.3.0.tar.gz", hash = "sha256:414a0a881f53a4df0e6cbace75f823bfcb6b94d674c42a384b498959b7c065e2", size = 403330, upload-time = "2026-03-12T14:53:40.778Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/46/9f/dba4b3e18ebbe1eaa29d9f1764fbc7da0cd91937b83f2b7928d15c5d2d36/sentence_transformers-5.2.3-py3-none-any.whl", hash = "sha256:6437c62d4112b615ddebda362dfc16a4308d604c5b68125ed586e3e95d5b2e30", size = 494225, upload-time = "2026-02-17T14:05:18.596Z" }, + { url = "https://files.pythonhosted.org/packages/e2/9c/2fa7224058cad8df68d84bafee21716f30892cecc7ad1ad73bde61d23754/sentence_transformers-5.3.0-py3-none-any.whl", hash = "sha256:dca6b98db790274a68185d27a65801b58b4caf653a4e556b5f62827509347c7d", size = 512390, upload-time = "2026-03-12T14:53:39.035Z" }, ] [[package]] name = "setuptools" -version = "82.0.0" +version = "82.0.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/f3/748f4d6f65d1756b9ae577f329c951cda23fb900e4de9f70900ced962085/setuptools-82.0.0.tar.gz", hash = "sha256:22e0a2d69474c6ae4feb01951cb69d515ed23728cf96d05513d36e42b62b37cb", size = 1144893, upload-time = "2026-02-08T15:08:40.206Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e1/c6/76dc613121b793286a3f91621d7b75a2b493e0390ddca50f11993eadf192/setuptools-82.0.0-py3-none-any.whl", hash = "sha256:70b18734b607bd1da571d097d236cfcfacaf01de45717d59e6e04b96877532e0", size = 1003468, upload-time = "2026-02-08T15:08:38.723Z" }, + { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" }, ] [[package]] @@ -3870,21 +3942,19 @@ wheels = [ [[package]] name = "tornado" -version = "6.5.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/37/1d/0a336abf618272d53f62ebe274f712e213f5a03c0b2339575430b8362ef2/tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7", size = 513632, upload-time = "2025-12-15T19:21:03.836Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/a9/e94a9d5224107d7ce3cc1fab8d5dc97f5ea351ccc6322ee4fb661da94e35/tornado-6.5.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9", size = 443909, upload-time = "2025-12-15T19:20:48.382Z" }, - { url = "https://files.pythonhosted.org/packages/db/7e/f7b8d8c4453f305a51f80dbb49014257bb7d28ccb4bbb8dd328ea995ecad/tornado-6.5.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843", size = 442163, upload-time = "2025-12-15T19:20:49.791Z" }, - { url = "https://files.pythonhosted.org/packages/ba/b5/206f82d51e1bfa940ba366a8d2f83904b15942c45a78dd978b599870ab44/tornado-6.5.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17", size = 445746, upload-time = "2025-12-15T19:20:51.491Z" }, - { url = "https://files.pythonhosted.org/packages/8e/9d/1a3338e0bd30ada6ad4356c13a0a6c35fbc859063fa7eddb309183364ac1/tornado-6.5.4-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335", size = 445083, upload-time = "2025-12-15T19:20:52.778Z" }, - { url = "https://files.pythonhosted.org/packages/50/d4/e51d52047e7eb9a582da59f32125d17c0482d065afd5d3bc435ff2120dc5/tornado-6.5.4-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f", size = 445315, upload-time = "2025-12-15T19:20:53.996Z" }, - { url = "https://files.pythonhosted.org/packages/27/07/2273972f69ca63dbc139694a3fc4684edec3ea3f9efabf77ed32483b875c/tornado-6.5.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84", size = 446003, upload-time = "2025-12-15T19:20:56.101Z" }, - { url = "https://files.pythonhosted.org/packages/d1/83/41c52e47502bf7260044413b6770d1a48dda2f0246f95ee1384a3cd9c44a/tornado-6.5.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f", size = 445412, upload-time = "2025-12-15T19:20:57.398Z" }, - { url = "https://files.pythonhosted.org/packages/10/c7/bc96917f06cbee182d44735d4ecde9c432e25b84f4c2086143013e7b9e52/tornado-6.5.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8", size = 445392, upload-time = "2025-12-15T19:20:58.692Z" }, - { url = "https://files.pythonhosted.org/packages/0c/1a/d7592328d037d36f2d2462f4bc1fbb383eec9278bc786c1b111cbbd44cfa/tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1", size = 446481, upload-time = "2025-12-15T19:21:00.008Z" }, - { url = "https://files.pythonhosted.org/packages/d6/6d/c69be695a0a64fd37a97db12355a035a6d90f79067a3cf936ec2b1dc38cd/tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc", size = 446886, upload-time = "2025-12-15T19:21:01.287Z" }, - { url = "https://files.pythonhosted.org/packages/50/49/8dc3fd90902f70084bd2cd059d576ddb4f8bb44c2c7c0e33a11422acb17e/tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1", size = 445910, upload-time = "2025-12-15T19:21:02.571Z" }, +version = "6.5.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/f1/3173dfa4a18db4a9b03e5d55325559dab51ee653763bb8745a75af491286/tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9", size = 516006, upload-time = "2026-03-10T21:31:02.067Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/59/8c/77f5097695f4dd8255ecbd08b2a1ed8ba8b953d337804dd7080f199e12bf/tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa", size = 445983, upload-time = "2026-03-10T21:30:44.28Z" }, + { url = "https://files.pythonhosted.org/packages/ab/5e/7625b76cd10f98f1516c36ce0346de62061156352353ef2da44e5c21523c/tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521", size = 444246, upload-time = "2026-03-10T21:30:46.571Z" }, + { url = "https://files.pythonhosted.org/packages/b2/04/7b5705d5b3c0fab088f434f9c83edac1573830ca49ccf29fb83bf7178eec/tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5", size = 447229, upload-time = "2026-03-10T21:30:48.273Z" }, + { url = "https://files.pythonhosted.org/packages/34/01/74e034a30ef59afb4097ef8659515e96a39d910b712a89af76f5e4e1f93c/tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07", size = 448192, upload-time = "2026-03-10T21:30:51.22Z" }, + { url = "https://files.pythonhosted.org/packages/be/00/fe9e02c5a96429fce1a1d15a517f5d8444f9c412e0bb9eadfbe3b0fc55bf/tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e", size = 448039, upload-time = "2026-03-10T21:30:53.52Z" }, + { url = "https://files.pythonhosted.org/packages/82/9e/656ee4cec0398b1d18d0f1eb6372c41c6b889722641d84948351ae19556d/tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca", size = 447445, upload-time = "2026-03-10T21:30:55.541Z" }, + { url = "https://files.pythonhosted.org/packages/5a/76/4921c00511f88af86a33de770d64141170f1cfd9c00311aea689949e274e/tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7", size = 448582, upload-time = "2026-03-10T21:30:57.142Z" }, + { url = "https://files.pythonhosted.org/packages/2c/23/f6c6112a04d28eed765e374435fb1a9198f73e1ec4b4024184f21faeb1ad/tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b", size = 448990, upload-time = "2026-03-10T21:30:58.857Z" }, + { url = "https://files.pythonhosted.org/packages/b7/c8/876602cbc96469911f0939f703453c1157b0c826ecb05bdd32e023397d4e/tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6", size = 448016, upload-time = "2026-03-10T21:31:00.43Z" }, ] [[package]]