diff --git a/.github/workflows/e2e_tests_lightspeed_evaluation.yaml b/.github/workflows/e2e_tests_lightspeed_evaluation.yaml
new file mode 100644
index 000000000..366e414d0
--- /dev/null
+++ b/.github/workflows/e2e_tests_lightspeed_evaluation.yaml
@@ -0,0 +1,143 @@
+name: E2E Tests for Lightspeed Evaluation
+
+on: [push, pull_request_target]
+
+jobs:
+  e2e_tests:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+    
+    name: "E2E Tests for Lightspeed Evaluation job"
+    
+    env:
+      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      E2E_OPENAI_MODEL: ${{ vars.E2E_OPENAI_MODEL }}
+      FAISS_VECTOR_STORE_ID: ${{ vars.FAISS_VECTOR_STORE_ID }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          # On PR_TARGET → the fork (or same repo) that opened the PR.
+          # On push      → falls back to the current repository.
+          repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
+
+          # On PR_TARGET → the PR head *commit* (reproducible).
+          # On push      → the pushed commit that triggered the workflow.
+          ref: ${{ github.event.pull_request.head.ref || github.sha }}
+
+          # Don’t keep credentials when running untrusted PR code under PR_TARGET.
+          persist-credentials: ${{ github.event_name != 'pull_request_target' }}
+
+      - name: Verify actual git checkout result
+        run: |
+          echo "=== Git Status After Checkout ==="
+          echo "Remote URLs:"
+          git remote -v
+          echo ""
+          echo "Current branch: $(git branch --show-current 2>/dev/null || echo 'detached HEAD')"
+          echo "Current commit: $(git rev-parse HEAD)"
+          echo "Current commit message: $(git log -1 --oneline)"
+          echo ""
+          echo "=== Recent commits ==="
+          git log --oneline -5
+
+      - name: Checkout lightspeed-Evaluation
+        uses: actions/checkout@v4
+        with:
+          repository: lightspeed-core/lightspeed-evaluation
+          path: lightspeed-evaluation
+  
+      - name: Load lightspeed-stack.yaml configuration
+        run: |
+          CONFIG_FILE="./lightspeed-evaluation/tests/integration/lightspeed-stack.yaml"
+          
+          if [ ! -f "${CONFIG_FILE}" ]; then
+            echo "❌ Configuration file not found: ${CONFIG_FILE}"
+            exit 1
+          fi
+          
+          cp "${CONFIG_FILE}" lightspeed-stack.yaml
+          echo "✅ Configuration loaded successfully"
+
+      - name: Select and configure run.yaml
+        run: |
+          CONFIG_FILE="./lightspeed-evaluation/tests/integration/run.yaml"
+
+          if [ ! -f "${CONFIG_FILE}" ]; then
+            echo "❌ Configuration file not found: ${CONFIG_FILE}"
+            exit 1
+          fi
+
+          cp "$CONFIG_FILE" run.yaml
+
+      - name: Show final configuration
+        run: |
+          echo "=== Configuration Preview ==="
+          echo "Providers: $(grep -c "provider_id:" run.yaml)"
+          echo "Models: $(grep -c "model_id:" run.yaml)"
+          echo ""
+          echo "=== lightspeed-stack.yaml ==="
+          grep -A 3 "llama_stack:" lightspeed-stack.yaml
+
+      - name: Run services (Library Mode)
+        env: 
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          echo "Starting service in library mode (1 container)"
+          docker compose -f docker-compose-library.yaml up -d
+
+          if docker compose -f docker-compose-library.yaml ps | grep -E 'Exit|exited|stopped'; then
+            echo "Service failed to start - showing logs:"
+            docker compose -f docker-compose-library.yaml logs
+            exit 1
+          else
+            echo "Service started successfully"
+          fi
+
+      - name: Wait for the LSC
+        run: |
+          echo "Waiting for service on port 8080..."
+          for i in {1..30}; do
+            if curl --output /dev/null --fail http://localhost:8080/v1/models ; then
+              echo "Service is up!"
+              exit 0
+            fi
+            docker compose -f docker-compose-library.yaml logs --tail=30
+            echo "Still waiting..."
+            sleep 2
+          done
+
+          echo "Service did not start in time"
+          exit 1
+
+      - name: Run lightspeed evaluation e2e tests
+        env:
+          TERM: xterm-256color
+          FORCE_COLOR: 1
+        run: |
+          cd lightspeed-evaluation
+          echo "Installing e2e tests dependencies"
+          pip install --break-system-packages uv
+          uv sync
+
+          echo "Running e2e test suite..."
+          make e2e_tests_lcore
+
+      - name: Show logs on failure
+        if: failure()
+        run: |
+          echo "=== Test failure logs ==="
+          echo "=== lightspeed-stack (library mode) logs ==="
+          docker compose -f docker-compose-library.yaml logs lightspeed-stack
+
+
+      # Cleanup
+      - name: Stop the LSC if in local devel
+        if: ${{ always() && env.ACT }}
+        run: |
+          echo "Stopping containers"
+          echo "++++++++++++++++++++++"
+          sleep 2
+          docker compose -f docker-compose-library.yaml down --rmi all
diff --git a/.tekton/lightspeed-stack-pull-request.yaml b/.tekton/lightspeed-stack-pull-request.yaml
index b6406f96c..59ef02f04 100644
--- a/.tekton/lightspeed-stack-pull-request.yaml
+++ b/.tekton/lightspeed-stack-pull-request.yaml
@@ -54,7 +54,7 @@ spec:
            ],
            "requirements_build_files": ["requirements-build.txt"],
            "binary": {
-             "packages": "aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,docstring-parser,durationpy,einops,email-validator,faiss-cpu,fire,frozenlist,fsspec,google-cloud-core,google-cloud-resource-manager,google-crc32c,google-genai,google-resumable-media,googleapis-common-protos,grpc-google-iam-v1,grpcio,grpcio-status,h11,hf-xet,httpcore,httpx,httpx-sse,idna,importlib-metadata,jinja2,jiter,joblib,jsonschema,jsonschema-specifications,kubernetes,lxml,markdown-it-py,mcp,mdurl,mpmath,multidict,networkx,numpy,oauthlib,packaging,pandas,peft,pillow,prometheus-client,prompt-toolkit,propcache,psycopg2-binary,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pygments,python-dateutil,python-multipart,pyyaml,referencing,requests,requests-oauthlib,rpds-py,rsa,safetensors,scikit-learn,scipy,setuptools,six,sniffio,sqlalchemy,starlette,sympy,termcolor,threadpoolctl,tiktoken,tokenizers,torch,tornado,tqdm,transformers,tree-sitter,triton,typing-extensions,typing-inspection,tzdata,urllib3,websocket-client,websockets,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
+             "packages": "aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,cffi,chevron,click,cryptography,datasets,dill,distro,dnspython,docstring-parser,durationpy,einops,email-validator,faiss-cpu,fire,frozenlist,fsspec,google-cloud-core,google-cloud-resource-manager,google-crc32c,google-genai,google-resumable-media,grpc-google-iam-v1,grpcio,grpcio-status,h11,hf-xet,httpcore,httpx,httpx-sse,idna,importlib-metadata,jinja2,jiter,joblib,jsonschema,jsonschema-specifications,kubernetes,lxml,markdown-it-py,mcp,mdurl,mpmath,multidict,networkx,numpy,oauthlib,packaging,pandas,peft,pillow,prometheus-client,prompt-toolkit,propcache,psycopg2-binary,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pygments,python-dateutil,python-multipart,pyyaml,referencing,requests,requests-oauthlib,rpds-py,safetensors,scikit-learn,scipy,setuptools,six,sniffio,sqlalchemy,starlette,sympy,termcolor,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,tree-sitter,triton,typing-extensions,typing-inspection,tzdata,urllib3,websocket-client,websockets,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
              "os": "linux",
              "arch": "x86_64,aarch64",
              "py_version": 312
@@ -209,7 +209,7 @@ spec:
         - name: name
           value: prefetch-dependencies-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies-oci-ta:0.2@sha256:22612d629796a29ddd177d6e29c18a4319875d4e2348286ea01d16427cec0dc1
+          value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies-oci-ta:0.3@sha256:a579d00fe370b6d9a1cb1751c883ecd0ec9f663604344e2fd61e1f6d5bf4e990
         - name: kind
           value: task
         resolver: bundles
@@ -267,7 +267,7 @@ spec:
         - name: name
           value: buildah-remote-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-buildah-remote-oci-ta:0.9@sha256:1302dbf65547d9ce065b4947f6217b7d3daa06dfd4542cbaa3e42438c1a08b0e
+          value: quay.io/konflux-ci/tekton-catalog/task-buildah-remote-oci-ta:0.9@sha256:a9ca472e297388d6ef8d1f51ee205abee6076aed7c5356ec0df84f14a2e78ad8
         - name: kind
           value: task
         resolver: bundles
@@ -293,7 +293,7 @@ spec:
         - name: name
           value: build-image-index
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-build-image-index:0.2@sha256:ac4f8b58ade5000f6e47d287b72832f0d89a91651849467be73e05da639cff7d
+          value: quay.io/konflux-ci/tekton-catalog/task-build-image-index:0.2@sha256:c7b0f7e1f743040d99a3532abbdfddc9484f80fd559a75171c97499c3eb5d163
         - name: kind
           value: task
         resolver: bundles
@@ -314,7 +314,7 @@ spec:
         - name: name
           value: source-build-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-source-build-oci-ta:0.3@sha256:eb620d137d2dfa9966d991ac210ad14f391cfa9cfc501e3cc1eb24e3332c6986
+          value: quay.io/konflux-ci/tekton-catalog/task-source-build-oci-ta:0.3@sha256:362f0475df00e7dfb5f15dea0481d1b68b287f60411718d70a23da3c059a5613
         - name: kind
           value: task
         resolver: bundles
@@ -336,7 +336,7 @@ spec:
         - name: name
           value: deprecated-image-check
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-deprecated-image-check:0.5@sha256:516ea66977bc4cdad1da61d9273a31540f0d419270f8c8c4b6b3a6aaa4002d96
+          value: quay.io/konflux-ci/tekton-catalog/task-deprecated-image-check:0.5@sha256:3457a4ca93f8d55f14ebd407532b1223c689eacc34f0abb3003db4111667bdae
         - name: kind
           value: task
         resolver: bundles
@@ -416,7 +416,7 @@ spec:
         - name: name
           value: sast-snyk-check-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-sast-snyk-check-oci-ta:0.4@sha256:129e089094f472c7a147f1b1591ad003bb509d2ade553ff54fcf2f99d8af5ef3
+          value: quay.io/konflux-ci/tekton-catalog/task-sast-snyk-check-oci-ta:0.4@sha256:6045ed6f2d37cfdf75cb3f2bf88706839c276a59f892ae027a315456c2914cf3
         - name: kind
           value: task
         resolver: bundles
@@ -443,7 +443,7 @@ spec:
         - name: name
           value: clamav-scan
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-clamav-scan:0.3@sha256:657d2704299777e90bc177ea012f4b13c80199ae77fa5d4b5e5b524993411e86
+          value: quay.io/konflux-ci/tekton-catalog/task-clamav-scan:0.3@sha256:9f18b216ce71a66909e7cb17d9b34526c02d73cf12884ba32d1f10614f7b9f5a
         - name: kind
           value: task
         resolver: bundles
@@ -488,7 +488,7 @@ spec:
         - name: name
           value: sast-coverity-check-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-sast-coverity-check-oci-ta:0.3@sha256:e8c63570f1d01d70b2a21b22a2a4aad9ca7d5c0327d8b2a4058a6e616cce17ca
+          value: quay.io/konflux-ci/tekton-catalog/task-sast-coverity-check-oci-ta:0.3@sha256:ab60e90de028036be823e75343fdc205418edcfa7c4de569bb5f8ab833bc2037
         - name: kind
           value: task
         resolver: bundles
@@ -509,7 +509,7 @@ spec:
         - name: name
           value: coverity-availability-check
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-coverity-availability-check:0.2@sha256:c9b9301c442830eca3ad7d9d5287b082b94c38d406442f391447484147afd006
+          value: quay.io/konflux-ci/tekton-catalog/task-coverity-availability-check:0.2@sha256:de35caf2f090e3275cfd1019ea50d9662422e904fb4aebd6ea29fb53a1ad57f5
         - name: kind
           value: task
         resolver: bundles
@@ -535,7 +535,7 @@ spec:
         - name: name
           value: sast-shell-check-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-sast-shell-check-oci-ta:0.1@sha256:a2fa9231978362bdc5d244eb179167fba727044a18a981ebac806847845aced8
+          value: quay.io/konflux-ci/tekton-catalog/task-sast-shell-check-oci-ta:0.1@sha256:c314b4d5369d7961af51c865be28cd792d5f233aef94ecf035b3f84acde398bf
         - name: kind
           value: task
         resolver: bundles
@@ -561,7 +561,7 @@ spec:
         - name: name
           value: sast-unicode-check-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-sast-unicode-check-oci-ta:0.4@sha256:8f93c716782b68a71e314d3eb037edfc07255d1a4d531afcf612409ef62233c7
+          value: quay.io/konflux-ci/tekton-catalog/task-sast-unicode-check-oci-ta:0.4@sha256:3d8a6902ab7c5c2125be07263f395426342c5032b3abfd0140162ad838437bab
         - name: kind
           value: task
         resolver: bundles
@@ -606,7 +606,7 @@ spec:
         - name: name
           value: push-dockerfile-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-push-dockerfile-oci-ta:0.3@sha256:322d515ca66d92188067344761733d1e5c64d4b7bb790d10f35540da5e6289f1
+          value: quay.io/konflux-ci/tekton-catalog/task-push-dockerfile-oci-ta:0.3@sha256:1bc2d0f26b89259db090a47bb38217c82c05e335d626653d184adf1d196ca131
         - name: kind
           value: task
         resolver: bundles
@@ -623,7 +623,7 @@ spec:
         - name: name
           value: rpms-signature-scan
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-rpms-signature-scan:0.2@sha256:e920854293f9917e628d8c68a0ee3b003dabe0a67a5a2fc865a354030d4f93e2
+          value: quay.io/konflux-ci/tekton-catalog/task-rpms-signature-scan:0.2@sha256:637fcb11066e2248d901c8f5fcbf713836bb9bf6ef6eff869b9891acd4d32398
         - name: kind
           value: task
         resolver: bundles
diff --git a/.tekton/lightspeed-stack-push.yaml b/.tekton/lightspeed-stack-push.yaml
index a46f29153..fb722b51e 100644
--- a/.tekton/lightspeed-stack-push.yaml
+++ b/.tekton/lightspeed-stack-push.yaml
@@ -46,7 +46,7 @@ spec:
           ],
           "requirements_build_files": ["requirements-build.txt"],
           "binary": {
-            "packages": "aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,docstring-parser,durationpy,einops,email-validator,faiss-cpu,fire,frozenlist,fsspec,google-cloud-core,google-cloud-resource-manager,google-crc32c,google-genai,google-resumable-media,googleapis-common-protos,grpc-google-iam-v1,grpcio,grpcio-status,h11,hf-xet,httpcore,httpx,httpx-sse,idna,importlib-metadata,jinja2,jiter,joblib,jsonschema,jsonschema-specifications,kubernetes,lxml,markdown-it-py,mcp,mdurl,mpmath,multidict,networkx,numpy,oauthlib,packaging,pandas,peft,pillow,prometheus-client,prompt-toolkit,propcache,psycopg2-binary,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pygments,python-dateutil,python-multipart,pyyaml,referencing,requests,requests-oauthlib,rpds-py,rsa,safetensors,scikit-learn,scipy,setuptools,six,sniffio,sqlalchemy,starlette,sympy,termcolor,threadpoolctl,tiktoken,tokenizers,torch,tornado,tqdm,transformers,tree-sitter,triton,typing-extensions,typing-inspection,tzdata,urllib3,websocket-client,websockets,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
+            "packages": "aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,cffi,chevron,click,cryptography,datasets,dill,distro,dnspython,docstring-parser,durationpy,einops,email-validator,faiss-cpu,fire,frozenlist,fsspec,google-cloud-core,google-cloud-resource-manager,google-crc32c,google-genai,google-resumable-media,grpc-google-iam-v1,grpcio,grpcio-status,h11,hf-xet,httpcore,httpx,httpx-sse,idna,importlib-metadata,jinja2,jiter,joblib,jsonschema,jsonschema-specifications,kubernetes,lxml,markdown-it-py,mcp,mdurl,mpmath,multidict,networkx,numpy,oauthlib,packaging,pandas,peft,pillow,prometheus-client,prompt-toolkit,propcache,psycopg2-binary,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pygments,python-dateutil,python-multipart,pyyaml,referencing,requests,requests-oauthlib,rpds-py,safetensors,scikit-learn,scipy,setuptools,six,sniffio,sqlalchemy,starlette,sympy,termcolor,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,tree-sitter,triton,typing-extensions,typing-inspection,tzdata,urllib3,websocket-client,websockets,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
             "os": "linux",
             "arch": "x86_64,aarch64",
             "py_version": 312
@@ -194,7 +194,7 @@ spec:
         - name: name
           value: prefetch-dependencies-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies-oci-ta:0.2@sha256:22612d629796a29ddd177d6e29c18a4319875d4e2348286ea01d16427cec0dc1
+          value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies-oci-ta:0.3@sha256:a579d00fe370b6d9a1cb1751c883ecd0ec9f663604344e2fd61e1f6d5bf4e990
         - name: kind
           value: task
         resolver: bundles
@@ -248,7 +248,7 @@ spec:
         - name: name
           value: buildah-remote-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-buildah-remote-oci-ta:0.9@sha256:1302dbf65547d9ce065b4947f6217b7d3daa06dfd4542cbaa3e42438c1a08b0e
+          value: quay.io/konflux-ci/tekton-catalog/task-buildah-remote-oci-ta:0.9@sha256:a9ca472e297388d6ef8d1f51ee205abee6076aed7c5356ec0df84f14a2e78ad8
         - name: kind
           value: task
         resolver: bundles
@@ -274,7 +274,7 @@ spec:
         - name: name
           value: build-image-index
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-build-image-index:0.2@sha256:ac4f8b58ade5000f6e47d287b72832f0d89a91651849467be73e05da639cff7d
+          value: quay.io/konflux-ci/tekton-catalog/task-build-image-index:0.2@sha256:c7b0f7e1f743040d99a3532abbdfddc9484f80fd559a75171c97499c3eb5d163
         - name: kind
           value: task
         resolver: bundles
@@ -295,7 +295,7 @@ spec:
         - name: name
           value: source-build-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-source-build-oci-ta:0.3@sha256:eb620d137d2dfa9966d991ac210ad14f391cfa9cfc501e3cc1eb24e3332c6986
+          value: quay.io/konflux-ci/tekton-catalog/task-source-build-oci-ta:0.3@sha256:362f0475df00e7dfb5f15dea0481d1b68b287f60411718d70a23da3c059a5613
         - name: kind
           value: task
         resolver: bundles
@@ -317,7 +317,7 @@ spec:
         - name: name
           value: deprecated-image-check
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-deprecated-image-check:0.5@sha256:516ea66977bc4cdad1da61d9273a31540f0d419270f8c8c4b6b3a6aaa4002d96
+          value: quay.io/konflux-ci/tekton-catalog/task-deprecated-image-check:0.5@sha256:3457a4ca93f8d55f14ebd407532b1223c689eacc34f0abb3003db4111667bdae
         - name: kind
           value: task
         resolver: bundles
@@ -397,7 +397,7 @@ spec:
         - name: name
           value: sast-snyk-check-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-sast-snyk-check-oci-ta:0.4@sha256:129e089094f472c7a147f1b1591ad003bb509d2ade553ff54fcf2f99d8af5ef3
+          value: quay.io/konflux-ci/tekton-catalog/task-sast-snyk-check-oci-ta:0.4@sha256:6045ed6f2d37cfdf75cb3f2bf88706839c276a59f892ae027a315456c2914cf3
         - name: kind
           value: task
         resolver: bundles
@@ -424,7 +424,7 @@ spec:
         - name: name
           value: clamav-scan
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-clamav-scan:0.3@sha256:657d2704299777e90bc177ea012f4b13c80199ae77fa5d4b5e5b524993411e86
+          value: quay.io/konflux-ci/tekton-catalog/task-clamav-scan:0.3@sha256:9f18b216ce71a66909e7cb17d9b34526c02d73cf12884ba32d1f10614f7b9f5a
         - name: kind
           value: task
         resolver: bundles
@@ -469,7 +469,7 @@ spec:
         - name: name
           value: sast-coverity-check-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-sast-coverity-check-oci-ta:0.3@sha256:e8c63570f1d01d70b2a21b22a2a4aad9ca7d5c0327d8b2a4058a6e616cce17ca
+          value: quay.io/konflux-ci/tekton-catalog/task-sast-coverity-check-oci-ta:0.3@sha256:ab60e90de028036be823e75343fdc205418edcfa7c4de569bb5f8ab833bc2037
         - name: kind
           value: task
         resolver: bundles
@@ -490,7 +490,7 @@ spec:
         - name: name
           value: coverity-availability-check
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-coverity-availability-check:0.2@sha256:c9b9301c442830eca3ad7d9d5287b082b94c38d406442f391447484147afd006
+          value: quay.io/konflux-ci/tekton-catalog/task-coverity-availability-check:0.2@sha256:de35caf2f090e3275cfd1019ea50d9662422e904fb4aebd6ea29fb53a1ad57f5
         - name: kind
           value: task
         resolver: bundles
@@ -516,7 +516,7 @@ spec:
         - name: name
           value: sast-shell-check-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-sast-shell-check-oci-ta:0.1@sha256:a2fa9231978362bdc5d244eb179167fba727044a18a981ebac806847845aced8
+          value: quay.io/konflux-ci/tekton-catalog/task-sast-shell-check-oci-ta:0.1@sha256:c314b4d5369d7961af51c865be28cd792d5f233aef94ecf035b3f84acde398bf
         - name: kind
           value: task
         resolver: bundles
@@ -542,7 +542,7 @@ spec:
         - name: name
           value: sast-unicode-check-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-sast-unicode-check-oci-ta:0.4@sha256:8f93c716782b68a71e314d3eb037edfc07255d1a4d531afcf612409ef62233c7
+          value: quay.io/konflux-ci/tekton-catalog/task-sast-unicode-check-oci-ta:0.4@sha256:3d8a6902ab7c5c2125be07263f395426342c5032b3abfd0140162ad838437bab
         - name: kind
           value: task
         resolver: bundles
@@ -590,7 +590,7 @@ spec:
         - name: name
           value: push-dockerfile-oci-ta
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-push-dockerfile-oci-ta:0.3@sha256:322d515ca66d92188067344761733d1e5c64d4b7bb790d10f35540da5e6289f1
+          value: quay.io/konflux-ci/tekton-catalog/task-push-dockerfile-oci-ta:0.3@sha256:1bc2d0f26b89259db090a47bb38217c82c05e335d626653d184adf1d196ca131
         - name: kind
           value: task
         resolver: bundles
@@ -607,7 +607,7 @@ spec:
         - name: name
           value: rpms-signature-scan
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-rpms-signature-scan:0.2@sha256:e920854293f9917e628d8c68a0ee3b003dabe0a67a5a2fc865a354030d4f93e2
+          value: quay.io/konflux-ci/tekton-catalog/task-rpms-signature-scan:0.2@sha256:637fcb11066e2248d901c8f5fcbf713836bb9bf6ef6eff869b9891acd4d32398
         - name: kind
           value: task
         resolver: bundles
diff --git a/dev-tools/mcp-mock-server/server.py b/dev-tools/mcp-mock-server/server.py
index fbee23c96..7f087622c 100644
--- a/dev-tools/mcp-mock-server/server.py
+++ b/dev-tools/mcp-mock-server/server.py
@@ -24,7 +24,7 @@
 import sys
 import threading
 from http.server import HTTPServer, BaseHTTPRequestHandler
-from datetime import datetime
+from datetime import datetime, UTC
 from pathlib import Path
 from typing import Any
 
@@ -38,7 +38,7 @@ class MCPMockHandler(BaseHTTPRequestHandler):
 
     def log_message(self, format: str, *args: Any) -> None:
         """Log requests with timestamp."""  # pylint: disable=redefined-builtin
-        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        timestamp = datetime.now(tz=UTC).strftime("%Y-%m-%d %H:%M:%S")
         print(f"[{timestamp}] {format % args}")
 
     def _capture_headers(self) -> None:
@@ -46,13 +46,12 @@ def _capture_headers(self) -> None:
         last_headers.clear()
 
         # Capture all headers for debugging
-        for header_name, value in self.headers.items():
-            last_headers[header_name] = value
+        last_headers.update(dict(self.headers.items()))
 
         # Log the request
         request_log.append(
             {
-                "timestamp": datetime.now().isoformat(),
+                "timestamp": datetime.now(tz=UTC).isoformat(),
                 "method": self.command,
                 "path": self.path,
                 "headers": dict(last_headers),
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 03bd6450d..4ee0d30c1 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -82,6 +82,7 @@ services:
     volumes:
       - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:z
       - ./tests/e2e/secrets/mcp-token:/tmp/mcp-secret-token:ro
+      - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-secret-token:ro
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       # Azure Entra ID credentials (AZURE_API_KEY is obtained dynamically)
diff --git a/docs/config.html b/docs/config.html
index 423649442..b31888ed5 100644
--- a/docs/config.html
+++ b/docs/config.html
@@ -376,6 +376,11 @@ <h2 id="azureentraidconfiguration">AzureEntraIdConfiguration</h2>
     <h2 id="byokrag">ByokRag</h2>
     <p>BYOK (Bring Your Own Knowledge) RAG configuration.</p>
     <table>
+      <colgroup>
+        <col style="width: 26%"/>
+        <col style="width: 23%"/>
+        <col style="width: 50%"/>
+      </colgroup>
       <thead>
         <tr class="header">
           <th>Field</th>
@@ -414,6 +419,13 @@ <h2 id="byokrag">ByokRag</h2>
           <td>string</td>
           <td>Path to RAG database.</td>
         </tr>
+        <tr class="odd">
+          <td>score_multiplier</td>
+          <td>number</td>
+          <td>Multiplier applied to relevance scores from this vector store. Used
+to weight results when querying multiple knowledge sources. Values &gt;
+1 boost this store&#x2019;s results; values &lt; 1 reduce them.</td>
+        </tr>
       </tbody>
     </table>
     <h2 id="corsconfiguration">CORSConfiguration</h2>
@@ -603,9 +615,15 @@ <h2 id="configuration">Configuration</h2>
 &#x2018;production&#x2019;). Used in telemetry events.</td>
         </tr>
         <tr class="even">
-          <td>solr</td>
+          <td>rag</td>
           <td/>
-          <td>Configuration for Solr vector search operations.</td>
+          <td>Configuration for all RAG strategies (inline and tool-based).</td>
+        </tr>
+        <tr class="odd">
+          <td>okp</td>
+          <td/>
+          <td>OKP provider settings. Only used when &#x2018;okp&#x2019; is listed in rag.inline
+or rag.tool.</td>
         </tr>
       </tbody>
     </table>
@@ -1089,6 +1107,41 @@ <h2 id="modelcontextprotocolserver">ModelContextProtocolServer</h2>
         </tr>
       </tbody>
     </table>
+    <h2 id="okpconfiguration">OkpConfiguration</h2>
+    <p>OKP (Offline Knowledge Portal) provider configuration.</p>
+    <p>Controls provider-specific behaviour for the OKP vector store. Only
+relevant when <code>"okp"</code> is listed in <code>rag.inline</code> or
+<code>rag.tool</code>.</p>
+    <table>
+      <colgroup>
+        <col style="width: 26%"/>
+        <col style="width: 23%"/>
+        <col style="width: 50%"/>
+      </colgroup>
+      <thead>
+        <tr class="header">
+          <th>Field</th>
+          <th>Type</th>
+          <th>Description</th>
+        </tr>
+      </thead>
+      <tbody>
+        <tr class="odd">
+          <td>offline</td>
+          <td>boolean</td>
+          <td>When True, use parent_id for OKP chunk source URLs. When False, use
+reference_url for chunk source URLs.</td>
+        </tr>
+        <tr class="even">
+          <td>chunk_filter_query</td>
+          <td>string</td>
+          <td>OKP filter query applied to every OKP search request. Defaults to
+&#x2018;is_chunk:true&#x2019; to restrict results to chunk documents. To add extra
+constraints, extend the expression using boolean syntax,
+e.g.&#xA0;&#x2018;is_chunk:true AND product:<em>openshift</em>&#x2019;.</td>
+        </tr>
+      </tbody>
+    </table>
     <h2 id="postgresqldatabaseconfiguration">PostgreSQLDatabaseConfiguration</h2>
     <p>PostgreSQL database configuration.</p>
     <p>PostgreSQL database is used by Lightspeed Core Stack service for
@@ -1337,6 +1390,48 @@ <h2 id="rhidentityconfiguration">RHIdentityConfiguration</h2>
         </tr>
       </tbody>
     </table>
+    <h2 id="ragconfiguration">RagConfiguration</h2>
+    <p>RAG strategy configuration.</p>
+    <p>Controls which RAG sources are used for inline and tool-based
+retrieval.</p>
+    <p>Each strategy lists RAG IDs to include. The special ID
+<code>"okp"</code> defined in constants, activates the OKP provider; all
+other IDs refer to entries in <code>byok_rag</code>.</p>
+    <p>Backward compatibility: - <code>inline</code> defaults to
+<code>[]</code> (no inline RAG). - <code>tool</code> defaults to
+<code>None</code> which means all registered vector stores are used
+(identical to the previous <code>tool.byok.enabled = True</code>
+default).</p>
+    <table>
+      <colgroup>
+        <col style="width: 26%"/>
+        <col style="width: 23%"/>
+        <col style="width: 50%"/>
+      </colgroup>
+      <thead>
+        <tr class="header">
+          <th>Field</th>
+          <th>Type</th>
+          <th>Description</th>
+        </tr>
+      </thead>
+      <tbody>
+        <tr class="odd">
+          <td>inline</td>
+          <td>array</td>
+          <td>RAG IDs whose sources are injected as context before the LLM call.
+Use &#x2018;okp&#x2019; to enable OKP inline RAG. Empty by default (no inline
+RAG).</td>
+        </tr>
+        <tr class="even">
+          <td>tool</td>
+          <td>array</td>
+          <td>RAG IDs made available to the LLM as a file_search tool. Use &#x2018;okp&#x2019;
+to include the OKP vector store. When omitted, all registered BYOK
+vector stores are used (backward compatibility).</td>
+        </tr>
+      </tbody>
+    </table>
     <h2 id="sqlitedatabaseconfiguration">SQLiteDatabaseConfiguration</h2>
     <p>SQLite database configuration.</p>
     <table>
@@ -1429,39 +1524,6 @@ <h2 id="serviceconfiguration">ServiceConfiguration</h2>
         </tr>
       </tbody>
     </table>
-    <h2 id="solrconfiguration">SolrConfiguration</h2>
-    <p>Solr configuration for vector search queries.</p>
-    <p>Controls whether to use offline or online mode when building document
-URLs from vector search results, and enables/disables Solr vector IO
-functionality.</p>
-    <table>
-      <colgroup>
-        <col style="width: 26%"/>
-        <col style="width: 23%"/>
-        <col style="width: 50%"/>
-      </colgroup>
-      <thead>
-        <tr class="header">
-          <th>Field</th>
-          <th>Type</th>
-          <th>Description</th>
-        </tr>
-      </thead>
-      <tbody>
-        <tr class="odd">
-          <td>enabled</td>
-          <td>boolean</td>
-          <td>When True, enables Solr vector IO functionality for vector search
-queries. When False, disables Solr vector search processing.</td>
-        </tr>
-        <tr class="even">
-          <td>offline</td>
-          <td>boolean</td>
-          <td>When True, use parent_id for chunk source URLs. When False, use
-reference_url for chunk source URLs.</td>
-        </tr>
-      </tbody>
-    </table>
     <h2 id="splunkconfiguration">SplunkConfiguration</h2>
     <p>Splunk HEC (HTTP Event Collector) configuration.</p>
     <p>Splunk HEC allows sending events directly to Splunk over HTTP/HTTPS.
diff --git a/docs/config.md b/docs/config.md
index 8ba10ad7e..00c0ae747 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -130,12 +130,12 @@ byok_rag:
 | Field | Type | Description |
 |-------|------|-------------|
 | rag_id | string | Unique RAG ID |
-| rag_type | string | Type of RAG database (e.g. `inline::faiss`). |
+| rag_type | string | Type of RAG database. |
 | embedding_model | string | Embedding model identification |
 | embedding_dimension | integer | Dimensionality of embedding vectors. |
 | vector_db_id | string | Vector database identification. |
 | db_path | string | Path to RAG database. |
-| score_multiplier | number | Multiplier applied to relevance scores from this vector store when querying multiple sources. Values > 1 boost results; values < 1 reduce them. Default: 1.0. |
+| score_multiplier | number | Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them. |
 
 
 ## CORSConfiguration
@@ -187,7 +187,8 @@ Global service configuration.
 | azure_entra_id |  |  |
 | splunk |  | Splunk HEC configuration for sending telemetry events. |
 | deployment_environment | string | Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events. |
-| rag |  | RAG strategy configuration (OKP and BYOK). Controls pre-query (Inline RAG) and tool-based (Tool RAG) retrieval. |
+| rag |  | Configuration for all RAG strategies (inline and tool-based). |
+| okp |  | OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool. |
 
 
 ## ConversationHistoryConfiguration
@@ -395,6 +396,21 @@ Useful resources:
 | timeout | integer | Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support. |
 
 
+## OkpConfiguration
+
+
+OKP (Offline Knowledge Portal) provider configuration.
+
+Controls provider-specific behaviour for the OKP vector store.
+Only relevant when ``"okp"`` is listed in ``rag.inline`` or ``rag.tool``.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| offline | boolean | When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs. |
+| chunk_filter_query | string | OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'. |
+
+
 ## PostgreSQLDatabaseConfiguration
 
 
@@ -501,6 +517,28 @@ Red Hat Identity authentication configuration.
 | required_entitlements | array | List of all required entitlements. |
 
 
+## RagConfiguration
+
+
+RAG strategy configuration.
+
+Controls which RAG sources are used for inline and tool-based retrieval.
+
+Each strategy lists RAG IDs to include. The special ID ``"okp"`` defined in constants,
+activates the OKP provider; all other IDs refer to entries in ``byok_rag``.
+
+Backward compatibility:
+    - ``inline`` defaults to ``[]`` (no inline RAG).
+    - ``tool`` defaults to ``None`` which means all registered vector stores
+      are used (identical to the previous ``tool.byok.enabled = True`` default).
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| inline | array | RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG). |
+| tool | array | RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility). |
+
+
 ## SQLiteDatabaseConfiguration
 
 
@@ -537,62 +575,6 @@ the service can handle requests concurrently.
 | cors |  | Cross-Origin Resource Sharing configuration for cross-domain requests |
 
 
-## RagConfiguration
-
-
-Top-level RAG strategy configuration. Controls two complementary retrieval modes:
-
-- **Inline RAG**: context is fetched from the listed sources and injected before the
-  LLM request.
-- **Tool RAG**: the LLM can call the `file_search` tool during generation to retrieve
-  context on demand from the listed vector stores. Supports both BYOK and OKP.
-
-Each strategy is configured as a list of RAG IDs referencing entries in `byok_rag`.
-The special ID `okp` activates the OKP provider (no `byok_rag` entry needed).
-
-**Backward compatibility**: omitting `tool` uses all registered BYOK vector stores
-(equivalent to the old `tool.byok.enabled = True`). Omitting `inline` means no
-context is injected before the LLM request.
-
-Example:
-
-```yaml
-rag:
-  inline:
-    - my-docs       # inject context from my-docs before the LLM request
-  tool:
-    - okp       # LLM can search OKP as a tool
-    - my-docs       # LLM can also search my-docs as a tool
-
-okp:
-  offline: true     # use parent_id for OKP URL construction
-```
-
-
-| Field | Type | Description |
-|-------|------|-------------|
-| inline | list[string] | RAG IDs whose content is injected before the LLM request. Use `okp` for OKP. Empty by default (no inline RAG). |
-| tool | list[string] or null | RAG IDs exposed as a `file_search` tool the LLM can invoke. Use `okp` to include OKP. When omitted, all registered BYOK vector stores are used (backward compatibility). |
-
-
-## OkpConfiguration
-
-OKP (Offline Knowledge Portal) provider settings. Only used when `okp` is listed in `rag.inline` or `rag.tool`.
-
-Example:
-
-```yaml
-okp:
-  offline: true                    # use parent_id for OKP URL construction
-  chunk_filter_query: "is_chunk:true"
-```
-
-| Field | Type | Description |
-|-------|------|-------------|
-| offline | boolean | When `true` (default), use `parent_id` for OKP chunk source URLs. When `false`, use `reference_url`. |
-| chunk_filter_query | string | OKP filter query (`fq`) applied to every OKP search request. Defaults to `"is_chunk:true"`. Extend with `AND` for extra constraints. |
-
-
 ## SplunkConfiguration
 
 
diff --git a/docs/config.puml b/docs/config.puml
index cc35f5862..69497edcc 100644
--- a/docs/config.puml
+++ b/docs/config.puml
@@ -41,11 +41,12 @@ class "AzureEntraIdConfiguration" as src.models.config.AzureEntraIdConfiguration
   tenant_id
 }
 class "ByokRag" as src.models.config.ByokRag {
-  db_path
+  db_path : str
   embedding_dimension
   embedding_model : str
   rag_id : str
   rag_type : str
+  score_multiplier : float
   vector_db_id : str
 }
 class "CORSConfiguration" as src.models.config.CORSConfiguration {
@@ -69,9 +70,10 @@ class "Configuration" as src.models.config.Configuration {
   llama_stack
   mcp_servers : list[ModelContextProtocolServer]
   name : str
+  okp
   quota_handlers
+  rag
   service
-  solr : Optional[SolrConfiguration]
   splunk : Optional[SplunkConfiguration]
   user_data_collection
   dump(filename: str | Path) -> None
@@ -160,6 +162,10 @@ class "ModelContextProtocolServer" as src.models.config.ModelContextProtocolServ
   resolve_auth_headers() -> Self
   validate_headers(value: list[str]) -> list[str]
 }
+class "OkpConfiguration" as src.models.config.OkpConfiguration {
+  chunk_filter_query : str
+  offline : bool
+}
 class "PostgreSQLDatabaseConfiguration" as src.models.config.PostgreSQLDatabaseConfiguration {
   ca_cert_path : Optional[FilePath]
   db : str
@@ -194,6 +200,10 @@ class "QuotaSchedulerConfiguration" as src.models.config.QuotaSchedulerConfigura
 class "RHIdentityConfiguration" as src.models.config.RHIdentityConfiguration {
   required_entitlements : Optional[list[str]]
 }
+class "RagConfiguration" as src.models.config.RagConfiguration {
+  inline : list[str]
+  tool : Optional[list[str]]
+}
 class "SQLiteDatabaseConfiguration" as src.models.config.SQLiteDatabaseConfiguration {
   db_path : str
 }
@@ -211,10 +221,6 @@ class "ServiceConfiguration" as src.models.config.ServiceConfiguration {
   check_service_configuration() -> Self
   validate_root_path(value: str) -> str
 }
-class "SolrConfiguration" as src.models.config.SolrConfiguration {
-  enabled : bool
-  offline : bool
-}
 class "SplunkConfiguration" as src.models.config.SplunkConfiguration {
   enabled : bool
   index : Optional[str]
@@ -257,14 +263,15 @@ src.models.config.JwtConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.JwtRoleRule --|> src.models.config.ConfigurationBase
 src.models.config.LlamaStackConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.ModelContextProtocolServer --|> src.models.config.ConfigurationBase
+src.models.config.OkpConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.PostgreSQLDatabaseConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.QuotaHandlersConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.QuotaLimiterConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.QuotaSchedulerConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.RHIdentityConfiguration --|> src.models.config.ConfigurationBase
+src.models.config.RagConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.SQLiteDatabaseConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.ServiceConfiguration --|> src.models.config.ConfigurationBase
-src.models.config.SolrConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.SplunkConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.TLSConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.UserDataCollection --|> src.models.config.ConfigurationBase
@@ -278,8 +285,10 @@ src.models.config.InferenceConfiguration --* src.models.config.Configuration : i
 src.models.config.JsonPathOperator --* src.models.config.JwtRoleRule : operator
 src.models.config.JwtConfiguration --* src.models.config.JwkConfiguration : jwt_configuration
 src.models.config.LlamaStackConfiguration --* src.models.config.Configuration : llama_stack
+src.models.config.OkpConfiguration --* src.models.config.Configuration : okp
 src.models.config.QuotaHandlersConfiguration --* src.models.config.Configuration : quota_handlers
 src.models.config.QuotaSchedulerConfiguration --* src.models.config.QuotaHandlersConfiguration : scheduler
+src.models.config.RagConfiguration --* src.models.config.Configuration : rag
 src.models.config.SQLiteDatabaseConfiguration --* src.models.config.DatabaseConfiguration : sqlite
 src.models.config.ServiceConfiguration --* src.models.config.Configuration : service
 src.models.config.TLSConfiguration --* src.models.config.ServiceConfiguration : tls_config
diff --git a/docs/config.svg b/docs/config.svg
index c0d342d41..816cbd1e5 100644
--- a/docs/config.svg
+++ b/docs/config.svg
@@ -1,729 +1,759 @@
 <?xml version="1.0"?>
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" contentStyleType="text/css" height="1149px" preserveAspectRatio="none" style="width:5549px;height:1149px;background:#FFFFFF;" version="1.1" viewBox="0 0 5549 1149" width="5549px" zoomAndPan="magnify">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" contentStyleType="text/css" height="1168px" preserveAspectRatio="none" style="width:5534px;height:1168px;background:#FFFFFF;" version="1.1" viewBox="0 0 5534 1168" width="5534px" zoomAndPan="magnify">
   <defs/>
   <g>
     <!--class src.models.config.A2AStateConfiguration-->
     <g id="elem_src.models.config.A2AStateConfiguration">
-      <rect codeLine="2" fill="#F1F1F1" height="143.3396" id="src.models.config.A2AStateConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="376.9507" x="1369.35" y="296.08"/>
-      <ellipse cx="1478.3116" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M1478.421,307.6112 Q1477.4991,307.6112 1476.7648,307.9237 Q1476.046,308.2362 1475.546,308.83 Q1475.046,309.4081 1474.7648,310.2831 Q1474.4991,311.1581 1474.4991,312.2519 Q1474.4991,313.6894 1474.9366,314.7362 Q1475.3898,315.7831 1476.2335,316.33 Q1477.0929,316.8769 1478.4054,316.8769 Q1479.1866,316.8769 1479.8741,316.7519 Q1480.5773,316.6112 1481.5773,316.2987 L1481.5773,318.1737 Q1480.7179,318.5019 1479.9523,318.6269 Q1479.2023,318.7519 1478.1866,318.7519 Q1476.2648,318.7519 1474.9523,317.955 Q1473.6554,317.1425 1472.9991,315.6737 Q1472.3429,314.1894 1472.3429,312.2206 Q1472.3429,310.7987 1472.7335,309.6269 Q1473.1398,308.4394 1473.921,307.5644 Q1474.7023,306.6894 1475.8429,306.2206 Q1476.9835,305.7362 1478.4523,305.7362 Q1479.4054,305.7362 1480.296,305.9394 Q1481.2023,306.1269 1482.1398,306.5956 L1481.3116,308.3769 Q1480.5304,308.0019 1479.8741,307.8144 Q1479.2179,307.6112 1478.421,307.6112 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="150.5274" x="1498.8116" y="317.512">A2AStateConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1370.35" x2="1745.3007" y1="328.08" y2="328.08"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="40.8798" x="1375.35" y="347.0459">config</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="364.9507" x="1375.35" y="366.1139">postgres : Optional[PostgreSQLDatabaseConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="308.1809" x="1375.35" y="385.1818">sqlite : Optional[SQLiteDatabaseConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="85.2877" x="1375.35" y="404.2497">storage_type</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1370.35" x2="1745.3007" y1="412.3517" y2="412.3517"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="253.553" x="1375.35" y="431.3176">check_a2a_state_configuration() -&gt; Self</text>
+      <rect codeLine="2" fill="#F1F1F1" height="143.3396" id="src.models.config.A2AStateConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="376.9507" x="994.1" y="296.08"/>
+      <ellipse cx="1103.0616" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M1103.171,307.6113 Q1102.2491,307.6113 1101.5148,307.9238 Q1100.796,308.2363 1100.296,308.83 Q1099.796,309.4081 1099.5148,310.2831 Q1099.2491,311.1581 1099.2491,312.2519 Q1099.2491,313.6894 1099.6866,314.7363 Q1100.1398,315.7831 1100.9835,316.33 Q1101.8429,316.8769 1103.1554,316.8769 Q1103.9366,316.8769 1104.6241,316.7519 Q1105.3273,316.6113 1106.3273,316.2988 L1106.3273,318.1738 Q1105.4679,318.5019 1104.7023,318.6269 Q1103.9523,318.7519 1102.9366,318.7519 Q1101.0148,318.7519 1099.7023,317.955 Q1098.4054,317.1425 1097.7491,315.6738 Q1097.0929,314.1894 1097.0929,312.2206 Q1097.0929,310.7988 1097.4835,309.6269 Q1097.8898,308.4394 1098.671,307.5644 Q1099.4523,306.6894 1100.5929,306.2206 Q1101.7335,305.7363 1103.2023,305.7363 Q1104.1554,305.7363 1105.046,305.9394 Q1105.9523,306.1269 1106.8898,306.5956 L1106.0616,308.3769 Q1105.2804,308.0019 1104.6241,307.8144 Q1103.9679,307.6113 1103.171,307.6113 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="150.5274" x="1123.5616" y="317.512">A2AStateConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="995.1" x2="1370.0507" y1="328.08" y2="328.08"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="40.8798" x="1000.1" y="347.0459">config</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="364.9507" x="1000.1" y="366.1139">postgres : Optional[PostgreSQLDatabaseConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="308.1809" x="1000.1" y="385.1818">sqlite : Optional[SQLiteDatabaseConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="85.2877" x="1000.1" y="404.2497">storage_type</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="995.1" x2="1370.0507" y1="412.3517" y2="412.3517"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="253.553" x="1000.1" y="431.3176">check_a2a_state_configuration() -&gt; Self</text>
     </g>
     <!--class src.models.config.APIKeyTokenConfiguration-->
     <g id="elem_src.models.config.APIKeyTokenConfiguration">
-      <rect codeLine="9" fill="#F1F1F1" height="67.0679" id="src.models.config.APIKeyTokenConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="209.2253" x="354.21" y="766.3"/>
-      <ellipse cx="369.21" cy="782.3" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M369.3194,777.8313 Q368.3975,777.8313 367.6631,778.1438 Q366.9444,778.4563 366.4444,779.05 Q365.9444,779.6281 365.6631,780.5031 Q365.3975,781.3781 365.3975,782.4719 Q365.3975,783.9094 365.835,784.9563 Q366.2881,786.0031 367.1319,786.55 Q367.9913,787.0969 369.3038,787.0969 Q370.085,787.0969 370.7725,786.9719 Q371.4756,786.8313 372.4756,786.5188 L372.4756,788.3938 Q371.6163,788.7219 370.8506,788.8469 Q370.1006,788.9719 369.085,788.9719 Q367.1631,788.9719 365.8506,788.175 Q364.5538,787.3625 363.8975,785.8938 Q363.2413,784.4094 363.2413,782.4406 Q363.2413,781.0188 363.6319,779.8469 Q364.0381,778.6594 364.8194,777.7844 Q365.6006,776.9094 366.7413,776.4406 Q367.8819,775.9563 369.3506,775.9563 Q370.3038,775.9563 371.1944,776.1594 Q372.1006,776.3469 373.0381,776.8156 L372.21,778.5969 Q371.4288,778.2219 370.7725,778.0344 Q370.1163,777.8313 369.3194,777.8313 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="177.2253" x="383.21" y="787.732">APIKeyTokenConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="355.21" x2="562.4353" y1="798.3" y2="798.3"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="48.8038" x="360.21" y="817.2659">api_key</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="355.21" x2="562.4353" y1="825.3679" y2="825.3679"/>
+      <rect codeLine="9" fill="#F1F1F1" height="67.0679" id="src.models.config.APIKeyTokenConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="209.2253" x="339.97" y="775.83"/>
+      <ellipse cx="354.97" cy="791.83" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M355.0794,787.3613 Q354.1575,787.3613 353.4231,787.6738 Q352.7044,787.9863 352.2044,788.58 Q351.7044,789.1581 351.4231,790.0331 Q351.1575,790.9081 351.1575,792.0019 Q351.1575,793.4394 351.595,794.4863 Q352.0481,795.5331 352.8919,796.08 Q353.7513,796.6269 355.0638,796.6269 Q355.845,796.6269 356.5325,796.5019 Q357.2356,796.3613 358.2356,796.0488 L358.2356,797.9238 Q357.3763,798.2519 356.6106,798.3769 Q355.8606,798.5019 354.845,798.5019 Q352.9231,798.5019 351.6106,797.705 Q350.3138,796.8925 349.6575,795.4238 Q349.0013,793.9394 349.0013,791.9706 Q349.0013,790.5488 349.3919,789.3769 Q349.7981,788.1894 350.5794,787.3144 Q351.3606,786.4394 352.5013,785.9706 Q353.6419,785.4863 355.1106,785.4863 Q356.0638,785.4863 356.9544,785.6894 Q357.8606,785.8769 358.7981,786.3456 L357.97,788.1269 Q357.1888,787.7519 356.5325,787.5644 Q355.8763,787.3613 355.0794,787.3613 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="177.2253" x="368.97" y="797.262">APIKeyTokenConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="340.97" x2="548.1953" y1="807.83" y2="807.83"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="48.8038" x="345.97" y="826.7959">api_key</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="340.97" x2="548.1953" y1="834.8979" y2="834.8979"/>
     </g>
     <!--class src.models.config.AccessRule-->
     <g id="elem_src.models.config.AccessRule">
-      <rect codeLine="12" fill="#F1F1F1" height="86.1358" id="src.models.config.AccessRule" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="139.7495" x="597.95" y="756.76"/>
-      <ellipse cx="628.7845" cy="772.76" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M628.8939,768.2913 Q627.972,768.2913 627.2376,768.6038 Q626.5189,768.9163 626.0189,769.51 Q625.5189,770.0881 625.2376,770.9631 Q624.972,771.8381 624.972,772.9319 Q624.972,774.3694 625.4095,775.4163 Q625.8626,776.4631 626.7064,777.01 Q627.5658,777.5569 628.8783,777.5569 Q629.6595,777.5569 630.347,777.4319 Q631.0501,777.2913 632.0501,776.9788 L632.0501,778.8538 Q631.1908,779.1819 630.4251,779.3069 Q629.6751,779.4319 628.6595,779.4319 Q626.7376,779.4319 625.4251,778.635 Q624.1283,777.8225 623.472,776.3538 Q622.8158,774.8694 622.8158,772.9006 Q622.8158,771.4788 623.2064,770.3069 Q623.6126,769.1194 624.3939,768.2444 Q625.1751,767.3694 626.3158,766.9006 Q627.4564,766.4163 628.9251,766.4163 Q629.8783,766.4163 630.7689,766.6194 Q631.6751,766.8069 632.6126,767.2756 L631.7845,769.0569 Q631.0033,768.6819 630.347,768.4944 Q629.6908,768.2913 628.8939,768.2913 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="72.5617" x="646.3033" y="778.192">AccessRule</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="598.95" x2="736.6995" y1="788.76" y2="788.76"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="127.7495" x="603.95" y="807.7259">actions : list[Action]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="54.3338" x="603.95" y="826.7939">role : str</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="598.95" x2="736.6995" y1="834.8958" y2="834.8958"/>
+      <rect codeLine="12" fill="#F1F1F1" height="86.1358" id="src.models.config.AccessRule" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="139.7495" x="583.7" y="766.3"/>
+      <ellipse cx="614.5345" cy="782.3" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M614.6439,777.8313 Q613.722,777.8313 612.9876,778.1438 Q612.2689,778.4563 611.7689,779.05 Q611.2689,779.6281 610.9876,780.5031 Q610.722,781.3781 610.722,782.4719 Q610.722,783.9094 611.1595,784.9563 Q611.6126,786.0031 612.4564,786.55 Q613.3158,787.0969 614.6283,787.0969 Q615.4095,787.0969 616.097,786.9719 Q616.8001,786.8313 617.8001,786.5188 L617.8001,788.3938 Q616.9408,788.7219 616.1751,788.8469 Q615.4251,788.9719 614.4095,788.9719 Q612.4876,788.9719 611.1751,788.175 Q609.8783,787.3625 609.222,785.8938 Q608.5658,784.4094 608.5658,782.4406 Q608.5658,781.0188 608.9564,779.8469 Q609.3626,778.6594 610.1439,777.7844 Q610.9251,776.9094 612.0658,776.4406 Q613.2064,775.9563 614.6751,775.9563 Q615.6283,775.9563 616.5189,776.1594 Q617.4251,776.3469 618.3626,776.8156 L617.5345,778.5969 Q616.7533,778.2219 616.097,778.0344 Q615.4408,777.8313 614.6439,777.8313 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="72.5617" x="632.0533" y="787.732">AccessRule</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="584.7" x2="722.4495" y1="798.3" y2="798.3"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="127.7495" x="589.7" y="817.2659">actions : list[Action]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="54.3338" x="589.7" y="836.3339">role : str</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="584.7" x2="722.4495" y1="844.4358" y2="844.4358"/>
     </g>
     <!--class src.models.config.Action-->
     <g id="elem_src.models.config.Action">
-      <rect codeLine="16" fill="#F1F1F1" height="67.0679" id="src.models.config.Action" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="73.4538" x="5185.1" y="45.13"/>
-      <ellipse cx="5200.1" cy="61.13" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M5200.2094,56.6612 Q5199.2875,56.6612 5198.5531,56.9737 Q5197.8344,57.2862 5197.3344,57.88 Q5196.8344,58.4581 5196.5531,59.3331 Q5196.2875,60.2081 5196.2875,61.3019 Q5196.2875,62.7394 5196.725,63.7862 Q5197.1781,64.8331 5198.0219,65.38 Q5198.8812,65.9269 5200.1937,65.9269 Q5200.975,65.9269 5201.6625,65.8019 Q5202.3656,65.6612 5203.3656,65.3487 L5203.3656,67.2237 Q5202.5062,67.5519 5201.7406,67.6769 Q5200.9906,67.8019 5199.975,67.8019 Q5198.0531,67.8019 5196.7406,67.005 Q5195.4437,66.1925 5194.7875,64.7237 Q5194.1312,63.2394 5194.1312,61.2706 Q5194.1312,59.8487 5194.5219,58.6769 Q5194.9281,57.4894 5195.7094,56.6144 Q5196.4906,55.7394 5197.6312,55.2706 Q5198.7719,54.7862 5200.2406,54.7862 Q5201.1937,54.7862 5202.0844,54.9894 Q5202.9906,55.1769 5203.9281,55.6456 L5203.1,57.4269 Q5202.3187,57.0519 5201.6625,56.8644 Q5201.0062,56.6612 5200.2094,56.6612 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="41.4538" x="5214.1" y="66.562">Action</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="5186.1" x2="5257.5538" y1="77.13" y2="77.13"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="37.4919" x="5191.1" y="96.0959">name</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="5186.1" x2="5257.5538" y1="104.1979" y2="104.1979"/>
+      <rect codeLine="16" fill="#F1F1F1" height="67.0679" id="src.models.config.Action" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="73.4538" x="5295.85" y="45.14"/>
+      <ellipse cx="5310.85" cy="61.14" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M5310.9594,56.6713 Q5310.0375,56.6713 5309.3031,56.9838 Q5308.5844,57.2963 5308.0844,57.89 Q5307.5844,58.4681 5307.3031,59.3431 Q5307.0375,60.2181 5307.0375,61.3119 Q5307.0375,62.7494 5307.475,63.7963 Q5307.9281,64.8431 5308.7719,65.39 Q5309.6313,65.9369 5310.9438,65.9369 Q5311.725,65.9369 5312.4125,65.8119 Q5313.1156,65.6713 5314.1156,65.3588 L5314.1156,67.2338 Q5313.2563,67.5619 5312.4906,67.6869 Q5311.7406,67.8119 5310.725,67.8119 Q5308.8031,67.8119 5307.4906,67.015 Q5306.1938,66.2025 5305.5375,64.7338 Q5304.8813,63.2494 5304.8813,61.2806 Q5304.8813,59.8588 5305.2719,58.6869 Q5305.6781,57.4994 5306.4594,56.6244 Q5307.2406,55.7494 5308.3813,55.2806 Q5309.5219,54.7963 5310.9906,54.7963 Q5311.9438,54.7963 5312.8344,54.9994 Q5313.7406,55.1869 5314.6781,55.6556 L5313.85,57.4369 Q5313.0688,57.0619 5312.4125,56.8744 Q5311.7563,56.6713 5310.9594,56.6713 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="41.4538" x="5324.85" y="66.572">Action</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5296.85" x2="5368.3038" y1="77.14" y2="77.14"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="37.4919" x="5301.85" y="96.1059">name</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5296.85" x2="5368.3038" y1="104.2079" y2="104.2079"/>
     </g>
     <!--class src.models.config.AuthenticationConfiguration-->
     <g id="elem_src.models.config.AuthenticationConfiguration">
-      <rect codeLine="19" fill="#F1F1F1" height="276.815" id="src.models.config.AuthenticationConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="367.3467" x="1781.15" y="229.34"/>
-      <ellipse cx="1866.5917" cy="245.34" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M1866.7011,240.8712 Q1865.7792,240.8712 1865.0448,241.1837 Q1864.3261,241.4962 1863.8261,242.09 Q1863.3261,242.6681 1863.0448,243.5431 Q1862.7792,244.4181 1862.7792,245.5119 Q1862.7792,246.9494 1863.2167,247.9962 Q1863.6698,249.0431 1864.5136,249.59 Q1865.373,250.1369 1866.6855,250.1369 Q1867.4667,250.1369 1868.1542,250.0119 Q1868.8573,249.8712 1869.8573,249.5587 L1869.8573,251.4337 Q1868.998,251.7619 1868.2323,251.8869 Q1867.4823,252.0119 1866.4667,252.0119 Q1864.5448,252.0119 1863.2323,251.215 Q1861.9355,250.4025 1861.2792,248.9337 Q1860.623,247.4494 1860.623,245.4806 Q1860.623,244.0587 1861.0136,242.8869 Q1861.4198,241.6994 1862.2011,240.8244 Q1862.9823,239.9494 1864.123,239.4806 Q1865.2636,238.9962 1866.7323,238.9962 Q1867.6855,238.9962 1868.5761,239.1994 Q1869.4823,239.3869 1870.4198,239.8556 L1869.5917,241.6369 Q1868.8105,241.2619 1868.1542,241.0744 Q1867.498,240.8712 1866.7011,240.8712 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="187.9633" x="1887.0917" y="250.772">AuthenticationConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1782.15" x2="2147.4967" y1="261.34" y2="261.34"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="350.1667" x="1787.15" y="280.3059">api_key_config : Optional[APIKeyTokenConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="143.9754" x="1787.15" y="299.3739">api_key_configuration</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="259.615" x="1787.15" y="318.4418">jwk_config : Optional[JwkConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="117.2635" x="1787.15" y="337.5097">jwk_configuration</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="240.4911" x="1787.15" y="356.5776">k8s_ca_cert_path : Optional[FilePath]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="249.6191" x="1787.15" y="375.6455">k8s_cluster_api : Optional[AnyHttpUrl]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="78.9037" x="1787.15" y="394.7134">module : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="355.3467" x="1787.15" y="413.7814">rh_identity_config : Optional[RHIdentityConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="165.4514" x="1787.15" y="432.8493">rh_identity_configuration</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="192.1073" x="1787.15" y="451.9172">skip_for_health_probes : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="167.5934" x="1787.15" y="470.9851">skip_tls_verification : bool</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1782.15" x2="2147.4967" y1="479.0871" y2="479.0871"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="239.567" x="1787.15" y="498.053">check_authentication_model() -&gt; Self</text>
+      <rect codeLine="19" fill="#F1F1F1" height="276.815" id="src.models.config.AuthenticationConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="367.3467" x="591.91" y="229.34"/>
+      <ellipse cx="677.3517" cy="245.34" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M677.4611,240.8713 Q676.5392,240.8713 675.8048,241.1838 Q675.0861,241.4963 674.5861,242.09 Q674.0861,242.6681 673.8048,243.5431 Q673.5392,244.4181 673.5392,245.5119 Q673.5392,246.9494 673.9767,247.9963 Q674.4298,249.0431 675.2736,249.59 Q676.133,250.1369 677.4455,250.1369 Q678.2267,250.1369 678.9142,250.0119 Q679.6173,249.8713 680.6173,249.5588 L680.6173,251.4338 Q679.758,251.7619 678.9923,251.8869 Q678.2423,252.0119 677.2267,252.0119 Q675.3048,252.0119 673.9923,251.215 Q672.6955,250.4025 672.0392,248.9338 Q671.383,247.4494 671.383,245.4806 Q671.383,244.0588 671.7736,242.8869 Q672.1798,241.6994 672.9611,240.8244 Q673.7423,239.9494 674.883,239.4806 Q676.0236,238.9963 677.4923,238.9963 Q678.4455,238.9963 679.3361,239.1994 Q680.2423,239.3869 681.1798,239.8556 L680.3517,241.6369 Q679.5705,241.2619 678.9142,241.0744 Q678.258,240.8713 677.4611,240.8713 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="187.9633" x="697.8517" y="250.772">AuthenticationConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="592.91" x2="958.2567" y1="261.34" y2="261.34"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="350.1667" x="597.91" y="280.3059">api_key_config : Optional[APIKeyTokenConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="143.9754" x="597.91" y="299.3739">api_key_configuration</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="259.615" x="597.91" y="318.4418">jwk_config : Optional[JwkConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="117.2635" x="597.91" y="337.5097">jwk_configuration</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="240.4911" x="597.91" y="356.5776">k8s_ca_cert_path : Optional[FilePath]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="249.6191" x="597.91" y="375.6455">k8s_cluster_api : Optional[AnyHttpUrl]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="78.9037" x="597.91" y="394.7134">module : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="355.3467" x="597.91" y="413.7814">rh_identity_config : Optional[RHIdentityConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="165.4514" x="597.91" y="432.8493">rh_identity_configuration</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="192.1073" x="597.91" y="451.9172">skip_for_health_probes : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="167.5934" x="597.91" y="470.9851">skip_tls_verification : bool</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="592.91" x2="958.2567" y1="479.0871" y2="479.0871"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="239.567" x="597.91" y="498.053">check_authentication_model() -&gt; Self</text>
     </g>
     <!--class src.models.config.AuthorizationConfiguration-->
     <g id="elem_src.models.config.AuthorizationConfiguration">
-      <rect codeLine="33" fill="#F1F1F1" height="67.0679" id="src.models.config.AuthorizationConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="212.4733" x="843.59" y="766.3"/>
-      <ellipse cx="858.59" cy="782.3" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M858.6994,777.8313 Q857.7775,777.8313 857.0431,778.1438 Q856.3244,778.4563 855.8244,779.05 Q855.3244,779.6281 855.0431,780.5031 Q854.7775,781.3781 854.7775,782.4719 Q854.7775,783.9094 855.215,784.9563 Q855.6681,786.0031 856.5119,786.55 Q857.3713,787.0969 858.6838,787.0969 Q859.465,787.0969 860.1525,786.9719 Q860.8556,786.8313 861.8556,786.5188 L861.8556,788.3938 Q860.9963,788.7219 860.2306,788.8469 Q859.4806,788.9719 858.465,788.9719 Q856.5431,788.9719 855.2306,788.175 Q853.9338,787.3625 853.2775,785.8938 Q852.6213,784.4094 852.6213,782.4406 Q852.6213,781.0188 853.0119,779.8469 Q853.4181,778.6594 854.1994,777.7844 Q854.9806,776.9094 856.1213,776.4406 Q857.2619,775.9563 858.7306,775.9563 Q859.6838,775.9563 860.5744,776.1594 Q861.4806,776.3469 862.4181,776.8156 L861.59,778.5969 Q860.8088,778.2219 860.1525,778.0344 Q859.4963,777.8313 858.6994,777.8313 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="180.4733" x="872.59" y="787.732">AuthorizationConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="844.59" x2="1055.0633" y1="798.3" y2="798.3"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="193.2552" x="849.59" y="817.2659">access_rules : list[AccessRule]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="844.59" x2="1055.0633" y1="825.3679" y2="825.3679"/>
+      <rect codeLine="33" fill="#F1F1F1" height="67.0679" id="src.models.config.AuthorizationConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="212.4733" x="829.34" y="775.83"/>
+      <ellipse cx="844.34" cy="791.83" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M844.4494,787.3613 Q843.5275,787.3613 842.7931,787.6738 Q842.0744,787.9863 841.5744,788.58 Q841.0744,789.1581 840.7931,790.0331 Q840.5275,790.9081 840.5275,792.0019 Q840.5275,793.4394 840.965,794.4863 Q841.4181,795.5331 842.2619,796.08 Q843.1213,796.6269 844.4338,796.6269 Q845.215,796.6269 845.9025,796.5019 Q846.6056,796.3613 847.6056,796.0488 L847.6056,797.9238 Q846.7463,798.2519 845.9806,798.3769 Q845.2306,798.5019 844.215,798.5019 Q842.2931,798.5019 840.9806,797.705 Q839.6838,796.8925 839.0275,795.4238 Q838.3713,793.9394 838.3713,791.9706 Q838.3713,790.5488 838.7619,789.3769 Q839.1681,788.1894 839.9494,787.3144 Q840.7306,786.4394 841.8713,785.9706 Q843.0119,785.4863 844.4806,785.4863 Q845.4338,785.4863 846.3244,785.6894 Q847.2306,785.8769 848.1681,786.3456 L847.34,788.1269 Q846.5588,787.7519 845.9025,787.5644 Q845.2463,787.3613 844.4494,787.3613 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="180.4733" x="858.34" y="797.262">AuthorizationConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="830.34" x2="1040.8133" y1="807.83" y2="807.83"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="193.2552" x="835.34" y="826.7959">access_rules : list[AccessRule]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="830.34" x2="1040.8133" y1="834.8979" y2="834.8979"/>
     </g>
     <!--class src.models.config.AzureEntraIdConfiguration-->
     <g id="elem_src.models.config.AzureEntraIdConfiguration">
-      <rect codeLine="36" fill="#F1F1F1" height="124.2717" id="src.models.config.AzureEntraIdConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="209.4213" x="1091.12" y="737.7"/>
-      <ellipse cx="1106.12" cy="753.7" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M1106.2294,749.2312 Q1105.3075,749.2312 1104.5731,749.5437 Q1103.8544,749.8562 1103.3544,750.45 Q1102.8544,751.0281 1102.5731,751.9031 Q1102.3075,752.7781 1102.3075,753.8719 Q1102.3075,755.3094 1102.745,756.3562 Q1103.1981,757.4031 1104.0419,757.95 Q1104.9013,758.4969 1106.2138,758.4969 Q1106.995,758.4969 1107.6825,758.3719 Q1108.3856,758.2312 1109.3856,757.9187 L1109.3856,759.7937 Q1108.5263,760.1219 1107.7606,760.2469 Q1107.0106,760.3719 1105.995,760.3719 Q1104.0731,760.3719 1102.7606,759.575 Q1101.4638,758.7625 1100.8075,757.2937 Q1100.1513,755.8094 1100.1513,753.8406 Q1100.1513,752.4187 1100.5419,751.2469 Q1100.9481,750.0594 1101.7294,749.1844 Q1102.5106,748.3094 1103.6513,747.8406 Q1104.7919,747.3562 1106.2606,747.3562 Q1107.2138,747.3562 1108.1044,747.5594 Q1109.0106,747.7469 1109.9481,748.2156 L1109.12,749.9969 Q1108.3388,749.6219 1107.6825,749.4344 Q1107.0263,749.2312 1106.2294,749.2312 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="177.4213" x="1120.12" y="759.132">AzureEntraIdConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1092.12" x2="1299.5413" y1="769.7" y2="769.7"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="53.9838" x="1097.12" y="788.6659">client_id</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="81.8157" x="1097.12" y="807.7339">client_secret</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="66.9758" x="1097.12" y="826.8018">scope : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="61.5998" x="1097.12" y="845.8697">tenant_id</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1092.12" x2="1299.5413" y1="853.9717" y2="853.9717"/>
+      <rect codeLine="36" fill="#F1F1F1" height="124.2717" id="src.models.config.AzureEntraIdConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="209.4213" x="1076.87" y="747.23"/>
+      <ellipse cx="1091.87" cy="763.23" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M1091.9794,758.7613 Q1091.0575,758.7613 1090.3231,759.0738 Q1089.6044,759.3863 1089.1044,759.98 Q1088.6044,760.5581 1088.3231,761.4331 Q1088.0575,762.3081 1088.0575,763.4019 Q1088.0575,764.8394 1088.495,765.8863 Q1088.9481,766.9331 1089.7919,767.48 Q1090.6513,768.0269 1091.9638,768.0269 Q1092.745,768.0269 1093.4325,767.9019 Q1094.1356,767.7613 1095.1356,767.4488 L1095.1356,769.3238 Q1094.2763,769.6519 1093.5106,769.7769 Q1092.7606,769.9019 1091.745,769.9019 Q1089.8231,769.9019 1088.5106,769.105 Q1087.2138,768.2925 1086.5575,766.8238 Q1085.9013,765.3394 1085.9013,763.3706 Q1085.9013,761.9488 1086.2919,760.7769 Q1086.6981,759.5894 1087.4794,758.7144 Q1088.2606,757.8394 1089.4013,757.3706 Q1090.5419,756.8863 1092.0106,756.8863 Q1092.9638,756.8863 1093.8544,757.0894 Q1094.7606,757.2769 1095.6981,757.7456 L1094.87,759.5269 Q1094.0888,759.1519 1093.4325,758.9644 Q1092.7763,758.7613 1091.9794,758.7613 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="177.4213" x="1105.87" y="768.662">AzureEntraIdConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1077.87" x2="1285.2913" y1="779.23" y2="779.23"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="53.9838" x="1082.87" y="798.1959">client_id</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="81.8157" x="1082.87" y="817.2639">client_secret</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="66.9758" x="1082.87" y="836.3318">scope : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="61.5998" x="1082.87" y="855.3997">tenant_id</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1077.87" x2="1285.2913" y1="863.5017" y2="863.5017"/>
     </g>
     <!--class src.models.config.ByokRag-->
     <g id="elem_src.models.config.ByokRag">
-      <rect codeLine="42" fill="#F1F1F1" height="162.4075" id="src.models.config.ByokRag" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="164.0534" x="1335.8" y="718.63"/>
-      <ellipse cx="1384.8978" cy="734.63" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M1385.0072,730.1612 Q1384.0853,730.1612 1383.3509,730.4737 Q1382.6322,730.7862 1382.1322,731.38 Q1381.6322,731.9581 1381.3509,732.8331 Q1381.0853,733.7081 1381.0853,734.8019 Q1381.0853,736.2394 1381.5228,737.2862 Q1381.9759,738.3331 1382.8197,738.88 Q1383.6791,739.4269 1384.9916,739.4269 Q1385.7728,739.4269 1386.4603,739.3019 Q1387.1634,739.1612 1388.1634,738.8487 L1388.1634,740.7237 Q1387.3041,741.0519 1386.5384,741.1769 Q1385.7884,741.3019 1384.7728,741.3019 Q1382.8509,741.3019 1381.5384,740.505 Q1380.2416,739.6925 1379.5853,738.2237 Q1378.9291,736.7394 1378.9291,734.7706 Q1378.9291,733.3487 1379.3197,732.1769 Q1379.7259,730.9894 1380.5072,730.1144 Q1381.2884,729.2394 1382.4291,728.7706 Q1383.5697,728.2862 1385.0384,728.2862 Q1385.9916,728.2862 1386.8822,728.4894 Q1387.7884,728.6769 1388.7259,729.1456 L1387.8978,730.9269 Q1387.1166,730.5519 1386.4603,730.3644 Q1385.8041,730.1612 1385.0072,730.1612 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.3578" x="1405.3978" y="740.062">ByokRag</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1336.8" x2="1498.8534" y1="750.63" y2="750.63"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="53.6058" x="1341.8" y="769.5959">db_path</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="151.1014" x="1341.8" y="788.6639">embedding_dimension</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="152.0534" x="1341.8" y="807.7318">embedding_model : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="69.2578" x="1341.8" y="826.7997">rag_id : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="85.7357" x="1341.8" y="845.8676">rag_type : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="111.4816" x="1341.8" y="864.9355">vector_db_id : str</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1336.8" x2="1498.8534" y1="873.0375" y2="873.0375"/>
+      <rect codeLine="42" fill="#F1F1F1" height="181.4754" id="src.models.config.ByokRag" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="164.0534" x="1321.55" y="718.63"/>
+      <ellipse cx="1370.6478" cy="734.63" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M1370.7572,730.1613 Q1369.8353,730.1613 1369.1009,730.4738 Q1368.3822,730.7863 1367.8822,731.38 Q1367.3822,731.9581 1367.1009,732.8331 Q1366.8353,733.7081 1366.8353,734.8019 Q1366.8353,736.2394 1367.2728,737.2863 Q1367.7259,738.3331 1368.5697,738.88 Q1369.4291,739.4269 1370.7416,739.4269 Q1371.5228,739.4269 1372.2103,739.3019 Q1372.9134,739.1613 1373.9134,738.8488 L1373.9134,740.7238 Q1373.0541,741.0519 1372.2884,741.1769 Q1371.5384,741.3019 1370.5228,741.3019 Q1368.6009,741.3019 1367.2884,740.505 Q1365.9916,739.6925 1365.3353,738.2238 Q1364.6791,736.7394 1364.6791,734.7706 Q1364.6791,733.3488 1365.0697,732.1769 Q1365.4759,730.9894 1366.2572,730.1144 Q1367.0384,729.2394 1368.1791,728.7706 Q1369.3197,728.2863 1370.7884,728.2863 Q1371.7416,728.2863 1372.6322,728.4894 Q1373.5384,728.6769 1374.4759,729.1456 L1373.6478,730.9269 Q1372.8666,730.5519 1372.2103,730.3644 Q1371.5541,730.1613 1370.7572,730.1613 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.3578" x="1391.1478" y="740.062">ByokRag</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1322.55" x2="1484.6034" y1="750.63" y2="750.63"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="82.1797" x="1327.55" y="769.5959">db_path : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="151.1014" x="1327.55" y="788.6639">embedding_dimension</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="152.0534" x="1327.55" y="807.7318">embedding_model : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="69.2578" x="1327.55" y="826.7997">rag_id : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="85.7357" x="1327.55" y="845.8676">rag_type : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="146.1594" x="1327.55" y="864.9355">score_multiplier : float</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="111.4816" x="1327.55" y="884.0034">vector_db_id : str</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1322.55" x2="1484.6034" y1="892.1054" y2="892.1054"/>
     </g>
     <!--class src.models.config.CORSConfiguration-->
     <g id="elem_src.models.config.CORSConfiguration">
-      <rect codeLine="50" fill="#F1F1F1" height="143.3396" id="src.models.config.CORSConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="230.7351" x="2726.46" y="7"/>
-      <ellipse cx="2773.9478" cy="23" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M2774.0572,18.5313 Q2773.1353,18.5313 2772.4009,18.8438 Q2771.6822,19.1563 2771.1822,19.75 Q2770.6822,20.3281 2770.4009,21.2031 Q2770.1353,22.0781 2770.1353,23.1719 Q2770.1353,24.6094 2770.5728,25.6563 Q2771.0259,26.7031 2771.8697,27.25 Q2772.7291,27.7969 2774.0416,27.7969 Q2774.8228,27.7969 2775.5103,27.6719 Q2776.2134,27.5313 2777.2134,27.2188 L2777.2134,29.0938 Q2776.3541,29.4219 2775.5884,29.5469 Q2774.8384,29.6719 2773.8228,29.6719 Q2771.9009,29.6719 2770.5884,28.875 Q2769.2916,28.0625 2768.6353,26.5938 Q2767.9791,25.1094 2767.9791,23.1406 Q2767.9791,21.7188 2768.3697,20.5469 Q2768.7759,19.3594 2769.5572,18.4844 Q2770.3384,17.6094 2771.4791,17.1406 Q2772.6197,16.6563 2774.0884,16.6563 Q2775.0416,16.6563 2775.9322,16.8594 Q2776.8384,17.0469 2777.7759,17.5156 L2776.9478,19.2969 Q2776.1666,18.9219 2775.5103,18.7344 Q2774.8541,18.5313 2774.0572,18.5313 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="127.2595" x="2794.4478" y="28.432">CORSConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2727.46" x2="2956.1951" y1="39" y2="39"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="153.3554" x="2732.46" y="57.9659">allow_credentials : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="150.9334" x="2732.46" y="77.0339">allow_headers : list[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="156.0154" x="2732.46" y="96.1018">allow_methods : list[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="142.9815" x="2732.46" y="115.1697">allow_origins : list[str]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2727.46" x2="2956.1951" y1="123.2717" y2="123.2717"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="218.7351" x="2732.46" y="142.2376">check_cors_configuration() -&gt; Self</text>
+      <rect codeLine="51" fill="#F1F1F1" height="143.3396" id="src.models.config.CORSConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="230.7351" x="1298.21" y="7"/>
+      <ellipse cx="1345.6978" cy="23" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M1345.8072,18.5313 Q1344.8853,18.5313 1344.1509,18.8438 Q1343.4322,19.1563 1342.9322,19.75 Q1342.4322,20.3281 1342.1509,21.2031 Q1341.8853,22.0781 1341.8853,23.1719 Q1341.8853,24.6094 1342.3228,25.6563 Q1342.7759,26.7031 1343.6197,27.25 Q1344.4791,27.7969 1345.7916,27.7969 Q1346.5728,27.7969 1347.2603,27.6719 Q1347.9634,27.5313 1348.9634,27.2188 L1348.9634,29.0938 Q1348.1041,29.4219 1347.3384,29.5469 Q1346.5884,29.6719 1345.5728,29.6719 Q1343.6509,29.6719 1342.3384,28.875 Q1341.0416,28.0625 1340.3853,26.5938 Q1339.7291,25.1094 1339.7291,23.1406 Q1339.7291,21.7188 1340.1197,20.5469 Q1340.5259,19.3594 1341.3072,18.4844 Q1342.0884,17.6094 1343.2291,17.1406 Q1344.3697,16.6563 1345.8384,16.6563 Q1346.7916,16.6563 1347.6822,16.8594 Q1348.5884,17.0469 1349.5259,17.5156 L1348.6978,19.2969 Q1347.9166,18.9219 1347.2603,18.7344 Q1346.6041,18.5313 1345.8072,18.5313 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="127.2595" x="1366.1978" y="28.432">CORSConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1299.21" x2="1527.9451" y1="39" y2="39"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="153.3554" x="1304.21" y="57.9659">allow_credentials : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="150.9334" x="1304.21" y="77.0339">allow_headers : list[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="156.0154" x="1304.21" y="96.1018">allow_methods : list[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="142.9815" x="1304.21" y="115.1697">allow_origins : list[str]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1299.21" x2="1527.9451" y1="123.2717" y2="123.2717"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="218.7351" x="1304.21" y="142.2376">check_cors_configuration() -&gt; Self</text>
     </g>
     <!--class src.models.config.Configuration-->
     <g id="elem_src.models.config.Configuration">
-      <rect codeLine="57" fill="#F1F1F1" height="429.3583" id="src.models.config.Configuration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="363.1187" x="1950.27" y="585.15"/>
-      <ellipse cx="2082.0375" cy="601.15" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M2082.1469,596.6813 Q2081.225,596.6813 2080.4907,596.9938 Q2079.7719,597.3063 2079.2719,597.9 Q2078.7719,598.4781 2078.4907,599.3531 Q2078.225,600.2281 2078.225,601.3219 Q2078.225,602.7594 2078.6625,603.8063 Q2079.1157,604.8531 2079.9594,605.4 Q2080.8188,605.9469 2082.1313,605.9469 Q2082.9125,605.9469 2083.6,605.8219 Q2084.3032,605.6813 2085.3032,605.3688 L2085.3032,607.2438 Q2084.4438,607.5719 2083.6782,607.6969 Q2082.9282,607.8219 2081.9125,607.8219 Q2079.9907,607.8219 2078.6782,607.025 Q2077.3813,606.2125 2076.725,604.7438 Q2076.0688,603.2594 2076.0688,601.2906 Q2076.0688,599.8688 2076.4594,598.6969 Q2076.8657,597.5094 2077.6469,596.6344 Q2078.4282,595.7594 2079.5688,595.2906 Q2080.7094,594.8063 2082.1782,594.8063 Q2083.1313,594.8063 2084.0219,595.0094 Q2084.9282,595.1969 2085.8657,595.6656 L2085.0375,597.4469 Q2084.2563,597.0719 2083.6,596.8844 Q2082.9438,596.6813 2082.1469,596.6813 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="91.0836" x="2102.5375" y="606.582">Configuration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1951.27" x2="2312.3887" y1="617.15" y2="617.15"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="62.4958" x="1956.27" y="636.1159">a2a_state</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="95.7876" x="1956.27" y="655.1839">authentication</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="345.8128" x="1956.27" y="674.2518">authorization : Optional[AuthorizationConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="351.1187" x="1956.27" y="693.3197">azure_entra_id : Optional[AzureEntraIdConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="156.7434" x="1956.27" y="712.3876">byok_rag : list[ByokRag]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="129.0375" x="1956.27" y="731.4555">conversation_cache</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="264.221" x="1956.27" y="750.5234">customization : Optional[Customization]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="60.4378" x="1956.27" y="769.5914">database</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="198.6872" x="1956.27" y="788.6593">deployment_environment : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="61.9217" x="1956.27" y="807.7272">inference</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="76.0477" x="1956.27" y="826.7951">llama_stack</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="311.6108" x="1956.27" y="845.863">mcp_servers : list[ModelContextProtocolServer]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="66.0658" x="1956.27" y="864.9309">name : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="102.6196" x="1956.27" y="883.9989">quota_handlers</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="45.7238" x="1956.27" y="903.0668">service</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="218.2452" x="1956.27" y="922.1347">solr : Optional[SolrConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="256.521" x="1956.27" y="941.2026">splunk : Optional[SplunkConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="133.6575" x="1956.27" y="960.2705">user_data_collection</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="1951.27" x2="2312.3887" y1="968.3725" y2="968.3725"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="230.2011" x="1956.27" y="987.3384">dump(filename: str | Path) -&gt; None</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="234.4851" x="1956.27" y="1006.4064">validate_mcp_auth_headers() -&gt; Self</text>
+      <rect codeLine="58" fill="#F1F1F1" height="448.4263" id="src.models.config.Configuration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="363.1187" x="2264.02" y="585.16"/>
+      <ellipse cx="2395.7875" cy="601.16" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M2395.8969,596.6913 Q2394.975,596.6913 2394.2407,597.0038 Q2393.5219,597.3163 2393.0219,597.91 Q2392.5219,598.4881 2392.2407,599.3631 Q2391.975,600.2381 2391.975,601.3319 Q2391.975,602.7694 2392.4125,603.8163 Q2392.8657,604.8631 2393.7094,605.41 Q2394.5688,605.9569 2395.8813,605.9569 Q2396.6625,605.9569 2397.35,605.8319 Q2398.0532,605.6913 2399.0532,605.3788 L2399.0532,607.2538 Q2398.1938,607.5819 2397.4282,607.7069 Q2396.6782,607.8319 2395.6625,607.8319 Q2393.7407,607.8319 2392.4282,607.035 Q2391.1313,606.2225 2390.475,604.7538 Q2389.8188,603.2694 2389.8188,601.3006 Q2389.8188,599.8788 2390.2094,598.7069 Q2390.6157,597.5194 2391.3969,596.6444 Q2392.1782,595.7694 2393.3188,595.3006 Q2394.4594,594.8163 2395.9282,594.8163 Q2396.8813,594.8163 2397.7719,595.0194 Q2398.6782,595.2069 2399.6157,595.6756 L2398.7875,597.4569 Q2398.0063,597.0819 2397.35,596.8944 Q2396.6938,596.6913 2395.8969,596.6913 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="91.0836" x="2416.2875" y="606.592">Configuration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2265.02" x2="2626.1387" y1="617.16" y2="617.16"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="62.4958" x="2270.02" y="636.1259">a2a_state</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="95.7876" x="2270.02" y="655.1939">authentication</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="345.8128" x="2270.02" y="674.2618">authorization : Optional[AuthorizationConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="351.1187" x="2270.02" y="693.3297">azure_entra_id : Optional[AzureEntraIdConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="156.7434" x="2270.02" y="712.3976">byok_rag : list[ByokRag]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="129.0375" x="2270.02" y="731.4655">conversation_cache</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="264.221" x="2270.02" y="750.5334">customization : Optional[Customization]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="60.4378" x="2270.02" y="769.6014">database</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="198.6872" x="2270.02" y="788.6693">deployment_environment : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="61.9217" x="2270.02" y="807.7372">inference</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="76.0477" x="2270.02" y="826.8051">llama_stack</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="311.6108" x="2270.02" y="845.873">mcp_servers : list[ModelContextProtocolServer]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="66.0658" x="2270.02" y="864.9409">name : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="24.5559" x="2270.02" y="884.0089">okp</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="102.6196" x="2270.02" y="903.0768">quota_handlers</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="22.2459" x="2270.02" y="922.1447">rag</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="45.7238" x="2270.02" y="941.2126">service</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="256.521" x="2270.02" y="960.2805">splunk : Optional[SplunkConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="133.6575" x="2270.02" y="979.3484">user_data_collection</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2265.02" x2="2626.1387" y1="987.4504" y2="987.4504"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="230.2011" x="2270.02" y="1006.4164">dump(filename: str | Path) -&gt; None</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="234.4851" x="2270.02" y="1025.4843">validate_mcp_auth_headers() -&gt; Self</text>
     </g>
     <!--class src.models.config.ConfigurationBase-->
     <g id="elem_src.models.config.ConfigurationBase">
-      <rect codeLine="79" fill="#F1F1F1" height="67.0679" id="src.models.config.ConfigurationBase" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="154.6395" x="2613.51" y="1075.51"/>
-      <ellipse cx="2628.51" cy="1091.51" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M2628.6194,1087.0413 Q2627.6975,1087.0413 2626.9631,1087.3538 Q2626.2444,1087.6663 2625.7444,1088.26 Q2625.2444,1088.8381 2624.9631,1089.7131 Q2624.6975,1090.5881 2624.6975,1091.6819 Q2624.6975,1093.1194 2625.135,1094.1663 Q2625.5881,1095.2131 2626.4319,1095.76 Q2627.2913,1096.3069 2628.6038,1096.3069 Q2629.385,1096.3069 2630.0725,1096.1819 Q2630.7756,1096.0413 2631.7756,1095.7288 L2631.7756,1097.6038 Q2630.9163,1097.9319 2630.1506,1098.0569 Q2629.4006,1098.1819 2628.385,1098.1819 Q2626.4631,1098.1819 2625.1506,1097.385 Q2623.8538,1096.5725 2623.1975,1095.1038 Q2622.5413,1093.6194 2622.5413,1091.6506 Q2622.5413,1090.2288 2622.9319,1089.0569 Q2623.3381,1087.8694 2624.1194,1086.9944 Q2624.9006,1086.1194 2626.0413,1085.6506 Q2627.1819,1085.1663 2628.6506,1085.1663 Q2629.6038,1085.1663 2630.4944,1085.3694 Q2631.4006,1085.5569 2632.3381,1086.0256 L2631.51,1087.8069 Q2630.7288,1087.4319 2630.0725,1087.2444 Q2629.4163,1087.0413 2628.6194,1087.0413 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="122.6395" x="2642.51" y="1096.942">ConfigurationBase</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2614.51" x2="2767.1495" y1="1107.51" y2="1107.51"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="88.7737" x="2619.51" y="1126.4759">model_config</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2614.51" x2="2767.1495" y1="1134.5779" y2="1134.5779"/>
+      <rect codeLine="81" fill="#F1F1F1" height="67.0679" id="src.models.config.ConfigurationBase" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="154.6395" x="2674.26" y="1094.58"/>
+      <ellipse cx="2689.26" cy="1110.58" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M2689.3694,1106.1113 Q2688.4475,1106.1113 2687.7131,1106.4238 Q2686.9944,1106.7363 2686.4944,1107.33 Q2685.9944,1107.9081 2685.7131,1108.7831 Q2685.4475,1109.6581 2685.4475,1110.7519 Q2685.4475,1112.1894 2685.885,1113.2363 Q2686.3381,1114.2831 2687.1819,1114.83 Q2688.0413,1115.3769 2689.3538,1115.3769 Q2690.135,1115.3769 2690.8225,1115.2519 Q2691.5256,1115.1113 2692.5256,1114.7988 L2692.5256,1116.6738 Q2691.6663,1117.0019 2690.9006,1117.1269 Q2690.1506,1117.2519 2689.135,1117.2519 Q2687.2131,1117.2519 2685.9006,1116.455 Q2684.6038,1115.6425 2683.9475,1114.1738 Q2683.2913,1112.6894 2683.2913,1110.7206 Q2683.2913,1109.2988 2683.6819,1108.1269 Q2684.0881,1106.9394 2684.8694,1106.0644 Q2685.6506,1105.1894 2686.7913,1104.7206 Q2687.9319,1104.2363 2689.4006,1104.2363 Q2690.3538,1104.2363 2691.2444,1104.4394 Q2692.1506,1104.6269 2693.0881,1105.0956 L2692.26,1106.8769 Q2691.4788,1106.5019 2690.8225,1106.3144 Q2690.1663,1106.1113 2689.3694,1106.1113 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="122.6395" x="2703.26" y="1116.012">ConfigurationBase</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2675.26" x2="2827.8995" y1="1126.58" y2="1126.58"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="88.7737" x="2680.26" y="1145.5459">model_config</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2675.26" x2="2827.8995" y1="1153.6479" y2="1153.6479"/>
     </g>
     <!--class src.models.config.ConversationHistoryConfiguration-->
     <g id="elem_src.models.config.ConversationHistoryConfiguration">
-      <rect codeLine="82" fill="#F1F1F1" height="143.3396" id="src.models.config.ConversationHistoryConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="398.4687" x="523.59" y="296.08"/>
-      <ellipse cx="605.9098" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M606.0191,307.6112 Q605.0973,307.6112 604.3629,307.9237 Q603.6441,308.2362 603.1441,308.83 Q602.6441,309.4081 602.3629,310.2831 Q602.0973,311.1581 602.0973,312.2519 Q602.0973,313.6894 602.5348,314.7362 Q602.9879,315.7831 603.8316,316.33 Q604.691,316.8769 606.0035,316.8769 Q606.7848,316.8769 607.4723,316.7519 Q608.1754,316.6112 609.1754,316.2987 L609.1754,318.1737 Q608.316,318.5019 607.5504,318.6269 Q606.8004,318.7519 605.7848,318.7519 Q603.8629,318.7519 602.5504,317.955 Q601.2535,317.1425 600.5973,315.6737 Q599.941,314.1894 599.941,312.2206 Q599.941,310.7987 600.3316,309.6269 Q600.7379,308.4394 601.5191,307.5644 Q602.3004,306.6894 603.441,306.2206 Q604.5816,305.7362 606.0504,305.7362 Q607.0035,305.7362 607.8941,305.9394 Q608.8004,306.1269 609.7379,306.5956 L608.9098,308.3769 Q608.1285,308.0019 607.4723,307.8144 Q606.816,307.6112 606.0191,307.6112 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="225.3291" x="626.4098" y="317.512">ConversationHistoryConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="524.59" x2="921.0587" y1="328.08" y2="328.08"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="283.9609" x="529.59" y="347.0459">memory : Optional[InMemoryCacheConfig]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="364.9507" x="529.59" y="366.1139">postgres : Optional[PostgreSQLDatabaseConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="308.1809" x="529.59" y="385.1818">sqlite : Optional[SQLiteDatabaseConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="386.4687" x="529.59" y="404.2497">type : Optional[Literal['noop', 'memory', 'sqlite', 'postgres']]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="524.59" x2="921.0587" y1="412.3517" y2="412.3517"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="228.8991" x="529.59" y="431.3176">check_cache_configuration() -&gt; Self</text>
+      <rect codeLine="84" fill="#F1F1F1" height="143.3396" id="src.models.config.ConversationHistoryConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="398.4687" x="1736.34" y="296.08"/>
+      <ellipse cx="1818.6598" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M1818.7691,307.6113 Q1817.8473,307.6113 1817.1129,307.9238 Q1816.3941,308.2363 1815.8941,308.83 Q1815.3941,309.4081 1815.1129,310.2831 Q1814.8473,311.1581 1814.8473,312.2519 Q1814.8473,313.6894 1815.2848,314.7363 Q1815.7379,315.7831 1816.5816,316.33 Q1817.441,316.8769 1818.7535,316.8769 Q1819.5348,316.8769 1820.2223,316.7519 Q1820.9254,316.6113 1821.9254,316.2988 L1821.9254,318.1738 Q1821.066,318.5019 1820.3004,318.6269 Q1819.5504,318.7519 1818.5348,318.7519 Q1816.6129,318.7519 1815.3004,317.955 Q1814.0035,317.1425 1813.3473,315.6738 Q1812.691,314.1894 1812.691,312.2206 Q1812.691,310.7988 1813.0816,309.6269 Q1813.4879,308.4394 1814.2691,307.5644 Q1815.0504,306.6894 1816.191,306.2206 Q1817.3316,305.7363 1818.8004,305.7363 Q1819.7535,305.7363 1820.6441,305.9394 Q1821.5504,306.1269 1822.4879,306.5956 L1821.6598,308.3769 Q1820.8785,308.0019 1820.2223,307.8144 Q1819.566,307.6113 1818.7691,307.6113 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="225.3291" x="1839.1598" y="317.512">ConversationHistoryConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1737.34" x2="2133.8087" y1="328.08" y2="328.08"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="283.9609" x="1742.34" y="347.0459">memory : Optional[InMemoryCacheConfig]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="364.9507" x="1742.34" y="366.1139">postgres : Optional[PostgreSQLDatabaseConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="308.1809" x="1742.34" y="385.1818">sqlite : Optional[SQLiteDatabaseConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="386.4687" x="1742.34" y="404.2497">type : Optional[Literal['noop', 'memory', 'sqlite', 'postgres']]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1737.34" x2="2133.8087" y1="412.3517" y2="412.3517"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="228.8991" x="1742.34" y="431.3176">check_cache_configuration() -&gt; Self</text>
     </g>
     <!--class src.models.config.CustomProfile-->
     <g id="elem_src.models.config.CustomProfile">
-      <rect codeLine="89" fill="#F1F1F1" height="105.2038" id="src.models.config.CustomProfile" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="199.9773" x="3985.84" y="315.14"/>
-      <ellipse cx="4034.8398" cy="331.14" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M4034.9492,326.6712 Q4034.0273,326.6712 4033.293,326.9837 Q4032.5742,327.2962 4032.0742,327.89 Q4031.5742,328.4681 4031.293,329.3431 Q4031.0273,330.2181 4031.0273,331.3119 Q4031.0273,332.7494 4031.4648,333.7962 Q4031.918,334.8431 4032.7617,335.39 Q4033.6211,335.9369 4034.9336,335.9369 Q4035.7148,335.9369 4036.4023,335.8119 Q4037.1055,335.6712 4038.1055,335.3587 L4038.1055,337.2337 Q4037.2461,337.5619 4036.4805,337.6869 Q4035.7305,337.8119 4034.7148,337.8119 Q4032.793,337.8119 4031.4805,337.015 Q4030.1836,336.2025 4029.5273,334.7337 Q4028.8711,333.2494 4028.8711,331.2806 Q4028.8711,329.8587 4029.2617,328.6869 Q4029.668,327.4994 4030.4492,326.6244 Q4031.2305,325.7494 4032.3711,325.2806 Q4033.5117,324.7962 4034.9805,324.7962 Q4035.9336,324.7962 4036.8242,324.9994 Q4037.7305,325.1869 4038.668,325.6556 L4037.8398,327.4369 Q4037.0586,327.0619 4036.4023,326.8744 Q4035.7461,326.6712 4034.9492,326.6712 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="93.4776" x="4055.3398" y="336.572">CustomProfile</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3986.84" x2="4184.8173" y1="347.14" y2="347.14"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="58.7438" x="3991.84" y="366.1059">path : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="143.0375" x="3991.84" y="385.1739">prompts : dict[str, str]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3986.84" x2="4184.8173" y1="393.2758" y2="393.2758"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="187.9773" x="3991.84" y="412.2418">get_prompts() -&gt; dict[str, str]</text>
+      <rect codeLine="91" fill="#F1F1F1" height="105.2038" id="src.models.config.CustomProfile" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="199.9773" x="4469.59" y="315.15"/>
+      <ellipse cx="4518.5898" cy="331.15" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M4518.6992,326.6813 Q4517.7773,326.6813 4517.043,326.9938 Q4516.3242,327.3063 4515.8242,327.9 Q4515.3242,328.4781 4515.043,329.3531 Q4514.7773,330.2281 4514.7773,331.3219 Q4514.7773,332.7594 4515.2148,333.8063 Q4515.668,334.8531 4516.5117,335.4 Q4517.3711,335.9469 4518.6836,335.9469 Q4519.4648,335.9469 4520.1523,335.8219 Q4520.8555,335.6813 4521.8555,335.3688 L4521.8555,337.2438 Q4520.9961,337.5719 4520.2305,337.6969 Q4519.4805,337.8219 4518.4648,337.8219 Q4516.543,337.8219 4515.2305,337.025 Q4513.9336,336.2125 4513.2773,334.7438 Q4512.6211,333.2594 4512.6211,331.2906 Q4512.6211,329.8688 4513.0117,328.6969 Q4513.418,327.5094 4514.1992,326.6344 Q4514.9805,325.7594 4516.1211,325.2906 Q4517.2617,324.8063 4518.7305,324.8063 Q4519.6836,324.8063 4520.5742,325.0094 Q4521.4805,325.1969 4522.418,325.6656 L4521.5898,327.4469 Q4520.8086,327.0719 4520.1523,326.8844 Q4519.4961,326.6813 4518.6992,326.6813 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="93.4776" x="4539.0898" y="336.582">CustomProfile</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4470.59" x2="4668.5673" y1="347.15" y2="347.15"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="58.7438" x="4475.59" y="366.1159">path : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="143.0375" x="4475.59" y="385.1839">prompts : dict[str, str]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4470.59" x2="4668.5673" y1="393.2858" y2="393.2858"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="187.9773" x="4475.59" y="412.2518">get_prompts() -&gt; dict[str, str]</text>
     </g>
     <!--class src.models.config.Customization-->
     <g id="elem_src.models.config.Customization">
-      <rect codeLine="94" fill="#F1F1F1" height="219.6113" id="src.models.config.Customization" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="292.265" x="3939.69" y="690.03"/>
-      <ellipse cx="4034.2457" cy="706.03" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M4034.355,701.5613 Q4033.4332,701.5613 4032.6988,701.8738 Q4031.98,702.1863 4031.48,702.78 Q4030.98,703.3581 4030.6988,704.2331 Q4030.4332,705.1081 4030.4332,706.2019 Q4030.4332,707.6394 4030.8707,708.6863 Q4031.3238,709.7331 4032.1675,710.28 Q4033.0269,710.8269 4034.3394,710.8269 Q4035.1207,710.8269 4035.8082,710.7019 Q4036.5113,710.5613 4037.5113,710.2488 L4037.5113,712.1238 Q4036.6519,712.4519 4035.8863,712.5769 Q4035.1363,712.7019 4034.1207,712.7019 Q4032.1988,712.7019 4030.8863,711.905 Q4029.5894,711.0925 4028.9332,709.6238 Q4028.2769,708.1394 4028.2769,706.1706 Q4028.2769,704.7488 4028.6675,703.5769 Q4029.0738,702.3894 4029.855,701.5144 Q4030.6363,700.6394 4031.7769,700.1706 Q4032.9175,699.6863 4034.3863,699.6863 Q4035.3394,699.6863 4036.23,699.8894 Q4037.1363,700.0769 4038.0738,700.5456 L4037.2457,702.3269 Q4036.4644,701.9519 4035.8082,701.7644 Q4035.1519,701.5613 4034.355,701.5613 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="94.6536" x="4054.7457" y="711.462">Customization</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3940.69" x2="4230.955" y1="722.03" y2="722.03"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="280.265" x="3945.69" y="740.9959">agent_card_config : Optional[dict[str, Any]]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="239.0911" x="3945.69" y="760.0639">agent_card_path : Optional[FilePath]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="268.225" x="3945.69" y="779.1318">custom_profile : Optional[CustomProfile]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="240.0291" x="3945.69" y="798.1997">disable_query_system_prompt : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="218.9172" x="3945.69" y="817.2676">disable_shield_ids_override : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="173.7674" x="3945.69" y="836.3355">profile_path : Optional[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="197.0073" x="3945.69" y="855.4034">system_prompt : Optional[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="268.267" x="3945.69" y="874.4714">system_prompt_path : Optional[FilePath]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3940.69" x2="4230.955" y1="882.5733" y2="882.5733"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="236.3051" x="3945.69" y="901.5393">check_customization_model() -&gt; Self</text>
+      <rect codeLine="96" fill="#F1F1F1" height="219.6113" id="src.models.config.Customization" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="292.265" x="4423.45" y="699.56"/>
+      <ellipse cx="4518.0057" cy="715.56" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M4518.115,711.0913 Q4517.1932,711.0913 4516.4588,711.4038 Q4515.74,711.7163 4515.24,712.31 Q4514.74,712.8881 4514.4588,713.7631 Q4514.1932,714.6381 4514.1932,715.7319 Q4514.1932,717.1694 4514.6307,718.2163 Q4515.0838,719.2631 4515.9275,719.81 Q4516.7869,720.3569 4518.0994,720.3569 Q4518.8807,720.3569 4519.5682,720.2319 Q4520.2713,720.0913 4521.2713,719.7788 L4521.2713,721.6538 Q4520.4119,721.9819 4519.6463,722.1069 Q4518.8963,722.2319 4517.8807,722.2319 Q4515.9588,722.2319 4514.6463,721.435 Q4513.3494,720.6225 4512.6932,719.1538 Q4512.0369,717.6694 4512.0369,715.7006 Q4512.0369,714.2788 4512.4275,713.1069 Q4512.8338,711.9194 4513.615,711.0444 Q4514.3963,710.1694 4515.5369,709.7006 Q4516.6775,709.2163 4518.1463,709.2163 Q4519.0994,709.2163 4519.99,709.4194 Q4520.8963,709.6069 4521.8338,710.0756 L4521.0057,711.8569 Q4520.2244,711.4819 4519.5682,711.2944 Q4518.9119,711.0913 4518.115,711.0913 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="94.6536" x="4538.5057" y="720.992">Customization</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4424.45" x2="4714.715" y1="731.56" y2="731.56"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="280.265" x="4429.45" y="750.5259">agent_card_config : Optional[dict[str, Any]]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="239.0911" x="4429.45" y="769.5939">agent_card_path : Optional[FilePath]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="268.225" x="4429.45" y="788.6618">custom_profile : Optional[CustomProfile]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="240.0291" x="4429.45" y="807.7297">disable_query_system_prompt : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="218.9172" x="4429.45" y="826.7976">disable_shield_ids_override : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="173.7674" x="4429.45" y="845.8655">profile_path : Optional[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="197.0073" x="4429.45" y="864.9334">system_prompt : Optional[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="268.267" x="4429.45" y="884.0014">system_prompt_path : Optional[FilePath]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4424.45" x2="4714.715" y1="892.1033" y2="892.1033"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="236.3051" x="4429.45" y="911.0693">check_customization_model() -&gt; Self</text>
     </g>
     <!--class src.models.config.DatabaseConfiguration-->
     <g id="elem_src.models.config.DatabaseConfiguration">
-      <rect codeLine="105" fill="#F1F1F1" height="143.3396" id="src.models.config.DatabaseConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="376.9507" x="957.35" y="296.08"/>
-      <ellipse cx="1065.0096" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M1065.119,307.6112 Q1064.1971,307.6112 1063.4628,307.9237 Q1062.744,308.2362 1062.244,308.83 Q1061.744,309.4081 1061.4628,310.2831 Q1061.1971,311.1581 1061.1971,312.2519 Q1061.1971,313.6894 1061.6346,314.7362 Q1062.0878,315.7831 1062.9315,316.33 Q1063.7909,316.8769 1065.1034,316.8769 Q1065.8846,316.8769 1066.5721,316.7519 Q1067.2753,316.6112 1068.2753,316.2987 L1068.2753,318.1737 Q1067.4159,318.5019 1066.6503,318.6269 Q1065.9003,318.7519 1064.8846,318.7519 Q1062.9628,318.7519 1061.6503,317.955 Q1060.3534,317.1425 1059.6971,315.6737 Q1059.0409,314.1894 1059.0409,312.2206 Q1059.0409,310.7987 1059.4315,309.6269 Q1059.8378,308.4394 1060.619,307.5644 Q1061.4003,306.6894 1062.5409,306.2206 Q1063.6815,305.7362 1065.1503,305.7362 Q1066.1034,305.7362 1066.994,305.9394 Q1067.9003,306.1269 1068.8378,306.5956 L1068.0096,308.3769 Q1067.2284,308.0019 1066.5721,307.8144 Q1065.9159,307.6112 1065.119,307.6112 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="153.1314" x="1085.5096" y="317.512">DatabaseConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="958.35" x2="1333.3007" y1="328.08" y2="328.08"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="40.8798" x="963.35" y="347.0459">config</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="52.1358" x="963.35" y="366.1139">db_type</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="364.9507" x="963.35" y="385.1818">postgres : Optional[PostgreSQLDatabaseConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="308.1809" x="963.35" y="404.2497">sqlite : Optional[SQLiteDatabaseConfiguration]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="958.35" x2="1333.3007" y1="412.3517" y2="412.3517"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="251.495" x="963.35" y="431.3176">check_database_configuration() -&gt; Self</text>
+      <rect codeLine="107" fill="#F1F1F1" height="143.3396" id="src.models.config.DatabaseConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="376.9507" x="180.1" y="296.08"/>
+      <ellipse cx="287.7596" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M287.869,307.6113 Q286.9471,307.6113 286.2128,307.9238 Q285.494,308.2363 284.994,308.83 Q284.494,309.4081 284.2128,310.2831 Q283.9471,311.1581 283.9471,312.2519 Q283.9471,313.6894 284.3846,314.7363 Q284.8378,315.7831 285.6815,316.33 Q286.5409,316.8769 287.8534,316.8769 Q288.6346,316.8769 289.3221,316.7519 Q290.0253,316.6113 291.0253,316.2988 L291.0253,318.1738 Q290.1659,318.5019 289.4003,318.6269 Q288.6503,318.7519 287.6346,318.7519 Q285.7128,318.7519 284.4003,317.955 Q283.1034,317.1425 282.4471,315.6738 Q281.7909,314.1894 281.7909,312.2206 Q281.7909,310.7988 282.1815,309.6269 Q282.5878,308.4394 283.369,307.5644 Q284.1503,306.6894 285.2909,306.2206 Q286.4315,305.7363 287.9003,305.7363 Q288.8534,305.7363 289.744,305.9394 Q290.6503,306.1269 291.5878,306.5956 L290.7596,308.3769 Q289.9784,308.0019 289.3221,307.8144 Q288.6659,307.6113 287.869,307.6113 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="153.1314" x="308.2596" y="317.512">DatabaseConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="181.1" x2="556.0507" y1="328.08" y2="328.08"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="40.8798" x="186.1" y="347.0459">config</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="52.1358" x="186.1" y="366.1139">db_type</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="364.9507" x="186.1" y="385.1818">postgres : Optional[PostgreSQLDatabaseConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="308.1809" x="186.1" y="404.2497">sqlite : Optional[SQLiteDatabaseConfiguration]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="181.1" x2="556.0507" y1="412.3517" y2="412.3517"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="251.495" x="186.1" y="431.3176">check_database_configuration() -&gt; Self</text>
     </g>
     <!--class src.models.config.InMemoryCacheConfig-->
     <g id="elem_src.models.config.InMemoryCacheConfig">
-      <rect codeLine="112" fill="#F1F1F1" height="67.0679" id="src.models.config.InMemoryCacheConfig" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="183.4514" x="2348.1" y="766.3"/>
-      <ellipse cx="2363.1" cy="782.3" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M2363.2094,777.8313 Q2362.2875,777.8313 2361.5531,778.1438 Q2360.8344,778.4563 2360.3344,779.05 Q2359.8344,779.6281 2359.5531,780.5031 Q2359.2875,781.3781 2359.2875,782.4719 Q2359.2875,783.9094 2359.725,784.9563 Q2360.1781,786.0031 2361.0219,786.55 Q2361.8813,787.0969 2363.1938,787.0969 Q2363.975,787.0969 2364.6625,786.9719 Q2365.3656,786.8313 2366.3656,786.5188 L2366.3656,788.3938 Q2365.5063,788.7219 2364.7406,788.8469 Q2363.9906,788.9719 2362.975,788.9719 Q2361.0531,788.9719 2359.7406,788.175 Q2358.4438,787.3625 2357.7875,785.8938 Q2357.1313,784.4094 2357.1313,782.4406 Q2357.1313,781.0188 2357.5219,779.8469 Q2357.9281,778.6594 2358.7094,777.7844 Q2359.4906,776.9094 2360.6313,776.4406 Q2361.7719,775.9563 2363.2406,775.9563 Q2364.1938,775.9563 2365.0844,776.1594 Q2365.9906,776.3469 2366.9281,776.8156 L2366.1,778.5969 Q2365.3188,778.2219 2364.6625,778.0344 Q2364.0063,777.8313 2363.2094,777.8313 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="151.4514" x="2377.1" y="787.732">InMemoryCacheConfig</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2349.1" x2="2530.5514" y1="798.3" y2="798.3"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="80.1637" x="2354.1" y="817.2659">max_entries</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2349.1" x2="2530.5514" y1="825.3679" y2="825.3679"/>
+      <rect codeLine="114" fill="#F1F1F1" height="67.0679" id="src.models.config.InMemoryCacheConfig" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="183.4514" x="2045.85" y="775.83"/>
+      <ellipse cx="2060.85" cy="791.83" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M2060.9594,787.3613 Q2060.0375,787.3613 2059.3031,787.6738 Q2058.5844,787.9863 2058.0844,788.58 Q2057.5844,789.1581 2057.3031,790.0331 Q2057.0375,790.9081 2057.0375,792.0019 Q2057.0375,793.4394 2057.475,794.4863 Q2057.9281,795.5331 2058.7719,796.08 Q2059.6313,796.6269 2060.9438,796.6269 Q2061.725,796.6269 2062.4125,796.5019 Q2063.1156,796.3613 2064.1156,796.0488 L2064.1156,797.9238 Q2063.2563,798.2519 2062.4906,798.3769 Q2061.7406,798.5019 2060.725,798.5019 Q2058.8031,798.5019 2057.4906,797.705 Q2056.1938,796.8925 2055.5375,795.4238 Q2054.8813,793.9394 2054.8813,791.9706 Q2054.8813,790.5488 2055.2719,789.3769 Q2055.6781,788.1894 2056.4594,787.3144 Q2057.2406,786.4394 2058.3813,785.9706 Q2059.5219,785.4863 2060.9906,785.4863 Q2061.9438,785.4863 2062.8344,785.6894 Q2063.7406,785.8769 2064.6781,786.3456 L2063.85,788.1269 Q2063.0688,787.7519 2062.4125,787.5644 Q2061.7563,787.3613 2060.9594,787.3613 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="151.4514" x="2074.85" y="797.262">InMemoryCacheConfig</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2046.85" x2="2228.3014" y1="807.83" y2="807.83"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="80.1637" x="2051.85" y="826.7959">max_entries</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2046.85" x2="2228.3014" y1="834.8979" y2="834.8979"/>
     </g>
     <!--class src.models.config.InferenceConfiguration-->
     <g id="elem_src.models.config.InferenceConfiguration">
-      <rect codeLine="115" fill="#F1F1F1" height="105.2038" id="src.models.config.InferenceConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="295.6949" x="192.98" y="315.14"/>
-      <ellipse cx="259.5077" cy="331.14" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M259.6171,326.6712 Q258.6952,326.6712 257.9609,326.9837 Q257.2421,327.2962 256.7421,327.89 Q256.2421,328.4681 255.9609,329.3431 Q255.6952,330.2181 255.6952,331.3119 Q255.6952,332.7494 256.1327,333.7962 Q256.5859,334.8431 257.4296,335.39 Q258.289,335.9369 259.6015,335.9369 Q260.3827,335.9369 261.0702,335.8119 Q261.7734,335.6712 262.7734,335.3587 L262.7734,337.2337 Q261.914,337.5619 261.1484,337.6869 Q260.3984,337.8119 259.3827,337.8119 Q257.4609,337.8119 256.1484,337.015 Q254.8515,336.2025 254.1952,334.7337 Q253.539,333.2494 253.539,331.2806 Q253.539,329.8587 253.9296,328.6869 Q254.3359,327.4994 255.1171,326.6244 Q255.8984,325.7494 257.039,325.2806 Q258.1796,324.7962 259.6484,324.7962 Q260.6015,324.7962 261.4921,324.9994 Q262.3984,325.1869 263.3359,325.6556 L262.5077,327.4369 Q261.7265,327.0619 261.0702,326.8744 Q260.414,326.6712 259.6171,326.6712 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="154.1394" x="280.0077" y="336.572">InferenceConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="193.98" x2="487.6749" y1="347.14" y2="347.14"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="188.9713" x="198.98" y="366.1059">default_model : Optional[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="203.1673" x="198.98" y="385.1739">default_provider : Optional[str]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="193.98" x2="487.6749" y1="393.2758" y2="393.2758"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="283.6949" x="198.98" y="412.2418">check_default_model_and_provider() -&gt; Self</text>
+      <rect codeLine="117" fill="#F1F1F1" height="105.2038" id="src.models.config.InferenceConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="295.6949" x="1405.73" y="315.15"/>
+      <ellipse cx="1472.2577" cy="331.15" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M1472.3671,326.6813 Q1471.4452,326.6813 1470.7109,326.9938 Q1469.9921,327.3063 1469.4921,327.9 Q1468.9921,328.4781 1468.7109,329.3531 Q1468.4452,330.2281 1468.4452,331.3219 Q1468.4452,332.7594 1468.8827,333.8063 Q1469.3359,334.8531 1470.1796,335.4 Q1471.039,335.9469 1472.3515,335.9469 Q1473.1327,335.9469 1473.8202,335.8219 Q1474.5234,335.6813 1475.5234,335.3688 L1475.5234,337.2438 Q1474.664,337.5719 1473.8984,337.6969 Q1473.1484,337.8219 1472.1327,337.8219 Q1470.2109,337.8219 1468.8984,337.025 Q1467.6015,336.2125 1466.9452,334.7438 Q1466.289,333.2594 1466.289,331.2906 Q1466.289,329.8688 1466.6796,328.6969 Q1467.0859,327.5094 1467.8671,326.6344 Q1468.6484,325.7594 1469.789,325.2906 Q1470.9296,324.8063 1472.3984,324.8063 Q1473.3515,324.8063 1474.2421,325.0094 Q1475.1484,325.1969 1476.0859,325.6656 L1475.2577,327.4469 Q1474.4765,327.0719 1473.8202,326.8844 Q1473.164,326.6813 1472.3671,326.6813 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="154.1394" x="1492.7577" y="336.582">InferenceConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1406.73" x2="1700.4249" y1="347.15" y2="347.15"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="188.9713" x="1411.73" y="366.1159">default_model : Optional[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="203.1673" x="1411.73" y="385.1839">default_provider : Optional[str]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="1406.73" x2="1700.4249" y1="393.2858" y2="393.2858"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="283.6949" x="1411.73" y="412.2518">check_default_model_and_provider() -&gt; Self</text>
     </g>
     <!--class src.models.config.JsonPathOperator-->
     <g id="elem_src.models.config.JsonPathOperator">
-      <rect codeLine="120" fill="#F1F1F1" height="67.0679" id="src.models.config.JsonPathOperator" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="150.0616" x="5326.8" y="334.21"/>
-      <ellipse cx="5341.8" cy="350.21" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M5341.9094,345.7412 Q5340.9875,345.7412 5340.2531,346.0537 Q5339.5344,346.3662 5339.0344,346.96 Q5338.5344,347.5381 5338.2531,348.4131 Q5337.9875,349.2881 5337.9875,350.3819 Q5337.9875,351.8194 5338.425,352.8662 Q5338.8781,353.9131 5339.7219,354.46 Q5340.5812,355.0069 5341.8937,355.0069 Q5342.675,355.0069 5343.3625,354.8819 Q5344.0656,354.7412 5345.0656,354.4287 L5345.0656,356.3037 Q5344.2062,356.6319 5343.4406,356.7569 Q5342.6906,356.8819 5341.675,356.8819 Q5339.7531,356.8819 5338.4406,356.085 Q5337.1437,355.2725 5336.4875,353.8037 Q5335.8312,352.3194 5335.8312,350.3506 Q5335.8312,348.9287 5336.2219,347.7569 Q5336.6281,346.5694 5337.4094,345.6944 Q5338.1906,344.8194 5339.3312,344.3506 Q5340.4719,343.8662 5341.9406,343.8662 Q5342.8937,343.8662 5343.7844,344.0694 Q5344.6906,344.2569 5345.6281,344.7256 L5344.8,346.5069 Q5344.0187,346.1319 5343.3625,345.9444 Q5342.7062,345.7412 5341.9094,345.7412 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="118.0616" x="5355.8" y="355.642">JsonPathOperator</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="5327.8" x2="5475.8616" y1="366.21" y2="366.21"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="37.4919" x="5332.8" y="385.1759">name</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="5327.8" x2="5475.8616" y1="393.2779" y2="393.2779"/>
+      <rect codeLine="122" fill="#F1F1F1" height="67.0679" id="src.models.config.JsonPathOperator" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="150.0616" x="5309.55" y="334.21"/>
+      <ellipse cx="5324.55" cy="350.21" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M5324.6594,345.7413 Q5323.7375,345.7413 5323.0031,346.0538 Q5322.2844,346.3663 5321.7844,346.96 Q5321.2844,347.5381 5321.0031,348.4131 Q5320.7375,349.2881 5320.7375,350.3819 Q5320.7375,351.8194 5321.175,352.8663 Q5321.6281,353.9131 5322.4719,354.46 Q5323.3313,355.0069 5324.6438,355.0069 Q5325.425,355.0069 5326.1125,354.8819 Q5326.8156,354.7413 5327.8156,354.4288 L5327.8156,356.3038 Q5326.9563,356.6319 5326.1906,356.7569 Q5325.4406,356.8819 5324.425,356.8819 Q5322.5031,356.8819 5321.1906,356.085 Q5319.8938,355.2725 5319.2375,353.8038 Q5318.5813,352.3194 5318.5813,350.3506 Q5318.5813,348.9288 5318.9719,347.7569 Q5319.3781,346.5694 5320.1594,345.6944 Q5320.9406,344.8194 5322.0813,344.3506 Q5323.2219,343.8663 5324.6906,343.8663 Q5325.6438,343.8663 5326.5344,344.0694 Q5327.4406,344.2569 5328.3781,344.7256 L5327.55,346.5069 Q5326.7688,346.1319 5326.1125,345.9444 Q5325.4563,345.7413 5324.6594,345.7413 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="118.0616" x="5338.55" y="355.642">JsonPathOperator</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5310.55" x2="5458.6116" y1="366.21" y2="366.21"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="37.4919" x="5315.55" y="385.1759">name</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5310.55" x2="5458.6116" y1="393.2779" y2="393.2779"/>
     </g>
     <!--class src.models.config.JwkConfiguration-->
     <g id="elem_src.models.config.JwkConfiguration">
-      <rect codeLine="123" fill="#F1F1F1" height="86.1358" id="src.models.config.JwkConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="145.3856" x="4655.13" y="756.76"/>
-      <ellipse cx="4670.13" cy="772.76" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M4670.2394,768.2913 Q4669.3175,768.2913 4668.5831,768.6038 Q4667.8644,768.9163 4667.3644,769.51 Q4666.8644,770.0881 4666.5831,770.9631 Q4666.3175,771.8381 4666.3175,772.9319 Q4666.3175,774.3694 4666.755,775.4163 Q4667.2081,776.4631 4668.0519,777.01 Q4668.9113,777.5569 4670.2238,777.5569 Q4671.005,777.5569 4671.6925,777.4319 Q4672.3956,777.2913 4673.3956,776.9788 L4673.3956,778.8538 Q4672.5363,779.1819 4671.7706,779.3069 Q4671.0206,779.4319 4670.005,779.4319 Q4668.0831,779.4319 4666.7706,778.635 Q4665.4738,777.8225 4664.8175,776.3538 Q4664.1613,774.8694 4664.1613,772.9006 Q4664.1613,771.4788 4664.5519,770.3069 Q4664.9581,769.1194 4665.7394,768.2444 Q4666.5206,767.3694 4667.6613,766.9006 Q4668.8019,766.4163 4670.2706,766.4163 Q4671.2238,766.4163 4672.1144,766.6194 Q4673.0206,766.8069 4673.9581,767.2756 L4673.13,769.0569 Q4672.3488,768.6819 4671.6925,768.4944 Q4671.0363,768.2913 4670.2394,768.2913 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="113.3856" x="4684.13" y="778.192">JwkConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4656.13" x2="4799.5156" y1="788.76" y2="788.76"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="114.8416" x="4661.13" y="807.7259">jwt_configuration</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="18.0459" x="4661.13" y="826.7939">url</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4656.13" x2="4799.5156" y1="834.8958" y2="834.8958"/>
+      <rect codeLine="125" fill="#F1F1F1" height="86.1358" id="src.models.config.JwkConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="145.3856" x="4821.89" y="766.3"/>
+      <ellipse cx="4836.89" cy="782.3" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M4836.9994,777.8313 Q4836.0775,777.8313 4835.3431,778.1438 Q4834.6244,778.4563 4834.1244,779.05 Q4833.6244,779.6281 4833.3431,780.5031 Q4833.0775,781.3781 4833.0775,782.4719 Q4833.0775,783.9094 4833.515,784.9563 Q4833.9681,786.0031 4834.8119,786.55 Q4835.6713,787.0969 4836.9838,787.0969 Q4837.765,787.0969 4838.4525,786.9719 Q4839.1556,786.8313 4840.1556,786.5188 L4840.1556,788.3938 Q4839.2963,788.7219 4838.5306,788.8469 Q4837.7806,788.9719 4836.765,788.9719 Q4834.8431,788.9719 4833.5306,788.175 Q4832.2338,787.3625 4831.5775,785.8938 Q4830.9213,784.4094 4830.9213,782.4406 Q4830.9213,781.0188 4831.3119,779.8469 Q4831.7181,778.6594 4832.4994,777.7844 Q4833.2806,776.9094 4834.4213,776.4406 Q4835.5619,775.9563 4837.0306,775.9563 Q4837.9838,775.9563 4838.8744,776.1594 Q4839.7806,776.3469 4840.7181,776.8156 L4839.89,778.5969 Q4839.1088,778.2219 4838.4525,778.0344 Q4837.7963,777.8313 4836.9994,777.8313 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="113.3856" x="4850.89" y="787.732">JwkConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4822.89" x2="4966.2756" y1="798.3" y2="798.3"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="114.8416" x="4827.89" y="817.2659">jwt_configuration</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="18.0459" x="4827.89" y="836.3339">url</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4822.89" x2="4966.2756" y1="844.4358" y2="844.4358"/>
     </g>
     <!--class src.models.config.JwtConfiguration-->
     <g id="elem_src.models.config.JwtConfiguration">
-      <rect codeLine="127" fill="#F1F1F1" height="105.2038" id="src.models.config.JwtConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="193.2853" x="4551.18" y="315.14"/>
-      <ellipse cx="4588.8248" cy="331.14" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M4588.9341,326.6712 Q4588.0123,326.6712 4587.2779,326.9837 Q4586.5591,327.2962 4586.0591,327.89 Q4585.5591,328.4681 4585.2779,329.3431 Q4585.0123,330.2181 4585.0123,331.3119 Q4585.0123,332.7494 4585.4498,333.7962 Q4585.9029,334.8431 4586.7466,335.39 Q4587.606,335.9369 4588.9185,335.9369 Q4589.6998,335.9369 4590.3873,335.8119 Q4591.0904,335.6712 4592.0904,335.3587 L4592.0904,337.2337 Q4591.231,337.5619 4590.4654,337.6869 Q4589.7154,337.8119 4588.6998,337.8119 Q4586.7779,337.8119 4585.4654,337.015 Q4584.1685,336.2025 4583.5123,334.7337 Q4582.856,333.2494 4582.856,331.2806 Q4582.856,329.8587 4583.2466,328.6869 Q4583.6529,327.4994 4584.4341,326.6244 Q4585.2154,325.7494 4586.356,325.2806 Q4587.4966,324.7962 4588.9654,324.7962 Q4589.9185,324.7962 4590.8091,324.9994 Q4591.7154,325.1869 4592.6529,325.6556 L4591.8248,327.4369 Q4591.0435,327.0619 4590.3873,326.8744 Q4589.731,326.6712 4588.9341,326.6712 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="110.9636" x="4607.8569" y="336.572">JwtConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4552.18" x2="4743.4653" y1="347.14" y2="347.14"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="181.2853" x="4557.18" y="366.1059">role_rules : list[JwtRoleRule]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="117.1516" x="4557.18" y="385.1739">user_id_claim : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="136.2055" x="4557.18" y="404.2418">username_claim : str</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4552.18" x2="4743.4653" y1="412.3438" y2="412.3438"/>
+      <rect codeLine="129" fill="#F1F1F1" height="105.2038" id="src.models.config.JwtConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="193.2853" x="4734.94" y="315.15"/>
+      <ellipse cx="4772.5848" cy="331.15" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M4772.6941,326.6813 Q4771.7723,326.6813 4771.0379,326.9938 Q4770.3191,327.3063 4769.8191,327.9 Q4769.3191,328.4781 4769.0379,329.3531 Q4768.7723,330.2281 4768.7723,331.3219 Q4768.7723,332.7594 4769.2098,333.8063 Q4769.6629,334.8531 4770.5066,335.4 Q4771.366,335.9469 4772.6785,335.9469 Q4773.4598,335.9469 4774.1473,335.8219 Q4774.8504,335.6813 4775.8504,335.3688 L4775.8504,337.2438 Q4774.991,337.5719 4774.2254,337.6969 Q4773.4754,337.8219 4772.4598,337.8219 Q4770.5379,337.8219 4769.2254,337.025 Q4767.9285,336.2125 4767.2723,334.7438 Q4766.616,333.2594 4766.616,331.2906 Q4766.616,329.8688 4767.0066,328.6969 Q4767.4129,327.5094 4768.1941,326.6344 Q4768.9754,325.7594 4770.116,325.2906 Q4771.2566,324.8063 4772.7254,324.8063 Q4773.6785,324.8063 4774.5691,325.0094 Q4775.4754,325.1969 4776.4129,325.6656 L4775.5848,327.4469 Q4774.8035,327.0719 4774.1473,326.8844 Q4773.491,326.6813 4772.6941,326.6813 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="110.9636" x="4791.6169" y="336.582">JwtConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4735.94" x2="4927.2253" y1="347.15" y2="347.15"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="181.2853" x="4740.94" y="366.1159">role_rules : list[JwtRoleRule]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="117.1516" x="4740.94" y="385.1839">user_id_claim : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="136.2055" x="4740.94" y="404.2518">username_claim : str</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4735.94" x2="4927.2253" y1="412.3538" y2="412.3538"/>
     </g>
     <!--class src.models.config.JwtRoleRule-->
     <g id="elem_src.models.config.JwtRoleRule">
-      <rect codeLine="132" fill="#F1F1F1" height="219.6113" id="src.models.config.JwtRoleRule" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="200.5932" x="5301.53" y="690.03"/>
-      <ellipse cx="5358.8598" cy="706.03" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M5358.9692,701.5613 Q5358.0473,701.5613 5357.3129,701.8738 Q5356.5942,702.1863 5356.0942,702.78 Q5355.5942,703.3581 5355.3129,704.2331 Q5355.0473,705.1081 5355.0473,706.2019 Q5355.0473,707.6394 5355.4848,708.6863 Q5355.9379,709.7331 5356.7817,710.28 Q5357.641,710.8269 5358.9535,710.8269 Q5359.7348,710.8269 5360.4223,710.7019 Q5361.1254,710.5613 5362.1254,710.2488 L5362.1254,712.1238 Q5361.266,712.4519 5360.5004,712.5769 Q5359.7504,712.7019 5358.7348,712.7019 Q5356.8129,712.7019 5355.5004,711.905 Q5354.2035,711.0925 5353.5473,709.6238 Q5352.891,708.1394 5352.891,706.1706 Q5352.891,704.7488 5353.2817,703.5769 Q5353.6879,702.3894 5354.4692,701.5144 Q5355.2504,700.6394 5356.391,700.1706 Q5357.5317,699.6863 5359.0004,699.6863 Q5359.9535,699.6863 5360.8442,699.8894 Q5361.7504,700.0769 5362.6879,700.5456 L5361.8598,702.3269 Q5361.0785,701.9519 5360.4223,701.7644 Q5359.766,701.5613 5358.9692,701.5613 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="77.4337" x="5379.3598" y="711.462">JwtRoleRule</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="5302.53" x2="5501.1232" y1="722.03" y2="722.03"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="104.4256" x="5307.53" y="740.9959">compiled_regex</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="86.1837" x="5307.53" y="760.0639">jsonpath : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="86.1557" x="5307.53" y="779.1318">negate : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.9178" x="5307.53" y="798.1997">operator</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="89.2357" x="5307.53" y="817.2676">roles : list[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="70.8957" x="5307.53" y="836.3355">value : Any</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="5302.53" x2="5501.1232" y1="844.4375" y2="844.4375"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="153.4954" x="5307.53" y="863.4034">check_jsonpath() -&gt; Self</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="188.5932" x="5307.53" y="882.4714">check_regex_pattern() -&gt; Self</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="128.3515" x="5307.53" y="901.5393">check_roles() -&gt; Self</text>
+      <rect codeLine="134" fill="#F1F1F1" height="219.6113" id="src.models.config.JwtRoleRule" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="200.5932" x="5284.28" y="699.56"/>
+      <ellipse cx="5341.6098" cy="715.56" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M5341.7192,711.0913 Q5340.7973,711.0913 5340.0629,711.4038 Q5339.3442,711.7163 5338.8442,712.31 Q5338.3442,712.8881 5338.0629,713.7631 Q5337.7973,714.6381 5337.7973,715.7319 Q5337.7973,717.1694 5338.2348,718.2163 Q5338.6879,719.2631 5339.5317,719.81 Q5340.391,720.3569 5341.7035,720.3569 Q5342.4848,720.3569 5343.1723,720.2319 Q5343.8754,720.0913 5344.8754,719.7788 L5344.8754,721.6538 Q5344.016,721.9819 5343.2504,722.1069 Q5342.5004,722.2319 5341.4848,722.2319 Q5339.5629,722.2319 5338.2504,721.435 Q5336.9535,720.6225 5336.2973,719.1538 Q5335.641,717.6694 5335.641,715.7006 Q5335.641,714.2788 5336.0317,713.1069 Q5336.4379,711.9194 5337.2192,711.0444 Q5338.0004,710.1694 5339.141,709.7006 Q5340.2817,709.2163 5341.7504,709.2163 Q5342.7035,709.2163 5343.5942,709.4194 Q5344.5004,709.6069 5345.4379,710.0756 L5344.6098,711.8569 Q5343.8285,711.4819 5343.1723,711.2944 Q5342.516,711.0913 5341.7192,711.0913 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="77.4337" x="5362.1098" y="720.992">JwtRoleRule</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5285.28" x2="5483.8732" y1="731.56" y2="731.56"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="104.4256" x="5290.28" y="750.5259">compiled_regex</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="86.1837" x="5290.28" y="769.5939">jsonpath : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="86.1557" x="5290.28" y="788.6618">negate : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.9178" x="5290.28" y="807.7297">operator</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="89.2357" x="5290.28" y="826.7976">roles : list[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="70.8957" x="5290.28" y="845.8655">value : Any</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5285.28" x2="5483.8732" y1="853.9675" y2="853.9675"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="153.4954" x="5290.28" y="872.9334">check_jsonpath() -&gt; Self</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="188.5932" x="5290.28" y="892.0014">check_regex_pattern() -&gt; Self</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="128.3515" x="5290.28" y="911.0693">check_roles() -&gt; Self</text>
     </g>
     <!--class src.models.config.LlamaStackConfiguration-->
     <g id="elem_src.models.config.LlamaStackConfiguration">
-      <rect codeLine="143" fill="#F1F1F1" height="162.4075" id="src.models.config.LlamaStackConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="274.219" x="2183.72" y="286.54"/>
-      <ellipse cx="2233.7698" cy="302.54" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M2233.8792,298.0712 Q2232.9573,298.0712 2232.223,298.3837 Q2231.5042,298.6962 2231.0042,299.29 Q2230.5042,299.8681 2230.223,300.7431 Q2229.9573,301.6181 2229.9573,302.7119 Q2229.9573,304.1494 2230.3948,305.1962 Q2230.848,306.2431 2231.6917,306.79 Q2232.5511,307.3369 2233.8636,307.3369 Q2234.6448,307.3369 2235.3323,307.2119 Q2236.0355,307.0712 2237.0355,306.7587 L2237.0355,308.6337 Q2236.1761,308.9619 2235.4105,309.0869 Q2234.6605,309.2119 2233.6448,309.2119 Q2231.723,309.2119 2230.4105,308.415 Q2229.1136,307.6025 2228.4573,306.1337 Q2227.8011,304.6494 2227.8011,302.6806 Q2227.8011,301.2587 2228.1917,300.0869 Q2228.598,298.8994 2229.3792,298.0244 Q2230.1605,297.1494 2231.3011,296.6806 Q2232.4417,296.1962 2233.9105,296.1962 Q2234.8636,296.1962 2235.7542,296.3994 Q2236.6605,296.5869 2237.598,297.0556 L2236.7698,298.8369 Q2235.9886,298.4619 2235.3323,298.2744 Q2234.6761,298.0712 2233.8792,298.0712 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="165.6194" x="2254.2698" y="307.972">LlamaStackConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2184.72" x2="2456.939" y1="318.54" y2="318.54"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="185.4013" x="2189.72" y="337.5059">api_key : Optional[SecretStr]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="262.219" x="2189.72" y="356.5739">library_client_config_path : Optional[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="51.8278" x="2189.72" y="375.6418">timeout</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="168.5454" x="2189.72" y="394.7097">url : Optional[AnyHttpUrl]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="240.6031" x="2189.72" y="413.7776">use_as_library_client : Optional[bool]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2184.72" x2="2456.939" y1="421.8796" y2="421.8796"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="219.8271" x="2189.72" y="440.8455">check_llama_stack_model() -&gt; Self</text>
+      <rect codeLine="145" fill="#F1F1F1" height="162.4075" id="src.models.config.LlamaStackConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="274.219" x="3238.47" y="286.54"/>
+      <ellipse cx="3288.5198" cy="302.54" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M3288.6292,298.0713 Q3287.7073,298.0713 3286.973,298.3838 Q3286.2542,298.6963 3285.7542,299.29 Q3285.2542,299.8681 3284.973,300.7431 Q3284.7073,301.6181 3284.7073,302.7119 Q3284.7073,304.1494 3285.1448,305.1963 Q3285.598,306.2431 3286.4417,306.79 Q3287.3011,307.3369 3288.6136,307.3369 Q3289.3948,307.3369 3290.0823,307.2119 Q3290.7855,307.0713 3291.7855,306.7588 L3291.7855,308.6338 Q3290.9261,308.9619 3290.1605,309.0869 Q3289.4105,309.2119 3288.3948,309.2119 Q3286.473,309.2119 3285.1605,308.415 Q3283.8636,307.6025 3283.2073,306.1338 Q3282.5511,304.6494 3282.5511,302.6806 Q3282.5511,301.2588 3282.9417,300.0869 Q3283.348,298.8994 3284.1292,298.0244 Q3284.9105,297.1494 3286.0511,296.6806 Q3287.1917,296.1963 3288.6605,296.1963 Q3289.6136,296.1963 3290.5042,296.3994 Q3291.4105,296.5869 3292.348,297.0556 L3291.5198,298.8369 Q3290.7386,298.4619 3290.0823,298.2744 Q3289.4261,298.0713 3288.6292,298.0713 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="165.6194" x="3309.0198" y="307.972">LlamaStackConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3239.47" x2="3511.689" y1="318.54" y2="318.54"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="185.4013" x="3244.47" y="337.5059">api_key : Optional[SecretStr]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="262.219" x="3244.47" y="356.5739">library_client_config_path : Optional[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="51.8278" x="3244.47" y="375.6418">timeout</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="168.5454" x="3244.47" y="394.7097">url : Optional[AnyHttpUrl]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="240.6031" x="3244.47" y="413.7776">use_as_library_client : Optional[bool]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3239.47" x2="3511.689" y1="421.8796" y2="421.8796"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="219.8271" x="3244.47" y="440.8455">check_llama_stack_model() -&gt; Self</text>
     </g>
     <!--class src.models.config.ModelContextProtocolServer-->
     <g id="elem_src.models.config.ModelContextProtocolServer">
-      <rect codeLine="151" fill="#F1F1F1" height="219.6113" id="src.models.config.ModelContextProtocolServer" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="285.405" x="2922.12" y="690.03"/>
-      <ellipse cx="2965.712" cy="706.03" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M2965.8213,701.5613 Q2964.8995,701.5613 2964.1651,701.8738 Q2963.4463,702.1863 2962.9463,702.78 Q2962.4463,703.3581 2962.1651,704.2331 Q2961.8995,705.1081 2961.8995,706.2019 Q2961.8995,707.6394 2962.337,708.6863 Q2962.7901,709.7331 2963.6338,710.28 Q2964.4932,710.8269 2965.8057,710.8269 Q2966.587,710.8269 2967.2745,710.7019 Q2967.9776,710.5613 2968.9776,710.2488 L2968.9776,712.1238 Q2968.1182,712.4519 2967.3526,712.5769 Q2966.6026,712.7019 2965.587,712.7019 Q2963.6651,712.7019 2962.3526,711.905 Q2961.0557,711.0925 2960.3995,709.6238 Q2959.7432,708.1394 2959.7432,706.1706 Q2959.7432,704.7488 2960.1338,703.5769 Q2960.5401,702.3894 2961.3213,701.5144 Q2962.1026,700.6394 2963.2432,700.1706 Q2964.3838,699.6863 2965.8526,699.6863 Q2966.8057,699.6863 2967.6963,699.8894 Q2968.6026,700.0769 2969.5401,700.5456 L2968.712,702.3269 Q2967.9307,701.9519 2967.2745,701.7644 Q2966.6182,701.5613 2965.8213,701.5613 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="189.8673" x="2986.0657" y="711.462">ModelContextProtocolServer</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2923.12" x2="3206.525" y1="722.03" y2="722.03"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="234.6251" x="2928.12" y="740.9959">authorization_headers : dict[str, str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="110.1656" x="2928.12" y="760.0639">headers : list[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="66.0658" x="2928.12" y="779.1318">name : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="102.8856" x="2928.12" y="798.1997">provider_id : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="210.2092" x="2928.12" y="817.2676">resolved_authorization_headers</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="198.2533" x="2928.12" y="836.3355">timeout : Optional[PositiveInt]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="46.6198" x="2928.12" y="855.4034">url : str</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2923.12" x2="3206.525" y1="863.5054" y2="863.5054"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="195.7192" x="2928.12" y="882.4714">resolve_auth_headers() -&gt; Self</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="273.405" x="2928.12" y="901.5393">validate_headers(value: list[str]) -&gt; list[str]</text>
+      <rect codeLine="153" fill="#F1F1F1" height="219.6113" id="src.models.config.ModelContextProtocolServer" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="285.405" x="2946.88" y="699.56"/>
+      <ellipse cx="2990.472" cy="715.56" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M2990.5813,711.0913 Q2989.6595,711.0913 2988.9251,711.4038 Q2988.2063,711.7163 2987.7063,712.31 Q2987.2063,712.8881 2986.9251,713.7631 Q2986.6595,714.6381 2986.6595,715.7319 Q2986.6595,717.1694 2987.097,718.2163 Q2987.5501,719.2631 2988.3938,719.81 Q2989.2532,720.3569 2990.5657,720.3569 Q2991.347,720.3569 2992.0345,720.2319 Q2992.7376,720.0913 2993.7376,719.7788 L2993.7376,721.6538 Q2992.8782,721.9819 2992.1126,722.1069 Q2991.3626,722.2319 2990.347,722.2319 Q2988.4251,722.2319 2987.1126,721.435 Q2985.8157,720.6225 2985.1595,719.1538 Q2984.5032,717.6694 2984.5032,715.7006 Q2984.5032,714.2788 2984.8938,713.1069 Q2985.3001,711.9194 2986.0813,711.0444 Q2986.8626,710.1694 2988.0032,709.7006 Q2989.1438,709.2163 2990.6126,709.2163 Q2991.5657,709.2163 2992.4563,709.4194 Q2993.3626,709.6069 2994.3001,710.0756 L2993.472,711.8569 Q2992.6907,711.4819 2992.0345,711.2944 Q2991.3782,711.0913 2990.5813,711.0913 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="189.8673" x="3010.8257" y="720.992">ModelContextProtocolServer</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2947.88" x2="3231.285" y1="731.56" y2="731.56"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="234.6251" x="2952.88" y="750.5259">authorization_headers : dict[str, str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="110.1656" x="2952.88" y="769.5939">headers : list[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="66.0658" x="2952.88" y="788.6618">name : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="102.8856" x="2952.88" y="807.7297">provider_id : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="210.2092" x="2952.88" y="826.7976">resolved_authorization_headers</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="198.2533" x="2952.88" y="845.8655">timeout : Optional[PositiveInt]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="46.6198" x="2952.88" y="864.9334">url : str</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2947.88" x2="3231.285" y1="873.0354" y2="873.0354"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="195.7192" x="2952.88" y="892.0014">resolve_auth_headers() -&gt; Self</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="273.405" x="2952.88" y="911.0693">validate_headers(value: list[str]) -&gt; list[str]</text>
+    </g>
+    <!--class src.models.config.OkpConfiguration-->
+    <g id="elem_src.models.config.OkpConfiguration">
+      <rect codeLine="164" fill="#F1F1F1" height="86.1358" id="src.models.config.OkpConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="162.0094" x="3041.57" y="324.68"/>
+      <ellipse cx="3061.9276" cy="340.68" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M3062.037,336.2113 Q3061.1151,336.2113 3060.3808,336.5238 Q3059.662,336.8363 3059.162,337.43 Q3058.662,338.0081 3058.3808,338.8831 Q3058.1151,339.7581 3058.1151,340.8519 Q3058.1151,342.2894 3058.5526,343.3363 Q3059.0058,344.3831 3059.8495,344.93 Q3060.7089,345.4769 3062.0214,345.4769 Q3062.8026,345.4769 3063.4901,345.3519 Q3064.1933,345.2113 3065.1933,344.8988 L3065.1933,346.7738 Q3064.3339,347.1019 3063.5683,347.2269 Q3062.8183,347.3519 3061.8026,347.3519 Q3059.8808,347.3519 3058.5683,346.555 Q3057.2714,345.7425 3056.6151,344.2738 Q3055.9589,342.7894 3055.9589,340.8206 Q3055.9589,339.3988 3056.3495,338.2269 Q3056.7558,337.0394 3057.537,336.1644 Q3058.3183,335.2894 3059.4589,334.8206 Q3060.5995,334.3363 3062.0683,334.3363 Q3063.0214,334.3363 3063.912,334.5394 Q3064.8183,334.7269 3065.7558,335.1956 L3064.9276,336.9769 Q3064.1464,336.6019 3063.4901,336.4144 Q3062.8339,336.2113 3062.037,336.2113 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="118.1035" x="3077.1182" y="346.112">OkpConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3042.57" x2="3202.5794" y1="356.68" y2="356.68"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="150.0094" x="3047.57" y="375.6459">chunk_filter_query : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="82.0677" x="3047.57" y="394.7139">offline : bool</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3042.57" x2="3202.5794" y1="402.8158" y2="402.8158"/>
     </g>
     <!--class src.models.config.PostgreSQLDatabaseConfiguration-->
     <g id="elem_src.models.config.PostgreSQLDatabaseConfiguration">
-      <rect codeLine="162" fill="#F1F1F1" height="238.6792" id="src.models.config.PostgreSQLDatabaseConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="262.0751" x="3242.79" y="680.49"/>
-      <ellipse cx="3257.79" cy="696.49" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M3257.8994,692.0213 Q3256.9775,692.0213 3256.2431,692.3338 Q3255.5244,692.6463 3255.0244,693.24 Q3254.5244,693.8181 3254.2431,694.6931 Q3253.9775,695.5681 3253.9775,696.6619 Q3253.9775,698.0994 3254.415,699.1463 Q3254.8681,700.1931 3255.7119,700.74 Q3256.5713,701.2869 3257.8838,701.2869 Q3258.665,701.2869 3259.3525,701.1619 Q3260.0556,701.0213 3261.0556,700.7088 L3261.0556,702.5838 Q3260.1963,702.9119 3259.4306,703.0369 Q3258.6806,703.1619 3257.665,703.1619 Q3255.7431,703.1619 3254.4306,702.365 Q3253.1338,701.5525 3252.4775,700.0838 Q3251.8213,698.5994 3251.8213,696.6306 Q3251.8213,695.2088 3252.2119,694.0369 Q3252.6181,692.8494 3253.3994,691.9744 Q3254.1806,691.0994 3255.3213,690.6306 Q3256.4619,690.1463 3257.9306,690.1463 Q3258.8838,690.1463 3259.7744,690.3494 Q3260.6806,690.5369 3261.6181,691.0056 L3260.79,692.7869 Q3260.0088,692.4119 3259.3525,692.2244 Q3258.6963,692.0213 3257.8994,692.0213 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="230.0751" x="3271.79" y="701.922">PostgreSQLDatabaseConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3243.79" x2="3503.8651" y1="712.49" y2="712.49"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="212.0852" x="3248.79" y="731.4559">ca_cert_path : Optional[FilePath]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="45.7938" x="3248.79" y="750.5239">db : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="118.1456" x="3248.79" y="769.5918">gss_encmode : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.4558" x="3248.79" y="788.6597">host : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="169.8614" x="3248.79" y="807.7276">namespace : Optional[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="63.7418" x="3248.79" y="826.7955">password</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="27.9159" x="3248.79" y="845.8634">port</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="89.8797" x="3248.79" y="864.9314">ssl_mode : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.6098" x="3248.79" y="883.9993">user : str</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3243.79" x2="3503.8651" y1="892.1013" y2="892.1013"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="248.891" x="3248.79" y="911.0672">check_postgres_configuration() -&gt; Self</text>
+      <rect codeLine="168" fill="#F1F1F1" height="238.6792" id="src.models.config.PostgreSQLDatabaseConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="262.0751" x="3338.54" y="690.03"/>
+      <ellipse cx="3353.54" cy="706.03" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M3353.6494,701.5613 Q3352.7275,701.5613 3351.9931,701.8738 Q3351.2744,702.1863 3350.7744,702.78 Q3350.2744,703.3581 3349.9931,704.2331 Q3349.7275,705.1081 3349.7275,706.2019 Q3349.7275,707.6394 3350.165,708.6863 Q3350.6181,709.7331 3351.4619,710.28 Q3352.3213,710.8269 3353.6338,710.8269 Q3354.415,710.8269 3355.1025,710.7019 Q3355.8056,710.5613 3356.8056,710.2488 L3356.8056,712.1238 Q3355.9463,712.4519 3355.1806,712.5769 Q3354.4306,712.7019 3353.415,712.7019 Q3351.4931,712.7019 3350.1806,711.905 Q3348.8838,711.0925 3348.2275,709.6238 Q3347.5713,708.1394 3347.5713,706.1706 Q3347.5713,704.7488 3347.9619,703.5769 Q3348.3681,702.3894 3349.1494,701.5144 Q3349.9306,700.6394 3351.0713,700.1706 Q3352.2119,699.6863 3353.6806,699.6863 Q3354.6338,699.6863 3355.5244,699.8894 Q3356.4306,700.0769 3357.3681,700.5456 L3356.54,702.3269 Q3355.7588,701.9519 3355.1025,701.7644 Q3354.4463,701.5613 3353.6494,701.5613 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="230.0751" x="3367.54" y="711.462">PostgreSQLDatabaseConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3339.54" x2="3599.6151" y1="722.03" y2="722.03"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="212.0852" x="3344.54" y="740.9959">ca_cert_path : Optional[FilePath]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="45.7938" x="3344.54" y="760.0639">db : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="118.1456" x="3344.54" y="779.1318">gss_encmode : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.4558" x="3344.54" y="798.1997">host : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="169.8614" x="3344.54" y="817.2676">namespace : Optional[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="63.7418" x="3344.54" y="836.3355">password</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="27.9159" x="3344.54" y="855.4034">port</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="89.8797" x="3344.54" y="874.4714">ssl_mode : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.6098" x="3344.54" y="893.5393">user : str</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3339.54" x2="3599.6151" y1="901.6413" y2="901.6413"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="248.891" x="3344.54" y="920.6072">check_postgres_configuration() -&gt; Self</text>
     </g>
     <!--class src.models.config.QuotaHandlersConfiguration-->
     <g id="elem_src.models.config.QuotaHandlersConfiguration">
-      <rect codeLine="174" fill="#F1F1F1" height="143.3396" id="src.models.config.QuotaHandlersConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="376.9507" x="3369.35" y="296.08"/>
-      <ellipse cx="3457.8087" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M3457.9181,307.6112 Q3456.9962,307.6112 3456.2618,307.9237 Q3455.5431,308.2362 3455.0431,308.83 Q3454.5431,309.4081 3454.2618,310.2831 Q3453.9962,311.1581 3453.9962,312.2519 Q3453.9962,313.6894 3454.4337,314.7362 Q3454.8868,315.7831 3455.7306,316.33 Q3456.59,316.8769 3457.9025,316.8769 Q3458.6837,316.8769 3459.3712,316.7519 Q3460.0743,316.6112 3461.0743,316.2987 L3461.0743,318.1737 Q3460.215,318.5019 3459.4493,318.6269 Q3458.6993,318.7519 3457.6837,318.7519 Q3455.7618,318.7519 3454.4493,317.955 Q3453.1525,317.1425 3452.4962,315.6737 Q3451.84,314.1894 3451.84,312.2206 Q3451.84,310.7987 3452.2306,309.6269 Q3452.6368,308.4394 3453.4181,307.5644 Q3454.1993,306.6894 3455.34,306.2206 Q3456.4806,305.7362 3457.9493,305.7362 Q3458.9025,305.7362 3459.7931,305.9394 Q3460.6993,306.1269 3461.6368,306.5956 L3460.8087,308.3769 Q3460.0275,308.0019 3459.3712,307.8144 Q3458.715,307.6112 3457.9181,307.6112 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="191.5333" x="3478.3087" y="317.512">QuotaHandlersConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3370.35" x2="3745.3007" y1="328.08" y2="328.08"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="180.1093" x="3375.35" y="347.0459">enable_token_history : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="267.021" x="3375.35" y="366.1139">limiters : list[QuotaLimiterConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="364.9507" x="3375.35" y="385.1818">postgres : Optional[PostgreSQLDatabaseConfiguration]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="64.5257" x="3375.35" y="404.2497">scheduler</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="308.1809" x="3375.35" y="423.3176">sqlite : Optional[SQLiteDatabaseConfiguration]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3370.35" x2="3745.3007" y1="431.4196" y2="431.4196"/>
+      <rect codeLine="180" fill="#F1F1F1" height="143.3396" id="src.models.config.QuotaHandlersConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="376.9507" x="3865.1" y="296.08"/>
+      <ellipse cx="3953.5587" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M3953.6681,307.6113 Q3952.7462,307.6113 3952.0118,307.9238 Q3951.2931,308.2363 3950.7931,308.83 Q3950.2931,309.4081 3950.0118,310.2831 Q3949.7462,311.1581 3949.7462,312.2519 Q3949.7462,313.6894 3950.1837,314.7363 Q3950.6368,315.7831 3951.4806,316.33 Q3952.34,316.8769 3953.6525,316.8769 Q3954.4337,316.8769 3955.1212,316.7519 Q3955.8243,316.6113 3956.8243,316.2988 L3956.8243,318.1738 Q3955.965,318.5019 3955.1993,318.6269 Q3954.4493,318.7519 3953.4337,318.7519 Q3951.5118,318.7519 3950.1993,317.955 Q3948.9025,317.1425 3948.2462,315.6738 Q3947.59,314.1894 3947.59,312.2206 Q3947.59,310.7988 3947.9806,309.6269 Q3948.3868,308.4394 3949.1681,307.5644 Q3949.9493,306.6894 3951.09,306.2206 Q3952.2306,305.7363 3953.6993,305.7363 Q3954.6525,305.7363 3955.5431,305.9394 Q3956.4493,306.1269 3957.3868,306.5956 L3956.5587,308.3769 Q3955.7775,308.0019 3955.1212,307.8144 Q3954.465,307.6113 3953.6681,307.6113 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="191.5333" x="3974.0587" y="317.512">QuotaHandlersConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3866.1" x2="4241.0507" y1="328.08" y2="328.08"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="180.1093" x="3871.1" y="347.0459">enable_token_history : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="267.021" x="3871.1" y="366.1139">limiters : list[QuotaLimiterConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="364.9507" x="3871.1" y="385.1818">postgres : Optional[PostgreSQLDatabaseConfiguration]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="64.5257" x="3871.1" y="404.2497">scheduler</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="308.1809" x="3871.1" y="423.3176">sqlite : Optional[SQLiteDatabaseConfiguration]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3866.1" x2="4241.0507" y1="431.4196" y2="431.4196"/>
     </g>
     <!--class src.models.config.QuotaLimiterConfiguration-->
     <g id="elem_src.models.config.QuotaLimiterConfiguration">
-      <rect codeLine="181" fill="#F1F1F1" height="143.3396" id="src.models.config.QuotaLimiterConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="293.287" x="3611.18" y="728.16"/>
-      <ellipse cx="3664.3588" cy="744.16" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M3664.4682,739.6912 Q3663.5463,739.6912 3662.8119,740.0037 Q3662.0932,740.3162 3661.5932,740.91 Q3661.0932,741.4881 3660.8119,742.3631 Q3660.5463,743.2381 3660.5463,744.3319 Q3660.5463,745.7694 3660.9838,746.8162 Q3661.4369,747.8631 3662.2807,748.41 Q3663.1401,748.9569 3664.4526,748.9569 Q3665.2338,748.9569 3665.9213,748.8319 Q3666.6244,748.6912 3667.6244,748.3787 L3667.6244,750.2537 Q3666.7651,750.5819 3665.9994,750.7069 Q3665.2494,750.8319 3664.2338,750.8319 Q3662.3119,750.8319 3660.9994,750.035 Q3659.7026,749.2225 3659.0463,747.7537 Q3658.3901,746.2694 3658.3901,744.3006 Q3658.3901,742.8787 3658.7807,741.7069 Q3659.1869,740.5194 3659.9682,739.6444 Q3660.7494,738.7694 3661.8901,738.3006 Q3663.0307,737.8162 3664.4994,737.8162 Q3665.4526,737.8162 3666.3432,738.0194 Q3667.2494,738.2069 3668.1869,738.6756 L3667.3588,740.4569 Q3666.5776,740.0819 3665.9213,739.8944 Q3665.2651,739.6912 3664.4682,739.6912 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="178.4293" x="3684.8588" y="749.592">QuotaLimiterConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3612.18" x2="3903.467" y1="760.16" y2="760.16"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="80.8637" x="3617.18" y="779.1259">initial_quota</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="66.0658" x="3617.18" y="798.1939">name : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="71.5537" x="3617.18" y="817.2618">period : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="99.9736" x="3617.18" y="836.3297">quota_increase</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="281.287" x="3617.18" y="855.3976">type : Literal['user_limiter', 'cluster_limiter']</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3612.18" x2="3903.467" y1="863.4996" y2="863.4996"/>
+      <rect codeLine="187" fill="#F1F1F1" height="143.3396" id="src.models.config.QuotaLimiterConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="293.287" x="3706.93" y="737.7"/>
+      <ellipse cx="3760.1088" cy="753.7" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M3760.2182,749.2313 Q3759.2963,749.2313 3758.5619,749.5438 Q3757.8432,749.8563 3757.3432,750.45 Q3756.8432,751.0281 3756.5619,751.9031 Q3756.2963,752.7781 3756.2963,753.8719 Q3756.2963,755.3094 3756.7338,756.3563 Q3757.1869,757.4031 3758.0307,757.95 Q3758.8901,758.4969 3760.2026,758.4969 Q3760.9838,758.4969 3761.6713,758.3719 Q3762.3744,758.2313 3763.3744,757.9188 L3763.3744,759.7938 Q3762.5151,760.1219 3761.7494,760.2469 Q3760.9994,760.3719 3759.9838,760.3719 Q3758.0619,760.3719 3756.7494,759.575 Q3755.4526,758.7625 3754.7963,757.2938 Q3754.1401,755.8094 3754.1401,753.8406 Q3754.1401,752.4188 3754.5307,751.2469 Q3754.9369,750.0594 3755.7182,749.1844 Q3756.4994,748.3094 3757.6401,747.8406 Q3758.7807,747.3563 3760.2494,747.3563 Q3761.2026,747.3563 3762.0932,747.5594 Q3762.9994,747.7469 3763.9369,748.2156 L3763.1088,749.9969 Q3762.3276,749.6219 3761.6713,749.4344 Q3761.0151,749.2313 3760.2182,749.2313 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="178.4293" x="3780.6088" y="759.132">QuotaLimiterConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3707.93" x2="3999.217" y1="769.7" y2="769.7"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="80.8637" x="3712.93" y="788.6659">initial_quota</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="66.0658" x="3712.93" y="807.7339">name : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="71.5537" x="3712.93" y="826.8018">period : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="99.9736" x="3712.93" y="845.8697">quota_increase</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="281.287" x="3712.93" y="864.9376">type : Literal['user_limiter', 'cluster_limiter']</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3707.93" x2="3999.217" y1="873.0396" y2="873.0396"/>
     </g>
     <!--class src.models.config.QuotaSchedulerConfiguration-->
     <g id="elem_src.models.config.QuotaSchedulerConfiguration">
-      <rect codeLine="188" fill="#F1F1F1" height="105.2038" id="src.models.config.QuotaSchedulerConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="229.5532" x="4920.05" y="26.07"/>
-      <ellipse cx="4935.05" cy="42.07" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M4935.1594,37.6012 Q4934.2375,37.6012 4933.5031,37.9137 Q4932.7844,38.2262 4932.2844,38.82 Q4931.7844,39.3981 4931.5031,40.2731 Q4931.2375,41.1481 4931.2375,42.2419 Q4931.2375,43.6794 4931.675,44.7262 Q4932.1281,45.7731 4932.9719,46.32 Q4933.8312,46.8669 4935.1437,46.8669 Q4935.925,46.8669 4936.6125,46.7419 Q4937.3156,46.6012 4938.3156,46.2887 L4938.3156,48.1637 Q4937.4562,48.4919 4936.6906,48.6169 Q4935.9406,48.7419 4934.925,48.7419 Q4933.0031,48.7419 4931.6906,47.945 Q4930.3937,47.1325 4929.7375,45.6637 Q4929.0812,44.1794 4929.0812,42.2106 Q4929.0812,40.7887 4929.4719,39.6169 Q4929.8781,38.4294 4930.6594,37.5544 Q4931.4406,36.6794 4932.5812,36.2106 Q4933.7219,35.7262 4935.1906,35.7262 Q4936.1437,35.7262 4937.0344,35.9294 Q4937.9406,36.1169 4938.8781,36.5856 L4938.05,38.3669 Q4937.2687,37.9919 4936.6125,37.8044 Q4935.9562,37.6012 4935.1594,37.6012 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="197.5532" x="4949.05" y="47.502">QuotaSchedulerConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4921.05" x2="5148.6032" y1="58.07" y2="58.07"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="196.9932" x="4926.05" y="77.0359">database_reconnection_count</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="194.5573" x="4926.05" y="96.1039">database_reconnection_delay</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="42.9798" x="4926.05" y="115.1718">period</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4921.05" x2="5148.6032" y1="123.2738" y2="123.2738"/>
+      <rect codeLine="194" fill="#F1F1F1" height="105.2038" id="src.models.config.QuotaSchedulerConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="229.5532" x="5030.8" y="26.07"/>
+      <ellipse cx="5045.8" cy="42.07" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M5045.9094,37.6013 Q5044.9875,37.6013 5044.2531,37.9138 Q5043.5344,38.2263 5043.0344,38.82 Q5042.5344,39.3981 5042.2531,40.2731 Q5041.9875,41.1481 5041.9875,42.2419 Q5041.9875,43.6794 5042.425,44.7263 Q5042.8781,45.7731 5043.7219,46.32 Q5044.5813,46.8669 5045.8938,46.8669 Q5046.675,46.8669 5047.3625,46.7419 Q5048.0656,46.6013 5049.0656,46.2888 L5049.0656,48.1638 Q5048.2063,48.4919 5047.4406,48.6169 Q5046.6906,48.7419 5045.675,48.7419 Q5043.7531,48.7419 5042.4406,47.945 Q5041.1438,47.1325 5040.4875,45.6638 Q5039.8313,44.1794 5039.8313,42.2106 Q5039.8313,40.7888 5040.2219,39.6169 Q5040.6281,38.4294 5041.4094,37.5544 Q5042.1906,36.6794 5043.3313,36.2106 Q5044.4719,35.7263 5045.9406,35.7263 Q5046.8938,35.7263 5047.7844,35.9294 Q5048.6906,36.1169 5049.6281,36.5856 L5048.8,38.3669 Q5048.0188,37.9919 5047.3625,37.8044 Q5046.7063,37.6013 5045.9094,37.6013 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="197.5532" x="5059.8" y="47.502">QuotaSchedulerConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5031.8" x2="5259.3532" y1="58.07" y2="58.07"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="196.9932" x="5036.8" y="77.0359">database_reconnection_count</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="194.5573" x="5036.8" y="96.1039">database_reconnection_delay</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="42.9798" x="5036.8" y="115.1718">period</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5031.8" x2="5259.3532" y1="123.2738" y2="123.2738"/>
     </g>
     <!--class src.models.config.RHIdentityConfiguration-->
     <g id="elem_src.models.config.RHIdentityConfiguration">
-      <rect codeLine="193" fill="#F1F1F1" height="67.0679" id="src.models.config.RHIdentityConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="281.009" x="4267.32" y="766.3"/>
-      <ellipse cx="4323.1098" cy="782.3" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M4323.2192,777.8313 Q4322.2973,777.8313 4321.5629,778.1438 Q4320.8442,778.4563 4320.3442,779.05 Q4319.8442,779.6281 4319.5629,780.5031 Q4319.2973,781.3781 4319.2973,782.4719 Q4319.2973,783.9094 4319.7348,784.9563 Q4320.1879,786.0031 4321.0317,786.55 Q4321.8911,787.0969 4323.2036,787.0969 Q4323.9848,787.0969 4324.6723,786.9719 Q4325.3754,786.8313 4326.3754,786.5188 L4326.3754,788.3938 Q4325.5161,788.7219 4324.7504,788.8469 Q4324.0004,788.9719 4322.9848,788.9719 Q4321.0629,788.9719 4319.7504,788.175 Q4318.4536,787.3625 4317.7973,785.8938 Q4317.1411,784.4094 4317.1411,782.4406 Q4317.1411,781.0188 4317.5317,779.8469 Q4317.9379,778.6594 4318.7192,777.7844 Q4319.5004,776.9094 4320.6411,776.4406 Q4321.7817,775.9563 4323.2504,775.9563 Q4324.2036,775.9563 4325.0942,776.1594 Q4326.0004,776.3469 4326.9379,776.8156 L4326.1098,778.5969 Q4325.3286,778.2219 4324.6723,778.0344 Q4324.0161,777.8313 4323.2192,777.8313 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="160.9294" x="4343.6098" y="787.732">RHIdentityConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4268.32" x2="4547.329" y1="798.3" y2="798.3"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="269.009" x="4273.32" y="817.2659">required_entitlements : Optional[list[str]]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4268.32" x2="4547.329" y1="825.3679" y2="825.3679"/>
+      <rect codeLine="199" fill="#F1F1F1" height="67.0679" id="src.models.config.RHIdentityConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="281.009" x="4107.07" y="775.83"/>
+      <ellipse cx="4162.8598" cy="791.83" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M4162.9692,787.3613 Q4162.0473,787.3613 4161.3129,787.6738 Q4160.5942,787.9863 4160.0942,788.58 Q4159.5942,789.1581 4159.3129,790.0331 Q4159.0473,790.9081 4159.0473,792.0019 Q4159.0473,793.4394 4159.4848,794.4863 Q4159.9379,795.5331 4160.7817,796.08 Q4161.6411,796.6269 4162.9536,796.6269 Q4163.7348,796.6269 4164.4223,796.5019 Q4165.1254,796.3613 4166.1254,796.0488 L4166.1254,797.9238 Q4165.2661,798.2519 4164.5004,798.3769 Q4163.7504,798.5019 4162.7348,798.5019 Q4160.8129,798.5019 4159.5004,797.705 Q4158.2036,796.8925 4157.5473,795.4238 Q4156.8911,793.9394 4156.8911,791.9706 Q4156.8911,790.5488 4157.2817,789.3769 Q4157.6879,788.1894 4158.4692,787.3144 Q4159.2504,786.4394 4160.3911,785.9706 Q4161.5317,785.4863 4163.0004,785.4863 Q4163.9536,785.4863 4164.8442,785.6894 Q4165.7504,785.8769 4166.6879,786.3456 L4165.8598,788.1269 Q4165.0786,787.7519 4164.4223,787.5644 Q4163.7661,787.3613 4162.9692,787.3613 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="160.9294" x="4183.3598" y="797.262">RHIdentityConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4108.07" x2="4387.079" y1="807.83" y2="807.83"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="269.009" x="4113.07" y="826.7959">required_entitlements : Optional[list[str]]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="4108.07" x2="4387.079" y1="834.8979" y2="834.8979"/>
+    </g>
+    <!--class src.models.config.RagConfiguration-->
+    <g id="elem_src.models.config.RagConfiguration">
+      <rect codeLine="202" fill="#F1F1F1" height="86.1358" id="src.models.config.RagConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="160.3855" x="2846.39" y="324.68"/>
+      <ellipse cx="2866.8485" cy="340.68" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M2866.9578,336.2113 Q2866.036,336.2113 2865.3016,336.5238 Q2864.5828,336.8363 2864.0828,337.43 Q2863.5828,338.0081 2863.3016,338.8831 Q2863.036,339.7581 2863.036,340.8519 Q2863.036,342.2894 2863.4735,343.3363 Q2863.9266,344.3831 2864.7703,344.93 Q2865.6297,345.4769 2866.9422,345.4769 Q2867.7235,345.4769 2868.411,345.3519 Q2869.1141,345.2113 2870.1141,344.8988 L2870.1141,346.7738 Q2869.2547,347.1019 2868.4891,347.2269 Q2867.7391,347.3519 2866.7235,347.3519 Q2864.8016,347.3519 2863.4891,346.555 Q2862.1922,345.7425 2861.536,344.2738 Q2860.8797,342.7894 2860.8797,340.8206 Q2860.8797,339.3988 2861.2703,338.2269 Q2861.6766,337.0394 2862.4578,336.1644 Q2863.2391,335.2894 2864.3797,334.8206 Q2865.5203,334.3363 2866.9891,334.3363 Q2867.9422,334.3363 2868.8328,334.5394 Q2869.7391,334.7269 2870.6766,335.1956 L2869.8485,336.9769 Q2869.0672,336.6019 2868.411,336.4144 Q2867.7547,336.2113 2866.9578,336.2113 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="116.2556" x="2882.0615" y="346.112">RagConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2847.39" x2="3005.7755" y1="356.68" y2="356.68"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="92.8056" x="2852.39" y="375.6459">inline : list[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="148.3855" x="2852.39" y="394.7139">tool : Optional[list[str]]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2847.39" x2="3005.7755" y1="402.8158" y2="402.8158"/>
     </g>
     <!--class src.models.config.SQLiteDatabaseConfiguration-->
     <g id="elem_src.models.config.SQLiteDatabaseConfiguration">
-      <rect codeLine="196" fill="#F1F1F1" height="67.0679" id="src.models.config.SQLiteDatabaseConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="227.6493" x="292" y="45.13"/>
-      <ellipse cx="307" cy="61.13" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M307.1094,56.6612 Q306.1875,56.6612 305.4531,56.9737 Q304.7344,57.2862 304.2344,57.88 Q303.7344,58.4581 303.4531,59.3331 Q303.1875,60.2081 303.1875,61.3019 Q303.1875,62.7394 303.625,63.7862 Q304.0781,64.8331 304.9219,65.38 Q305.7813,65.9269 307.0938,65.9269 Q307.875,65.9269 308.5625,65.8019 Q309.2656,65.6612 310.2656,65.3487 L310.2656,67.2237 Q309.4063,67.5519 308.6406,67.6769 Q307.8906,67.8019 306.875,67.8019 Q304.9531,67.8019 303.6406,67.005 Q302.3438,66.1925 301.6875,64.7237 Q301.0313,63.2394 301.0313,61.2706 Q301.0313,59.8487 301.4219,58.6769 Q301.8281,57.4894 302.6094,56.6144 Q303.3906,55.7394 304.5313,55.2706 Q305.6719,54.7862 307.1406,54.7862 Q308.0938,54.7862 308.9844,54.9894 Q309.8906,55.1769 310.8281,55.6456 L310,57.4269 Q309.2188,57.0519 308.5625,56.8644 Q307.9063,56.6612 307.1094,56.6612 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="195.6493" x="321" y="66.562">SQLiteDatabaseConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="293" x2="518.6493" y1="77.13" y2="77.13"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="82.1797" x="298" y="96.0959">db_path : str</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="293" x2="518.6493" y1="104.1979" y2="104.1979"/>
+      <rect codeLine="206" fill="#F1F1F1" height="67.0679" id="src.models.config.SQLiteDatabaseConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="227.6493" x="2374.75" y="45.14"/>
+      <ellipse cx="2389.75" cy="61.14" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M2389.8594,56.6713 Q2388.9375,56.6713 2388.2031,56.9838 Q2387.4844,57.2963 2386.9844,57.89 Q2386.4844,58.4681 2386.2031,59.3431 Q2385.9375,60.2181 2385.9375,61.3119 Q2385.9375,62.7494 2386.375,63.7963 Q2386.8281,64.8431 2387.6719,65.39 Q2388.5313,65.9369 2389.8438,65.9369 Q2390.625,65.9369 2391.3125,65.8119 Q2392.0156,65.6713 2393.0156,65.3588 L2393.0156,67.2338 Q2392.1563,67.5619 2391.3906,67.6869 Q2390.6406,67.8119 2389.625,67.8119 Q2387.7031,67.8119 2386.3906,67.015 Q2385.0938,66.2025 2384.4375,64.7338 Q2383.7813,63.2494 2383.7813,61.2806 Q2383.7813,59.8588 2384.1719,58.6869 Q2384.5781,57.4994 2385.3594,56.6244 Q2386.1406,55.7494 2387.2813,55.2806 Q2388.4219,54.7963 2389.8906,54.7963 Q2390.8438,54.7963 2391.7344,54.9994 Q2392.6406,55.1869 2393.5781,55.6556 L2392.75,57.4369 Q2391.9688,57.0619 2391.3125,56.8744 Q2390.6563,56.6713 2389.8594,56.6713 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="195.6493" x="2403.75" y="66.572">SQLiteDatabaseConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2375.75" x2="2601.3993" y1="77.14" y2="77.14"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="82.1797" x="2380.75" y="96.1059">db_path : str</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2375.75" x2="2601.3993" y1="104.2079" y2="104.2079"/>
     </g>
     <!--class src.models.config.ServiceConfiguration-->
     <g id="elem_src.models.config.ServiceConfiguration">
-      <rect codeLine="199" fill="#F1F1F1" height="276.815" id="src.models.config.ServiceConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="248.781" x="2493.44" y="229.34"/>
-      <ellipse cx="2544.6868" cy="245.34" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M2544.7962,240.8712 Q2543.8743,240.8712 2543.1399,241.1837 Q2542.4212,241.4962 2541.9212,242.09 Q2541.4212,242.6681 2541.1399,243.5431 Q2540.8743,244.4181 2540.8743,245.5119 Q2540.8743,246.9494 2541.3118,247.9962 Q2541.7649,249.0431 2542.6087,249.59 Q2543.468,250.1369 2544.7805,250.1369 Q2545.5618,250.1369 2546.2493,250.0119 Q2546.9524,249.8712 2547.9524,249.5587 L2547.9524,251.4337 Q2547.093,251.7619 2546.3274,251.8869 Q2545.5774,252.0119 2544.5618,252.0119 Q2542.6399,252.0119 2541.3274,251.215 Q2540.0305,250.4025 2539.3743,248.9337 Q2538.718,247.4494 2538.718,245.4806 Q2538.718,244.0587 2539.1087,242.8869 Q2539.5149,241.6994 2540.2962,240.8244 Q2541.0774,239.9494 2542.218,239.4806 Q2543.3587,238.9962 2544.8274,238.9962 Q2545.7805,238.9962 2546.6712,239.1994 Q2547.5774,239.3869 2548.5149,239.8556 L2547.6868,241.6369 Q2546.9055,241.2619 2546.2493,241.0744 Q2545.593,240.8712 2544.7962,240.8712 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="137.7875" x="2565.1868" y="250.772">ServiceConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2494.44" x2="2741.221" y1="261.34" y2="261.34"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="109.7036" x="2499.44" y="280.3059">access_log : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="129.7515" x="2499.44" y="299.3739">auth_enabled : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="149.9115" x="2499.44" y="318.4418">base_url : Optional[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="100.1556" x="2499.44" y="337.5097">color_log : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="27.6779" x="2499.44" y="356.5776">cors</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.4558" x="2499.44" y="375.6455">host : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="27.9159" x="2499.44" y="394.7134">port</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="92.7357" x="2499.44" y="413.7814">root_path : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="62.4678" x="2499.44" y="432.8493">tls_config</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="53.1158" x="2499.44" y="451.9172">workers</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2494.44" x2="2741.221" y1="460.0192" y2="460.0192"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="236.781" x="2499.44" y="478.9851">check_service_configuration() -&gt; Self</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="227.7791" x="2499.44" y="498.053">validate_root_path(value: str) -&gt; str</text>
-    </g>
-    <!--class src.models.config.SolrConfiguration-->
-    <g id="elem_src.models.config.SolrConfiguration">
-      <rect codeLine="213" fill="#F1F1F1" height="86.1358" id="src.models.config.SolrConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="148.6336" x="4835.51" y="756.76"/>
-      <ellipse cx="4850.51" cy="772.76" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M4850.6194,768.2913 Q4849.6975,768.2913 4848.9631,768.6038 Q4848.2444,768.9163 4847.7444,769.51 Q4847.2444,770.0881 4846.9631,770.9631 Q4846.6975,771.8381 4846.6975,772.9319 Q4846.6975,774.3694 4847.135,775.4163 Q4847.5881,776.4631 4848.4319,777.01 Q4849.2912,777.5569 4850.6037,777.5569 Q4851.385,777.5569 4852.0725,777.4319 Q4852.7756,777.2913 4853.7756,776.9788 L4853.7756,778.8538 Q4852.9162,779.1819 4852.1506,779.3069 Q4851.4006,779.4319 4850.385,779.4319 Q4848.4631,779.4319 4847.1506,778.635 Q4845.8537,777.8225 4845.1975,776.3538 Q4844.5412,774.8694 4844.5412,772.9006 Q4844.5412,771.4788 4844.9319,770.3069 Q4845.3381,769.1194 4846.1194,768.2444 Q4846.9006,767.3694 4848.0412,766.9006 Q4849.1819,766.4163 4850.6506,766.4163 Q4851.6037,766.4163 4852.4944,766.6194 Q4853.4006,766.8069 4854.3381,767.2756 L4853.51,769.0569 Q4852.7287,768.6819 4852.0725,768.4944 Q4851.4162,768.2913 4850.6194,768.2913 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="116.6336" x="4864.51" y="778.192">SolrConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4836.51" x2="4983.1436" y1="788.76" y2="788.76"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="93.3236" x="4841.51" y="807.7259">enabled : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="82.0677" x="4841.51" y="826.7939">offline : bool</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="4836.51" x2="4983.1436" y1="834.8958" y2="834.8958"/>
+      <rect codeLine="209" fill="#F1F1F1" height="276.815" id="src.models.config.ServiceConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="248.781" x="2170.19" y="229.34"/>
+      <ellipse cx="2221.4368" cy="245.34" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M2221.5462,240.8713 Q2220.6243,240.8713 2219.8899,241.1838 Q2219.1712,241.4963 2218.6712,242.09 Q2218.1712,242.6681 2217.8899,243.5431 Q2217.6243,244.4181 2217.6243,245.5119 Q2217.6243,246.9494 2218.0618,247.9963 Q2218.5149,249.0431 2219.3587,249.59 Q2220.218,250.1369 2221.5305,250.1369 Q2222.3118,250.1369 2222.9993,250.0119 Q2223.7024,249.8713 2224.7024,249.5588 L2224.7024,251.4338 Q2223.843,251.7619 2223.0774,251.8869 Q2222.3274,252.0119 2221.3118,252.0119 Q2219.3899,252.0119 2218.0774,251.215 Q2216.7805,250.4025 2216.1243,248.9338 Q2215.468,247.4494 2215.468,245.4806 Q2215.468,244.0588 2215.8587,242.8869 Q2216.2649,241.6994 2217.0462,240.8244 Q2217.8274,239.9494 2218.968,239.4806 Q2220.1087,238.9963 2221.5774,238.9963 Q2222.5305,238.9963 2223.4212,239.1994 Q2224.3274,239.3869 2225.2649,239.8556 L2224.4368,241.6369 Q2223.6555,241.2619 2222.9993,241.0744 Q2222.343,240.8713 2221.5462,240.8713 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="137.7875" x="2241.9368" y="250.772">ServiceConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2171.19" x2="2417.971" y1="261.34" y2="261.34"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="109.7036" x="2176.19" y="280.3059">access_log : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="129.7515" x="2176.19" y="299.3739">auth_enabled : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="149.9115" x="2176.19" y="318.4418">base_url : Optional[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="100.1556" x="2176.19" y="337.5097">color_log : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="27.6779" x="2176.19" y="356.5776">cors</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="57.4558" x="2176.19" y="375.6455">host : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="27.9159" x="2176.19" y="394.7134">port</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="92.7357" x="2176.19" y="413.7814">root_path : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="62.4678" x="2176.19" y="432.8493">tls_config</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="53.1158" x="2176.19" y="451.9172">workers</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2171.19" x2="2417.971" y1="460.0192" y2="460.0192"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="236.781" x="2176.19" y="478.9851">check_service_configuration() -&gt; Self</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="227.7791" x="2176.19" y="498.053">validate_root_path(value: str) -&gt; str</text>
     </g>
     <!--class src.models.config.SplunkConfiguration-->
     <g id="elem_src.models.config.SplunkConfiguration">
-      <rect codeLine="217" fill="#F1F1F1" height="200.5433" id="src.models.config.SplunkConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="246.765" x="5019.44" y="699.56"/>
-      <ellipse cx="5070.6868" cy="715.56" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M5070.7962,711.0913 Q5069.8743,711.0913 5069.1399,711.4038 Q5068.4212,711.7163 5067.9212,712.31 Q5067.4212,712.8881 5067.1399,713.7631 Q5066.8743,714.6381 5066.8743,715.7319 Q5066.8743,717.1694 5067.3118,718.2163 Q5067.7649,719.2631 5068.6087,719.81 Q5069.468,720.3569 5070.7805,720.3569 Q5071.5618,720.3569 5072.2493,720.2319 Q5072.9524,720.0913 5073.9524,719.7788 L5073.9524,721.6538 Q5073.093,721.9819 5072.3274,722.1069 Q5071.5774,722.2319 5070.5618,722.2319 Q5068.6399,722.2319 5067.3274,721.435 Q5066.0305,720.6225 5065.3743,719.1538 Q5064.718,717.6694 5064.718,715.7006 Q5064.718,714.2788 5065.1087,713.1069 Q5065.5149,711.9194 5066.2962,711.0444 Q5067.0774,710.1694 5068.218,709.7006 Q5069.3587,709.2163 5070.8274,709.2163 Q5071.7805,709.2163 5072.6712,709.4194 Q5073.5774,709.6069 5074.5149,710.0756 L5073.6868,711.8569 Q5072.9055,711.4819 5072.2493,711.2944 Q5071.593,711.0913 5070.7962,711.0913 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="135.7715" x="5091.1868" y="720.992">SplunkConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="5020.44" x2="5265.205" y1="731.56" y2="731.56"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="93.3236" x="5025.44" y="750.5259">enabled : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="130.7595" x="5025.44" y="769.5939">index : Optional[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="72.7997" x="5025.44" y="788.6618">source : str</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="51.8278" x="5025.44" y="807.7297">timeout</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="203.3912" x="5025.44" y="826.7976">token_path : Optional[FilePath]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="112.6296" x="5025.44" y="845.8655">url : Optional[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="99.7916" x="5025.44" y="864.9334">verify_ssl : bool</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="5020.44" x2="5265.205" y1="873.0354" y2="873.0354"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="234.765" x="5025.44" y="892.0014">check_splunk_configuration() -&gt; Self</text>
+      <rect codeLine="223" fill="#F1F1F1" height="200.5433" id="src.models.config.SplunkConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="246.765" x="5002.2" y="709.1"/>
+      <ellipse cx="5053.4468" cy="725.1" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M5053.5562,720.6313 Q5052.6343,720.6313 5051.8999,720.9438 Q5051.1812,721.2563 5050.6812,721.85 Q5050.1812,722.4281 5049.8999,723.3031 Q5049.6343,724.1781 5049.6343,725.2719 Q5049.6343,726.7094 5050.0718,727.7563 Q5050.5249,728.8031 5051.3687,729.35 Q5052.228,729.8969 5053.5405,729.8969 Q5054.3218,729.8969 5055.0093,729.7719 Q5055.7124,729.6313 5056.7124,729.3188 L5056.7124,731.1938 Q5055.853,731.5219 5055.0874,731.6469 Q5054.3374,731.7719 5053.3218,731.7719 Q5051.3999,731.7719 5050.0874,730.975 Q5048.7905,730.1625 5048.1343,728.6938 Q5047.478,727.2094 5047.478,725.2406 Q5047.478,723.8188 5047.8687,722.6469 Q5048.2749,721.4594 5049.0562,720.5844 Q5049.8374,719.7094 5050.978,719.2406 Q5052.1187,718.7563 5053.5874,718.7563 Q5054.5405,718.7563 5055.4312,718.9594 Q5056.3374,719.1469 5057.2749,719.6156 L5056.4468,721.3969 Q5055.6655,721.0219 5055.0093,720.8344 Q5054.353,720.6313 5053.5562,720.6313 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="135.7715" x="5073.9468" y="730.532">SplunkConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5003.2" x2="5247.965" y1="741.1" y2="741.1"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="93.3236" x="5008.2" y="760.0659">enabled : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="130.7595" x="5008.2" y="779.1339">index : Optional[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="72.7997" x="5008.2" y="798.2018">source : str</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="51.8278" x="5008.2" y="817.2697">timeout</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="203.3912" x="5008.2" y="836.3376">token_path : Optional[FilePath]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="112.6296" x="5008.2" y="855.4055">url : Optional[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="99.7916" x="5008.2" y="874.4734">verify_ssl : bool</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="5003.2" x2="5247.965" y1="882.5754" y2="882.5754"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="234.765" x="5008.2" y="901.5414">check_splunk_configuration() -&gt; Self</text>
     </g>
     <!--class src.models.config.TLSConfiguration-->
     <g id="elem_src.models.config.TLSConfiguration">
-      <rect codeLine="227" fill="#F1F1F1" height="124.2717" id="src.models.config.TLSConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="264.4471" x="3037.6" y="16.53"/>
-      <ellipse cx="3108.6287" cy="32.53" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M3108.7381,28.0612 Q3107.8162,28.0612 3107.0819,28.3737 Q3106.3631,28.6862 3105.8631,29.28 Q3105.3631,29.8581 3105.0819,30.7331 Q3104.8162,31.6081 3104.8162,32.7019 Q3104.8162,34.1394 3105.2537,35.1862 Q3105.7069,36.2331 3106.5506,36.78 Q3107.41,37.3269 3108.7225,37.3269 Q3109.5037,37.3269 3110.1912,37.2019 Q3110.8944,37.0612 3111.8944,36.7487 L3111.8944,38.6237 Q3111.035,38.9519 3110.2694,39.0769 Q3109.5194,39.2019 3108.5037,39.2019 Q3106.5819,39.2019 3105.2694,38.405 Q3103.9725,37.5925 3103.3162,36.1237 Q3102.66,34.6394 3102.66,32.6706 Q3102.66,31.2487 3103.0506,30.0769 Q3103.4569,28.8894 3104.2381,28.0144 Q3105.0194,27.1394 3106.16,26.6706 Q3107.3006,26.1862 3108.7694,26.1862 Q3109.7225,26.1862 3110.6131,26.3894 Q3111.5194,26.5769 3112.4569,27.0456 L3111.6287,28.8269 Q3110.8475,28.4519 3110.1912,28.2644 Q3109.535,28.0612 3108.7381,28.0612 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="113.8896" x="3129.1287" y="37.962">TLSConfiguration</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3038.6" x2="3301.0471" y1="48.53" y2="48.53"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="252.4471" x="3043.6" y="67.4959">tls_certificate_path : Optional[FilePath]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="243.5151" x="3043.6" y="86.5639">tls_key_password : Optional[FilePath]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="209.9432" x="3043.6" y="105.6318">tls_key_path : Optional[FilePath]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="3038.6" x2="3301.0471" y1="113.7338" y2="113.7338"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="206.4292" x="3043.6" y="132.6997">check_tls_configuration() -&gt; Self</text>
+      <rect codeLine="233" fill="#F1F1F1" height="124.2717" id="src.models.config.TLSConfiguration" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="264.4471" x="3042.35" y="16.54"/>
+      <ellipse cx="3113.3787" cy="32.54" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M3113.4881,28.0713 Q3112.5662,28.0713 3111.8319,28.3838 Q3111.1131,28.6963 3110.6131,29.29 Q3110.1131,29.8681 3109.8319,30.7431 Q3109.5662,31.6181 3109.5662,32.7119 Q3109.5662,34.1494 3110.0037,35.1963 Q3110.4569,36.2431 3111.3006,36.79 Q3112.16,37.3369 3113.4725,37.3369 Q3114.2537,37.3369 3114.9412,37.2119 Q3115.6444,37.0713 3116.6444,36.7588 L3116.6444,38.6338 Q3115.785,38.9619 3115.0194,39.0869 Q3114.2694,39.2119 3113.2537,39.2119 Q3111.3319,39.2119 3110.0194,38.415 Q3108.7225,37.6025 3108.0662,36.1338 Q3107.41,34.6494 3107.41,32.6806 Q3107.41,31.2588 3107.8006,30.0869 Q3108.2069,28.8994 3108.9881,28.0244 Q3109.7694,27.1494 3110.91,26.6806 Q3112.0506,26.1963 3113.5194,26.1963 Q3114.4725,26.1963 3115.3631,26.3994 Q3116.2694,26.5869 3117.2069,27.0556 L3116.3787,28.8369 Q3115.5975,28.4619 3114.9412,28.2744 Q3114.285,28.0713 3113.4881,28.0713 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="113.8896" x="3133.8787" y="37.972">TLSConfiguration</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3043.35" x2="3305.7971" y1="48.54" y2="48.54"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="252.4471" x="3048.35" y="67.5059">tls_certificate_path : Optional[FilePath]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="243.5151" x="3048.35" y="86.5739">tls_key_password : Optional[FilePath]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="209.9432" x="3048.35" y="105.6418">tls_key_path : Optional[FilePath]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="3043.35" x2="3305.7971" y1="113.7438" y2="113.7438"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="206.4292" x="3048.35" y="132.7097">check_tls_configuration() -&gt; Self</text>
     </g>
     <!--class src.models.config.UserDataCollection-->
     <g id="elem_src.models.config.UserDataCollection">
-      <rect codeLine="233" fill="#F1F1F1" height="143.3396" id="src.models.config.UserDataCollection" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="357.5186" x="2777.07" y="296.08"/>
-      <ellipse cx="2888.3065" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
-      <path d="M2888.4159,307.6112 Q2887.494,307.6112 2886.7596,307.9237 Q2886.0409,308.2362 2885.5409,308.83 Q2885.0409,309.4081 2884.7596,310.2831 Q2884.494,311.1581 2884.494,312.2519 Q2884.494,313.6894 2884.9315,314.7362 Q2885.3846,315.7831 2886.2284,316.33 Q2887.0878,316.8769 2888.4003,316.8769 Q2889.1815,316.8769 2889.869,316.7519 Q2890.5721,316.6112 2891.5721,316.2987 L2891.5721,318.1737 Q2890.7128,318.5019 2889.9471,318.6269 Q2889.1971,318.7519 2888.1815,318.7519 Q2886.2596,318.7519 2884.9471,317.955 Q2883.6503,317.1425 2882.994,315.6737 Q2882.3378,314.1894 2882.3378,312.2206 Q2882.3378,310.7987 2882.7284,309.6269 Q2883.1346,308.4394 2883.9159,307.5644 Q2884.6971,306.6894 2885.8378,306.2206 Q2886.9784,305.7362 2888.4471,305.7362 Q2889.4003,305.7362 2890.2909,305.9394 Q2891.1971,306.1269 2892.1346,306.5956 L2891.3065,308.3769 Q2890.5253,308.0019 2889.869,307.8144 Q2889.2128,307.6112 2888.4159,307.6112 Z " fill="#000000"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="126.5455" x="2908.8065" y="317.512">UserDataCollection</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2778.07" x2="3133.5886" y1="328.08" y2="328.08"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="159.4174" x="2783.07" y="347.0459">feedback_enabled : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="211.0492" x="2783.07" y="366.1139">feedback_storage : Optional[str]</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="170.0714" x="2783.07" y="385.1818">transcripts_enabled : bool</text>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="221.7032" x="2783.07" y="404.2497">transcripts_storage : Optional[str]</text>
-      <line style="stroke:#181818;stroke-width:0.5;" x1="2778.07" x2="3133.5886" y1="412.3517" y2="412.3517"/>
-      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="345.5186" x="2783.07" y="431.3176">check_storage_location_is_set_when_needed() -&gt; Self</text>
+      <rect codeLine="239" fill="#F1F1F1" height="143.3396" id="src.models.config.UserDataCollection" rx="2.5" ry="2.5" style="stroke:#181818;stroke-width:0.5;" width="357.5186" x="2453.82" y="296.08"/>
+      <ellipse cx="2565.0565" cy="312.08" fill="#ADD1B2" rx="11" ry="11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path d="M2565.1659,307.6113 Q2564.244,307.6113 2563.5096,307.9238 Q2562.7909,308.2363 2562.2909,308.83 Q2561.7909,309.4081 2561.5096,310.2831 Q2561.244,311.1581 2561.244,312.2519 Q2561.244,313.6894 2561.6815,314.7363 Q2562.1346,315.7831 2562.9784,316.33 Q2563.8378,316.8769 2565.1503,316.8769 Q2565.9315,316.8769 2566.619,316.7519 Q2567.3221,316.6113 2568.3221,316.2988 L2568.3221,318.1738 Q2567.4628,318.5019 2566.6971,318.6269 Q2565.9471,318.7519 2564.9315,318.7519 Q2563.0096,318.7519 2561.6971,317.955 Q2560.4003,317.1425 2559.744,315.6738 Q2559.0878,314.1894 2559.0878,312.2206 Q2559.0878,310.7988 2559.4784,309.6269 Q2559.8846,308.4394 2560.6659,307.5644 Q2561.4471,306.6894 2562.5878,306.2206 Q2563.7284,305.7363 2565.1971,305.7363 Q2566.1503,305.7363 2567.0409,305.9394 Q2567.9471,306.1269 2568.8846,306.5956 L2568.0565,308.3769 Q2567.2753,308.0019 2566.619,307.8144 Q2565.9628,307.6113 2565.1659,307.6113 Z " fill="#000000"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="126.5455" x="2585.5565" y="317.512">UserDataCollection</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2454.82" x2="2810.3386" y1="328.08" y2="328.08"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="159.4174" x="2459.82" y="347.0459">feedback_enabled : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="211.0492" x="2459.82" y="366.1139">feedback_storage : Optional[str]</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="170.0714" x="2459.82" y="385.1818">transcripts_enabled : bool</text>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="221.7032" x="2459.82" y="404.2497">transcripts_storage : Optional[str]</text>
+      <line style="stroke:#181818;stroke-width:0.5;" x1="2454.82" x2="2810.3386" y1="412.3517" y2="412.3517"/>
+      <text fill="#000000" font-family="sans-serif" font-size="14" lengthAdjust="spacing" textLength="345.5186" x="2459.82" y="431.3176">check_storage_location_is_set_when_needed() -&gt; Self</text>
     </g>
     <!--link src.models.config.A2AStateConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.A2AStateConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="240" d="M1663.94,439.82 C1726.39,482.1 1794.11,528.89 1798.83,536.15 C1916.67,717.67 1742,856.45 1889.83,1014.51 C1938.78,1066.85 2403.8412,1094.4275 2595.2812,1103.7175 " fill="none" id="src.models.config.A2AStateConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.26,1104.59,2595.572,1097.7246,2594.9903,1109.7105,2613.26,1104.59" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="246" d="M1270.61,439.85 C1305.46,464.58 1347.08,490.11 1388.58,506.16 C1472,538.41 1517.81,480.19 1587.58,536.16 C1772.43,684.43 1594.01,886.2 1779.58,1033.58 C1849.11,1088.8 2437.8126,1115.6062 2655.9126,1123.7762 " fill="none" id="src.models.config.A2AStateConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.9,1124.45,2656.1372,1117.7804,2655.688,1129.772,2673.9,1124.45" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.APIKeyTokenConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.APIKeyTokenConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="241" d="M467.82,833.65 C482.79,881.62 517.59,970.02 580.83,1014.51 C645.78,1060.22 677.82,1037.32 756.83,1045.51 C1126.16,1083.81 2281.7718,1102.3468 2595.2318,1106.7568 " fill="none" id="src.models.config.APIKeyTokenConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.23,1107.01,2595.3162,1100.7574,2595.1474,1112.7562,2613.23,1107.01" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="247" d="M452.78,843.38 C466.98,893.16 501.1,986.59 566.58,1033.58 C631.11,1079.89 663.57,1056.44 742.58,1064.58 C1127.44,1104.26 2334.9514,1121.8918 2655.9914,1125.9618 " fill="none" id="src.models.config.APIKeyTokenConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.99,1126.19,2656.0675,1119.9623,2655.9154,1131.9613,2673.99,1126.19" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.AccessRule to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.AccessRule_src.models.config.ConfigurationBase">
-      <path codeLine="242" d="M670.94,843.06 C677.62,899.83 698.67,998.99 764.83,1045.51 C841.56,1099.47 2244.5301,1106.8281 2595.1301,1107.8381 " fill="none" id="src.models.config.AccessRule-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.13,1107.89,2595.1474,1101.8382,2595.1128,1113.8381,2613.13,1107.89" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="248" d="M655.98,852.71 C661.83,911.38 681.89,1015.67 750.58,1064.58 C830.19,1121.27 2296.76,1126.5969 2655.86,1127.0569 " fill="none" id="src.models.config.AccessRule-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.86,1127.08,2655.8677,1121.0569,2655.8523,1133.0569,2673.86,1127.08" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.AuthenticationConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.AuthenticationConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="243" d="M1936.43,506.52 C1905.57,673.6 1868.65,941.93 1932.83,1014.51 C1977.46,1064.99 2411.0461,1093.301 2595.2461,1103.231 " fill="none" id="src.models.config.AuthenticationConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.22,1104.2,2595.5691,1097.2397,2594.9231,1109.2223,2613.22,1104.2" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="249" d="M959.49,498.9 C965.17,501.49 970.87,503.92 976.58,506.16 C1197.03,592.73 1326.13,428.3 1503.58,585.16 C1658.6,722.18 1472,898.34 1628.58,1033.58 C1707.42,1101.68 2414.4933,1120.864 2655.8433,1125.504 " fill="none" id="src.models.config.AuthenticationConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.84,1125.85,2655.9587,1119.5051,2655.728,1131.5029,2673.84,1125.85" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.AuthorizationConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.AuthorizationConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="244" d="M958.42,833.55 C973.1,882.25 1008.01,972.54 1073.83,1014.51 C1204.45,1097.81 2290.4501,1106.9539 2595.1901,1107.9039 " fill="none" id="src.models.config.AuthorizationConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.19,1107.96,2595.2088,1101.9039,2595.1714,1113.9039,2613.19,1107.96" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="250" d="M943.39,843.25 C957.27,893.73 991.44,989.15 1059.58,1033.58 C1195.85,1122.44 2341.75,1127.5943 2655.78,1127.3443 " fill="none" id="src.models.config.AuthorizationConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.78,1127.33,2655.7752,1121.3443,2655.7848,1133.3443,2673.78,1127.33" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.AzureEntraIdConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.AzureEntraIdConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="245" d="M1214.28,862.22 C1232.44,911.53 1265.21,978.32 1317.83,1014.51 C1424.74,1088.05 2321.0012,1103.9314 2595.3212,1107.1114 " fill="none" id="src.models.config.AzureEntraIdConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.32,1107.32,2595.3908,1101.1118,2595.2517,1113.1109,2613.32,1107.32" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="251" d="M1198.42,871.86 C1215.92,923.57 1248.63,995.03 1303.58,1033.58 C1416.16,1112.57 2372.3204,1124.7389 2656.2304,1126.6489 " fill="none" id="src.models.config.AzureEntraIdConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2674.23,1126.77,2656.2708,1120.649,2656.19,1132.6488,2674.23,1126.77" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.ByokRag to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.ByokRag_src.models.config.ConfigurationBase">
-      <path codeLine="246" d="M1431.2,881.5 C1445.03,937.39 1472.62,1008.21 1526.83,1045.51 C1615.86,1106.78 2348.8501,1109.4007 2595.0201,1108.5707 " fill="none" id="src.models.config.ByokRag-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.02,1108.51,2594.9999,1102.5707,2595.0403,1114.5707,2613.02,1108.51" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="252" d="M1417.49,900.35 C1431.36,957.35 1458.78,1027.14 1512.58,1064.58 C1560.31,1097.8 2391.5546,1119.0734 2655.7846,1125.0434 " fill="none" id="src.models.config.ByokRag-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.78,1125.45,2655.9201,1119.0449,2655.6491,1131.0419,2673.78,1125.45" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.CORSConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.CORSConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="247" d="M2957.5,128.4 C3040.69,164.62 3140.21,210.83 3151.83,229.34 C3217.23,333.54 3234.33,414.89 3151.83,506.15 C3107.34,555.36 2904.89,492.63 2854.83,536.15 C2691.69,677.98 2856.7,817.91 2766.83,1014.51 C2756.56,1036.97 2751.7295,1045.2815 2736.4395,1062.0715 " fill="none" id="src.models.config.CORSConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2724.32,1075.38,2740.8757,1066.1113,2732.0034,1058.0316,2724.32,1075.38" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="253" d="M1297.88,84.91 C1002.3,99.49 237.38,145.06 162.58,229.34 C6,405.77 259.86,983.22 322.58,1033.58 C416.34,1108.86 2250.7903,1124.1307 2655.7903,1126.5907 " fill="none" id="src.models.config.CORSConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.79,1126.7,2655.8268,1120.5908,2655.7539,1132.5906,2673.79,1126.7" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.Configuration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.Configuration_src.models.config.ConfigurationBase">
-      <path codeLine="248" d="M2313.72,1003.58 C2319.35,1007.42 2325.06,1011.07 2330.83,1014.51 C2419.16,1067.17 2519.0559,1088.1457 2595.4359,1098.2657 " fill="none" id="src.models.config.Configuration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.28,1100.63,2596.224,1092.3177,2594.6479,1104.2138,2613.28,1100.63" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="254" d="M2622.31,1033.95 C2632.91,1044.61 2643.7,1054.89 2654.58,1064.58 C2666.55,1075.24 2665.443,1075.5019 2678.983,1084.3519 " fill="none" id="src.models.config.Configuration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2694.05,1094.2,2682.2657,1079.3296,2675.7003,1089.3743,2694.05,1094.2" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.ConversationHistoryConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.ConversationHistoryConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="249" d="M815.68,439.9 C852.5,464.71 896.37,490.28 939.83,506.15 C1061.6,550.63 1426.33,493.31 1517.83,585.15 C1653.68,721.53 1435.86,881.31 1574.83,1014.51 C1649.57,1086.15 2354.5222,1103.0497 2595.2722,1106.7997 " fill="none" id="src.models.config.ConversationHistoryConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.27,1107.08,2595.3656,1100.8004,2595.1787,1112.7989,2613.27,1107.08" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="255" d="M1929.86,439.87 C1919.14,592.69 1903.34,945.65 1977.58,1033.58 C2022.08,1086.29 2468.6219,1113.4822 2655.9319,1122.7322 " fill="none" id="src.models.config.ConversationHistoryConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.91,1123.62,2656.2278,1116.7395,2655.636,1128.7249,2673.91,1123.62" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.Customization to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.Customization_src.models.config.ConfigurationBase">
-      <path codeLine="250" d="M4027.51,910.13 C4000.71,949.64 3965.15,990.42 3921.83,1014.51 C3821.27,1070.45 3041.1621,1097.8271 2786.5721,1105.3671 " fill="none" id="src.models.config.Customization-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.58,1105.9,2786.7497,1111.3645,2786.3945,1099.3698,2768.58,1105.9" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="256" d="M4515.37,919.42 C4488.38,962.18 4451.59,1007.24 4405.58,1033.58 C4267.84,1112.46 3155.7797,1124.738 2847.1897,1126.658 " fill="none" id="src.models.config.Customization-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.19,1126.77,2847.227,1132.6579,2847.1523,1120.6581,2829.19,1126.77" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.DatabaseConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.DatabaseConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="251" d="M1241.53,439.9 C1275.27,462.98 1314.2,487.44 1351.83,506.15 C1447.47,553.73 1497.11,516.35 1578.83,585.15 C1745.73,725.67 1641.07,885.23 1816.83,1014.51 C1880.36,1061.25 2393.3651,1092.5494 2595.3051,1103.2294 " fill="none" id="src.models.config.DatabaseConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.28,1104.18,2595.622,1097.2377,2594.9882,1109.221,2613.28,1104.18" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="257" d="M468.91,439.87 C501.93,462.13 539.25,486.1 574.58,506.16 C645.64,546.49 691.66,519.71 740.58,585.16 C861.38,746.79 664.3,895.65 811.58,1033.58 C880.65,1098.26 2302.5316,1121.1895 2655.8316,1125.9095 " fill="none" id="src.models.config.DatabaseConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.83,1126.15,2655.9118,1119.9101,2655.7515,1131.909,2673.83,1126.15" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.InMemoryCacheConfig to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.InMemoryCacheConfig_src.models.config.ConfigurationBase">
-      <path codeLine="252" d="M2448.11,833.5 C2462.6,884.72 2497.05,984.37 2558.83,1045.51 C2574.15,1060.68 2577.58,1064.939 2597.06,1074.189 " fill="none" id="src.models.config.InMemoryCacheConfig-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.32,1081.91,2599.6337,1068.769,2594.4864,1079.609,2613.32,1081.91" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="258" d="M2143.88,843.23 C2155.2,892.8 2183.92,985.96 2246.58,1033.58 C2312.6,1083.76 2531.3375,1108.8428 2655.8275,1119.6528 " fill="none" id="src.models.config.InMemoryCacheConfig-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.76,1121.21,2656.3465,1113.6753,2655.3084,1125.6303,2673.76,1121.21" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.InferenceConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.InferenceConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="253" d="M392.3,420.6 C423.04,449.16 464.13,483.33 505.83,506.15 C607.67,561.9 678.82,497.39 754.83,585.15 C881.45,731.36 684.04,882.95 825.83,1014.51 C892.33,1076.22 2250.8122,1101.2489 2595.2322,1106.6289 " fill="none" id="src.models.config.InferenceConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.23,1106.91,2595.3259,1100.6296,2595.1385,1112.6281,2613.23,1106.91" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="259" d="M1614.48,420.72 C1645.36,446.75 1683.63,478.53 1718.58,506.16 C1736.01,519.94 1745.78,517.99 1758.58,536.16 C1891.87,725.35 1742.58,876.55 1912.58,1033.58 C1967.99,1084.77 2458.911,1113.251 2656.071,1122.781 " fill="none" id="src.models.config.InferenceConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2674.05,1123.65,2656.3607,1116.788,2655.7813,1128.774,2674.05,1123.65" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.JwkConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.JwkConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="254" d="M4724.19,843.11 C4716.79,899.93 4694.52,999.14 4627.83,1045.51 C4550.32,1099.4 3137.8999,1106.8073 2786.3599,1107.8373 " fill="none" id="src.models.config.JwkConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.36,1107.89,2786.3775,1113.8372,2786.3423,1101.8373,2768.36,1107.89" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="260" d="M4891.67,852.76 C4885.11,911.49 4863.81,1015.86 4794.58,1064.58 C4712.87,1122.09 3211.27,1126.7947 2847.39,1127.1047 " fill="none" id="src.models.config.JwkConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.39,1127.12,2847.39,1133.12,2847.39,1121.12,2829.39,1127.12" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.JwtConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.JwtConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="255" d="M4644.96,420.63 C4636.37,566.75 4608.79,973.75 4565.83,1014.51 C4499.64,1077.3 3132.498,1101.5415 2786.578,1106.7015 " fill="none" id="src.models.config.JwtConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.58,1106.97,2786.6675,1112.7009,2786.4885,1100.7022,2768.58,1106.97" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="261" d="M4824.78,420.8 C4804.47,575.61 4744.88,1023.07 4733.58,1033.58 C4662.67,1099.53 3204.9886,1121.5666 2847.1986,1126.0066 " fill="none" id="src.models.config.JwtConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.2,1126.23,2847.2731,1132.0062,2847.1242,1120.0071,2829.2,1126.23" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.JwtRoleRule to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.JwtRoleRule_src.models.config.ConfigurationBase">
-      <path codeLine="256" d="M5366.74,910.02 C5348.41,949.52 5321.67,990.31 5283.83,1014.51 C5121.66,1118.22 5044.08,1035.84 4851.83,1045.51 C4037.45,1086.5 3066.9781,1102.6902 2786.3981,1106.7402 " fill="none" id="src.models.config.JwtRoleRule-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.4,1107,2786.4847,1112.7396,2786.3115,1100.7408,2768.4,1107" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="262" d="M5352.58,919.6 C5334.39,962.26 5306.93,1007.18 5266.58,1033.58 C5111.44,1135.1 5035.73,1054.93 4850.58,1064.58 C4061.9,1105.7 3122.9981,1121.7463 2847.2981,1125.7863 " fill="none" id="src.models.config.JwtRoleRule-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.3,1126.05,2847.386,1131.7856,2847.2102,1119.7869,2829.3,1126.05" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.LlamaStackConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.LlamaStackConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="257" d="M2371.08,449.07 C2397.15,485.47 2431.73,526.64 2470.83,555.15 C2500.83,577.04 2525.01,556.65 2548.83,585.15 C2673.41,734.22 2558.85,834.06 2630.83,1014.51 C2639.4,1036.02 2642.729,1043.2122 2654.889,1060.4122 " fill="none" id="src.models.config.LlamaStackConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2665.28,1075.11,2659.7882,1056.9485,2649.9897,1063.8758,2665.28,1075.11" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="263" d="M3299.48,449.21 C3276.17,470.21 3249.3,491.14 3221.58,506.16 C3103.36,570.18 3019.96,485.62 2929.58,585.16 C2793.69,734.81 2950.19,856.02 2853.58,1033.58 C2840.66,1057.33 2833.3878,1066.9312 2813.6078,1082.9512 " fill="none" id="src.models.config.LlamaStackConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2799.62,1094.28,2817.384,1087.6138,2809.8315,1078.2886,2799.62,1094.28" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.ModelContextProtocolServer to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.ModelContextProtocolServer_src.models.config.ConfigurationBase">
-      <path codeLine="258" d="M3010.65,909.83 C2982.06,957.51 2942.87,1010.44 2894.83,1045.51 C2857.87,1072.48 2826.6895,1084.1679 2786.0995,1092.9879 " fill="none" id="src.models.config.ModelContextProtocolServer-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.51,1096.81,2787.3736,1098.8511,2784.8255,1087.1247,2768.51,1096.81" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="264" d="M3036.94,919.43 C3008.18,969.41 2968.42,1025.98 2919.58,1064.58 C2893.33,1085.33 2876.6553,1094.2305 2846.2453,1103.8905 " fill="none" id="src.models.config.ModelContextProtocolServer-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.09,1109.34,2848.0618,1109.6089,2844.4288,1098.1721,2829.09,1109.34" style="stroke:#181818;stroke-width:1.0;"/>
+    </g>
+    <!--link src.models.config.OkpConfiguration to src.models.config.ConfigurationBase-->
+    <g id="link_src.models.config.OkpConfiguration_src.models.config.ConfigurationBase">
+      <path codeLine="265" d="M3102.03,411.12 C3085.17,441.66 3058.58,481.67 3024.58,506.16 C2987.62,532.78 2966.66,514.52 2926.58,536.16 C2896.58,552.35 2884.76,555.72 2867.58,585.16 C2765.51,760.08 2840.28,838.31 2786.58,1033.58 C2780.92,1054.14 2779.3148,1059.8313 2772.2148,1077.5513 " fill="none" id="src.models.config.OkpConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2765.52,1094.26,2777.7843,1079.7829,2766.6452,1075.3197,2765.52,1094.26" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.PostgreSQLDatabaseConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.PostgreSQLDatabaseConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="259" d="M3314.87,919.55 C3291.54,955.29 3261.43,991.17 3224.83,1014.51 C3150.33,1062.02 2915.8076,1088.5376 2786.4976,1100.1676 " fill="none" id="src.models.config.PostgreSQLDatabaseConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.57,1101.78,2787.0351,1106.1435,2785.9602,1094.1917,2768.57,1101.78" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="266" d="M3423.91,929.06 C3398.49,979.1 3361.68,1032.97 3311.58,1064.58 C3233.14,1114.08 2981.6478,1124.2776 2847.1778,1126.3876 " fill="none" id="src.models.config.PostgreSQLDatabaseConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.18,1126.67,2847.2719,1132.3869,2847.0836,1120.3883,2829.18,1126.67" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.QuotaHandlersConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.QuotaHandlersConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="260" d="M3564.92,439.87 C3578.25,590.29 3599.26,933.52 3522.83,1014.51 C3471.51,1068.89 2983.2326,1095.4895 2786.4626,1104.1395 " fill="none" id="src.models.config.QuotaHandlersConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.48,1104.93,2786.7261,1110.1337,2786.1991,1098.1453,2768.48,1104.93" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="267" d="M4060.76,439.72 C4074.64,593.49 4097.21,950.29 4017.58,1033.58 C3935.38,1119.56 3109.57,1127.0235 2847.08,1127.2635 " fill="none" id="src.models.config.QuotaHandlersConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.08,1127.28,2847.0855,1133.2635,2847.0745,1121.2635,2829.08,1127.28" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.QuotaLimiterConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.QuotaLimiterConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="261" d="M3728.25,871.98 C3700.19,929.57 3652.13,1007.19 3583.83,1045.51 C3513.64,1084.89 2990.4548,1101.048 2786.3648,1105.948 " fill="none" id="src.models.config.QuotaLimiterConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.37,1106.38,2786.5088,1111.9462,2786.2208,1099.9497,2768.37,1106.38" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="268" d="M3826.12,881.3 C3798.75,941.39 3750.52,1024.07 3679.58,1064.58 C3606.62,1106.25 3056.9263,1121.1363 2847.1463,1125.3763 " fill="none" id="src.models.config.QuotaLimiterConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.15,1125.74,2847.2676,1131.375,2847.0251,1119.3775,2829.15,1125.74" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.QuotaSchedulerConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.QuotaSchedulerConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="262" d="M5125.36,131.62 C5139.32,138.5 5153.77,145 5167.83,150.34 C5307.61,203.39 5398.28,115.17 5494.83,229.34 C5542.54,285.77 5521.04,1013.24 5519.83,1014.51 C5471.06,1065.8 3235.8482,1100.2825 2786.3582,1106.7125 " fill="none" id="src.models.config.QuotaSchedulerConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.36,1106.97,2786.444,1112.7119,2786.2723,1100.7131,2768.36,1106.97" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="269" d="M5240.47,131.72 C5253.18,138.21 5266.12,144.58 5278.58,150.34 C5364.95,190.28 5421.4,152.54 5477.58,229.34 C5527.72,297.89 5506.15,1029.82 5502.58,1033.58 C5408.07,1133.15 3284.2798,1129.4049 2847.0798,1127.5849 " fill="none" id="src.models.config.QuotaSchedulerConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.08,1127.51,2847.0549,1133.5849,2847.1048,1121.585,2829.08,1127.51" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.RHIdentityConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.RHIdentityConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="263" d="M4394.48,833.54 C4372.5,882.64 4323.63,973.97 4249.83,1014.51 C4119.34,1086.19 3083.3087,1103.5054 2786.3987,1107.0454 " fill="none" id="src.models.config.RHIdentityConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.4,1107.26,2786.4703,1113.045,2786.3272,1101.0458,2768.4,1107.26" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="270" d="M4236.94,843.34 C4216.64,900.16 4166.5,1015.17 4079.58,1064.58 C3971.04,1126.29 3115.2699,1128.4164 2847.3799,1127.5764 " fill="none" id="src.models.config.RHIdentityConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.38,1127.52,2847.3611,1133.5764,2847.3987,1121.5765,2829.38,1127.52" style="stroke:#181818;stroke-width:1.0;"/>
+    </g>
+    <!--link src.models.config.RagConfiguration to src.models.config.ConfigurationBase-->
+    <g id="link_src.models.config.RagConfiguration_src.models.config.ConfigurationBase">
+      <path codeLine="271" d="M2897.78,411.3 C2878.77,438.83 2852.95,475.19 2828.58,506.16 C2817.67,520.02 2809.74,520.03 2802.58,536.16 C2715.32,732.62 2734.3807,983.7405 2745.1507,1076.3505 " fill="none" id="src.models.config.RagConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2747.23,1094.23,2751.1106,1075.6574,2739.1909,1077.0436,2747.23,1094.23" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.SQLiteDatabaseConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.SQLiteDatabaseConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="264" d="M316.15,112.63 C265.11,136.57 205.58,174.48 175.83,229.34 C6,542.48 57.7,793.18 336.83,1014.51 C427.96,1086.78 2198.1605,1104.3322 2595.1805,1107.3722 " fill="none" id="src.models.config.SQLiteDatabaseConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2613.18,1107.51,2595.2265,1101.3724,2595.1346,1113.372,2613.18,1107.51" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="272" d="M2602.6,95.07 C2871.42,131.49 3522.98,220.57 3530.58,229.34 C3611.15,322.31 3609.93,412.14 3530.58,506.16 C3479.4,566.8 3421.53,493.57 3354.58,536.16 C3332.21,550.38 3330.46,560.56 3320.58,585.16 C3245.39,772.41 3390.2,888.87 3249.58,1033.58 C3192.45,1092.37 2972.5131,1113.9877 2847.3331,1122.0177 " fill="none" id="src.models.config.SQLiteDatabaseConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.37,1123.17,2847.7172,1128.0054,2846.949,1116.03,2829.37,1123.17" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.ServiceConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.ServiceConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="265" d="M2631.43,506.5 C2648.84,682.8 2676.1312,959.2171 2685.8112,1057.2471 " fill="none" id="src.models.config.ServiceConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2687.58,1075.16,2691.7821,1056.6575,2679.8402,1057.8367,2687.58,1075.16" style="stroke:#181818;stroke-width:1.0;"/>
-    </g>
-    <!--link src.models.config.SolrConfiguration to src.models.config.ConfigurationBase-->
-    <g id="link_src.models.config.SolrConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="266" d="M4903.63,843.31 C4894.13,892.38 4871.09,971.83 4817.83,1014.51 C4759.94,1060.9 4727.56,1037.42 4653.83,1045.51 C4278.77,1086.68 3103.1087,1103.1851 2786.5187,1106.9651 " fill="none" id="src.models.config.SolrConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.52,1107.18,2786.5904,1112.9647,2786.4471,1100.9655,2768.52,1107.18" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="273" d="M2169.99,491.91 C2164.19,496.82 2158.37,501.59 2152.58,506.16 C2101.28,546.64 2060.01,527.87 2028.58,585.16 C1980.65,672.52 1962.79,958.73 2028.58,1033.58 C2070.75,1081.56 2478.0427,1110.9455 2655.9727,1121.6855 " fill="none" id="src.models.config.ServiceConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2673.94,1122.77,2656.3342,1115.6964,2655.6112,1127.6746,2673.94,1122.77" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.SplunkConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.SplunkConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="267" d="M5100.86,900.47 C5077.89,942.63 5045.16,988.04 5001.83,1014.51 C4894.23,1080.26 4845.6,1036.67 4719.83,1045.51 C3960.46,1098.87 3056.0498,1106.6233 2786.3398,1107.7733 " fill="none" id="src.models.config.SplunkConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2768.34,1107.85,2786.3654,1113.7732,2786.3143,1101.7733,2768.34,1107.85" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="274" d="M5085.09,909.94 C5062.05,954.42 5028.83,1003.49 4984.58,1033.58 C4923.23,1075.29 4894.32,1056.55 4820.58,1064.58 C4423.58,1107.83 3174.4791,1122.9396 2847.2791,1126.2196 " fill="none" id="src.models.config.SplunkConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.28,1126.4,2847.3392,1132.2193,2847.219,1120.2199,2829.28,1126.4" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.TLSConfiguration to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.TLSConfiguration_src.models.config.ConfigurationBase">
-      <path codeLine="268" d="M3197.65,141.08 C3233.37,230.11 3282.19,396.35 3205.83,506.15 C3170.1,557.53 3134.21,538.73 3073.83,555.15 C3000.22,575.18 2959.6,532.05 2904.83,585.15 C2764.84,720.87 2918.24,849.22 2814.83,1014.51 C2799.38,1039.2 2789.8719,1049.7179 2766.7219,1065.1679 " fill="none" id="src.models.config.TLSConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2751.75,1075.16,2770.0526,1070.1586,2763.3912,1060.1773,2751.75,1075.16" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="275" d="M3307.21,91.25 C3400.69,106.9 3520.55,143.56 3586.58,229.34 C3641.13,300.21 3678.25,966.97 3618.58,1033.58 C3566.34,1091.9 3050.3377,1116.3855 2847.3177,1123.8855 " fill="none" id="src.models.config.TLSConfiguration-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2829.33,1124.55,2847.5392,1129.8814,2847.0962,1117.8896,2829.33,1124.55" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.UserDataCollection to src.models.config.ConfigurationBase-->
     <g id="link_src.models.config.UserDataCollection_src.models.config.ConfigurationBase">
-      <path codeLine="269" d="M2874.86,439.63 C2836.42,478.42 2794.09,529.79 2771.83,585.15 C2694.68,776.98 2742.33,842.18 2704.83,1045.51 C2703.05,1055.14 2704.778,1047.9761 2702.648,1057.4761 " fill="none" id="src.models.config.UserDataCollection-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="none" points="2698.71,1075.04,2708.5027,1058.7887,2696.7934,1056.1634,2698.71,1075.04" style="stroke:#181818;stroke-width:1.0;"/>
+      <path codeLine="276" d="M2635.95,439.82 C2643.34,567.09 2665.39,841.44 2725.58,1064.58 C2728.24,1074.46 2725.1631,1068.2304 2729.0931,1077.6404 " fill="none" id="src.models.config.UserDataCollection-to-src.models.config.ConfigurationBase" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="none" points="2736.03,1094.25,2734.6297,1075.3281,2723.5566,1079.9527,2736.03,1094.25" style="stroke:#181818;stroke-width:1.0;"/>
     </g>
     <!--link src.models.config.A2AStateConfiguration to src.models.config.Configuration-->
     <g id="link_src.models.config.A2AStateConfiguration_src.models.config.Configuration">
-      <path codeLine="270" d="M1646.2,439.8 C1681.01,464.45 1722.51,489.93 1763.83,506.15 C1841.77,536.75 1878.99,491.51 1949.83,536.15 C1970.95,549.47 1982.0425,557.4745 1999.5325,576.0445 " fill="none" id="src.models.config.A2AStateConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="2007.76,584.78,2006.5581,577.6698,1999.5325,576.0445,2000.7344,583.1547,2007.76,584.78" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="58.032" x="1971.82" y="551.047">a2a_state</text>
+      <path codeLine="277" d="M1267.78,439.79 C1303.01,465.22 1345.63,491.26 1388.58,506.16 C1547.38,561.23 1599.58,509.76 1765.58,536.16 C1801.24,541.83 1808.9,549.57 1844.58,555.16 C1933.08,569.01 2166.25,545.5 2246.58,585.16 C2252.4,588.03 2247.8075,585.0138 2253.4375,588.3638 " fill="none" id="src.models.config.A2AStateConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2263.75,594.5,2260.6392,587.9944,2253.4375,588.3638,2256.5484,594.8694,2263.75,594.5" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="58.032" x="1845.58" y="551.057">a2a_state</text>
     </g>
     <!--link src.models.config.AuthenticationConfiguration to src.models.config.Configuration-->
     <g id="link_src.models.config.AuthenticationConfiguration_src.models.config.Configuration">
-      <path codeLine="271" d="M2075.17,506.36 C2080.63,516.16 2085.59,526.13 2089.83,536.15 C2096.39,551.68 2098.7144,556.5616 2103.3944,573.3316 " fill="none" id="src.models.config.AuthenticationConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="2106.62,584.89,2108.86,578.0356,2103.3944,573.3316,2101.1544,580.186,2106.62,584.89" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="88.9461" x="2096.11" y="551.047">authentication</text>
+      <path codeLine="278" d="M959.5,499.79 C965.17,502.11 970.87,504.24 976.58,506.16 C1106.92,549.94 1458.66,515.4 1594.58,536.16 C1627.72,541.22 1634.45,550.04 1667.58,555.16 C1794.91,574.82 2130.41,529.44 2246.58,585.16 C2252.43,587.96 2247.8054,584.9995 2253.4654,588.2895 " fill="none" id="src.models.config.AuthenticationConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2263.84,594.32,2260.6628,587.8465,2253.4654,588.2895,2256.6425,594.763,2263.84,594.32" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="88.9461" x="1668.58" y="551.057">authentication</text>
     </g>
     <!--link src.models.config.CORSConfiguration to src.models.config.ServiceConfiguration-->
     <g id="link_src.models.config.CORSConfiguration_src.models.config.ServiceConfiguration">
-      <path codeLine="272" d="M2786.46,150.63 C2767.86,174.46 2753.8337,192.4505 2732.6337,219.6105 " fill="none" id="src.models.config.CORSConfiguration-to-src.models.config.ServiceConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="2725.25,229.07,2732.095,226.8015,2732.6337,219.6105,2725.7887,221.879,2725.25,229.07" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="25.701" x="2763.98" y="195.237">cors</text>
+      <path codeLine="279" d="M1529.12,83.18 C1680.5,92.07 1950.43,122.97 2152.58,229.34 C2158.5,232.46 2154.2539,229.3885 2159.9939,233.0885 " fill="none" id="src.models.config.CORSConfiguration-to-src.models.config.ServiceConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2170.08,239.59,2167.2041,232.9772,2159.9939,233.0885,2162.8698,239.7013,2170.08,239.59" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="25.701" x="2079.9" y="195.237">cors</text>
     </g>
     <!--link src.models.config.ConversationHistoryConfiguration to src.models.config.Configuration-->
     <g id="link_src.models.config.ConversationHistoryConfiguration_src.models.config.Configuration">
-      <path codeLine="273" d="M811.79,439.84 C849.15,465.61 894.42,491.88 939.83,506.15 C1081.79,550.79 1464.83,498.6 1608.83,536.15 C1728.53,567.37 1842.2392,622.3772 1939.5192,677.7072 " fill="none" id="src.models.config.ConversationHistoryConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="1949.95,683.64,1946.7122,677.1967,1939.5192,677.7072,1942.757,684.1506,1949.95,683.64" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="119.8211" x="1670.46" y="551.047">conversation_cache</text>
+      <path codeLine="280" d="M2051.16,439.83 C2111.88,479.43 2186.04,531.48 2246.58,585.16 C2252.31,590.24 2249.2908,587.2571 2255.0008,592.6171 " fill="none" id="src.models.config.ConversationHistoryConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2263.75,600.83,2262.113,593.8072,2255.0008,592.6171,2256.6378,599.6399,2263.75,600.83" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="119.8211" x="2208.25" y="551.057">conversation_cache</text>
     </g>
     <!--link src.models.config.CustomProfile to src.models.config.Customization-->
     <g id="link_src.models.config.CustomProfile_src.models.config.Customization">
-      <path codeLine="274" d="M4085.83,420.62 C4085.83,486.98 4085.83,591.62 4085.83,677.87 " fill="none" id="src.models.config.CustomProfile-to-src.models.config.Customization" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="4085.83,689.87,4089.83,683.87,4085.83,677.87,4081.83,683.87,4085.83,689.87" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="90.7271" x="4086.83" y="551.047">custom_profile</text>
+      <path codeLine="281" d="M4569.58,420.47 C4569.58,488.68 4569.58,598.44 4569.58,687.4 " fill="none" id="src.models.config.CustomProfile-to-src.models.config.Customization" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="4569.58,699.4,4573.58,693.4,4569.58,687.4,4565.58,693.4,4569.58,699.4" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="90.7271" x="4570.58" y="551.057">custom_profile</text>
     </g>
     <!--link src.models.config.DatabaseConfiguration to src.models.config.Configuration-->
     <g id="link_src.models.config.DatabaseConfiguration_src.models.config.Configuration">
-      <path codeLine="275" d="M1230.17,439.56 C1265.53,465.21 1308.46,491.46 1351.83,506.15 C1545.44,571.77 1620.64,461.17 1810.83,536.15 C1860.07,555.57 1897.9818,578.1592 1940.5818,610.9592 " fill="none" id="src.models.config.DatabaseConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="1950.09,618.28,1947.7762,611.4502,1940.5818,610.9592,1942.8956,617.789,1950.09,618.28" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="56.121" x="1852.5" y="551.047">database</text>
+      <path codeLine="282" d="M452.82,439.87 C488.16,465.56 531.12,491.76 574.58,506.16 C675.34,539.52 1422.55,550.22 1528.58,555.16 C1608.34,558.87 2174.41,550.99 2246.58,585.16 C2252.33,587.88 2247.5715,584.8782 2253.1315,588.0682 " fill="none" id="src.models.config.DatabaseConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2263.54,594.04,2260.3263,587.5846,2253.1315,588.0682,2256.3451,594.5236,2263.54,594.04" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="56.121" x="1529.58" y="551.057">database</text>
     </g>
     <!--link src.models.config.InferenceConfiguration to src.models.config.Configuration-->
     <g id="link_src.models.config.InferenceConfiguration_src.models.config.Configuration">
-      <path codeLine="276" d="M387.03,420.84 C417.48,451.39 460.07,487.55 505.83,506.15 C531.95,516.77 1490.23,579.38 1517.83,585.15 C1666.96,616.38 1818.4001,669.6891 1938.7801,717.2691 " fill="none" id="src.models.config.InferenceConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="1949.94,721.68,1945.8304,715.7546,1938.7801,717.2691,1942.8897,723.1945,1949.94,721.68" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="57.499" x="1100.54" y="551.047">inference</text>
+      <path codeLine="283" d="M1601.31,420.75 C1631.86,450.67 1674.02,486.18 1718.58,506.16 C1772.71,530.42 2194.4,556.95 2246.58,585.16 C2252.4,588.3 2248.0249,585.2336 2253.6649,588.8336 " fill="none" id="src.models.config.InferenceConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2263.78,595.29,2260.8746,588.6901,2253.6649,588.8336,2256.5703,595.4335,2263.78,595.29" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="57.499" x="2068.46" y="551.057">inference</text>
     </g>
     <!--link src.models.config.JsonPathOperator to src.models.config.JwtRoleRule-->
     <g id="link_src.models.config.JsonPathOperator_src.models.config.JwtRoleRule">
-      <path codeLine="277" d="M5401.83,401.43 C5401.83,462.1 5401.83,582.63 5401.83,677.98 " fill="none" id="src.models.config.JsonPathOperator-to-src.models.config.JwtRoleRule" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="5401.83,689.98,5405.83,683.98,5401.83,677.98,5397.83,683.98,5401.83,689.98" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="53.781" x="5402.83" y="551.047">operator</text>
+      <path codeLine="284" d="M5384.58,401.43 C5384.58,463.62 5384.58,589.49 5384.58,687.49 " fill="none" id="src.models.config.JsonPathOperator-to-src.models.config.JwtRoleRule" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="5384.58,699.49,5388.58,693.49,5384.58,687.49,5380.58,693.49,5384.58,699.49" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="53.781" x="5385.58" y="551.057">operator</text>
     </g>
     <!--link src.models.config.JwtConfiguration to src.models.config.JwkConfiguration-->
     <g id="link_src.models.config.JwtConfiguration_src.models.config.JwkConfiguration">
-      <path codeLine="278" d="M4657.48,420.62 C4673.48,506.64 4702.9953,665.3524 4717.7453,744.6424 " fill="none" id="src.models.config.JwtConfiguration-to-src.models.config.JwkConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="4719.94,756.44,4722.7752,749.8096,4717.7453,744.6424,4714.9101,751.2728,4719.94,756.44" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="106.6391" x="4681.66" y="551.047">jwt_configuration</text>
+      <path codeLine="285" d="M4838.99,420.47 C4851.58,508.31 4875.1978,673.1013 4886.8078,754.1213 " fill="none" id="src.models.config.JwtConfiguration-to-src.models.config.JwkConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="4888.51,766,4891.6185,759.4933,4886.8078,754.1213,4883.6994,760.6281,4888.51,766" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="106.6391" x="4858.44" y="551.057">jwt_configuration</text>
     </g>
     <!--link src.models.config.LlamaStackConfiguration to src.models.config.Configuration-->
     <g id="link_src.models.config.LlamaStackConfiguration_src.models.config.Configuration">
-      <path codeLine="279" d="M2285.39,449.39 C2268.39,488.06 2251.9676,525.4339 2230.7476,573.7239 " fill="none" id="src.models.config.LlamaStackConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="2225.92,584.71,2231.9958,580.8262,2230.7476,573.7239,2224.6718,577.6078,2225.92,584.71" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="70.616" x="2244.25" y="551.047">llama_stack</text>
+      <path codeLine="286" d="M3304.66,449.35 C3280.61,471.56 3252.05,493.12 3221.58,506.16 C3068.27,571.75 3004.92,480.92 2847.58,536.16 C2769.94,563.41 2702.8269,602.5121 2637.4169,649.0621 " fill="none" id="src.models.config.LlamaStackConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2627.64,656.02,2634.8477,655.8,2637.4169,649.0621,2630.2092,649.2821,2627.64,656.02" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="70.616" x="2848.58" y="551.057">llama_stack</text>
+    </g>
+    <!--link src.models.config.OkpConfiguration to src.models.config.Configuration-->
+    <g id="link_src.models.config.OkpConfiguration_src.models.config.Configuration">
+      <path codeLine="287" d="M3104.31,411.1 C3088.2,442.92 3061.52,484.58 3024.58,506.16 C2915.74,569.72 2857.46,484.31 2742.58,536.16 C2701.78,554.57 2672.1061,573.2017 2636.6161,602.1717 " fill="none" id="src.models.config.OkpConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2627.32,609.76,2634.4975,609.0646,2636.6161,602.1717,2629.4386,602.8671,2627.32,609.76" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="22.802" x="2743.58" y="551.057">okp</text>
     </g>
     <!--link src.models.config.QuotaHandlersConfiguration to src.models.config.Configuration-->
     <g id="link_src.models.config.QuotaHandlersConfiguration_src.models.config.Configuration">
-      <path codeLine="280" d="M3375.28,439.9 C3306.21,464.32 3226.29,489.64 3151.83,506.15 C2958.44,549.04 2906.03,536.43 2708.83,555.15 C2624.94,563.12 2406.7,548.5 2330.83,585.15 C2325.01,587.97 2329.6433,584.9973 2324.0033,588.2773 " fill="none" id="src.models.config.QuotaHandlersConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="2313.63,594.31,2320.8276,594.7514,2324.0033,588.2773,2316.8058,587.8359,2313.63,594.31" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="95.2901" x="2970.14" y="551.047">quota_handlers</text>
+      <path codeLine="288" d="M3864.87,424.49 C3766,452.12 3642.51,484.19 3530.58,506.16 C3266.21,558.04 3188.39,510.34 2929.58,585.16 C2825.91,615.13 2726.9673,658.7318 2638.2873,703.1918 " fill="none" id="src.models.config.QuotaHandlersConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2627.56,708.57,2634.7164,709.4567,2638.2873,703.1918,2631.1309,702.3051,2627.56,708.57" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="95.2901" x="3249.4" y="551.057">quota_handlers</text>
     </g>
     <!--link src.models.config.QuotaSchedulerConfiguration to src.models.config.QuotaHandlersConfiguration-->
     <g id="link_src.models.config.QuotaSchedulerConfiguration_src.models.config.QuotaHandlersConfiguration">
-      <path codeLine="281" d="M4919.57,88.88 C4722.66,106.16 4309.69,149.14 3968.83,229.34 C3895.04,246.7 3827.0107,267.757 3757.8907,291.667 " fill="none" id="src.models.config.QuotaSchedulerConfiguration-to-src.models.config.QuotaHandlersConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="3746.55,295.59,3753.528,297.4087,3757.8907,291.667,3750.9127,289.8483,3746.55,295.59" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="59.9171" x="4200.69" y="195.237">scheduler</text>
+      <path codeLine="289" d="M5030.51,100.18 C4892.18,125.82 4653.53,173.25 4452.58,229.34 C4383.25,248.69 4319.6834,269.4575 4253.7234,292.5975 " fill="none" id="src.models.config.QuotaSchedulerConfiguration-to-src.models.config.QuotaHandlersConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="4242.4,296.57,4249.3859,298.3582,4253.7234,292.5975,4246.7375,290.8093,4242.4,296.57" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="59.9171" x="4642.19" y="195.237">scheduler</text>
+    </g>
+    <!--link src.models.config.RagConfiguration to src.models.config.Configuration-->
+    <g id="link_src.models.config.RagConfiguration_src.models.config.Configuration">
+      <path codeLine="290" d="M2907.66,411.23 C2891.34,442.68 2864.7,483.79 2828.58,506.16 C2756.26,550.93 2711.89,491.37 2639.58,536.16 C2617.99,549.52 2606.496,557.6768 2588.376,576.2668 " fill="none" id="src.models.config.RagConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2580,584.86,2587.0524,583.3554,2588.376,576.2668,2581.3236,577.7714,2580,584.86" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="20.657" x="2640.58" y="551.057">rag</text>
     </g>
     <!--link src.models.config.SQLiteDatabaseConfiguration to src.models.config.DatabaseConfiguration-->
     <g id="link_src.models.config.SQLiteDatabaseConfiguration_src.models.config.DatabaseConfiguration">
-      <path codeLine="282" d="M519.91,100.75 C630.58,123.69 801.6,165.82 939.83,229.34 C978.79,247.24 1008.7997,265.2352 1042.9597,288.8552 " fill="none" id="src.models.config.SQLiteDatabaseConfiguration-to-src.models.config.DatabaseConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="1052.83,295.68,1050.1698,288.9775,1042.9597,288.8552,1045.6199,295.5577,1052.83,295.68" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="32.955" x="857.17" y="195.237">sqlite</text>
+      <path codeLine="291" d="M2374.59,88.2 C1965.76,118.84 596.01,221.92 574.58,229.34 C531.62,244.21 498.73,263.2166 463.5,288.6466 " fill="none" id="src.models.config.SQLiteDatabaseConfiguration-to-src.models.config.DatabaseConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="453.77,295.67,460.9761,295.4016,463.5,288.6466,456.2939,288.915,453.77,295.67" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="32.955" x="1152.29" y="195.237">sqlite</text>
     </g>
     <!--link src.models.config.ServiceConfiguration to src.models.config.Configuration-->
     <g id="link_src.models.config.ServiceConfiguration_src.models.config.Configuration">
-      <path codeLine="283" d="M2493.25,492.8 C2487.47,497.43 2481.65,501.91 2475.83,506.15 C2453.02,522.8 2444.64,522.68 2419.83,536.15 C2380.15,557.7 2366.44,557.39 2330.83,585.15 C2325.13,589.59 2328.6353,586.4779 2322.9853,591.2279 " fill="none" id="src.models.config.ServiceConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="2313.8,598.95,2320.9667,598.1507,2322.9853,591.2279,2315.8186,592.0272,2313.8,598.95" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="42.458" x="2420.83" y="551.047">service</text>
+      <path codeLine="292" d="M2333.09,506.58 C2338.1,523.02 2343.32,539.5 2348.58,555.16 C2351.84,564.86 2351.2515,563.4322 2354.7615,573.3922 " fill="none" id="src.models.config.ServiceConfiguration-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2358.75,584.71,2360.5283,577.7216,2354.7615,573.3922,2352.9832,580.3806,2358.75,584.71" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="42.458" x="2349.58" y="551.057">service</text>
     </g>
     <!--link src.models.config.TLSConfiguration to src.models.config.ServiceConfiguration-->
     <g id="link_src.models.config.TLSConfiguration_src.models.config.ServiceConfiguration">
-      <path codeLine="284" d="M3037.25,127.7 C2918.68,170.72 2763,227.35 2759.83,229.34 C2754.01,232.98 2757.9655,229.8604 2752.2655,233.9804 " fill="none" id="src.models.config.TLSConfiguration-to-src.models.config.ServiceConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="2742.54,241.01,2749.7459,240.737,2752.2655,233.9804,2745.0595,234.2534,2742.54,241.01" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="58.0061" x="2884.25" y="195.237">tls_config</text>
+      <path codeLine="293" d="M3042.16,84.28 C2888.41,94.5 2631.07,126.96 2436.58,229.34 C2430.66,232.46 2434.9141,229.4109 2429.1741,233.1009 " fill="none" id="src.models.config.TLSConfiguration-to-src.models.config.ServiceConfiguration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2419.08,239.59,2426.2901,239.7102,2429.1741,233.1009,2421.964,232.9807,2419.08,239.59" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="58.0061" x="2530.21" y="195.237">tls_config</text>
     </g>
     <!--link src.models.config.UserDataCollection to src.models.config.Configuration-->
     <g id="link_src.models.config.UserDataCollection_src.models.config.Configuration">
-      <path codeLine="285" d="M2872.45,439.69 C2839.34,464.4 2799.67,489.96 2759.83,506.15 C2682.66,537.52 2656.78,521.28 2574.83,536.15 C2561.22,538.62 2342.9,578.41 2330.83,585.15 C2325.07,588.37 2329.4595,585.2828 2323.8595,588.9128 " fill="none" id="src.models.config.UserDataCollection-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
-      <polygon fill="#181818" points="2313.79,595.44,2321.0005,595.5329,2323.8595,588.9128,2316.649,588.8199,2313.79,595.44" style="stroke:#181818;stroke-width:1.0;"/>
-      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="124.1111" x="2575.83" y="551.047">user_data_collection</text>
+      <path codeLine="294" d="M2514,439.73 C2481.5,465.65 2450.35,498.1 2432.58,536.16 C2425.37,551.59 2422.6396,556.2614 2419.4996,573.1814 " fill="none" id="src.models.config.UserDataCollection-to-src.models.config.Configuration" style="stroke:#181818;stroke-width:1.0;"/>
+      <polygon fill="#181818" points="2417.31,584.98,2422.3376,579.8106,2419.4996,573.1814,2414.4719,578.3509,2417.31,584.98" style="stroke:#181818;stroke-width:1.0;"/>
+      <text fill="#000000" font-family="sans-serif" font-size="13" lengthAdjust="spacing" textLength="124.1111" x="2433.58" y="551.057">user_data_collection</text>
     </g>
-    <!--SRC=[lLfVanid4d_tfr3bnRcjs0zvkjg7L3bxITavv-oiagUKYa8pI292mnYONIkN- -xN3Gp3p824CgayxImqFxgRflyXpSogeIUcMrgnDMkfeaQgLICRzg8ILElL5s- -VhCsrB07sUpulWC2BfilLbIljAfUdsJDX7vTsSdNIThLVr-iLeu0FbgfpLunlRfVVMXncefVFxgnzK_l_qKDtL8z1jZ0Clr9SCFYHK0D8_a580VTCsBEBSFz3wmw4ler9HeP95LC_-K_Lg- -MQsPsBtuNn3xuxj_i_FFyiYQ8iapv5Pusd9oPES8lAgOre-TO3cuGE00AiH2dGckpQzlgbvA9GKgHHiLWr_YimfSuRb7gpfpO8tXbMMxIEJyYbXgh-huw7BAsinM-GaO_-tvc43wuVauGnWeXzN7VsfIKL8nPKXBpI76-9uBzX76DZsTwBHXYW0d8wQRyrjZsb-KG49GHHTK3-Tm8Bn6PPYKl8zlt_d96RFpXGFJ-iXRifEA71WLva1Q9RVsucob5Fsy4PeyCSLt_Xo6QM_oerCYzWYJzWvqKl7VRph-z09licZ1H86M8hljhNeJRVnxfzXtZL7qNLsqRuxU-Hd1GLhGwV2jMQMOGNrLiXsEpB26Eih0ohTdUNoa-yp6VjPkKs-j6S4NEsrPNVDcJsf-OersHpUCMep1JkWUTXpzYmug33on2lmM0VX05RXx-F2uBb7GZCwTXn3ocO0chBLH4TcHcm8ZgvaAPmLRRyBSYS5Pr-avi84zRwPpplWggVHrLriar5oWtfVZzyZ8_K0mpj4jpbgjbGXjYp0qBdS0mxgSrU6oBLW6WRFiTU0jPUFY7jnExIvVHK4jE0WEHfuIt3t44xYyzk4DFrahvFc45isQ9wvaWz_1SdYpOuer5P8AGKzMETKHNLNLmXrGJz7X_eYw1J4C-sm-Acba9SNQacnyO0YudpffA3dGfXO0WBS8wBZTHakXHX4P_f_7uLPqpN54PaTcX1q04vGN34O8LlcJhBjJ- -KEN3HoPPbQ_R52TsqDwJ-OgC2reOBN6DPHLZpxtfglcjkteDmIaqCwQtQMnjlq23CSyrkEkSUvqAelBdCxiXECnnfyr_nennxGdjo2pT-OMdcF5SYHMC7VbusKxSklLYyTn_ZbiF6hP-xb9d8Fo7-1Vx2t0kmKpI2doPZ6gis7UMV543fFhK7DrBooJkghbVTKUsQ8dtQRJoWcRFZBUP6D_cPRDxN76rjHLISIWs4ho2wiOYBoItcAyo38jvLtvaAy8gmBuPfk1IEVEgREH9yXjpbvWOSGq1Ff0sUY1j-dYGJBL92R34H-jnbZNf5Jw8lpbwLprX7hCf-u9CqqSpn9sd5z0g7Nbmn_rtLoqlQU_lfrIDp-ZBW9IkTHw6SRv3ZJCQmF82MAJ1Exr8ZjQ2TCI7ocPj3FWnqyWNcf18dNI0n480259Icj_A1bW_RpeMMsXiqmFYLBL4BJcYA7D26pIByzcraQtoaHWvioS3C7ns9ld503sICCkepQXr0EZaPGVWg-3qTHmj5mp50FaUUbf_1NvTJ2PQs9Odls6OK6dOwyRSFsr4IEGtfbUncckI94xmwfmGU6m-ynewFkf-osw3yWwZIfUTmYPIBlCIrQOrPKSWGPwhYq7LroMsMidKI2RnK5R-ih48UOyvi6wWRP6MSf-OeJZf5GJSRGCJagUxWzKGwOzvtvF32ZbFm2fvSukuugYbdGJHSReyIpl-oZUeb1pla4ffVSQD3bn6Lpmv_OkyPiWev7SBcCCSeh-qbhgX7JbdWG8IZsIfp8QFVAAdVx2heOzJQ8k8SQ7FmqxXF63h3DS5Vxxb_2CbkGvLcgsWP3PKjeBSW82vrB-CTPNXythqUodx2sUEjBYn85vHTOtK2DW4cCmSOIERXyUl12WfymKGr6Py7Uky5PKFwpmHzKW8qXTNiYCDPAYgzBIadHMvtmXcEvJsmTDxjZJ75PJsezte0dSpMdp_H3Wcy3ZDSZ9lLzgwmVcagnxjLHB4TwXKii-Wn9CLZHk0eHZAnhJ7OMyEav23fmb-d8PJZBKBkWp3vrN85BiWI2OL4-ifhW7IBbN32430iNLiGDgMbqNxjYlMY716dSoHDo7xMCq6EwwrOFerR_ao3MftHII3N6mERGr1jvDwEaD6IKKc2RC-Jeu9EEpjyxZ-DR4bc74pdoWDoxSe2tX6ErJJiPEPrDwI8t5Eb5xdQ2Do4X6hQPTq9o6yqfqrlnfcQVKyMNx5GLDJz3pk6SvQLeaZvGstE6UbABQSVPTNokU_-VtnUrCwTaLcvh3X06NTkRNUGVQV55eOzcr-ckLQQ3CLuGxnUmIhol3w6RbH5mJcX5t35MRp5xcvrncF5lNAcSKj56LugtHi_NnvDv27-O_XbDIF-0m64DC-Jv05RJ02Ei7WbV-YRuwjKVtvJqt5xSyDIs17DuRrk2Sk55R25ivgLb2Mhs7MK1QFz6iW1Y_don1Eml1BhSgLs2FMeQBW1AfmSB05CDf4LmoQxH0iHP4sOHblahiKBFP05ChWUn0F921Ry0zLhbkW3wGist53LPD2q5pLGm2s1pvSS2o4lbn1BOL0sn16-MNoy1I-JG2-1cUUa2h4GAk2IoPpAkkwllwlVHg_gjwKqH-lYTlYputvKMg_UkfFnJmRk4-yHR_hM7o0pmvFr__9ftGs0luJhyXE26U5o2E_pWeE2L9GqPHydxuPMWA3JVbJp9t2SUP6w9iYLwY7ziSMFqAe6V_FxYrg1pTriNzJxgODuQDUwA7rplVM-qF8AKQAh_OKgHgxx1gKIjfqBFNS9jwkSk_mS0]-->
+    <!--SRC=[lLhDankd4h_xhr3bucGhzY6dLtj8bRClAM-U4pkxoIcbej0CaeZGC0Pcryh7__wwWM6O6P1G9fMBhONcHtVJz1UZcLarzCXqIolso5ggg95grSY6lQW4rNhrsPkltZmQQjYTRBPyrm41byrdAwfNMbMlZx9cGh-kxEJh9EtgZnUhbIE07wtKPgUONjskthSuJSMl7zpOuq_l_aiDtL0z1bZ3ClrHSCFYHK0D8_a580VTCM9EBSFzzwmw4FeL9HeP95LC__aNgrTVhnwPsBxuAuZzuVv_xFIpFB2cIF8CkHMUjfmSs2a2hogczKCdM0uk43Y02h5GPu9hy-kRgfTIIO5AqKR5uEVuh09NUExHgixiMMDuPTak4Zc_8fRQgpe-kfoorhDLVWB6Vti-992-VpxC40RAOVNXFvfKb5HC6T9IiuynlkE2VO2nTKydEcsO8i39YEdczDQOzXSba11KqGNLmpdi2QzH6IObxyFRUpyvOtQ-S61Q7tXBjbAHFQF2x4chvCPUd8sKefytGfCdflZMdyCmxKr-T4h47c7Ite5EAlxxLSUVNk1D5YsOAD1IPBUzLQ-ZZN_l5FksCOhUrqNxvkYTdn4Sf0MT3hyrgnGpgAzAjiEH6TPGHnbO-UOa3mzqbzdOpzfjwarlHWwC7JUihdcp8pK_iaQx0nn6BT9WBNG7-ux-H3SL1e1HnSYn4uQt88E2YQsG56LuOXKuDWBx1u30_jtxXySI3Sxet8498Py9gCgQ8nMHeRafiCAQgN2OiFKwp1qP64ETdWCZsV5cEkUi4mJLbtrnaL1pWNfdZxz7jy0F1EiTNVEiMLk9qFW8G-jp1p2iovabBjk0wH0uvbu7tfJMBZ32zTNkTbOKr8A3u87aCS7TNJo1osiV__0dQuKy7T7aMVF4bMpmTp0gtcoPOas5f4BGerLETK1VLhLmITHJTBW_e6v13CC-cGzA6bb9yMX9rZvo15nvQE7VZvqqbEnfKmk0SKQDbWnhkDrIjw9h3gC0RaTckkfW3K6nm2w4O9K_iBexjfzlmLNZvdRlrPyhTDlMNdx4X0Lk1nMynl2E8k4HzqPxqQg-0HsMM1RIMUkoDDvqHvZXDDzop45EXSPxTfdRaHrXFDRWVVE37Rj3it4BrlzYYkLzLY17OWN_lsoaR5z-kNhfECPV3Xj_zSozN4TU0FalS0FM-C4SqGnocenfhDf6VXf2wB4rg9cQLzONVRdo3cd73F7JRlC9nOGDVmVFij5VOEkDTdWnAwhgH88mR0LPXbLCH7ukNt6E0lcsyiuyXLU4zHHSqurWv6F7r8de4-GuHo_muEbx8htdJDJWuZIHO9aASfI1oEysOymhSWfzTlwyTCuwObtc4vYacMQE9qaxhbCWmlhIuT-wJaxQtg5VlWw9svyHDq7fF0hzP6CPPpg6zN6YH95fOfSYiIs532QaFbCpwEV13fx0l5I2n6ia1Y8GOSAIb5Q-rx91-tdVCblBPXYVaYKgecbjaKEQa5catvxDB9tlb8Z1JHcucODZqJTEh87i0GPTPkr3A0T78oW_1j-7eoZXQ3Xcg8V8yz9J-AjoRE6oraIn7Vk4GeDEHzwsOJjg8iSXlRAz3DEK4A9tXzJW0yFXzvZHKVTJTblq7n1rcjGyRf4oaNUO_JnYybDo11dgkCGTNN9RRJmwYGJVA0hUrbSY3Z7dDmrK3h8ppbBobISS8w6QZA5ZSbJsS7kYL2_lE_FfNaOf-GLEBt4I7HKEiw2RhYb6-ML_sKSL4zDIF87f9JSQT3bnsTpm9tRVc7NGyGWkbp56UMK_QKqrOjeI3o84nRu_j2KsDYLpzsiFwJ61I8lD21krXvnxknMy2JShR7SXB6Lslx3ACLFGFAatWS4TbFuG5d2VCBQ7RGRNq3ST4nUX1LcUfQfjx5MsSjU2ZB3GbuK_dA7tOVfouFm9QvMtlbGfKL1-WTKDb2IOCndiPv6zIzy7foVu4VFYOECMx9qRdEK0VpVN01LWFqfTdtUCjPBYwvnIKlHM9xpXs4GWjYwSNMcck1nkiwiTUGEEqrMolh08zOIDPrwFM4Fq7Rf-Q2h5Ow-EOZdIArmUqoTaYi6AnXMaO6HTOxApW4zFGT21kqmZCCDPXjhb0EnZnnLuG4iW69Rw8wi9pYXIpWFT5Uqx8VC7wZrExAWDcFYbJSzONsR_CoiIpRTX8zThADeXGHftBONSHTqlZ0hkZjV3g3tqIQ3kqrOff1fZO9zhwWAzRrDIcd4p4LkweGu1HtXmCSsvETzHoNgvA5W5v3vS0Rmb7AjjteR9QMrE6Vc_I3UygTcdLEKdErL57TYGy3ZNUIwsf3Lj6zzGp6enRNkxTjFbMF3pkwAMwPJCoctF2OAYwxspi_mZBJvhzB7jCjsbidYmaZFY_GBcWXVfBhJICWBE2Qs8MyRg3QQEipCEC_wXBPNGAjheIl7MwFdwU38FuG_Jl-K9wLynEAnXXZplmMeQO8JL8-5B7oPVlVhpwvA6tuihtlkMO0wFVajGpZpBBOJDFFSiGSq-vYm0xHzg5a3CdqYMWFs3GEVRn4kmHntB1K3fkBq0CDMzMWINR5ajG9nrW1PXcNyIApHi5i3a6Y0B8C-q3nQWpghe1LYNQk85q6UgoQMeoN9iAMYcDbe0cojiba1EQvO5MEVgZoMmgQ9Z2LuY7Ly0Dqjn5s0biiabIK8cURkv-1d0RVGHmBMPKX7w-BE2iZpYfhGWlNLbuz_A0qguJtnwSEb1DGCy-LnX_2fvHOvGmdNuuk6Aq5w2EtmVKV1Qb8QC0-xjyDfH5ELlIfwMRXCFIzS4x18znD-65CNO4b3yIEJAI5W2E_dgvDe0TdDTh_SswhCMHQ0I4V0xcMj3q4tna_QjRmUNXwCIdlklUeg2o1NkBcfy5SQK4cvJ7nJz7m00]-->
   </g>
 </svg>
diff --git a/docs/e2e_testing.md b/docs/e2e_testing.md
index 4035a17d0..64eff79b2 100644
--- a/docs/e2e_testing.md
+++ b/docs/e2e_testing.md
@@ -58,7 +58,7 @@ tests/e2e/
 ├── utils/
 │   ├── utils.py                 # restart_container, switch_config, wait_for_container_health, etc.
 │   ├── prow_utils.py            # Prow/OpenShift helpers (restore_llama_stack_pod, etc.)
-│   └── llama_stack_shields.py   # Shield unregister/register (server mode, optional)
+│   └── llama_stack_utils.py    # Toolgroups + shield unregister/register (server mode, optional)
 ├── mock_mcp_server/             # Mock MCP server for MCP tests
 └── rag/                         # RAG test data (e.g. for FAISS)
 ```
diff --git a/docs/openapi.json b/docs/openapi.json
index bff858bf2..80873199d 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -3795,19 +3795,19 @@
                 }
             }
         },
-        "/v1/infer": {
+        "/v1/responses": {
             "post": {
                 "tags": [
-                    "rlsapi-v1"
+                    "responses"
                 ],
-                "summary": "Infer Endpoint",
-                "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n    infer_request: The inference request containing question and context.\n    request: The FastAPI request object for accessing headers and state.\n    background_tasks: FastAPI background tasks for async Splunk event sending.\n    auth: Authentication tuple from the configured auth provider.\n\nReturns:\n    RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n    HTTPException: 503 if the LLM service is unavailable.",
-                "operationId": "infer_endpoint_v1_infer_post",
+                "summary": "Responses Endpoint Handler",
+                "description": "Handle request to the /responses endpoint using Responses API (LCORE specification).\n\nProcesses a POST request to the responses endpoint, forwarding the\nuser's request to a selected Llama Stack LLM and returning the generated response\nfollowing the LCORE OpenAPI specification.\n\nReturns:\n    ResponsesResponse: Contains the response following LCORE specification (non-streaming).\n    StreamingResponse: SSE-formatted streaming response with enriched events (streaming).\n        - response.created event includes conversation attribute\n        - response.completed event includes available_quotas attribute\n\nRaises:\n    HTTPException:\n        - 401: Unauthorized - Missing or invalid credentials\n        - 403: Forbidden - Insufficient permissions or model override not allowed\n        - 404: Not Found - Conversation, model, or provider not found\n        - 413: Prompt too long - Prompt exceeded model's context window size\n        - 422: Unprocessable Entity - Request validation failed\n        - 429: Quota limit exceeded - The token quota for model or user has been exceeded\n        - 500: Internal Server Error - Configuration not loaded or other server errors\n        - 503: Service Unavailable - Unable to connect to Llama Stack backend",
+                "operationId": "responses_endpoint_handler_v1_responses_post",
                 "requestBody": {
                     "content": {
                         "application/json": {
                             "schema": {
-                                "$ref": "#/components/schemas/RlsapiV1InferRequest"
+                                "$ref": "#/components/schemas/ResponsesRequest"
                             }
                         }
                     },
@@ -3819,14 +3819,59 @@
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/RlsapiV1InferResponse"
+                                    "$ref": "#/components/schemas/ResponsesResponse"
                                 },
                                 "example": {
-                                    "data": {
-                                        "request_id": "01JDKR8N7QW9ZMXVGK3PB5TQWZ",
-                                        "text": "To list files in Linux, use the `ls` command."
+                                    "available_quotas": {
+                                        "daily": 1000,
+                                        "monthly": 50000
+                                    },
+                                    "completed_at": 1704067250,
+                                    "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e",
+                                    "created_at": 1704067200,
+                                    "id": "resp_abc123",
+                                    "instructions": "You are a helpful assistant",
+                                    "model": "openai/gpt-4-turbo",
+                                    "object": "response",
+                                    "output": [
+                                        {
+                                            "content": [
+                                                {
+                                                    "text": "Kubernetes is an open-source container orchestration system...",
+                                                    "type": "output_text"
+                                                }
+                                            ],
+                                            "role": "assistant",
+                                            "type": "message"
+                                        }
+                                    ],
+                                    "output_text": "Kubernetes is an open-source container orchestration system...",
+                                    "parallel_tool_calls": true,
+                                    "status": "completed",
+                                    "store": true,
+                                    "temperature": 0.7,
+                                    "text": {
+                                        "format": {
+                                            "type": "text"
+                                        }
+                                    },
+                                    "usage": {
+                                        "input_tokens": 100,
+                                        "output_tokens": 50,
+                                        "total_tokens": 150
                                     }
                                 }
+                            },
+                            "text/event-stream": {
+                                "schema": {
+                                    "type": "string"
+                                },
+                                "examples": {
+                                    "stream": {
+                                        "value": "event: response.created\ndata: {\"type\":\"response.created\",\"sequence_number\":0,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"status\":\"in_progress\",\"output\":[],\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{},\"output_text\":\"\"}}\n\nevent: response.output_item.added\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":1}\n\nevent: response.output_item.done\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"sequence_number\":3,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"completed_at\":1704067250,\"status\":\"completed\",\"output\":[{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]}],\"usage\":{\"input_tokens\":10,\"output_tokens\":6,\"total_tokens\":16},\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{\"daily\":1000,\"monthly\":50000},\"output_text\":\"Hello! How can I help?\"}}\n\ndata: [DONE]\n\n"
+                                    }
+                                },
+                                "description": "SSE stream of events"
                             }
                         }
                     },
@@ -3866,6 +3911,14 @@
                                     "$ref": "#/components/schemas/ForbiddenResponse"
                                 },
                                 "examples": {
+                                    "conversation read": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                                                "response": "User does not have permission to perform this action"
+                                            }
+                                        }
+                                    },
                                     "endpoint": {
                                         "value": {
                                             "detail": {
@@ -3873,6 +3926,50 @@
                                                 "response": "User does not have permission to access this endpoint"
                                             }
                                         }
+                                    },
+                                    "model override": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User lacks model_override permission required to override model/provider.",
+                                                "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request."
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "Resource not found",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                },
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    },
+                                    "provider": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Provider with ID openai does not exist",
+                                                "response": "Provider not found"
+                                            }
+                                        }
+                                    },
+                                    "model": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Model with ID gpt-4-turbo is not configured",
+                                                "response": "Model not found"
+                                            }
+                                        }
                                     }
                                 }
                             }
@@ -4008,6 +4105,16 @@
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -4035,26 +4142,37 @@
                 }
             }
         },
-        "/readiness": {
-            "get": {
+        "/v1/infer": {
+            "post": {
                 "tags": [
-                    "health"
+                    "rlsapi-v1"
                 ],
-                "summary": "Readiness Probe Get Method",
-                "description": "Handle the readiness probe endpoint, returning service readiness.\n\nIf any provider reports an error status, responds with HTTP 503\nand details of unhealthy providers; otherwise, indicates the\nservice is ready.\n\nReturns:\n    ReadinessResponse: Object with `ready` indicating overall readiness,\n    `reason` explaining the outcome, and `providers` containing the list of\n    unhealthy ProviderHealthStatus entries (empty when ready).",
-                "operationId": "readiness_probe_get_method_readiness_get",
+                "summary": "Infer Endpoint",
+                "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n    infer_request: The inference request containing question and context.\n    request: The FastAPI request object for accessing headers and state.\n    background_tasks: FastAPI background tasks for async Splunk event sending.\n    auth: Authentication tuple from the configured auth provider.\n\nReturns:\n    RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n    HTTPException: 503 if the LLM service is unavailable.",
+                "operationId": "infer_endpoint_v1_infer_post",
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RlsapiV1InferRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
                 "responses": {
                     "200": {
                         "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ReadinessResponse"
+                                    "$ref": "#/components/schemas/RlsapiV1InferResponse"
                                 },
                                 "example": {
-                                    "providers": [],
-                                    "ready": true,
-                                    "reason": "Service is ready"
+                                    "data": {
+                                        "request_id": "01JDKR8N7QW9ZMXVGK3PB5TQWZ",
+                                        "text": "To list files in Linux, use the `ls` command."
+                                    }
                                 }
                             }
                         }
@@ -4107,167 +4225,153 @@
                             }
                         }
                     },
-                    "503": {
-                        "description": "Service unavailable",
+                    "413": {
+                        "description": "Prompt is too long",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                    "$ref": "#/components/schemas/PromptTooLongResponse"
                                 },
                                 "examples": {
-                                    "llama stack": {
+                                    "prompt too long": {
                                         "value": {
                                             "detail": {
-                                                "cause": "Connection error while trying to reach backend service.",
-                                                "response": "Unable to connect to Llama Stack"
+                                                "cause": "The prompt exceeds the maximum allowed length.",
+                                                "response": "Prompt is too long"
                                             }
                                         }
                                     }
                                 }
                             }
                         }
-                    }
-                }
-            }
-        },
-        "/liveness": {
-            "get": {
-                "tags": [
-                    "health"
-                ],
-                "summary": "Liveness Probe Get Method",
-                "description": "Return the liveness status of the service.\n\nReturns:\n    LivenessResponse: Indicates that the service is alive.",
-                "operationId": "liveness_probe_get_method_liveness_get",
-                "responses": {
-                    "200": {
-                        "description": "Successful response",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/LivenessResponse"
-                                },
-                                "example": {
-                                    "alive": true
-                                }
-                            }
-                        }
                     },
-                    "401": {
-                        "description": "Unauthorized",
+                    "422": {
+                        "description": "Request validation failed",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                    "$ref": "#/components/schemas/UnprocessableEntityResponse"
                                 },
                                 "examples": {
-                                    "missing header": {
+                                    "invalid format": {
                                         "value": {
                                             "detail": {
-                                                "cause": "No Authorization header found",
-                                                "response": "Missing or invalid credentials provided by client"
+                                                "cause": "Invalid request format. The request body could not be parsed.",
+                                                "response": "Invalid request format"
                                             }
                                         }
                                     },
-                                    "missing token": {
+                                    "missing attributes": {
                                         "value": {
                                             "detail": {
-                                                "cause": "No token found in Authorization header",
-                                                "response": "Missing or invalid credentials provided by client"
+                                                "cause": "Missing required attributes: ['query', 'model', 'provider']",
+                                                "response": "Missing required attributes"
                                             }
                                         }
-                                    }
-                                }
-                            }
-                        }
-                    },
-                    "403": {
-                        "description": "Permission denied",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ForbiddenResponse"
-                                },
-                                "examples": {
-                                    "endpoint": {
+                                    },
+                                    "invalid value": {
                                         "value": {
                                             "detail": {
-                                                "cause": "User 6789 is not authorized to access this endpoint.",
-                                                "response": "User does not have permission to access this endpoint"
+                                                "cause": "Invalid attachment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
+                                                "response": "Invalid attribute value"
                                             }
                                         }
                                     }
                                 }
                             }
                         }
-                    }
-                }
-            }
-        },
-        "/authorized": {
-            "post": {
-                "tags": [
-                    "authorized"
-                ],
-                "summary": "Authorized Endpoint Handler",
-                "description": "Handle request to the /authorized endpoint.\n\nProcess POST requests to the /authorized endpoint, returning\nthe authenticated user's ID and username.\n\nThe response intentionally omits any authentication token.\n\nReturns:\n    AuthorizedResponse: Contains the user ID and username of the authenticated user.",
-                "operationId": "authorized_endpoint_handler_authorized_post",
-                "responses": {
-                    "200": {
-                        "description": "Successful response",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/AuthorizedResponse"
-                                },
-                                "example": {
-                                    "skip_userid_check": false,
-                                    "user_id": "123e4567-e89b-12d3-a456-426614174000",
-                                    "username": "user1"
-                                }
-                            }
-                        }
                     },
-                    "401": {
-                        "description": "Unauthorized",
+                    "429": {
+                        "description": "Quota limit exceeded",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                    "$ref": "#/components/schemas/QuotaExceededResponse"
                                 },
                                 "examples": {
-                                    "missing header": {
+                                    "model": {
                                         "value": {
                                             "detail": {
-                                                "cause": "No Authorization header found",
-                                                "response": "Missing or invalid credentials provided by client"
+                                                "cause": "The token quota for model gpt-4-turbo has been exceeded.",
+                                                "response": "The model quota has been exceeded"
                                             }
                                         }
                                     },
-                                    "missing token": {
+                                    "user none": {
                                         "value": {
                                             "detail": {
-                                                "cause": "No token found in Authorization header",
-                                                "response": "Missing or invalid credentials provided by client"
+                                                "cause": "User 123 has no available tokens.",
+                                                "response": "The quota has been exceeded"
                                             }
                                         }
-                                    }
-                                }
-                            }
-                        }
-                    },
-                    "403": {
-                        "description": "Permission denied",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ForbiddenResponse"
-                                },
-                                "examples": {
-                                    "endpoint": {
+                                    },
+                                    "cluster none": {
                                         "value": {
                                             "detail": {
-                                                "cause": "User 6789 is not authorized to access this endpoint.",
-                                                "response": "User does not have permission to access this endpoint"
+                                                "cause": "Cluster has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "user insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 123 has 5 tokens, but 10 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "cluster insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Cluster has 500 tokens, but 900 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                }
+                            }
+                        }
+                    },
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
                                             }
                                         }
                                     }
@@ -4278,21 +4382,26 @@
                 }
             }
         },
-        "/metrics": {
+        "/readiness": {
             "get": {
                 "tags": [
-                    "metrics"
+                    "health"
                 ],
-                "summary": "Metrics Endpoint Handler",
-                "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.\n\nReturns:\n    PlainTextResponse: Response body containing the Prometheus metrics text\n    and the Prometheus content type.",
-                "operationId": "metrics_endpoint_handler_metrics_get",
+                "summary": "Readiness Probe Get Method",
+                "description": "Handle the readiness probe endpoint, returning service readiness.\n\nIf any provider reports an error status, responds with HTTP 503\nand details of unhealthy providers; otherwise, indicates the\nservice is ready.\n\nReturns:\n    ReadinessResponse: Object with `ready` indicating overall readiness,\n    `reason` explaining the outcome, and `providers` containing the list of\n    unhealthy ProviderHealthStatus entries (empty when ready).",
+                "operationId": "readiness_probe_get_method_readiness_get",
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
-                            "text/plain": {
+                            "application/json": {
                                 "schema": {
-                                    "type": "string"
+                                    "$ref": "#/components/schemas/ReadinessResponse"
+                                },
+                                "example": {
+                                    "providers": [],
+                                    "ready": true,
+                                    "reason": "Service is ready"
                                 }
                             }
                         }
@@ -4301,6 +4410,9 @@
                         "description": "Unauthorized",
                         "content": {
                             "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
                                 "examples": {
                                     "missing header": {
                                         "value": {
@@ -4319,11 +4431,6 @@
                                         }
                                     }
                                 }
-                            },
-                            "text/plain": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
-                                }
                             }
                         }
                     },
@@ -4331,6 +4438,9 @@
                         "description": "Permission denied",
                         "content": {
                             "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
                                 "examples": {
                                     "endpoint": {
                                         "value": {
@@ -4341,33 +4451,6 @@
                                         }
                                     }
                                 }
-                            },
-                            "text/plain": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ForbiddenResponse"
-                                }
-                            }
-                        }
-                    },
-                    "500": {
-                        "description": "Internal server error",
-                        "content": {
-                            "application/json": {
-                                "examples": {
-                                    "configuration": {
-                                        "value": {
-                                            "detail": {
-                                                "cause": "Lightspeed Stack configuration has not been initialized.",
-                                                "response": "Configuration is not loaded"
-                                            }
-                                        }
-                                    }
-                                }
-                            },
-                            "text/plain": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
-                                }
                             }
                         }
                     },
@@ -4375,6 +4458,9 @@
                         "description": "Service unavailable",
                         "content": {
                             "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
                                 "examples": {
                                     "llama stack": {
                                         "value": {
@@ -4385,162 +4471,2725 @@
                                         }
                                     }
                                 }
-                            },
-                            "text/plain": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
-                                }
                             }
                         }
                     }
                 }
             }
         },
-        "/.well-known/agent-card.json": {
+        "/liveness": {
             "get": {
                 "tags": [
-                    "a2a"
+                    "health"
                 ],
-                "summary": "Get Agent Card",
-                "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n    AgentCard: The agent card describing this agent's capabilities.",
-                "operationId": "get_agent_card__well_known_agent_card_json_get",
+                "summary": "Liveness Probe Get Method",
+                "description": "Return the liveness status of the service.\n\nReturns:\n    LivenessResponse: Indicates that the service is alive.",
+                "operationId": "liveness_probe_get_method_liveness_get",
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/AgentCard"
+                                    "$ref": "#/components/schemas/LivenessResponse"
+                                },
+                                "example": {
+                                    "alive": true
                                 }
                             }
                         }
-                    }
-                }
-            }
-        },
-        "/.well-known/agent.json": {
-            "get": {
-                "tags": [
-                    "a2a"
-                ],
-                "summary": "Get Agent Card",
-                "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n    AgentCard: The agent card describing this agent's capabilities.",
-                "operationId": "get_agent_card__well_known_agent_json_get",
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
+                    },
+                    "401": {
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/AgentCard"
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
-                    }
-                }
-            }
-        },
-        "/a2a": {
-            "get": {
-                "tags": [
-                    "a2a"
-                ],
-                "summary": "Handle A2A Jsonrpc",
-                "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n    request: FastAPI request object\n    auth: Authentication tuple\n    mcp_headers: MCP headers for context propagation\n\nReturns:\n    JSON-RPC response or streaming response",
-                "operationId": "handle_a2a_jsonrpc_a2a_get",
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
-                        "content": {
-                            "application/json": {
-                                "schema": {}
-                            }
-                        }
-                    }
-                }
-            },
-            "post": {
-                "tags": [
-                    "a2a"
-                ],
-                "summary": "Handle A2A Jsonrpc",
-                "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n    request: FastAPI request object\n    auth: Authentication tuple\n    mcp_headers: MCP headers for context propagation\n\nReturns:\n    JSON-RPC response or streaming response",
-                "operationId": "handle_a2a_jsonrpc_a2a_get",
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
+                    },
+                    "403": {
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
-                                "schema": {}
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
                             }
                         }
                     }
                 }
             }
         },
-        "/a2a/health": {
-            "get": {
+        "/authorized": {
+            "post": {
                 "tags": [
-                    "a2a"
+                    "authorized"
                 ],
-                "summary": "A2A Health Check",
-                "description": "Health check endpoint for A2A service.\n\nReturns:\n    Dict with health status information.",
-                "operationId": "a2a_health_check_a2a_health_get",
+                "summary": "Authorized Endpoint Handler",
+                "description": "Handle request to the /authorized endpoint.\n\nProcess POST requests to the /authorized endpoint, returning\nthe authenticated user's ID and username.\n\nThe response intentionally omits any authentication token.\n\nReturns:\n    AuthorizedResponse: Contains the user ID and username of the authenticated user.",
+                "operationId": "authorized_endpoint_handler_authorized_post",
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "additionalProperties": {
-                                        "type": "string"
+                                    "$ref": "#/components/schemas/AuthorizedResponse"
+                                },
+                                "example": {
+                                    "skip_userid_check": false,
+                                    "user_id": "123e4567-e89b-12d3-a456-426614174000",
+                                    "username": "user1"
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
                                     },
-                                    "type": "object",
-                                    "title": "Response A2A Health Check A2A Health Get"
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/metrics": {
+            "get": {
+                "tags": [
+                    "metrics"
+                ],
+                "summary": "Metrics Endpoint Handler",
+                "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.\n\nReturns:\n    PlainTextResponse: Response body containing the Prometheus metrics text\n    and the Prometheus content type.",
+                "operationId": "metrics_endpoint_handler_metrics_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "text/plain": {
+                                "schema": {
+                                    "type": "string"
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
+                            "text/plain": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
+                            "text/plain": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
+                            "text/plain": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                }
+                            }
+                        }
+                    },
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
+                            "text/plain": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/.well-known/agent-card.json": {
+            "get": {
+                "tags": [
+                    "a2a"
+                ],
+                "summary": "Get Agent Card",
+                "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n    AgentCard: The agent card describing this agent's capabilities.",
+                "operationId": "get_agent_card__well_known_agent_card_json_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/AgentCard"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/.well-known/agent.json": {
+            "get": {
+                "tags": [
+                    "a2a"
+                ],
+                "summary": "Get Agent Card",
+                "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n    AgentCard: The agent card describing this agent's capabilities.",
+                "operationId": "get_agent_card__well_known_agent_json_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/AgentCard"
                                 }
                             }
                         }
                     }
-                }
-            }
-        }
-    },
-    "components": {
-        "schemas": {
-            "A2AStateConfiguration": {
+                }
+            }
+        },
+        "/a2a": {
+            "get": {
+                "tags": [
+                    "a2a"
+                ],
+                "summary": "Handle A2A Jsonrpc",
+                "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n    request: FastAPI request object\n    auth: Authentication tuple\n    mcp_headers: MCP headers for context propagation\n\nReturns:\n    JSON-RPC response or streaming response",
+                "operationId": "handle_a2a_jsonrpc_a2a_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {}
+                            }
+                        }
+                    }
+                }
+            },
+            "post": {
+                "tags": [
+                    "a2a"
+                ],
+                "summary": "Handle A2A Jsonrpc",
+                "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n    request: FastAPI request object\n    auth: Authentication tuple\n    mcp_headers: MCP headers for context propagation\n\nReturns:\n    JSON-RPC response or streaming response",
+                "operationId": "handle_a2a_jsonrpc_a2a_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {}
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/a2a/health": {
+            "get": {
+                "tags": [
+                    "a2a"
+                ],
+                "summary": "A2A Health Check",
+                "description": "Health check endpoint for A2A service.\n\nReturns:\n    Dict with health status information.",
+                "operationId": "a2a_health_check_a2a_health_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful Response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "additionalProperties": {
+                                        "type": "string"
+                                    },
+                                    "type": "object",
+                                    "title": "Response A2A Health Check A2A Health Get"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "components": {
+        "schemas": {
+            "A2AStateConfiguration": {
+                "properties": {
+                    "sqlite": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "SQLite configuration",
+                        "description": "SQLite database configuration for A2A state storage."
+                    },
+                    "postgres": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "PostgreSQL configuration",
+                        "description": "PostgreSQL database configuration for A2A state storage."
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "A2AStateConfiguration",
+                "description": "A2A protocol persistent state configuration.\n\nConfigures how A2A task state and context-to-conversation mappings are\nstored. For multi-worker deployments, use SQLite or PostgreSQL to ensure\nstate is shared across all workers.\n\nIf no configuration is provided, in-memory storage is used (default).\nThis is suitable for single-worker deployments but state will be lost\non restarts and not shared across workers.\n\nAttributes:\n    sqlite: SQLite database configuration for A2A state storage.\n    postgres: PostgreSQL database configuration for A2A state storage."
+            },
+            "APIKeySecurityScheme": {
+                "properties": {
+                    "description": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Description"
+                    },
+                    "in": {
+                        "$ref": "#/components/schemas/In"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "apiKey",
+                        "title": "Type",
+                        "default": "apiKey"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "in",
+                    "name"
+                ],
+                "title": "APIKeySecurityScheme",
+                "description": "Defines a security scheme using an API key."
+            },
+            "APIKeyTokenConfiguration": {
+                "properties": {
+                    "api_key": {
+                        "type": "string",
+                        "minLength": 1,
+                        "format": "password",
+                        "title": "API key",
+                        "writeOnly": true,
+                        "examples": [
+                            "some-api-key"
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "api_key"
+                ],
+                "title": "APIKeyTokenConfiguration",
+                "description": "API Key Token configuration."
+            },
+            "AccessRule": {
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "title": "Role name",
+                        "description": "Name of the role"
+                    },
+                    "actions": {
+                        "items": {
+                            "$ref": "#/components/schemas/Action"
+                        },
+                        "type": "array",
+                        "title": "Allowed actions",
+                        "description": "Allowed actions for this role"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "role",
+                    "actions"
+                ],
+                "title": "AccessRule",
+                "description": "Rule defining what actions a role can perform."
+            },
+            "Action": {
+                "type": "string",
+                "enum": [
+                    "admin",
+                    "list_other_conversations",
+                    "read_other_conversations",
+                    "query_other_conversations",
+                    "delete_other_conversations",
+                    "query",
+                    "streaming_query",
+                    "get_conversation",
+                    "list_conversations",
+                    "delete_conversation",
+                    "update_conversation",
+                    "feedback",
+                    "get_models",
+                    "get_tools",
+                    "get_shields",
+                    "list_providers",
+                    "get_provider",
+                    "list_rags",
+                    "get_rag",
+                    "get_metrics",
+                    "get_config",
+                    "info",
+                    "model_override",
+                    "rlsapi_v1_infer",
+                    "a2a_agent_card",
+                    "a2a_task_execution",
+                    "a2a_message",
+                    "a2a_jsonrpc"
+                ],
+                "title": "Action",
+                "description": "Available actions in the system.\n\nNote: this is not a real model, just an enumeration of all action names."
+            },
+            "AgentCapabilities": {
+                "properties": {
+                    "extensions": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/AgentExtension"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Extensions"
+                    },
+                    "pushNotifications": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Pushnotifications"
+                    },
+                    "stateTransitionHistory": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Statetransitionhistory"
+                    },
+                    "streaming": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Streaming"
+                    }
+                },
+                "type": "object",
+                "title": "AgentCapabilities",
+                "description": "Defines optional capabilities supported by an agent."
+            },
+            "AgentCard": {
+                "properties": {
+                    "additionalInterfaces": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/AgentInterface"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Additionalinterfaces"
+                    },
+                    "capabilities": {
+                        "$ref": "#/components/schemas/AgentCapabilities"
+                    },
+                    "defaultInputModes": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Defaultinputmodes"
+                    },
+                    "defaultOutputModes": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Defaultoutputmodes"
+                    },
+                    "description": {
+                        "type": "string",
+                        "title": "Description",
+                        "examples": [
+                            "Agent that helps users with recipes and cooking."
+                        ]
+                    },
+                    "documentationUrl": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Documentationurl"
+                    },
+                    "iconUrl": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Iconurl"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name",
+                        "examples": [
+                            "Recipe Agent"
+                        ]
+                    },
+                    "preferredTransport": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Preferredtransport",
+                        "default": "JSONRPC",
+                        "examples": [
+                            "JSONRPC",
+                            "GRPC",
+                            "HTTP+JSON"
+                        ]
+                    },
+                    "protocolVersion": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Protocolversion",
+                        "default": "0.3.0"
+                    },
+                    "provider": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/AgentProvider"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "security": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "additionalProperties": {
+                                        "items": {
+                                            "type": "string"
+                                        },
+                                        "type": "array"
+                                    },
+                                    "type": "object"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Security",
+                        "examples": [
+                            [
+                                {
+                                    "oauth": [
+                                        "read"
+                                    ]
+                                },
+                                {
+                                    "api-key": [],
+                                    "mtls": []
+                                }
+                            ]
+                        ]
+                    },
+                    "securitySchemes": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": {
+                                    "$ref": "#/components/schemas/SecurityScheme"
+                                },
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Securityschemes"
+                    },
+                    "signatures": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/AgentCardSignature"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Signatures"
+                    },
+                    "skills": {
+                        "items": {
+                            "$ref": "#/components/schemas/AgentSkill"
+                        },
+                        "type": "array",
+                        "title": "Skills"
+                    },
+                    "supportsAuthenticatedExtendedCard": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Supportsauthenticatedextendedcard"
+                    },
+                    "url": {
+                        "type": "string",
+                        "title": "Url",
+                        "examples": [
+                            "https://api.example.com/a2a/v1"
+                        ]
+                    },
+                    "version": {
+                        "type": "string",
+                        "title": "Version",
+                        "examples": [
+                            "1.0.0"
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "capabilities",
+                    "defaultInputModes",
+                    "defaultOutputModes",
+                    "description",
+                    "name",
+                    "skills",
+                    "url",
+                    "version"
+                ],
+                "title": "AgentCard",
+                "description": "The AgentCard is a self-describing manifest for an agent. It provides essential\nmetadata including the agent's identity, capabilities, skills, supported\ncommunication methods, and security requirements."
+            },
+            "AgentCardSignature": {
+                "properties": {
+                    "header": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": true,
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Header"
+                    },
+                    "protected": {
+                        "type": "string",
+                        "title": "Protected"
+                    },
+                    "signature": {
+                        "type": "string",
+                        "title": "Signature"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "protected",
+                    "signature"
+                ],
+                "title": "AgentCardSignature",
+                "description": "AgentCardSignature represents a JWS signature of an AgentCard.\nThis follows the JSON format of an RFC 7515 JSON Web Signature (JWS)."
+            },
+            "AgentExtension": {
+                "properties": {
+                    "description": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Description"
+                    },
+                    "params": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": true,
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Params"
+                    },
+                    "required": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Required"
+                    },
+                    "uri": {
+                        "type": "string",
+                        "title": "Uri"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "uri"
+                ],
+                "title": "AgentExtension",
+                "description": "A declaration of a protocol extension supported by an Agent."
+            },
+            "AgentInterface": {
+                "properties": {
+                    "transport": {
+                        "type": "string",
+                        "title": "Transport",
+                        "examples": [
+                            "JSONRPC",
+                            "GRPC",
+                            "HTTP+JSON"
+                        ]
+                    },
+                    "url": {
+                        "type": "string",
+                        "title": "Url",
+                        "examples": [
+                            "https://api.example.com/a2a/v1",
+                            "https://grpc.example.com/a2a",
+                            "https://rest.example.com/v1"
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "transport",
+                    "url"
+                ],
+                "title": "AgentInterface",
+                "description": "Declares a combination of a target URL and a transport protocol for interacting with the agent.\nThis allows agents to expose the same functionality over multiple transport mechanisms."
+            },
+            "AgentProvider": {
+                "properties": {
+                    "organization": {
+                        "type": "string",
+                        "title": "Organization"
+                    },
+                    "url": {
+                        "type": "string",
+                        "title": "Url"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "organization",
+                    "url"
+                ],
+                "title": "AgentProvider",
+                "description": "Represents the service provider of an agent."
+            },
+            "AgentSkill": {
+                "properties": {
+                    "description": {
+                        "type": "string",
+                        "title": "Description"
+                    },
+                    "examples": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Examples",
+                        "examples": [
+                            [
+                                "I need a recipe for bread"
+                            ]
+                        ]
+                    },
+                    "id": {
+                        "type": "string",
+                        "title": "Id"
+                    },
+                    "inputModes": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Inputmodes"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "outputModes": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Outputmodes"
+                    },
+                    "security": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "additionalProperties": {
+                                        "items": {
+                                            "type": "string"
+                                        },
+                                        "type": "array"
+                                    },
+                                    "type": "object"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Security",
+                        "examples": [
+                            [
+                                {
+                                    "google": [
+                                        "oidc"
+                                    ]
+                                }
+                            ]
+                        ]
+                    },
+                    "tags": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Tags",
+                        "examples": [
+                            [
+                                "cooking",
+                                "customer support",
+                                "billing"
+                            ]
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "description",
+                    "id",
+                    "name",
+                    "tags"
+                ],
+                "title": "AgentSkill",
+                "description": "Represents a distinct capability or function that an agent can perform."
+            },
+            "AllowedToolsFilter": {
+                "properties": {
+                    "tool_names": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Tool Names"
+                    }
+                },
+                "type": "object",
+                "title": "AllowedToolsFilter",
+                "description": "Filter configuration for restricting which MCP tools can be used.\n\n:param tool_names: (Optional) List of specific tool names that are allowed"
+            },
+            "ApprovalFilter": {
+                "properties": {
+                    "always": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Always"
+                    },
+                    "never": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Never"
+                    }
+                },
+                "type": "object",
+                "title": "ApprovalFilter",
+                "description": "Filter configuration for MCP tool approval requirements.\n\n:param always: (Optional) List of tool names that always require approval\n:param never: (Optional) List of tool names that never require approval"
+            },
+            "Attachment": {
+                "properties": {
+                    "attachment_type": {
+                        "type": "string",
+                        "title": "Attachment Type",
+                        "description": "The attachment type, like 'log', 'configuration' etc.",
+                        "examples": [
+                            "log"
+                        ]
+                    },
+                    "content_type": {
+                        "type": "string",
+                        "title": "Content Type",
+                        "description": "The content type as defined in MIME standard",
+                        "examples": [
+                            "text/plain"
+                        ]
+                    },
+                    "content": {
+                        "type": "string",
+                        "title": "Content",
+                        "description": "The actual attachment content",
+                        "examples": [
+                            "warning: quota exceeded"
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "attachment_type",
+                    "content_type",
+                    "content"
+                ],
+                "title": "Attachment",
+                "description": "Model representing an attachment that can be send from the UI as part of query.\n\nA list of attachments can be an optional part of 'query' request.\n\nAttributes:\n    attachment_type: The attachment type, like \"log\", \"configuration\" etc.\n    content_type: The content type as defined in MIME standard\n    content: The actual attachment content\n\nYAML attachments with **kind** and **metadata/name** attributes will\nbe handled as resources with the specified name:\n```\nkind: Pod\nmetadata:\n    name: private-reg\n```",
+                "examples": [
+                    {
+                        "attachment_type": "log",
+                        "content": "this is attachment",
+                        "content_type": "text/plain"
+                    },
+                    {
+                        "attachment_type": "configuration",
+                        "content": "kind: Pod\n metadata:\n name:    private-reg",
+                        "content_type": "application/yaml"
+                    },
+                    {
+                        "attachment_type": "configuration",
+                        "content": "foo: bar",
+                        "content_type": "application/yaml"
+                    }
+                ]
+            },
+            "AuthenticationConfiguration": {
+                "properties": {
+                    "module": {
+                        "type": "string",
+                        "title": "Module",
+                        "default": "noop"
+                    },
+                    "skip_tls_verification": {
+                        "type": "boolean",
+                        "title": "Skip Tls Verification",
+                        "default": false
+                    },
+                    "skip_for_health_probes": {
+                        "type": "boolean",
+                        "title": "Skip authorization for probes",
+                        "description": "Skip authorization for readiness and liveness probes",
+                        "default": false
+                    },
+                    "k8s_cluster_api": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "minLength": 1,
+                                "format": "uri"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "K8S Cluster Api"
+                    },
+                    "k8s_ca_cert_path": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "format": "file-path"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "K8S Ca Cert Path"
+                    },
+                    "jwk_config": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/JwkConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "api_key_config": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/APIKeyTokenConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "rh_identity_config": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/RHIdentityConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "AuthenticationConfiguration",
+                "description": "Authentication configuration."
+            },
+            "AuthorizationCodeOAuthFlow": {
+                "properties": {
+                    "authorizationUrl": {
+                        "type": "string",
+                        "title": "Authorizationurl"
+                    },
+                    "refreshUrl": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Refreshurl"
+                    },
+                    "scopes": {
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "type": "object",
+                        "title": "Scopes"
+                    },
+                    "tokenUrl": {
+                        "type": "string",
+                        "title": "Tokenurl"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "authorizationUrl",
+                    "scopes",
+                    "tokenUrl"
+                ],
+                "title": "AuthorizationCodeOAuthFlow",
+                "description": "Defines configuration details for the OAuth 2.0 Authorization Code flow."
+            },
+            "AuthorizationConfiguration": {
+                "properties": {
+                    "access_rules": {
+                        "items": {
+                            "$ref": "#/components/schemas/AccessRule"
+                        },
+                        "type": "array",
+                        "title": "Access rules",
+                        "description": "Rules for role-based access control"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "AuthorizationConfiguration",
+                "description": "Authorization configuration."
+            },
+            "AuthorizedResponse": {
+                "properties": {
+                    "user_id": {
+                        "type": "string",
+                        "title": "User Id",
+                        "description": "User ID, for example UUID",
+                        "examples": [
+                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
+                        ]
+                    },
+                    "username": {
+                        "type": "string",
+                        "title": "Username",
+                        "description": "User name",
+                        "examples": [
+                            "John Doe",
+                            "Adam Smith"
+                        ]
+                    },
+                    "skip_userid_check": {
+                        "type": "boolean",
+                        "title": "Skip Userid Check",
+                        "description": "Whether to skip the user ID check",
+                        "examples": [
+                            true,
+                            false
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "user_id",
+                    "username",
+                    "skip_userid_check"
+                ],
+                "title": "AuthorizedResponse",
+                "description": "Model representing a response to an authorization request.\n\nAttributes:\n    user_id: The ID of the logged in user.\n    username: The name of the logged in user.\n    skip_userid_check: Whether to skip the user ID check.",
+                "examples": [
+                    {
+                        "skip_userid_check": false,
+                        "user_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "username": "user1"
+                    }
+                ]
+            },
+            "AzureEntraIdConfiguration": {
+                "properties": {
+                    "tenant_id": {
+                        "type": "string",
+                        "format": "password",
+                        "title": "Tenant Id",
+                        "writeOnly": true
+                    },
+                    "client_id": {
+                        "type": "string",
+                        "format": "password",
+                        "title": "Client Id",
+                        "writeOnly": true
+                    },
+                    "client_secret": {
+                        "type": "string",
+                        "format": "password",
+                        "title": "Client Secret",
+                        "writeOnly": true
+                    },
+                    "scope": {
+                        "type": "string",
+                        "title": "Token scope",
+                        "description": "Azure Cognitive Services scope for token requests. Override only if using a different Azure service.",
+                        "default": "https://cognitiveservices.azure.com/.default"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "tenant_id",
+                    "client_id",
+                    "client_secret"
+                ],
+                "title": "AzureEntraIdConfiguration",
+                "description": "Microsoft Entra ID authentication attributes for Azure."
+            },
+            "BadRequestResponse": {
+                "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
+                    "detail": {
+                        "$ref": "#/components/schemas/DetailModel"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "status_code",
+                    "detail"
+                ],
+                "title": "BadRequestResponse",
+                "description": "400 Bad Request. Invalid resource identifier.",
+                "examples": [
+                    {
+                        "detail": {
+                            "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.",
+                            "response": "Invalid conversation ID format"
+                        },
+                        "label": "conversation_id"
+                    }
+                ]
+            },
+            "ByokRag": {
+                "properties": {
+                    "rag_id": {
+                        "type": "string",
+                        "minLength": 1,
+                        "title": "RAG ID",
+                        "description": "Unique RAG ID"
+                    },
+                    "rag_type": {
+                        "type": "string",
+                        "minLength": 1,
+                        "title": "RAG type",
+                        "description": "Type of RAG database.",
+                        "default": "inline::faiss"
+                    },
+                    "embedding_model": {
+                        "type": "string",
+                        "minLength": 1,
+                        "title": "Embedding model",
+                        "description": "Embedding model identification",
+                        "default": "sentence-transformers/all-mpnet-base-v2"
+                    },
+                    "embedding_dimension": {
+                        "type": "integer",
+                        "exclusiveMinimum": 0.0,
+                        "title": "Embedding dimension",
+                        "description": "Dimensionality of embedding vectors.",
+                        "default": 768
+                    },
+                    "vector_db_id": {
+                        "type": "string",
+                        "minLength": 1,
+                        "title": "Vector DB ID",
+                        "description": "Vector database identification."
+                    },
+                    "db_path": {
+                        "type": "string",
+                        "title": "DB path",
+                        "description": "Path to RAG database."
+                    },
+                    "score_multiplier": {
+                        "type": "number",
+                        "exclusiveMinimum": 0.0,
+                        "title": "Score multiplier",
+                        "description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.",
+                        "default": 1.0
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "rag_id",
+                    "vector_db_id",
+                    "db_path"
+                ],
+                "title": "ByokRag",
+                "description": "BYOK (Bring Your Own Knowledge) RAG configuration."
+            },
+            "CORSConfiguration": {
+                "properties": {
+                    "allow_origins": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Allow origins",
+                        "description": "A list of origins allowed for cross-origin requests. An origin is the combination of protocol (http, https), domain (myapp.com, localhost, localhost.tiangolo.com), and port (80, 443, 8080). Use ['*'] to allow all origins.",
+                        "default": [
+                            "*"
+                        ]
+                    },
+                    "allow_credentials": {
+                        "type": "boolean",
+                        "title": "Allow credentials",
+                        "description": "Indicate that cookies should be supported for cross-origin requests",
+                        "default": false
+                    },
+                    "allow_methods": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Allow methods",
+                        "description": "A list of HTTP methods that should be allowed for cross-origin requests. You can use ['*'] to allow all standard methods.",
+                        "default": [
+                            "*"
+                        ]
+                    },
+                    "allow_headers": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Allow headers",
+                        "description": "A list of HTTP request headers that should be supported for cross-origin requests. You can use ['*'] to allow all headers. The Accept, Accept-Language, Content-Language and Content-Type headers are always allowed for simple CORS requests.",
+                        "default": [
+                            "*"
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "CORSConfiguration",
+                "description": "CORS configuration.\n\nCORS or 'Cross-Origin Resource Sharing' refers to the situations when a\nfrontend running in a browser has JavaScript code that communicates with a\nbackend, and the backend is in a different 'origin' than the frontend.\n\nUseful resources:\n\n  - [CORS in FastAPI](https://fastapi.tiangolo.com/tutorial/cors/)\n  - [Wikipedia article](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing)\n  - [What is CORS?](https://dev.to/akshay_chauhan/what-is-cors-explained-8f1)"
+            },
+            "ClientCredentialsOAuthFlow": {
+                "properties": {
+                    "refreshUrl": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Refreshurl"
+                    },
+                    "scopes": {
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "type": "object",
+                        "title": "Scopes"
+                    },
+                    "tokenUrl": {
+                        "type": "string",
+                        "title": "Tokenurl"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "scopes",
+                    "tokenUrl"
+                ],
+                "title": "ClientCredentialsOAuthFlow",
+                "description": "Defines configuration details for the OAuth 2.0 Client Credentials flow."
+            },
+            "Configuration": {
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "title": "Service name",
+                        "description": "Name of the service. That value will be used in REST API endpoints."
+                    },
+                    "service": {
+                        "$ref": "#/components/schemas/ServiceConfiguration",
+                        "title": "Service configuration",
+                        "description": "This section contains Lightspeed Core Stack service configuration."
+                    },
+                    "llama_stack": {
+                        "$ref": "#/components/schemas/LlamaStackConfiguration",
+                        "title": "Llama Stack configuration",
+                        "description": "This section contains Llama Stack configuration. Lightspeed Core Stack service can call Llama Stack in library mode or in server mode."
+                    },
+                    "user_data_collection": {
+                        "$ref": "#/components/schemas/UserDataCollection",
+                        "title": "User data collection configuration",
+                        "description": "This section contains configuration for subsystem that collects user data(transcription history and feedbacks)."
+                    },
+                    "database": {
+                        "$ref": "#/components/schemas/DatabaseConfiguration",
+                        "title": "Database Configuration",
+                        "description": "Configuration for database to store conversation IDs and other runtime data"
+                    },
+                    "mcp_servers": {
+                        "items": {
+                            "$ref": "#/components/schemas/ModelContextProtocolServer"
+                        },
+                        "type": "array",
+                        "title": "Model Context Protocol Server and tools configuration",
+                        "description": "MCP (Model Context Protocol) servers provide tools and capabilities to the AI agents. These are configured in this section. Only MCP servers defined in the lightspeed-stack.yaml configuration are available to the agents. Tools configured in the llama-stack run.yaml are not accessible to lightspeed-core agents."
+                    },
+                    "authentication": {
+                        "$ref": "#/components/schemas/AuthenticationConfiguration",
+                        "title": "Authentication configuration",
+                        "description": "Authentication configuration"
+                    },
+                    "authorization": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/AuthorizationConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Authorization configuration",
+                        "description": "Lightspeed Core Stack implements a modular authentication and authorization system with multiple authentication methods. Authorization is configurable through role-based access control. Authentication is handled through selectable modules configured via the module field in the authentication configuration."
+                    },
+                    "customization": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/Customization"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Custom profile configuration",
+                        "description": "It is possible to customize Lightspeed Core Stack via this section. System prompt can be customized and also different parts of the service can be replaced by custom Python modules."
+                    },
+                    "inference": {
+                        "$ref": "#/components/schemas/InferenceConfiguration",
+                        "title": "Inference configuration",
+                        "description": "One LLM provider and one its model might be selected as default ones. When no provider+model pair is specified in REST API calls (query endpoints), the default provider and model are used."
+                    },
+                    "conversation_cache": {
+                        "$ref": "#/components/schemas/ConversationHistoryConfiguration",
+                        "title": "Conversation history configuration"
+                    },
+                    "byok_rag": {
+                        "items": {
+                            "$ref": "#/components/schemas/ByokRag"
+                        },
+                        "type": "array",
+                        "title": "BYOK RAG configuration",
+                        "description": "BYOK RAG configuration. This configuration can be used to reconfigure Llama Stack through its run.yaml configuration file"
+                    },
+                    "a2a_state": {
+                        "$ref": "#/components/schemas/A2AStateConfiguration",
+                        "title": "A2A state configuration",
+                        "description": "Configuration for A2A protocol persistent state storage."
+                    },
+                    "quota_handlers": {
+                        "$ref": "#/components/schemas/QuotaHandlersConfiguration",
+                        "title": "Quota handlers",
+                        "description": "Quota handlers configuration"
+                    },
+                    "azure_entra_id": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/AzureEntraIdConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "splunk": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/SplunkConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Splunk configuration",
+                        "description": "Splunk HEC configuration for sending telemetry events."
+                    },
+                    "deployment_environment": {
+                        "type": "string",
+                        "title": "Deployment environment",
+                        "description": "Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events.",
+                        "default": "development"
+                    },
+                    "rag": {
+                        "$ref": "#/components/schemas/RagConfiguration",
+                        "title": "RAG configuration",
+                        "description": "Configuration for all RAG strategies (inline and tool-based)."
+                    },
+                    "okp": {
+                        "$ref": "#/components/schemas/OkpConfiguration",
+                        "title": "OKP configuration",
+                        "description": "OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool."
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "name",
+                    "service",
+                    "llama_stack",
+                    "user_data_collection"
+                ],
+                "title": "Configuration",
+                "description": "Global service configuration."
+            },
+            "ConfigurationResponse": {
+                "properties": {
+                    "configuration": {
+                        "$ref": "#/components/schemas/Configuration"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "configuration"
+                ],
+                "title": "ConfigurationResponse",
+                "description": "Success response model for the config endpoint.",
+                "examples": [
+                    {
+                        "configuration": {
+                            "authentication": {
+                                "module": "noop",
+                                "skip_tls_verification": false
+                            },
+                            "authorization": {
+                                "access_rules": []
+                            },
+                            "byok_rag": [],
+                            "conversation_cache": {},
+                            "database": {
+                                "sqlite": {
+                                    "db_path": "/tmp/lightspeed-stack.db"
+                                }
+                            },
+                            "inference": {
+                                "default_model": "gpt-4-turbo",
+                                "default_provider": "openai"
+                            },
+                            "llama_stack": {
+                                "api_key": "*****",
+                                "url": "http://localhost:8321",
+                                "use_as_library_client": false
+                            },
+                            "mcp_servers": [
+                                {
+                                    "name": "server1",
+                                    "provider_id": "provider1",
+                                    "url": "http://url.com:1"
+                                }
+                            ],
+                            "name": "lightspeed-stack",
+                            "quota_handlers": {
+                                "enable_token_history": false,
+                                "limiters": [],
+                                "scheduler": {
+                                    "period": 1
+                                }
+                            },
+                            "service": {
+                                "access_log": true,
+                                "auth_enabled": false,
+                                "color_log": true,
+                                "cors": {
+                                    "allow_credentials": false,
+                                    "allow_headers": [
+                                        "*"
+                                    ],
+                                    "allow_methods": [
+                                        "*"
+                                    ],
+                                    "allow_origins": [
+                                        "*"
+                                    ]
+                                },
+                                "host": "localhost",
+                                "port": 8080,
+                                "tls_config": {},
+                                "workers": 1
+                            },
+                            "user_data_collection": {
+                                "feedback_enabled": true,
+                                "feedback_storage": "/tmp/data/feedback",
+                                "transcripts_enabled": false,
+                                "transcripts_storage": "/tmp/data/transcripts"
+                            }
+                        }
+                    }
+                ]
+            },
+            "ConversationData": {
+                "properties": {
+                    "conversation_id": {
+                        "type": "string",
+                        "title": "Conversation Id"
+                    },
+                    "topic_summary": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Topic Summary"
+                    },
+                    "last_message_timestamp": {
+                        "type": "number",
+                        "title": "Last Message Timestamp"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "conversation_id",
+                    "topic_summary",
+                    "last_message_timestamp"
+                ],
+                "title": "ConversationData",
+                "description": "Model representing conversation data returned by cache list operations.\n\nAttributes:\n    conversation_id: The conversation ID\n    topic_summary: The topic summary for the conversation (can be None)\n    last_message_timestamp: The timestamp of the last message in the conversation"
+            },
+            "ConversationDeleteResponse": {
+                "properties": {
+                    "conversation_id": {
+                        "type": "string",
+                        "title": "Conversation Id",
+                        "description": "The conversation ID (UUID) that was deleted.",
+                        "examples": [
+                            "123e4567-e89b-12d3-a456-426614174000"
+                        ]
+                    },
+                    "success": {
+                        "type": "boolean",
+                        "title": "Success",
+                        "description": "Whether the deletion was successful.",
+                        "examples": [
+                            true,
+                            false
+                        ]
+                    },
+                    "response": {
+                        "type": "string",
+                        "title": "Response",
+                        "description": "A message about the deletion result.",
+                        "examples": [
+                            "Conversation deleted successfully",
+                            "Conversation cannot be deleted"
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "conversation_id",
+                    "success",
+                    "response"
+                ],
+                "title": "ConversationDeleteResponse",
+                "description": "Model representing a response for deleting a conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was deleted.\n    success: Whether the deletion was successful.\n    response: A message about the deletion result.",
+                "examples": [
+                    {
+                        "label": "deleted",
+                        "value": {
+                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                            "response": "Conversation deleted successfully",
+                            "success": true
+                        }
+                    },
+                    {
+                        "label": "not found",
+                        "value": {
+                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                            "response": "Conversation can not be deleted",
+                            "success": true
+                        }
+                    }
+                ]
+            },
+            "ConversationDetails": {
+                "properties": {
+                    "conversation_id": {
+                        "type": "string",
+                        "title": "Conversation Id",
+                        "description": "Conversation ID (UUID)",
+                        "examples": [
+                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
+                        ]
+                    },
+                    "created_at": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Created At",
+                        "description": "When the conversation was created",
+                        "examples": [
+                            "2024-01-01T01:00:00Z"
+                        ]
+                    },
+                    "last_message_at": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Last Message At",
+                        "description": "When the last message was sent",
+                        "examples": [
+                            "2024-01-01T01:00:00Z"
+                        ]
+                    },
+                    "message_count": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Message Count",
+                        "description": "Number of user messages in the conversation",
+                        "examples": [
+                            42
+                        ]
+                    },
+                    "last_used_model": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Last Used Model",
+                        "description": "Identification of the last model used for the conversation",
+                        "examples": [
+                            "gpt-4-turbo",
+                            "gpt-3.5-turbo-0125"
+                        ]
+                    },
+                    "last_used_provider": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Last Used Provider",
+                        "description": "Identification of the last provider used for the conversation",
+                        "examples": [
+                            "openai",
+                            "gemini"
+                        ]
+                    },
+                    "topic_summary": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Topic Summary",
+                        "description": "Topic summary for the conversation",
+                        "examples": [
+                            "Openshift Microservices Deployment Strategies"
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "conversation_id"
+                ],
+                "title": "ConversationDetails",
+                "description": "Model representing the details of a user conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID).\n    created_at: When the conversation was created.\n    last_message_at: When the last message was sent.\n    message_count: Number of user messages in the conversation.\n    last_used_model: The last model used for the conversation.\n    last_used_provider: The provider of the last used model.\n    topic_summary: The topic summary for the conversation.\n\nExample:\n    ```python\n    conversation = ConversationDetails(\n        conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n        created_at=\"2024-01-01T00:00:00Z\",\n        last_message_at=\"2024-01-01T00:05:00Z\",\n        message_count=5,\n        last_used_model=\"gemini/gemini-2.0-flash\",\n        last_used_provider=\"gemini\",\n        topic_summary=\"Openshift Microservices Deployment Strategies\",\n    )\n    ```"
+            },
+            "ConversationHistoryConfiguration": {
+                "properties": {
+                    "type": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "enum": [
+                                    "noop",
+                                    "memory",
+                                    "sqlite",
+                                    "postgres"
+                                ]
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Conversation history database type",
+                        "description": "Type of database where the conversation history is to be stored."
+                    },
+                    "memory": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/InMemoryCacheConfig"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "In-memory cache configuration",
+                        "description": "In-memory cache configuration"
+                    },
+                    "sqlite": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "SQLite configuration",
+                        "description": "SQLite database configuration"
+                    },
+                    "postgres": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "PostgreSQL configuration",
+                        "description": "PostgreSQL database configuration"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "ConversationHistoryConfiguration",
+                "description": "Conversation history configuration."
+            },
+            "ConversationResponse": {
+                "properties": {
+                    "conversation_id": {
+                        "type": "string",
+                        "title": "Conversation Id",
+                        "description": "Conversation ID (UUID)",
+                        "examples": [
+                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
+                        ]
+                    },
+                    "chat_history": {
+                        "items": {
+                            "$ref": "#/components/schemas/ConversationTurn"
+                        },
+                        "type": "array",
+                        "title": "Chat History",
+                        "description": "The simplified chat history as a list of conversation turns",
+                        "examples": [
+                            {
+                                "completed_at": "2024-01-01T00:01:05Z",
+                                "messages": [
+                                    {
+                                        "content": "Hello",
+                                        "type": "user"
+                                    },
+                                    {
+                                        "content": "Hi there!",
+                                        "type": "assistant"
+                                    }
+                                ],
+                                "model": "gpt-4o-mini",
+                                "provider": "openai",
+                                "started_at": "2024-01-01T00:01:00Z",
+                                "tool_calls": [],
+                                "tool_results": []
+                            }
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "conversation_id",
+                    "chat_history"
+                ],
+                "title": "ConversationResponse",
+                "description": "Model representing a response for retrieving a conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID).\n    chat_history: The chat history as a list of conversation turns.",
+                "examples": [
+                    {
+                        "chat_history": [
+                            {
+                                "completed_at": "2024-01-01T00:01:05Z",
+                                "messages": [
+                                    {
+                                        "content": "Hello",
+                                        "type": "user"
+                                    },
+                                    {
+                                        "content": "Hi there!",
+                                        "type": "assistant"
+                                    }
+                                ],
+                                "model": "gpt-4o-mini",
+                                "provider": "openai",
+                                "started_at": "2024-01-01T00:01:00Z",
+                                "tool_calls": [],
+                                "tool_results": []
+                            }
+                        ],
+                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000"
+                    }
+                ]
+            },
+            "ConversationTurn": {
+                "properties": {
+                    "messages": {
+                        "items": {
+                            "$ref": "#/components/schemas/Message"
+                        },
+                        "type": "array",
+                        "title": "Messages",
+                        "description": "List of messages in this turn"
+                    },
+                    "tool_calls": {
+                        "items": {
+                            "$ref": "#/components/schemas/ToolCallSummary"
+                        },
+                        "type": "array",
+                        "title": "Tool Calls",
+                        "description": "List of tool calls made in this turn"
+                    },
+                    "tool_results": {
+                        "items": {
+                            "$ref": "#/components/schemas/ToolResultSummary"
+                        },
+                        "type": "array",
+                        "title": "Tool Results",
+                        "description": "List of tool results from this turn"
+                    },
+                    "provider": {
+                        "type": "string",
+                        "title": "Provider",
+                        "description": "Provider identifier used for this turn",
+                        "examples": [
+                            "openai"
+                        ]
+                    },
+                    "model": {
+                        "type": "string",
+                        "title": "Model",
+                        "description": "Model identifier used for this turn",
+                        "examples": [
+                            "gpt-4o-mini"
+                        ]
+                    },
+                    "started_at": {
+                        "type": "string",
+                        "title": "Started At",
+                        "description": "ISO 8601 timestamp when the turn started",
+                        "examples": [
+                            "2024-01-01T00:01:00Z"
+                        ]
+                    },
+                    "completed_at": {
+                        "type": "string",
+                        "title": "Completed At",
+                        "description": "ISO 8601 timestamp when the turn completed",
+                        "examples": [
+                            "2024-01-01T00:01:05Z"
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "provider",
+                    "model",
+                    "started_at",
+                    "completed_at"
+                ],
+                "title": "ConversationTurn",
+                "description": "Model representing a single conversation turn.\n\nAttributes:\n    messages: List of messages in this turn.\n    tool_calls: List of tool calls made in this turn.\n    tool_results: List of tool results from this turn.\n    provider: Provider identifier used for this turn.\n    model: Model identifier used for this turn.\n    started_at: ISO 8601 timestamp when the turn started.\n    completed_at: ISO 8601 timestamp when the turn completed."
+            },
+            "ConversationUpdateRequest": {
+                "properties": {
+                    "topic_summary": {
+                        "type": "string",
+                        "maxLength": 1000,
+                        "minLength": 1,
+                        "title": "Topic Summary",
+                        "description": "The new topic summary for the conversation",
+                        "examples": [
+                            "Discussion about machine learning algorithms"
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "topic_summary"
+                ],
+                "title": "ConversationUpdateRequest",
+                "description": "Model representing a request to update a conversation topic summary.\n\nAttributes:\n    topic_summary: The new topic summary for the conversation.\n\nExample:\n    ```python\n    update_request = ConversationUpdateRequest(\n        topic_summary=\"Discussion about machine learning algorithms\"\n    )\n    ```"
+            },
+            "ConversationUpdateResponse": {
+                "properties": {
+                    "conversation_id": {
+                        "type": "string",
+                        "title": "Conversation Id",
+                        "description": "The conversation ID (UUID) that was updated",
+                        "examples": [
+                            "123e4567-e89b-12d3-a456-426614174000"
+                        ]
+                    },
+                    "success": {
+                        "type": "boolean",
+                        "title": "Success",
+                        "description": "Whether the update was successful",
+                        "examples": [
+                            true
+                        ]
+                    },
+                    "message": {
+                        "type": "string",
+                        "title": "Message",
+                        "description": "A message about the update result",
+                        "examples": [
+                            "Topic summary updated successfully"
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "conversation_id",
+                    "success",
+                    "message"
+                ],
+                "title": "ConversationUpdateResponse",
+                "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was updated.\n    success: Whether the update was successful.\n    message: A message about the update result.\n\nExample:\n    ```python\n    update_response = ConversationUpdateResponse(\n        conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n        success=True,\n        message=\"Topic summary updated successfully\",\n    )\n    ```",
+                "examples": [
+                    {
+                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "message": "Topic summary updated successfully",
+                        "success": true
+                    }
+                ]
+            },
+            "ConversationsListResponse": {
+                "properties": {
+                    "conversations": {
+                        "items": {
+                            "$ref": "#/components/schemas/ConversationDetails"
+                        },
+                        "type": "array",
+                        "title": "Conversations"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "conversations"
+                ],
+                "title": "ConversationsListResponse",
+                "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n    conversations: List of conversation details associated with the user.",
+                "examples": [
+                    {
+                        "conversations": [
+                            {
+                                "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                "created_at": "2024-01-01T00:00:00Z",
+                                "last_message_at": "2024-01-01T00:05:00Z",
+                                "last_used_model": "gemini/gemini-2.0-flash",
+                                "last_used_provider": "gemini",
+                                "message_count": 5,
+                                "topic_summary": "Openshift Microservices Deployment Strategies"
+                            },
+                            {
+                                "conversation_id": "456e7890-e12b-34d5-a678-901234567890",
+                                "created_at": "2024-01-01T01:00:00Z",
+                                "last_used_model": "gemini/gemini-2.5-flash",
+                                "last_used_provider": "gemini",
+                                "message_count": 2,
+                                "topic_summary": "RHDH Purpose Summary"
+                            }
+                        ]
+                    }
+                ]
+            },
+            "ConversationsListResponseV2": {
+                "properties": {
+                    "conversations": {
+                        "items": {
+                            "$ref": "#/components/schemas/ConversationData"
+                        },
+                        "type": "array",
+                        "title": "Conversations"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "conversations"
+                ],
+                "title": "ConversationsListResponseV2",
+                "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n    conversations: List of conversation data associated with the user.",
+                "examples": [
+                    {
+                        "conversations": [
+                            {
+                                "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                "last_message_timestamp": 1704067200.0,
+                                "topic_summary": "Openshift Microservices Deployment Strategies"
+                            }
+                        ]
+                    }
+                ]
+            },
+            "CustomProfile": {
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "title": "Path to custom profile",
+                        "description": "Path to Python modules containing custom profile."
+                    },
+                    "prompts": {
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "type": "object",
+                        "title": "System prompts",
+                        "description": "Dictionary containing map of system prompts",
+                        "default": {}
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "path"
+                ],
+                "title": "CustomProfile",
+                "description": "Custom profile customization for prompts and validation."
+            },
+            "Customization": {
+                "properties": {
+                    "profile_path": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Profile Path"
+                    },
+                    "disable_query_system_prompt": {
+                        "type": "boolean",
+                        "title": "Disable Query System Prompt",
+                        "default": false
+                    },
+                    "disable_shield_ids_override": {
+                        "type": "boolean",
+                        "title": "Disable Shield Ids Override",
+                        "default": false
+                    },
+                    "system_prompt_path": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "format": "file-path"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "System Prompt Path"
+                    },
+                    "system_prompt": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "System Prompt"
+                    },
+                    "agent_card_path": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "format": "file-path"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Agent Card Path"
+                    },
+                    "agent_card_config": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": true,
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Agent Card Config"
+                    },
+                    "custom_profile": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/CustomProfile"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "Customization",
+                "description": "Service customization."
+            },
+            "DatabaseConfiguration": {
+                "properties": {
+                    "sqlite": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "SQLite configuration",
+                        "description": "SQLite database configuration"
+                    },
+                    "postgres": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "PostgreSQL configuration",
+                        "description": "PostgreSQL database configuration"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "DatabaseConfiguration",
+                "description": "Database configuration."
+            },
+            "DetailModel": {
+                "properties": {
+                    "response": {
+                        "type": "string",
+                        "title": "Response",
+                        "description": "Short summary of the error"
+                    },
+                    "cause": {
+                        "type": "string",
+                        "title": "Cause",
+                        "description": "Detailed explanation of what caused the error"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "response",
+                    "cause"
+                ],
+                "title": "DetailModel",
+                "description": "Nested detail model for error responses."
+            },
+            "FeedbackCategory": {
+                "type": "string",
+                "enum": [
+                    "incorrect",
+                    "not_relevant",
+                    "incomplete",
+                    "outdated_information",
+                    "unsafe",
+                    "other"
+                ],
+                "title": "FeedbackCategory",
+                "description": "Enum representing predefined feedback categories for AI responses.\n\nThese categories help provide structured feedback about AI inference quality\nwhen users provide negative feedback (thumbs down). Multiple categories can\nbe selected to provide comprehensive feedback about response issues."
+            },
+            "FeedbackRequest": {
+                "properties": {
+                    "conversation_id": {
+                        "type": "string",
+                        "title": "Conversation Id",
+                        "description": "The required conversation ID (UUID)",
+                        "examples": [
+                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
+                        ]
+                    },
+                    "user_question": {
+                        "type": "string",
+                        "title": "User Question",
+                        "description": "User question (the query string)",
+                        "examples": [
+                            "What is Kubernetes?"
+                        ]
+                    },
+                    "llm_response": {
+                        "type": "string",
+                        "title": "Llm Response",
+                        "description": "Response from LLM",
+                        "examples": [
+                            "Kubernetes is an open-source container orchestration system for automating ..."
+                        ]
+                    },
+                    "sentiment": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Sentiment",
+                        "description": "User sentiment, if provided must be -1 or 1",
+                        "examples": [
+                            -1,
+                            1
+                        ]
+                    },
+                    "user_feedback": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "maxLength": 4096
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "User Feedback",
+                        "description": "Feedback on the LLM response.",
+                        "examples": [
+                            "I'm not satisfied with the response because it is too vague."
+                        ]
+                    },
+                    "categories": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/FeedbackCategory"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Categories",
+                        "description": "List of feedback categories that describe issues with the LLM response (for negative feedback).",
+                        "examples": [
+                            [
+                                "incorrect",
+                                "incomplete"
+                            ]
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "conversation_id",
+                    "user_question",
+                    "llm_response"
+                ],
+                "title": "FeedbackRequest",
+                "description": "Model representing a feedback request.\n\nAttributes:\n    conversation_id: The required conversation ID (UUID).\n    user_question: The required user question.\n    llm_response: The required LLM response.\n    sentiment: The optional sentiment.\n    user_feedback: The optional user feedback.\n    categories: The optional list of feedback categories (multi-select for negative feedback).\n\nExample:\n    ```python\n    feedback_request = FeedbackRequest(\n        conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n        user_question=\"what are you doing?\",\n        user_feedback=\"This response is not helpful\",\n        llm_response=\"I don't know\",\n        sentiment=-1,\n        categories=[FeedbackCategory.INCORRECT, FeedbackCategory.INCOMPLETE]\n    )\n    ```",
+                "examples": [
+                    {
+                        "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
+                        "llm_response": "bar",
+                        "sentiment": -1,
+                        "user_feedback": "Not satisfied with the response quality.",
+                        "user_question": "foo"
+                    },
+                    {
+                        "categories": [
+                            "incorrect"
+                        ],
+                        "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
+                        "llm_response": "The capital of France is Berlin.",
+                        "sentiment": -1,
+                        "user_question": "What is the capital of France?"
+                    },
+                    {
+                        "categories": [
+                            "incomplete",
+                            "not_relevant"
+                        ],
+                        "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
+                        "llm_response": "Use Docker.",
+                        "sentiment": -1,
+                        "user_feedback": "This response is too general and doesn't provide specific steps.",
+                        "user_question": "How do I deploy a web app?"
+                    }
+                ]
+            },
+            "FeedbackResponse": {
+                "properties": {
+                    "response": {
+                        "type": "string",
+                        "title": "Response",
+                        "description": "The response of the feedback request.",
+                        "examples": [
+                            "feedback received"
+                        ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "response"
+                ],
+                "title": "FeedbackResponse",
+                "description": "Model representing a response to a feedback request.\n\nAttributes:\n    response: The response of the feedback request.\n\nExample:\n    ```python\n    feedback_response = FeedbackResponse(response=\"feedback received\")\n    ```",
+                "examples": [
+                    {
+                        "response": "feedback received"
+                    }
+                ]
+            },
+            "FeedbackStatusUpdateRequest": {
+                "properties": {
+                    "status": {
+                        "type": "boolean",
+                        "title": "Status",
+                        "description": "Desired state of feedback enablement, must be False or True",
+                        "default": false,
+                        "examples": [
+                            true,
+                            false
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "FeedbackStatusUpdateRequest",
+                "description": "Model representing a feedback status update request.\n\nAttributes:\n    status: Value of the desired feedback enabled state.\n\nExample:\n    ```python\n    feedback_request = FeedbackRequest(\n        status=false\n    )\n    ```"
+            },
+            "FeedbackStatusUpdateResponse": {
+                "properties": {
+                    "status": {
+                        "additionalProperties": true,
+                        "type": "object",
+                        "title": "Status"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "status"
+                ],
+                "title": "FeedbackStatusUpdateResponse",
+                "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n    status: The previous and current status of the service and who updated it.\n\nExample:\n    ```python\n    status_response = StatusResponse(\n        status={\n            \"previous_status\": true,\n            \"updated_status\": false,\n            \"updated_by\": \"user/test\",\n            \"timestamp\": \"2023-03-15 12:34:56\"\n        },\n    )\n    ```",
+                "examples": [
+                    {
+                        "status": {
+                            "previous_status": true,
+                            "timestamp": "2023-03-15 12:34:56",
+                            "updated_by": "user/test",
+                            "updated_status": false
+                        }
+                    }
+                ]
+            },
+            "ForbiddenResponse": {
                 "properties": {
-                    "sqlite": {
-                        "anyOf": [
-                            {
-                                "$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "SQLite configuration",
-                        "description": "SQLite database configuration for A2A state storage."
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
                     },
-                    "postgres": {
+                    "detail": {
+                        "$ref": "#/components/schemas/DetailModel"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "status_code",
+                    "detail"
+                ],
+                "title": "ForbiddenResponse",
+                "description": "403 Forbidden. Access denied.",
+                "examples": [
+                    {
+                        "detail": {
+                            "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                            "response": "User does not have permission to perform this action"
+                        },
+                        "label": "conversation read"
+                    },
+                    {
+                        "detail": {
+                            "cause": "User 6789 does not have permission to delete conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                            "response": "User does not have permission to perform this action"
+                        },
+                        "label": "conversation delete"
+                    },
+                    {
+                        "detail": {
+                            "cause": "User 6789 is not authorized to access this endpoint.",
+                            "response": "User does not have permission to access this endpoint"
+                        },
+                        "label": "endpoint"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Storing feedback is disabled.",
+                            "response": "Storing feedback is disabled"
+                        },
+                        "label": "feedback"
+                    },
+                    {
+                        "detail": {
+                            "cause": "User lacks model_override permission required to override model/provider.",
+                            "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request."
+                        },
+                        "label": "model override"
+                    }
+                ]
+            },
+            "HTTPAuthSecurityScheme": {
+                "properties": {
+                    "bearerFormat": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "PostgreSQL configuration",
-                        "description": "PostgreSQL database configuration for A2A state storage."
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "title": "A2AStateConfiguration",
-                "description": "A2A protocol persistent state configuration.\n\nConfigures how A2A task state and context-to-conversation mappings are\nstored. For multi-worker deployments, use SQLite or PostgreSQL to ensure\nstate is shared across all workers.\n\nIf no configuration is provided, in-memory storage is used (default).\nThis is suitable for single-worker deployments but state will be lost\non restarts and not shared across workers.\n\nAttributes:\n    sqlite: SQLite database configuration for A2A state storage.\n    postgres: PostgreSQL database configuration for A2A state storage."
-            },
-            "APIKeySecurityScheme": {
-                "properties": {
+                        "title": "Bearerformat"
+                    },
                     "description": {
                         "anyOf": [
                             {
@@ -4552,340 +7201,411 @@
                         ],
                         "title": "Description"
                     },
-                    "in": {
-                        "$ref": "#/components/schemas/In"
-                    },
-                    "name": {
+                    "scheme": {
                         "type": "string",
-                        "title": "Name"
+                        "title": "Scheme"
                     },
                     "type": {
                         "type": "string",
-                        "const": "apiKey",
+                        "const": "http",
                         "title": "Type",
-                        "default": "apiKey"
+                        "default": "http"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "in",
-                    "name"
+                    "scheme"
                 ],
-                "title": "APIKeySecurityScheme",
-                "description": "Defines a security scheme using an API key."
+                "title": "HTTPAuthSecurityScheme",
+                "description": "Defines a security scheme using HTTP authentication."
             },
-            "APIKeyTokenConfiguration": {
+            "HTTPValidationError": {
                 "properties": {
-                    "api_key": {
-                        "type": "string",
-                        "minLength": 1,
-                        "format": "password",
-                        "title": "API key",
-                        "writeOnly": true,
-                        "examples": [
-                            "some-api-key"
-                        ]
+                    "detail": {
+                        "items": {
+                            "$ref": "#/components/schemas/ValidationError"
+                        },
+                        "type": "array",
+                        "title": "Detail"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
-                "required": [
-                    "api_key"
-                ],
-                "title": "APIKeyTokenConfiguration",
-                "description": "API Key Token configuration."
+                "title": "HTTPValidationError"
             },
-            "AccessRule": {
+            "ImplicitOAuthFlow": {
                 "properties": {
-                    "role": {
+                    "authorizationUrl": {
                         "type": "string",
-                        "title": "Role name",
-                        "description": "Name of the role"
+                        "title": "Authorizationurl"
                     },
-                    "actions": {
-                        "items": {
-                            "$ref": "#/components/schemas/Action"
+                    "refreshUrl": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Refreshurl"
+                    },
+                    "scopes": {
+                        "additionalProperties": {
+                            "type": "string"
                         },
-                        "type": "array",
-                        "title": "Allowed actions",
-                        "description": "Allowed actions for this role"
+                        "type": "object",
+                        "title": "Scopes"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
                 "required": [
-                    "role",
-                    "actions"
+                    "authorizationUrl",
+                    "scopes"
                 ],
-                "title": "AccessRule",
-                "description": "Rule defining what actions a role can perform."
+                "title": "ImplicitOAuthFlow",
+                "description": "Defines configuration details for the OAuth 2.0 Implicit flow."
             },
-            "Action": {
+            "In": {
                 "type": "string",
                 "enum": [
-                    "admin",
-                    "list_other_conversations",
-                    "read_other_conversations",
-                    "query_other_conversations",
-                    "delete_other_conversations",
-                    "query",
-                    "streaming_query",
-                    "get_conversation",
-                    "list_conversations",
-                    "delete_conversation",
-                    "update_conversation",
-                    "feedback",
-                    "get_models",
-                    "get_tools",
-                    "get_shields",
-                    "list_providers",
-                    "get_provider",
-                    "list_rags",
-                    "get_rag",
-                    "get_metrics",
-                    "get_config",
-                    "info",
-                    "model_override",
-                    "rlsapi_v1_infer",
-                    "a2a_agent_card",
-                    "a2a_task_execution",
-                    "a2a_message",
-                    "a2a_jsonrpc"
+                    "cookie",
+                    "header",
+                    "query"
                 ],
-                "title": "Action",
-                "description": "Available actions in the system.\n\nNote: this is not a real model, just an enumeration of all action names."
+                "title": "In",
+                "description": "The location of the API key."
             },
-            "AgentCapabilities": {
+            "InMemoryCacheConfig": {
                 "properties": {
-                    "extensions": {
-                        "anyOf": [
-                            {
-                                "items": {
-                                    "$ref": "#/components/schemas/AgentExtension"
-                                },
-                                "type": "array"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Extensions"
-                    },
-                    "pushNotifications": {
+                    "max_entries": {
+                        "type": "integer",
+                        "exclusiveMinimum": 0.0,
+                        "title": "Max entries",
+                        "description": "Maximum number of entries stored in the in-memory cache"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "max_entries"
+                ],
+                "title": "InMemoryCacheConfig",
+                "description": "In-memory cache configuration."
+            },
+            "IncludeParameter": {
+                "type": "string",
+                "enum": [
+                    "web_search_call.action.sources",
+                    "code_interpreter_call.outputs",
+                    "computer_call_output.output.image_url",
+                    "file_search_call.results",
+                    "message.input_image.image_url",
+                    "message.output_text.logprobs",
+                    "reasoning.encrypted_content"
+                ]
+            },
+            "InferenceConfiguration": {
+                "properties": {
+                    "default_model": {
                         "anyOf": [
                             {
-                                "type": "boolean"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Pushnotifications"
+                        "title": "Default model",
+                        "description": "Identification of default model used when no other model is specified."
                     },
-                    "stateTransitionHistory": {
+                    "default_provider": {
                         "anyOf": [
                             {
-                                "type": "boolean"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Statetransitionhistory"
+                        "title": "Default provider",
+                        "description": "Identification of default provider used when no other model is specified."
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "InferenceConfiguration",
+                "description": "Inference configuration."
+            },
+            "InfoResponse": {
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "title": "Name",
+                        "description": "Service name",
+                        "examples": [
+                            "Lightspeed Stack"
+                        ]
                     },
-                    "streaming": {
-                        "anyOf": [
-                            {
-                                "type": "boolean"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Streaming"
+                    "service_version": {
+                        "type": "string",
+                        "title": "Service Version",
+                        "description": "Service version",
+                        "examples": [
+                            "0.1.0",
+                            "0.2.0",
+                            "1.0.0"
+                        ]
+                    },
+                    "llama_stack_version": {
+                        "type": "string",
+                        "title": "Llama Stack Version",
+                        "description": "Llama Stack version",
+                        "examples": [
+                            "0.2.1",
+                            "0.2.2",
+                            "0.2.18",
+                            "0.2.21",
+                            "0.2.22"
+                        ]
                     }
                 },
                 "type": "object",
-                "title": "AgentCapabilities",
-                "description": "Defines optional capabilities supported by an agent."
+                "required": [
+                    "name",
+                    "service_version",
+                    "llama_stack_version"
+                ],
+                "title": "InfoResponse",
+                "description": "Model representing a response to an info request.\n\nAttributes:\n    name: Service name.\n    service_version: Service version.\n    llama_stack_version: Llama Stack version.\n\nExample:\n    ```python\n    info_response = InfoResponse(\n        name=\"Lightspeed Stack\",\n        service_version=\"1.0.0\",\n        llama_stack_version=\"0.2.22\",\n    )\n    ```",
+                "examples": [
+                    {
+                        "llama_stack_version": "1.0.0",
+                        "name": "Lightspeed Stack",
+                        "service_version": "1.0.0"
+                    }
+                ]
             },
-            "AgentCard": {
+            "InternalServerErrorResponse": {
                 "properties": {
-                    "additionalInterfaces": {
-                        "anyOf": [
-                            {
-                                "items": {
-                                    "$ref": "#/components/schemas/AgentInterface"
-                                },
-                                "type": "array"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Additionalinterfaces"
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
                     },
-                    "capabilities": {
-                        "$ref": "#/components/schemas/AgentCapabilities"
+                    "detail": {
+                        "$ref": "#/components/schemas/DetailModel"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "status_code",
+                    "detail"
+                ],
+                "title": "InternalServerErrorResponse",
+                "description": "500 Internal Server Error.",
+                "examples": [
+                    {
+                        "detail": {
+                            "cause": "An unexpected error occurred while processing the request.",
+                            "response": "Internal server error"
+                        },
+                        "label": "internal"
                     },
-                    "defaultInputModes": {
-                        "items": {
-                            "type": "string"
+                    {
+                        "detail": {
+                            "cause": "Lightspeed Stack configuration has not been initialized.",
+                            "response": "Configuration is not loaded"
                         },
-                        "type": "array",
-                        "title": "Defaultinputmodes"
+                        "label": "configuration"
                     },
-                    "defaultOutputModes": {
-                        "items": {
-                            "type": "string"
+                    {
+                        "detail": {
+                            "cause": "Failed to store feedback at directory: /path/example",
+                            "response": "Failed to store feedback"
                         },
-                        "type": "array",
-                        "title": "Defaultoutputmodes"
+                        "label": "feedback storage"
                     },
-                    "description": {
+                    {
+                        "detail": {
+                            "cause": "Failed to call backend API",
+                            "response": "Error while processing query"
+                        },
+                        "label": "query"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Conversation cache is not configured or unavailable.",
+                            "response": "Conversation cache not configured"
+                        },
+                        "label": "conversation cache"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Failed to query the database",
+                            "response": "Database query failed"
+                        },
+                        "label": "database"
+                    }
+                ]
+            },
+            "JsonPathOperator": {
+                "type": "string",
+                "enum": [
+                    "equals",
+                    "contains",
+                    "in",
+                    "match"
+                ],
+                "title": "JsonPathOperator",
+                "description": "Supported operators for JSONPath evaluation.\n\nNote: this is not a real model, just an enumeration of all supported JSONPath operators."
+            },
+            "JwkConfiguration": {
+                "properties": {
+                    "url": {
                         "type": "string",
-                        "title": "Description",
-                        "examples": [
-                            "Agent that helps users with recipes and cooking."
-                        ]
+                        "minLength": 1,
+                        "format": "uri",
+                        "title": "URL",
+                        "description": "HTTPS URL of the JWK (JSON Web Key) set used to validate JWTs."
                     },
-                    "documentationUrl": {
-                        "anyOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Documentationurl"
+                    "jwt_configuration": {
+                        "$ref": "#/components/schemas/JwtConfiguration",
+                        "title": "JWT configuration",
+                        "description": "JWT (JSON Web Token) configuration"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "url"
+                ],
+                "title": "JwkConfiguration",
+                "description": "JWK (JSON Web Key) configuration.\n\nA JSON Web Key (JWK) is a JavaScript Object Notation (JSON) data structure\nthat represents a cryptographic key.\n\nUseful resources:\n\n  - [JSON Web Key](https://openid.net/specs/draft-jones-json-web-key-03.html)\n  - [RFC 7517](https://www.rfc-editor.org/rfc/rfc7517)"
+            },
+            "JwtConfiguration": {
+                "properties": {
+                    "user_id_claim": {
+                        "type": "string",
+                        "title": "User ID claim",
+                        "description": "JWT claim name that uniquely identifies the user (subject ID).",
+                        "default": "user_id"
                     },
-                    "iconUrl": {
-                        "anyOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Iconurl"
+                    "username_claim": {
+                        "type": "string",
+                        "title": "Username claim",
+                        "description": "JWT claim name that provides the human-readable username.",
+                        "default": "username"
                     },
-                    "name": {
+                    "role_rules": {
+                        "items": {
+                            "$ref": "#/components/schemas/JwtRoleRule"
+                        },
+                        "type": "array",
+                        "title": "Role rules",
+                        "description": "Rules for extracting roles from JWT claims"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "JwtConfiguration",
+                "description": "JWT (JSON Web Token) configuration.\n\nJSON Web Token (JWT) is a compact, URL-safe means of representing\nclaims to be transferred between two parties.  The claims in a JWT\nare encoded as a JSON object that is used as the payload of a JSON\nWeb Signature (JWS) structure or as the plaintext of a JSON Web\nEncryption (JWE) structure, enabling the claims to be digitally\nsigned or integrity protected with a Message Authentication Code\n(MAC) and/or encrypted.\n\nUseful resources:\n\n  - [JSON Web Token](https://en.wikipedia.org/wiki/JSON_Web_Token)\n  - [RFC 7519](https://datatracker.ietf.org/doc/html/rfc7519)\n  - [JSON Web Tokens](https://auth0.com/docs/secure/tokens/json-web-tokens)"
+            },
+            "JwtRoleRule": {
+                "properties": {
+                    "jsonpath": {
                         "type": "string",
-                        "title": "Name",
-                        "examples": [
-                            "Recipe Agent"
-                        ]
+                        "title": "JSON path",
+                        "description": "JSONPath expression to evaluate against the JWT payload"
                     },
-                    "preferredTransport": {
-                        "anyOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Preferredtransport",
-                        "default": "JSONRPC",
-                        "examples": [
-                            "JSONRPC",
-                            "GRPC",
-                            "HTTP+JSON"
-                        ]
+                    "operator": {
+                        "$ref": "#/components/schemas/JsonPathOperator",
+                        "title": "Operator",
+                        "description": "JSON path comparison operator"
                     },
-                    "protocolVersion": {
-                        "anyOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Protocolversion",
-                        "default": "0.3.0"
+                    "negate": {
+                        "type": "boolean",
+                        "title": "Negate rule",
+                        "description": "If set to true, the meaning of the rule is negated",
+                        "default": false
                     },
-                    "provider": {
-                        "anyOf": [
-                            {
-                                "$ref": "#/components/schemas/AgentProvider"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ]
+                    "value": {
+                        "title": "Value",
+                        "description": "Value to compare against"
                     },
-                    "security": {
-                        "anyOf": [
-                            {
-                                "items": {
-                                    "additionalProperties": {
-                                        "items": {
-                                            "type": "string"
-                                        },
-                                        "type": "array"
-                                    },
-                                    "type": "object"
-                                },
-                                "type": "array"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Security",
+                    "roles": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "List of roles",
+                        "description": "Roles to be assigned if the rule matches"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "jsonpath",
+                    "operator",
+                    "value",
+                    "roles"
+                ],
+                "title": "JwtRoleRule",
+                "description": "Rule for extracting roles from JWT claims."
+            },
+            "LivenessResponse": {
+                "properties": {
+                    "alive": {
+                        "type": "boolean",
+                        "title": "Alive",
+                        "description": "Flag indicating that the app is alive",
                         "examples": [
-                            [
-                                {
-                                    "oauth": [
-                                        "read"
-                                    ]
-                                },
-                                {
-                                    "api-key": [],
-                                    "mtls": []
-                                }
-                            ]
+                            true,
+                            false
                         ]
-                    },
-                    "securitySchemes": {
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "alive"
+                ],
+                "title": "LivenessResponse",
+                "description": "Model representing a response to a liveness request.\n\nAttributes:\n    alive: If app is alive.\n\nExample:\n    ```python\n    liveness_response = LivenessResponse(alive=True)\n    ```",
+                "examples": [
+                    {
+                        "alive": true
+                    }
+                ]
+            },
+            "LlamaStackConfiguration": {
+                "properties": {
+                    "url": {
                         "anyOf": [
                             {
-                                "additionalProperties": {
-                                    "$ref": "#/components/schemas/SecurityScheme"
-                                },
-                                "type": "object"
+                                "type": "string",
+                                "minLength": 1,
+                                "format": "uri"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Securityschemes"
+                        "title": "Llama Stack URL",
+                        "description": "URL to Llama Stack service; used when library mode is disabled. Must be a valid HTTP or HTTPS URL."
                     },
-                    "signatures": {
+                    "api_key": {
                         "anyOf": [
                             {
-                                "items": {
-                                    "$ref": "#/components/schemas/AgentCardSignature"
-                                },
-                                "type": "array"
+                                "type": "string",
+                                "format": "password",
+                                "writeOnly": true
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Signatures"
-                    },
-                    "skills": {
-                        "items": {
-                            "$ref": "#/components/schemas/AgentSkill"
-                        },
-                        "type": "array",
-                        "title": "Skills"
+                        "title": "API key",
+                        "description": "API key to access Llama Stack service"
                     },
-                    "supportsAuthenticatedExtendedCard": {
+                    "use_as_library_client": {
                         "anyOf": [
                             {
                                 "type": "boolean"
@@ -4894,198 +7614,155 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Supportsauthenticatedextendedcard"
-                    },
-                    "url": {
-                        "type": "string",
-                        "title": "Url",
-                        "examples": [
-                            "https://api.example.com/a2a/v1"
-                        ]
+                        "title": "Use as library",
+                        "description": "When set to true Llama Stack will be used in library mode, not in server mode (default)"
                     },
-                    "version": {
-                        "type": "string",
-                        "title": "Version",
-                        "examples": [
-                            "1.0.0"
-                        ]
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "capabilities",
-                    "defaultInputModes",
-                    "defaultOutputModes",
-                    "description",
-                    "name",
-                    "skills",
-                    "url",
-                    "version"
-                ],
-                "title": "AgentCard",
-                "description": "The AgentCard is a self-describing manifest for an agent. It provides essential\nmetadata including the agent's identity, capabilities, skills, supported\ncommunication methods, and security requirements."
-            },
-            "AgentCardSignature": {
-                "properties": {
-                    "header": {
+                    "library_client_config_path": {
                         "anyOf": [
                             {
-                                "additionalProperties": true,
-                                "type": "object"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Header"
-                    },
-                    "protected": {
-                        "type": "string",
-                        "title": "Protected"
+                        "title": "Llama Stack configuration path",
+                        "description": "Path to configuration file used when Llama Stack is run in library mode"
                     },
-                    "signature": {
-                        "type": "string",
-                        "title": "Signature"
+                    "timeout": {
+                        "type": "integer",
+                        "exclusiveMinimum": 0.0,
+                        "title": "Request timeout",
+                        "description": "Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries.",
+                        "default": 180
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
-                "required": [
-                    "protected",
-                    "signature"
-                ],
-                "title": "AgentCardSignature",
-                "description": "AgentCardSignature represents a JWS signature of an AgentCard.\nThis follows the JSON format of an RFC 7515 JSON Web Signature (JWS)."
+                "title": "LlamaStackConfiguration",
+                "description": "Llama stack configuration.\n\nLlama Stack is a comprehensive system that provides a uniform set of tools\nfor building, scaling, and deploying generative AI applications, enabling\ndevelopers to create, integrate, and orchestrate multiple AI services and\ncapabilities into an adaptable setup.\n\nUseful resources:\n\n  - [Llama Stack](https://www.llama.com/products/llama-stack/)\n  - [Python Llama Stack client](https://github.com/llamastack/llama-stack-client-python)\n  - [Build AI Applications with Llama Stack](https://llamastack.github.io/)"
             },
-            "AgentExtension": {
+            "MCPClientAuthOptionsResponse": {
                 "properties": {
-                    "description": {
-                        "anyOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Description"
-                    },
-                    "params": {
-                        "anyOf": [
+                    "servers": {
+                        "items": {
+                            "$ref": "#/components/schemas/MCPServerAuthInfo"
+                        },
+                        "type": "array",
+                        "title": "Servers",
+                        "description": "List of MCP servers that accept client-provided authorization"
+                    }
+                },
+                "type": "object",
+                "title": "MCPClientAuthOptionsResponse",
+                "description": "Response containing MCP servers that accept client-provided authorization.",
+                "examples": [
+                    {
+                        "servers": [
                             {
-                                "additionalProperties": true,
-                                "type": "object"
+                                "client_auth_headers": [
+                                    "Authorization"
+                                ],
+                                "name": "github"
                             },
                             {
-                                "type": "null"
+                                "client_auth_headers": [
+                                    "Authorization",
+                                    "X-API-Key"
+                                ],
+                                "name": "gitlab"
                             }
-                        ],
-                        "title": "Params"
+                        ]
+                    }
+                ]
+            },
+            "MCPListToolsTool": {
+                "properties": {
+                    "input_schema": {
+                        "additionalProperties": true,
+                        "type": "object",
+                        "title": "Input Schema"
                     },
-                    "required": {
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "description": {
                         "anyOf": [
                             {
-                                "type": "boolean"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Required"
-                    },
-                    "uri": {
-                        "type": "string",
-                        "title": "Uri"
+                        "title": "Description"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "uri"
+                    "input_schema",
+                    "name"
                 ],
-                "title": "AgentExtension",
-                "description": "A declaration of a protocol extension supported by an Agent."
+                "title": "MCPListToolsTool",
+                "description": "Tool definition returned by MCP list tools operation.\n\n:param input_schema: JSON schema defining the tool's input parameters\n:param name: Name of the tool\n:param description: (Optional) Description of what the tool does"
             },
-            "AgentInterface": {
+            "MCPServerAuthInfo": {
                 "properties": {
-                    "transport": {
+                    "name": {
                         "type": "string",
-                        "title": "Transport",
-                        "examples": [
-                            "JSONRPC",
-                            "GRPC",
-                            "HTTP+JSON"
-                        ]
+                        "title": "Name",
+                        "description": "MCP server name"
                     },
-                    "url": {
-                        "type": "string",
-                        "title": "Url",
-                        "examples": [
-                            "https://api.example.com/a2a/v1",
-                            "https://grpc.example.com/a2a",
-                            "https://rest.example.com/v1"
-                        ]
+                    "client_auth_headers": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Client Auth Headers",
+                        "description": "List of authentication header names for client-provided tokens"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "transport",
-                    "url"
+                    "name",
+                    "client_auth_headers"
                 ],
-                "title": "AgentInterface",
-                "description": "Declares a combination of a target URL and a transport protocol for interacting with the agent.\nThis allows agents to expose the same functionality over multiple transport mechanisms."
+                "title": "MCPServerAuthInfo",
+                "description": "Information about MCP server client authentication options."
             },
-            "AgentProvider": {
+            "Message": {
                 "properties": {
-                    "organization": {
+                    "content": {
                         "type": "string",
-                        "title": "Organization"
+                        "title": "Content",
+                        "description": "The message content",
+                        "examples": [
+                            "Hello, how can I help you?"
+                        ]
                     },
-                    "url": {
-                        "type": "string",
-                        "title": "Url"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "organization",
-                    "url"
-                ],
-                "title": "AgentProvider",
-                "description": "Represents the service provider of an agent."
-            },
-            "AgentSkill": {
-                "properties": {
-                    "description": {
+                    "type": {
                         "type": "string",
-                        "title": "Description"
-                    },
-                    "examples": {
-                        "anyOf": [
-                            {
-                                "items": {
-                                    "type": "string"
-                                },
-                                "type": "array"
-                            },
-                            {
-                                "type": "null"
-                            }
+                        "enum": [
+                            "user",
+                            "assistant",
+                            "system",
+                            "developer"
                         ],
-                        "title": "Examples",
+                        "title": "Type",
+                        "description": "The type of message",
                         "examples": [
-                            [
-                                "I need a recipe for bread"
-                            ]
+                            "user",
+                            "assistant",
+                            "system",
+                            "developer"
                         ]
                     },
-                    "id": {
-                        "type": "string",
-                        "title": "Id"
-                    },
-                    "inputModes": {
+                    "referenced_documents": {
                         "anyOf": [
                             {
                                 "items": {
-                                    "type": "string"
+                                    "$ref": "#/components/schemas/ReferencedDocument"
                                 },
                                 "type": "array"
                             },
@@ -5093,838 +7770,924 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Inputmodes"
-                    },
+                        "title": "Referenced Documents",
+                        "description": "List of documents referenced in the response (assistant messages only)"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "content",
+                    "type"
+                ],
+                "title": "Message",
+                "description": "Model representing a message in a conversation turn.\n\nAttributes:\n    content: The message content.\n    type: The type of message.\n    referenced_documents: Optional list of documents referenced in an assistant response."
+            },
+            "ModelContextProtocolServer": {
+                "properties": {
                     "name": {
                         "type": "string",
-                        "title": "Name"
+                        "title": "MCP name",
+                        "description": "MCP server name that must be unique"
                     },
-                    "outputModes": {
+                    "provider_id": {
+                        "type": "string",
+                        "title": "Provider ID",
+                        "description": "MCP provider identification",
+                        "default": "model-context-protocol"
+                    },
+                    "url": {
+                        "type": "string",
+                        "title": "MCP server URL",
+                        "description": "URL of the MCP server"
+                    },
+                    "authorization_headers": {
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "type": "object",
+                        "title": "Authorization headers",
+                        "description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 3 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client-provided token in the header. To specify this use a string 'client' instead of the file path. 3. Usage of the oauth token in the header. To specify this use a string 'oauth' instead of the file path. "
+                    },
+                    "headers": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Propagated headers",
+                        "description": "List of HTTP header names to automatically forward from the incoming request to this MCP server. Headers listed here are extracted from the original client request and included when calling the MCP server. This is useful when infrastructure components (e.g. API gateways) inject headers that MCP servers need, such as x-rh-identity in HCC. Header matching is case-insensitive. These headers are additive with authorization_headers and MCP-HEADERS."
+                    },
+                    "timeout": {
                         "anyOf": [
                             {
-                                "items": {
-                                    "type": "string"
-                                },
-                                "type": "array"
+                                "type": "integer",
+                                "exclusiveMinimum": 0.0
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Outputmodes"
-                    },
-                    "security": {
+                        "title": "Request timeout",
+                        "description": "Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support."
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "name",
+                    "url"
+                ],
+                "title": "ModelContextProtocolServer",
+                "description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and capabilities to the\nAI agents. These are configured by this structure. Only MCP servers\ndefined in the lightspeed-stack.yaml configuration are available to the\nagents. Tools configured in the llama-stack run.yaml are not accessible to\nlightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)"
+            },
+            "ModelsResponse": {
+                "properties": {
+                    "models": {
+                        "items": {
+                            "additionalProperties": true,
+                            "type": "object"
+                        },
+                        "type": "array",
+                        "title": "Models",
+                        "description": "List of models available"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "models"
+                ],
+                "title": "ModelsResponse",
+                "description": "Model representing a response to models request.",
+                "examples": [
+                    {
+                        "models": [
+                            {
+                                "api_model_type": "llm",
+                                "identifier": "openai/gpt-4-turbo",
+                                "metadata": {},
+                                "model_type": "llm",
+                                "provider_id": "openai",
+                                "provider_resource_id": "gpt-4-turbo",
+                                "type": "model"
+                            }
+                        ]
+                    }
+                ]
+            },
+            "MutualTLSSecurityScheme": {
+                "properties": {
+                    "description": {
                         "anyOf": [
                             {
-                                "items": {
-                                    "additionalProperties": {
-                                        "items": {
-                                            "type": "string"
-                                        },
-                                        "type": "array"
-                                    },
-                                    "type": "object"
-                                },
-                                "type": "array"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Security",
-                        "examples": [
-                            [
-                                {
-                                    "google": [
-                                        "oidc"
-                                    ]
-                                }
-                            ]
-                        ]
+                        "title": "Description"
                     },
-                    "tags": {
-                        "items": {
-                            "type": "string"
-                        },
-                        "type": "array",
-                        "title": "Tags",
-                        "examples": [
-                            [
-                                "cooking",
-                                "customer support",
-                                "billing"
-                            ]
-                        ]
+                    "type": {
+                        "type": "string",
+                        "const": "mutualTLS",
+                        "title": "Type",
+                        "default": "mutualTLS"
                     }
                 },
                 "type": "object",
-                "required": [
-                    "description",
-                    "id",
-                    "name",
-                    "tags"
-                ],
-                "title": "AgentSkill",
-                "description": "Represents a distinct capability or function that an agent can perform."
+                "title": "MutualTLSSecurityScheme",
+                "description": "Defines a security scheme using mTLS authentication."
             },
-            "Attachment": {
+            "NotFoundResponse": {
                 "properties": {
-                    "attachment_type": {
-                        "type": "string",
-                        "title": "Attachment Type",
-                        "description": "The attachment type, like 'log', 'configuration' etc.",
-                        "examples": [
-                            "log"
-                        ]
-                    },
-                    "content_type": {
-                        "type": "string",
-                        "title": "Content Type",
-                        "description": "The content type as defined in MIME standard",
-                        "examples": [
-                            "text/plain"
-                        ]
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
                     },
-                    "content": {
-                        "type": "string",
-                        "title": "Content",
-                        "description": "The actual attachment content",
-                        "examples": [
-                            "warning: quota exceeded"
-                        ]
+                    "detail": {
+                        "$ref": "#/components/schemas/DetailModel"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
                 "required": [
-                    "attachment_type",
-                    "content_type",
-                    "content"
+                    "status_code",
+                    "detail"
                 ],
-                "title": "Attachment",
-                "description": "Model representing an attachment that can be send from the UI as part of query.\n\nA list of attachments can be an optional part of 'query' request.\n\nAttributes:\n    attachment_type: The attachment type, like \"log\", \"configuration\" etc.\n    content_type: The content type as defined in MIME standard\n    content: The actual attachment content\n\nYAML attachments with **kind** and **metadata/name** attributes will\nbe handled as resources with the specified name:\n```\nkind: Pod\nmetadata:\n    name: private-reg\n```",
+                "title": "NotFoundResponse",
+                "description": "404 Not Found - Resource does not exist.",
                 "examples": [
                     {
-                        "attachment_type": "log",
-                        "content": "this is attachment",
-                        "content_type": "text/plain"
+                        "detail": {
+                            "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                            "response": "Conversation not found"
+                        },
+                        "label": "conversation"
                     },
                     {
-                        "attachment_type": "configuration",
-                        "content": "kind: Pod\n metadata:\n name:    private-reg",
-                        "content_type": "application/yaml"
+                        "detail": {
+                            "cause": "Provider with ID openai does not exist",
+                            "response": "Provider not found"
+                        },
+                        "label": "provider"
                     },
                     {
-                        "attachment_type": "configuration",
-                        "content": "foo: bar",
-                        "content_type": "application/yaml"
+                        "detail": {
+                            "cause": "Model with ID gpt-4-turbo is not configured",
+                            "response": "Model not found"
+                        },
+                        "label": "model"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Rag with ID vs_7b52a8cf-0fa3-489c-beab-27e061d102f3 does not exist",
+                            "response": "Rag not found"
+                        },
+                        "label": "rag"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Streaming Request with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                            "response": "Streaming Request not found"
+                        },
+                        "label": "streaming request"
                     }
                 ]
             },
-            "AuthenticationConfiguration": {
+            "OAuth2SecurityScheme": {
                 "properties": {
-                    "module": {
-                        "type": "string",
-                        "title": "Module",
-                        "default": "noop"
-                    },
-                    "skip_tls_verification": {
-                        "type": "boolean",
-                        "title": "Skip Tls Verification",
-                        "default": false
-                    },
-                    "skip_for_health_probes": {
-                        "type": "boolean",
-                        "title": "Skip authorization for probes",
-                        "description": "Skip authorization for readiness and liveness probes",
-                        "default": false
-                    },
-                    "k8s_cluster_api": {
+                    "description": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "minLength": 1,
-                                "format": "uri"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "K8S Cluster Api"
+                        "title": "Description"
                     },
-                    "k8s_ca_cert_path": {
+                    "flows": {
+                        "$ref": "#/components/schemas/OAuthFlows"
+                    },
+                    "oauth2MetadataUrl": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "format": "file-path"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "K8S Ca Cert Path"
+                        "title": "Oauth2Metadataurl"
                     },
-                    "jwk_config": {
+                    "type": {
+                        "type": "string",
+                        "const": "oauth2",
+                        "title": "Type",
+                        "default": "oauth2"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "flows"
+                ],
+                "title": "OAuth2SecurityScheme",
+                "description": "Defines a security scheme using OAuth 2.0."
+            },
+            "OAuthFlows": {
+                "properties": {
+                    "authorizationCode": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/JwkConfiguration"
+                                "$ref": "#/components/schemas/AuthorizationCodeOAuthFlow"
                             },
                             {
                                 "type": "null"
                             }
                         ]
                     },
-                    "api_key_config": {
+                    "clientCredentials": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/APIKeyTokenConfiguration"
+                                "$ref": "#/components/schemas/ClientCredentialsOAuthFlow"
                             },
                             {
                                 "type": "null"
                             }
                         ]
                     },
-                    "rh_identity_config": {
+                    "implicit": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/RHIdentityConfiguration"
+                                "$ref": "#/components/schemas/ImplicitOAuthFlow"
                             },
                             {
                                 "type": "null"
                             }
                         ]
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "title": "AuthenticationConfiguration",
-                "description": "Authentication configuration."
-            },
-            "AuthorizationCodeOAuthFlow": {
-                "properties": {
-                    "authorizationUrl": {
-                        "type": "string",
-                        "title": "Authorizationurl"
                     },
-                    "refreshUrl": {
+                    "password": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "$ref": "#/components/schemas/PasswordOAuthFlow"
                             },
                             {
                                 "type": "null"
                             }
-                        ],
-                        "title": "Refreshurl"
-                    },
-                    "scopes": {
-                        "additionalProperties": {
-                            "type": "string"
-                        },
-                        "type": "object",
-                        "title": "Scopes"
-                    },
-                    "tokenUrl": {
-                        "type": "string",
-                        "title": "Tokenurl"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "authorizationUrl",
-                    "scopes",
-                    "tokenUrl"
-                ],
-                "title": "AuthorizationCodeOAuthFlow",
-                "description": "Defines configuration details for the OAuth 2.0 Authorization Code flow."
-            },
-            "AuthorizationConfiguration": {
-                "properties": {
-                    "access_rules": {
-                        "items": {
-                            "$ref": "#/components/schemas/AccessRule"
-                        },
-                        "type": "array",
-                        "title": "Access rules",
-                        "description": "Rules for role-based access control"
+                        ]
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
-                "title": "AuthorizationConfiguration",
-                "description": "Authorization configuration."
+                "title": "OAuthFlows",
+                "description": "Defines the configuration for the supported OAuth 2.0 flows."
             },
-            "AuthorizedResponse": {
+            "OkpConfiguration": {
                 "properties": {
-                    "user_id": {
-                        "type": "string",
-                        "title": "User Id",
-                        "description": "User ID, for example UUID",
-                        "examples": [
-                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
-                        ]
+                    "offline": {
+                        "type": "boolean",
+                        "title": "OKP offline mode",
+                        "description": "When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs.",
+                        "default": true
                     },
-                    "username": {
+                    "chunk_filter_query": {
                         "type": "string",
-                        "title": "Username",
-                        "description": "User name",
-                        "examples": [
-                            "John Doe",
-                            "Adam Smith"
-                        ]
-                    },
-                    "skip_userid_check": {
-                        "type": "boolean",
-                        "title": "Skip Userid Check",
-                        "description": "Whether to skip the user ID check",
-                        "examples": [
-                            true,
-                            false
-                        ]
+                        "title": "OKP chunk filter query",
+                        "description": "OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'.",
+                        "default": "is_chunk:true"
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
-                "required": [
-                    "user_id",
-                    "username",
-                    "skip_userid_check"
-                ],
-                "title": "AuthorizedResponse",
-                "description": "Model representing a response to an authorization request.\n\nAttributes:\n    user_id: The ID of the logged in user.\n    username: The name of the logged in user.\n    skip_userid_check: Whether to skip the user ID check.",
-                "examples": [
-                    {
-                        "skip_userid_check": false,
-                        "user_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "username": "user1"
-                    }
-                ]
+                "title": "OkpConfiguration",
+                "description": "OKP (Offline Knowledge Portal) provider configuration.\n\nControls provider-specific behaviour for the OKP vector store.\nOnly relevant when ``\"okp\"`` is listed in ``rag.inline`` or ``rag.tool``."
             },
-            "AzureEntraIdConfiguration": {
+            "OpenAIResponseAnnotationCitation": {
                 "properties": {
-                    "tenant_id": {
+                    "type": {
                         "type": "string",
-                        "format": "password",
-                        "title": "Tenant Id",
-                        "writeOnly": true
+                        "const": "url_citation",
+                        "title": "Type",
+                        "default": "url_citation"
                     },
-                    "client_id": {
-                        "type": "string",
-                        "format": "password",
-                        "title": "Client Id",
-                        "writeOnly": true
+                    "end_index": {
+                        "type": "integer",
+                        "title": "End Index"
                     },
-                    "client_secret": {
+                    "start_index": {
+                        "type": "integer",
+                        "title": "Start Index"
+                    },
+                    "title": {
                         "type": "string",
-                        "format": "password",
-                        "title": "Client Secret",
-                        "writeOnly": true
+                        "title": "Title"
                     },
-                    "scope": {
+                    "url": {
                         "type": "string",
-                        "title": "Token scope",
-                        "description": "Azure Cognitive Services scope for token requests. Override only if using a different Azure service.",
-                        "default": "https://cognitiveservices.azure.com/.default"
+                        "title": "Url"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
                 "required": [
-                    "tenant_id",
-                    "client_id",
-                    "client_secret"
+                    "end_index",
+                    "start_index",
+                    "title",
+                    "url"
                 ],
-                "title": "AzureEntraIdConfiguration",
-                "description": "Microsoft Entra ID authentication attributes for Azure."
+                "title": "OpenAIResponseAnnotationCitation",
+                "description": "URL citation annotation for referencing external web resources.\n\n:param type: Annotation type identifier, always \"url_citation\"\n:param end_index: End position of the citation span in the content\n:param start_index: Start position of the citation span in the content\n:param title: Title of the referenced web resource\n:param url: URL of the referenced web resource"
             },
-            "BadRequestResponse": {
+            "OpenAIResponseAnnotationContainerFileCitation": {
                 "properties": {
-                    "status_code": {
+                    "type": {
+                        "type": "string",
+                        "const": "container_file_citation",
+                        "title": "Type",
+                        "default": "container_file_citation"
+                    },
+                    "container_id": {
+                        "type": "string",
+                        "title": "Container Id"
+                    },
+                    "end_index": {
                         "type": "integer",
-                        "title": "Status Code"
+                        "title": "End Index"
                     },
-                    "detail": {
-                        "$ref": "#/components/schemas/DetailModel"
+                    "file_id": {
+                        "type": "string",
+                        "title": "File Id"
+                    },
+                    "filename": {
+                        "type": "string",
+                        "title": "Filename"
+                    },
+                    "start_index": {
+                        "type": "integer",
+                        "title": "Start Index"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "status_code",
-                    "detail"
+                    "container_id",
+                    "end_index",
+                    "file_id",
+                    "filename",
+                    "start_index"
                 ],
-                "title": "BadRequestResponse",
-                "description": "400 Bad Request. Invalid resource identifier.",
-                "examples": [
-                    {
-                        "detail": {
-                            "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.",
-                            "response": "Invalid conversation ID format"
-                        },
-                        "label": "conversation_id"
-                    }
-                ]
+                "title": "OpenAIResponseAnnotationContainerFileCitation"
             },
-            "ByokRag": {
+            "OpenAIResponseAnnotationFileCitation": {
                 "properties": {
-                    "rag_id": {
+                    "type": {
                         "type": "string",
-                        "minLength": 1,
-                        "title": "RAG ID",
-                        "description": "Unique RAG ID"
+                        "const": "file_citation",
+                        "title": "Type",
+                        "default": "file_citation"
                     },
-                    "rag_type": {
+                    "file_id": {
                         "type": "string",
-                        "minLength": 1,
-                        "title": "RAG type",
-                        "description": "Type of RAG database.",
-                        "default": "inline::faiss"
+                        "title": "File Id"
                     },
-                    "embedding_model": {
+                    "filename": {
                         "type": "string",
-                        "minLength": 1,
-                        "title": "Embedding model",
-                        "description": "Embedding model identification",
-                        "default": "sentence-transformers/all-mpnet-base-v2"
+                        "title": "Filename"
                     },
-                    "embedding_dimension": {
+                    "index": {
                         "type": "integer",
-                        "exclusiveMinimum": 0.0,
-                        "title": "Embedding dimension",
-                        "description": "Dimensionality of embedding vectors.",
-                        "default": 768
-                    },
-                    "vector_db_id": {
+                        "title": "Index"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "file_id",
+                    "filename",
+                    "index"
+                ],
+                "title": "OpenAIResponseAnnotationFileCitation",
+                "description": "File citation annotation for referencing specific files in response content.\n\n:param type: Annotation type identifier, always \"file_citation\"\n:param file_id: Unique identifier of the referenced file\n:param filename: Name of the referenced file\n:param index: Position index of the citation within the content"
+            },
+            "OpenAIResponseAnnotationFilePath": {
+                "properties": {
+                    "type": {
                         "type": "string",
-                        "minLength": 1,
-                        "title": "Vector DB ID",
-                        "description": "Vector database identification."
+                        "const": "file_path",
+                        "title": "Type",
+                        "default": "file_path"
                     },
-                    "db_path": {
+                    "file_id": {
                         "type": "string",
-                        "format": "file-path",
-                        "title": "DB path",
-                        "description": "Path to RAG database."
+                        "title": "File Id"
                     },
-                    "score_multiplier": {
-                        "type": "number",
-                        "exclusiveMinimum": 0.0,
-                        "title": "Score multiplier",
-                        "description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.",
-                        "default": 1.0
+                    "index": {
+                        "type": "integer",
+                        "title": "Index"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
                 "required": [
-                    "rag_id",
-                    "vector_db_id",
-                    "db_path"
+                    "file_id",
+                    "index"
                 ],
-                "title": "ByokRag",
-                "description": "BYOK (Bring Your Own Knowledge) RAG configuration."
+                "title": "OpenAIResponseAnnotationFilePath"
             },
-            "CORSConfiguration": {
+            "OpenAIResponseContentPartRefusal": {
                 "properties": {
-                    "allow_origins": {
-                        "items": {
-                            "type": "string"
-                        },
-                        "type": "array",
-                        "title": "Allow origins",
-                        "description": "A list of origins allowed for cross-origin requests. An origin is the combination of protocol (http, https), domain (myapp.com, localhost, localhost.tiangolo.com), and port (80, 443, 8080). Use ['*'] to allow all origins.",
-                        "default": [
-                            "*"
-                        ]
-                    },
-                    "allow_credentials": {
-                        "type": "boolean",
-                        "title": "Allow credentials",
-                        "description": "Indicate that cookies should be supported for cross-origin requests",
-                        "default": false
-                    },
-                    "allow_methods": {
-                        "items": {
-                            "type": "string"
-                        },
-                        "type": "array",
-                        "title": "Allow methods",
-                        "description": "A list of HTTP methods that should be allowed for cross-origin requests. You can use ['*'] to allow all standard methods.",
-                        "default": [
-                            "*"
-                        ]
+                    "type": {
+                        "type": "string",
+                        "const": "refusal",
+                        "title": "Type",
+                        "default": "refusal"
                     },
-                    "allow_headers": {
-                        "items": {
-                            "type": "string"
-                        },
-                        "type": "array",
-                        "title": "Allow headers",
-                        "description": "A list of HTTP request headers that should be supported for cross-origin requests. You can use ['*'] to allow all headers. The Accept, Accept-Language, Content-Language and Content-Type headers are always allowed for simple CORS requests.",
-                        "default": [
-                            "*"
-                        ]
+                    "refusal": {
+                        "type": "string",
+                        "title": "Refusal"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
-                "title": "CORSConfiguration",
-                "description": "CORS configuration.\n\nCORS or 'Cross-Origin Resource Sharing' refers to the situations when a\nfrontend running in a browser has JavaScript code that communicates with a\nbackend, and the backend is in a different 'origin' than the frontend.\n\nUseful resources:\n\n  - [CORS in FastAPI](https://fastapi.tiangolo.com/tutorial/cors/)\n  - [Wikipedia article](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing)\n  - [What is CORS?](https://dev.to/akshay_chauhan/what-is-cors-explained-8f1)"
+                "required": [
+                    "refusal"
+                ],
+                "title": "OpenAIResponseContentPartRefusal",
+                "description": "Refusal content within a streamed response part.\n\n:param type: Content part type identifier, always \"refusal\"\n:param refusal: Refusal text supplied by the model"
             },
-            "ClientCredentialsOAuthFlow": {
+            "OpenAIResponseError": {
                 "properties": {
-                    "refreshUrl": {
-                        "anyOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Refreshurl"
-                    },
-                    "scopes": {
-                        "additionalProperties": {
-                            "type": "string"
-                        },
-                        "type": "object",
-                        "title": "Scopes"
+                    "code": {
+                        "type": "string",
+                        "title": "Code"
                     },
-                    "tokenUrl": {
+                    "message": {
                         "type": "string",
-                        "title": "Tokenurl"
+                        "title": "Message"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "scopes",
-                    "tokenUrl"
+                    "code",
+                    "message"
                 ],
-                "title": "ClientCredentialsOAuthFlow",
-                "description": "Defines configuration details for the OAuth 2.0 Client Credentials flow."
+                "title": "OpenAIResponseError",
+                "description": "Error details for failed OpenAI response requests.\n\n:param code: Error code identifying the type of failure\n:param message: Human-readable error message describing the failure"
             },
-            "Configuration": {
+            "OpenAIResponseInputFunctionToolCallOutput": {
                 "properties": {
-                    "name": {
+                    "call_id": {
                         "type": "string",
-                        "title": "Service name",
-                        "description": "Name of the service. That value will be used in REST API endpoints."
-                    },
-                    "service": {
-                        "$ref": "#/components/schemas/ServiceConfiguration",
-                        "title": "Service configuration",
-                        "description": "This section contains Lightspeed Core Stack service configuration."
-                    },
-                    "llama_stack": {
-                        "$ref": "#/components/schemas/LlamaStackConfiguration",
-                        "title": "Llama Stack configuration",
-                        "description": "This section contains Llama Stack configuration. Lightspeed Core Stack service can call Llama Stack in library mode or in server mode."
+                        "title": "Call Id"
                     },
-                    "user_data_collection": {
-                        "$ref": "#/components/schemas/UserDataCollection",
-                        "title": "User data collection configuration",
-                        "description": "This section contains configuration for subsystem that collects user data(transcription history and feedbacks)."
-                    },
-                    "database": {
-                        "$ref": "#/components/schemas/DatabaseConfiguration",
-                        "title": "Database Configuration",
-                        "description": "Configuration for database to store conversation IDs and other runtime data"
+                    "output": {
+                        "type": "string",
+                        "title": "Output"
                     },
-                    "mcp_servers": {
-                        "items": {
-                            "$ref": "#/components/schemas/ModelContextProtocolServer"
-                        },
-                        "type": "array",
-                        "title": "Model Context Protocol Server and tools configuration",
-                        "description": "MCP (Model Context Protocol) servers provide tools and capabilities to the AI agents. These are configured in this section. Only MCP servers defined in the lightspeed-stack.yaml configuration are available to the agents. Tools configured in the llama-stack run.yaml are not accessible to lightspeed-core agents."
+                    "type": {
+                        "type": "string",
+                        "const": "function_call_output",
+                        "title": "Type",
+                        "default": "function_call_output"
                     },
-                    "authentication": {
-                        "$ref": "#/components/schemas/AuthenticationConfiguration",
-                        "title": "Authentication configuration",
-                        "description": "Authentication configuration"
+                    "id": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Id"
                     },
-                    "authorization": {
+                    "status": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/AuthorizationConfiguration"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Authorization configuration",
-                        "description": "Lightspeed Core Stack implements a modular authentication and authorization system with multiple authentication methods. Authorization is configurable through role-based access control. Authentication is handled through selectable modules configured via the module field in the authentication configuration."
+                        "title": "Status"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "call_id",
+                    "output"
+                ],
+                "title": "OpenAIResponseInputFunctionToolCallOutput",
+                "description": "This represents the output of a function call that gets passed back to the model."
+            },
+            "OpenAIResponseInputMessageContentFile": {
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "input_file",
+                        "title": "Type",
+                        "default": "input_file"
                     },
-                    "customization": {
+                    "file_data": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/Customization"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Custom profile configuration",
-                        "description": "It is possible to customize Lightspeed Core Stack via this section. System prompt can be customized and also different parts of the service can be replaced by custom Python modules."
-                    },
-                    "inference": {
-                        "$ref": "#/components/schemas/InferenceConfiguration",
-                        "title": "Inference configuration",
-                        "description": "One LLM provider and one its model might be selected as default ones. When no provider+model pair is specified in REST API calls (query endpoints), the default provider and model are used."
+                        "title": "File Data"
                     },
-                    "conversation_cache": {
-                        "$ref": "#/components/schemas/ConversationHistoryConfiguration",
-                        "title": "Conversation history configuration"
+                    "file_id": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "File Id"
                     },
-                    "byok_rag": {
-                        "items": {
-                            "$ref": "#/components/schemas/ByokRag"
-                        },
-                        "type": "array",
-                        "title": "BYOK RAG configuration",
-                        "description": "BYOK RAG configuration. This configuration can be used to reconfigure Llama Stack through its run.yaml configuration file"
+                    "file_url": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "File Url"
                     },
-                    "a2a_state": {
-                        "$ref": "#/components/schemas/A2AStateConfiguration",
-                        "title": "A2A state configuration",
-                        "description": "Configuration for A2A protocol persistent state storage."
+                    "filename": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Filename"
+                    }
+                },
+                "type": "object",
+                "title": "OpenAIResponseInputMessageContentFile",
+                "description": "File content for input messages in OpenAI response format.\n\n:param type: The type of the input item. Always `input_file`.\n:param file_data: The data of the file to be sent to the model.\n:param file_id: (Optional) The ID of the file to be sent to the model.\n:param file_url: The URL of the file to be sent to the model.\n:param filename: The name of the file to be sent to the model."
+            },
+            "OpenAIResponseInputMessageContentImage": {
+                "properties": {
+                    "detail": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "const": "low"
+                            },
+                            {
+                                "type": "string",
+                                "const": "high"
+                            },
+                            {
+                                "type": "string",
+                                "const": "auto"
+                            }
+                        ],
+                        "title": "Detail",
+                        "default": "auto"
                     },
-                    "quota_handlers": {
-                        "$ref": "#/components/schemas/QuotaHandlersConfiguration",
-                        "title": "Quota handlers",
-                        "description": "Quota handlers configuration"
+                    "type": {
+                        "type": "string",
+                        "const": "input_image",
+                        "title": "Type",
+                        "default": "input_image"
                     },
-                    "azure_entra_id": {
+                    "file_id": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/AzureEntraIdConfiguration"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
-                        ]
+                        ],
+                        "title": "File Id"
                     },
-                    "splunk": {
+                    "image_url": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/SplunkConfiguration"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Splunk configuration",
-                        "description": "Splunk HEC configuration for sending telemetry events."
+                        "title": "Image Url"
+                    }
+                },
+                "type": "object",
+                "title": "OpenAIResponseInputMessageContentImage",
+                "description": "Image content for input messages in OpenAI response format.\n\n:param detail: Level of detail for image processing, can be \"low\", \"high\", or \"auto\"\n:param type: Content type identifier, always \"input_image\"\n:param file_id: (Optional) The ID of the file to be sent to the model.\n:param image_url: (Optional) URL of the image content"
+            },
+            "OpenAIResponseInputMessageContentText": {
+                "properties": {
+                    "text": {
+                        "type": "string",
+                        "title": "Text"
                     },
-                    "deployment_environment": {
+                    "type": {
                         "type": "string",
-                        "title": "Deployment environment",
-                        "description": "Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events.",
-                        "default": "development"
+                        "const": "input_text",
+                        "title": "Type",
+                        "default": "input_text"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "text"
+                ],
+                "title": "OpenAIResponseInputMessageContentText",
+                "description": "Text content for input messages in OpenAI response format.\n\n:param text: The text content of the input message\n:param type: Content type identifier, always \"input_text\""
+            },
+            "OpenAIResponseInputToolChoiceAllowedTools": {
+                "properties": {
+                    "mode": {
+                        "type": "string",
+                        "enum": [
+                            "auto",
+                            "required"
+                        ],
+                        "title": "Mode",
+                        "default": "auto"
                     },
-                    "rag": {
-                        "$ref": "#/components/schemas/RagConfiguration",
-                        "title": "RAG configuration",
-                        "description": "Configuration for all RAG strategies (inline and tool-based)."
+                    "tools": {
+                        "items": {
+                            "additionalProperties": {
+                                "type": "string"
+                            },
+                            "type": "object"
+                        },
+                        "type": "array",
+                        "title": "Tools"
                     },
-                    "okp": {
-                        "$ref": "#/components/schemas/OkpConfiguration",
-                        "title": "OKP configuration",
-                        "description": "OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool."
+                    "type": {
+                        "type": "string",
+                        "const": "allowed_tools",
+                        "title": "Type",
+                        "default": "allowed_tools"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
                 "required": [
-                    "name",
-                    "service",
-                    "llama_stack",
-                    "user_data_collection"
+                    "tools"
                 ],
-                "title": "Configuration",
-                "description": "Global service configuration."
+                "title": "OpenAIResponseInputToolChoiceAllowedTools",
+                "description": "Constrains the tools available to the model to a pre-defined set.\n\n:param mode: Constrains the tools available to the model to a pre-defined set\n:param tools: A list of tool definitions that the model should be allowed to call\n:param type: Tool choice type identifier, always \"allowed_tools\""
             },
-            "ConfigurationResponse": {
+            "OpenAIResponseInputToolChoiceCustomTool": {
                 "properties": {
-                    "configuration": {
-                        "$ref": "#/components/schemas/Configuration"
+                    "type": {
+                        "type": "string",
+                        "const": "custom",
+                        "title": "Type",
+                        "default": "custom"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "configuration"
+                    "name"
                 ],
-                "title": "ConfigurationResponse",
-                "description": "Success response model for the config endpoint.",
-                "examples": [
-                    {
-                        "configuration": {
-                            "authentication": {
-                                "module": "noop",
-                                "skip_tls_verification": false
-                            },
-                            "authorization": {
-                                "access_rules": []
-                            },
-                            "byok_rag": [],
-                            "conversation_cache": {},
-                            "database": {
-                                "sqlite": {
-                                    "db_path": "/tmp/lightspeed-stack.db"
-                                }
-                            },
-                            "inference": {
-                                "default_model": "gpt-4-turbo",
-                                "default_provider": "openai"
-                            },
-                            "llama_stack": {
-                                "api_key": "*****",
-                                "url": "http://localhost:8321",
-                                "use_as_library_client": false
+                "title": "OpenAIResponseInputToolChoiceCustomTool",
+                "description": "Forces the model to call a custom tool.\n\n:param type: Tool choice type identifier, always \"custom\"\n:param name: The name of the custom tool to call."
+            },
+            "OpenAIResponseInputToolChoiceFileSearch": {
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file_search",
+                        "title": "Type",
+                        "default": "file_search"
+                    }
+                },
+                "type": "object",
+                "title": "OpenAIResponseInputToolChoiceFileSearch",
+                "description": "Indicates that the model should use file search to generate a response.\n\n:param type: Tool choice type identifier, always \"file_search\""
+            },
+            "OpenAIResponseInputToolChoiceFunctionTool": {
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "function",
+                        "title": "Type",
+                        "default": "function"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "name"
+                ],
+                "title": "OpenAIResponseInputToolChoiceFunctionTool",
+                "description": "Forces the model to call a specific function.\n\n:param name: The name of the function to call\n:param type: Tool choice type identifier, always \"function\""
+            },
+            "OpenAIResponseInputToolChoiceMCPTool": {
+                "properties": {
+                    "server_label": {
+                        "type": "string",
+                        "title": "Server Label"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "mcp",
+                        "title": "Type",
+                        "default": "mcp"
+                    },
+                    "name": {
+                        "anyOf": [
+                            {
+                                "type": "string"
                             },
-                            "mcp_servers": [
-                                {
-                                    "name": "server1",
-                                    "provider_id": "provider1",
-                                    "url": "http://url.com:1"
-                                }
-                            ],
-                            "name": "lightspeed-stack",
-                            "quota_handlers": {
-                                "enable_token_history": false,
-                                "limiters": [],
-                                "scheduler": {
-                                    "period": 1
-                                }
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Name"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "server_label"
+                ],
+                "title": "OpenAIResponseInputToolChoiceMCPTool",
+                "description": "Forces the model to call a specific tool on a remote MCP server\n\n:param server_label: The label of the MCP server to use.\n:param type: Tool choice type identifier, always \"mcp\"\n:param name: (Optional) The name of the tool to call on the server."
+            },
+            "OpenAIResponseInputToolChoiceMode": {
+                "type": "string",
+                "enum": [
+                    "auto",
+                    "required",
+                    "none"
+                ],
+                "title": "OpenAIResponseInputToolChoiceMode"
+            },
+            "OpenAIResponseInputToolChoiceWebSearch": {
+                "properties": {
+                    "type": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "const": "web_search"
                             },
-                            "service": {
-                                "access_log": true,
-                                "auth_enabled": false,
-                                "color_log": true,
-                                "cors": {
-                                    "allow_credentials": false,
-                                    "allow_headers": [
-                                        "*"
-                                    ],
-                                    "allow_methods": [
-                                        "*"
-                                    ],
-                                    "allow_origins": [
-                                        "*"
-                                    ]
-                                },
-                                "host": "localhost",
-                                "port": 8080,
-                                "tls_config": {},
-                                "workers": 1
+                            {
+                                "type": "string",
+                                "const": "web_search_preview"
                             },
-                            "user_data_collection": {
-                                "feedback_enabled": true,
-                                "feedback_storage": "/tmp/data/feedback",
-                                "transcripts_enabled": false,
-                                "transcripts_storage": "/tmp/data/transcripts"
+                            {
+                                "type": "string",
+                                "const": "web_search_preview_2025_03_11"
+                            },
+                            {
+                                "type": "string",
+                                "const": "web_search_2025_08_26"
                             }
-                        }
+                        ],
+                        "title": "Type",
+                        "default": "web_search"
                     }
-                ]
+                },
+                "type": "object",
+                "title": "OpenAIResponseInputToolChoiceWebSearch",
+                "description": "Indicates that the model should use web search to generate a response\n\n:param type: Web search tool type variant to use"
             },
-            "ConversationData": {
+            "OpenAIResponseInputToolFileSearch": {
                 "properties": {
-                    "conversation_id": {
+                    "type": {
                         "type": "string",
-                        "title": "Conversation Id"
+                        "const": "file_search",
+                        "title": "Type",
+                        "default": "file_search"
                     },
-                    "topic_summary": {
+                    "vector_store_ids": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Vector Store Ids"
+                    },
+                    "filters": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "additionalProperties": true,
+                                "type": "object"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Topic Summary"
+                        "title": "Filters"
                     },
-                    "last_message_timestamp": {
-                        "type": "number",
-                        "title": "Last Message Timestamp"
+                    "max_num_results": {
+                        "anyOf": [
+                            {
+                                "type": "integer",
+                                "maximum": 50.0,
+                                "minimum": 1.0
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Max Num Results",
+                        "default": 10
+                    },
+                    "ranking_options": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/SearchRankingOptions"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
                     }
                 },
                 "type": "object",
                 "required": [
-                    "conversation_id",
-                    "topic_summary",
-                    "last_message_timestamp"
+                    "vector_store_ids"
                 ],
-                "title": "ConversationData",
-                "description": "Model representing conversation data returned by cache list operations.\n\nAttributes:\n    conversation_id: The conversation ID\n    topic_summary: The topic summary for the conversation (can be None)\n    last_message_timestamp: The timestamp of the last message in the conversation"
+                "title": "OpenAIResponseInputToolFileSearch",
+                "description": "File search tool configuration for OpenAI response inputs.\n\n:param type: Tool type identifier, always \"file_search\"\n:param vector_store_ids: List of vector store identifiers to search within\n:param filters: (Optional) Additional filters to apply to the search\n:param max_num_results: (Optional) Maximum number of search results to return (1-50)\n:param ranking_options: (Optional) Options for ranking and scoring search results"
             },
-            "ConversationDeleteResponse": {
+            "OpenAIResponseInputToolFunction": {
                 "properties": {
-                    "conversation_id": {
+                    "type": {
                         "type": "string",
-                        "title": "Conversation Id",
-                        "description": "The conversation ID (UUID) that was deleted.",
-                        "examples": [
-                            "123e4567-e89b-12d3-a456-426614174000"
-                        ]
-                    },
-                    "success": {
-                        "type": "boolean",
-                        "title": "Success",
-                        "description": "Whether the deletion was successful.",
-                        "examples": [
-                            true,
-                            false
-                        ]
+                        "const": "function",
+                        "title": "Type",
+                        "default": "function"
                     },
-                    "response": {
+                    "name": {
                         "type": "string",
-                        "title": "Response",
-                        "description": "A message about the deletion result.",
-                        "examples": [
-                            "Conversation deleted successfully",
-                            "Conversation cannot be deleted"
-                        ]
+                        "title": "Name"
+                    },
+                    "description": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Description"
+                    },
+                    "parameters": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": true,
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Parameters"
+                    },
+                    "strict": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Strict"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "conversation_id",
-                    "success",
-                    "response"
+                    "name",
+                    "parameters"
                 ],
-                "title": "ConversationDeleteResponse",
-                "description": "Model representing a response for deleting a conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was deleted.\n    success: Whether the deletion was successful.\n    response: A message about the deletion result.",
-                "examples": [
-                    {
-                        "label": "deleted",
-                        "value": {
-                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                            "response": "Conversation deleted successfully",
-                            "success": true
-                        }
-                    },
-                    {
-                        "label": "not found",
-                        "value": {
-                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                            "response": "Conversation can not be deleted",
-                            "success": true
-                        }
-                    }
-                ]
+                "title": "OpenAIResponseInputToolFunction",
+                "description": "Function tool configuration for OpenAI response inputs.\n\n:param type: Tool type identifier, always \"function\"\n:param name: Name of the function that can be called\n:param description: (Optional) Description of what the function does\n:param parameters: (Optional) JSON schema defining the function's parameters\n:param strict: (Optional) Whether to enforce strict parameter validation"
             },
-            "ConversationDetails": {
+            "OpenAIResponseInputToolMCP": {
                 "properties": {
-                    "conversation_id": {
+                    "type": {
                         "type": "string",
-                        "title": "Conversation Id",
-                        "description": "Conversation ID (UUID)",
-                        "examples": [
-                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
-                        ]
+                        "const": "mcp",
+                        "title": "Type",
+                        "default": "mcp"
                     },
-                    "created_at": {
+                    "server_label": {
+                        "type": "string",
+                        "title": "Server Label"
+                    },
+                    "connector_id": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -5933,13 +8696,9 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Created At",
-                        "description": "When the conversation was created",
-                        "examples": [
-                            "2024-01-01T01:00:00Z"
-                        ]
+                        "title": "Connector Id"
                     },
-                    "last_message_at": {
+                    "server_url": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -5948,44 +8707,167 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Last Message At",
-                        "description": "When the last message was sent",
-                        "examples": [
-                            "2024-01-01T01:00:00Z"
-                        ]
+                        "title": "Server Url"
                     },
-                    "message_count": {
+                    "headers": {
                         "anyOf": [
                             {
-                                "type": "integer"
+                                "additionalProperties": true,
+                                "type": "object"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Message Count",
-                        "description": "Number of user messages in the conversation",
-                        "examples": [
-                            42
-                        ]
+                        "title": "Headers"
                     },
-                    "last_used_model": {
+                    "authorization": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Authorization"
+                    },
+                    "require_approval": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "const": "always"
+                            },
+                            {
+                                "type": "string",
+                                "const": "never"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ApprovalFilter"
+                            }
+                        ],
+                        "title": "Require Approval",
+                        "default": "never"
+                    },
+                    "allowed_tools": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AllowedToolsFilter"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Allowed Tools"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "server_label"
+                ],
+                "title": "OpenAIResponseInputToolMCP",
+                "description": "Model Context Protocol (MCP) tool configuration for OpenAI response inputs.\n\n:param type: Tool type identifier, always \"mcp\"\n:param server_label: Label to identify this MCP server\n:param connector_id: (Optional) ID of the connector to use for this MCP server\n:param server_url: (Optional) URL endpoint of the MCP server\n:param headers: (Optional) HTTP headers to include when connecting to the server\n:param authorization: (Optional) OAuth access token for authenticating with the MCP server\n:param require_approval: Approval requirement for tool calls (\"always\", \"never\", or filter)\n:param allowed_tools: (Optional) Restriction on which tools can be used from this server"
+            },
+            "OpenAIResponseInputToolWebSearch": {
+                "properties": {
+                    "type": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "const": "web_search"
+                            },
+                            {
+                                "type": "string",
+                                "const": "web_search_preview"
+                            },
+                            {
+                                "type": "string",
+                                "const": "web_search_preview_2025_03_11"
+                            },
+                            {
+                                "type": "string",
+                                "const": "web_search_2025_08_26"
+                            }
+                        ],
+                        "title": "Type",
+                        "default": "web_search"
+                    },
+                    "search_context_size": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "type": "string",
+                                "pattern": "^low|medium|high$"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Last Used Model",
-                        "description": "Identification of the last model used for the conversation",
-                        "examples": [
-                            "gpt-4-turbo",
-                            "gpt-3.5-turbo-0125"
-                        ]
+                        "title": "Search Context Size",
+                        "default": "medium"
+                    }
+                },
+                "type": "object",
+                "title": "OpenAIResponseInputToolWebSearch",
+                "description": "Web search tool configuration for OpenAI response inputs.\n\n:param type: Web search tool type variant to use\n:param search_context_size: (Optional) Size of search context, must be \"low\", \"medium\", or \"high\""
+            },
+            "OpenAIResponseMCPApprovalRequest": {
+                "properties": {
+                    "arguments": {
+                        "type": "string",
+                        "title": "Arguments"
                     },
-                    "last_used_provider": {
+                    "id": {
+                        "type": "string",
+                        "title": "Id"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "server_label": {
+                        "type": "string",
+                        "title": "Server Label"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_approval_request",
+                        "title": "Type",
+                        "default": "mcp_approval_request"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "arguments",
+                    "id",
+                    "name",
+                    "server_label"
+                ],
+                "title": "OpenAIResponseMCPApprovalRequest",
+                "description": "A request for human approval of a tool invocation."
+            },
+            "OpenAIResponseMCPApprovalResponse": {
+                "properties": {
+                    "approval_request_id": {
+                        "type": "string",
+                        "title": "Approval Request Id"
+                    },
+                    "approve": {
+                        "type": "boolean",
+                        "title": "Approve"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_approval_response",
+                        "title": "Type",
+                        "default": "mcp_approval_response"
+                    },
+                    "id": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -5994,14 +8876,9 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Last Used Provider",
-                        "description": "Identification of the last provider used for the conversation",
-                        "examples": [
-                            "openai",
-                            "gemini"
-                        ]
+                        "title": "Id"
                     },
-                    "topic_summary": {
+                    "reason": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -6010,382 +8887,468 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Topic Summary",
-                        "description": "Topic summary for the conversation",
-                        "examples": [
-                            "Openshift Microservices Deployment Strategies"
-                        ]
+                        "title": "Reason"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "conversation_id"
+                    "approval_request_id",
+                    "approve"
                 ],
-                "title": "ConversationDetails",
-                "description": "Model representing the details of a user conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID).\n    created_at: When the conversation was created.\n    last_message_at: When the last message was sent.\n    message_count: Number of user messages in the conversation.\n    last_used_model: The last model used for the conversation.\n    last_used_provider: The provider of the last used model.\n    topic_summary: The topic summary for the conversation.\n\nExample:\n    ```python\n    conversation = ConversationDetails(\n        conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n        created_at=\"2024-01-01T00:00:00Z\",\n        last_message_at=\"2024-01-01T00:05:00Z\",\n        message_count=5,\n        last_used_model=\"gemini/gemini-2.0-flash\",\n        last_used_provider=\"gemini\",\n        topic_summary=\"Openshift Microservices Deployment Strategies\",\n    )\n    ```"
+                "title": "OpenAIResponseMCPApprovalResponse",
+                "description": "A response to an MCP approval request."
             },
-            "ConversationHistoryConfiguration": {
+            "OpenAIResponseMessage-Input": {
                 "properties": {
-                    "type": {
+                    "content": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "enum": [
-                                    "noop",
-                                    "memory",
-                                    "sqlite",
-                                    "postgres"
-                                ]
+                                "type": "string"
                             },
                             {
-                                "type": "null"
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
+                                        }
+                                    ],
+                                    "discriminator": {
+                                        "propertyName": "type",
+                                        "mapping": {
+                                            "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile",
+                                            "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
+                                            "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText"
+                                        }
+                                    }
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Input"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal"
+                                        }
+                                    ],
+                                    "discriminator": {
+                                        "propertyName": "type",
+                                        "mapping": {
+                                            "output_text": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Input",
+                                            "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal"
+                                        }
+                                    }
+                                },
+                                "type": "array"
                             }
                         ],
-                        "title": "Conversation history database type",
-                        "description": "Type of database where the conversation history is to be stored."
+                        "title": "Content"
                     },
-                    "memory": {
+                    "role": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/InMemoryCacheConfig"
+                                "type": "string",
+                                "const": "system"
                             },
                             {
-                                "type": "null"
+                                "type": "string",
+                                "const": "developer"
+                            },
+                            {
+                                "type": "string",
+                                "const": "user"
+                            },
+                            {
+                                "type": "string",
+                                "const": "assistant"
                             }
                         ],
-                        "title": "In-memory cache configuration",
-                        "description": "In-memory cache configuration"
+                        "title": "Role"
                     },
-                    "sqlite": {
+                    "type": {
+                        "type": "string",
+                        "const": "message",
+                        "title": "Type",
+                        "default": "message"
+                    },
+                    "id": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "SQLite configuration",
-                        "description": "SQLite database configuration"
+                        "title": "Id"
                     },
-                    "postgres": {
+                    "status": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "PostgreSQL configuration",
-                        "description": "PostgreSQL database configuration"
+                        "title": "Status"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
-                "title": "ConversationHistoryConfiguration",
-                "description": "Conversation history configuration."
+                "required": [
+                    "content",
+                    "role"
+                ],
+                "title": "OpenAIResponseMessage",
+                "description": "Corresponds to the various Message types in the Responses API.\nThey are all under one type because the Responses API gives them all\nthe same \"type\" value, and there is no way to tell them apart in certain\nscenarios."
             },
-            "ConversationResponse": {
+            "OpenAIResponseMessage-Output": {
                 "properties": {
-                    "conversation_id": {
-                        "type": "string",
-                        "title": "Conversation Id",
-                        "description": "Conversation ID (UUID)",
-                        "examples": [
-                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
-                        ]
-                    },
-                    "chat_history": {
-                        "items": {
-                            "$ref": "#/components/schemas/ConversationTurn"
-                        },
-                        "type": "array",
-                        "title": "Chat History",
-                        "description": "The simplified chat history as a list of conversation turns",
-                        "examples": [
+                    "content": {
+                        "anyOf": [
                             {
-                                "completed_at": "2024-01-01T00:01:05Z",
-                                "messages": [
-                                    {
-                                        "content": "Hello",
-                                        "type": "user"
-                                    },
-                                    {
-                                        "content": "Hi there!",
-                                        "type": "assistant"
+                                "type": "string"
+                            },
+                            {
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
+                                        }
+                                    ],
+                                    "discriminator": {
+                                        "propertyName": "type",
+                                        "mapping": {
+                                            "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile",
+                                            "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
+                                            "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText"
+                                        }
                                     }
-                                ],
-                                "model": "gpt-4o-mini",
-                                "provider": "openai",
-                                "started_at": "2024-01-01T00:01:00Z",
-                                "tool_calls": [],
-                                "tool_results": []
-                            }
-                        ]
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "conversation_id",
-                    "chat_history"
-                ],
-                "title": "ConversationResponse",
-                "description": "Model representing a response for retrieving a conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID).\n    chat_history: The chat history as a list of conversation turns.",
-                "examples": [
-                    {
-                        "chat_history": [
+                                },
+                                "type": "array"
+                            },
                             {
-                                "completed_at": "2024-01-01T00:01:05Z",
-                                "messages": [
-                                    {
-                                        "content": "Hello",
-                                        "type": "user"
-                                    },
-                                    {
-                                        "content": "Hi there!",
-                                        "type": "assistant"
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Output"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal"
+                                        }
+                                    ],
+                                    "discriminator": {
+                                        "propertyName": "type",
+                                        "mapping": {
+                                            "output_text": "#/components/schemas/OpenAIResponseOutputMessageContentOutputText-Output",
+                                            "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal"
+                                        }
                                     }
-                                ],
-                                "model": "gpt-4o-mini",
-                                "provider": "openai",
-                                "started_at": "2024-01-01T00:01:00Z",
-                                "tool_calls": [],
-                                "tool_results": []
+                                },
+                                "type": "array"
                             }
-                        ],
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000"
-                    }
-                ]
-            },
-            "ConversationTurn": {
-                "properties": {
-                    "messages": {
-                        "items": {
-                            "$ref": "#/components/schemas/Message"
-                        },
-                        "type": "array",
-                        "title": "Messages",
-                        "description": "List of messages in this turn"
-                    },
-                    "tool_calls": {
-                        "items": {
-                            "$ref": "#/components/schemas/ToolCallSummary"
-                        },
-                        "type": "array",
-                        "title": "Tool Calls",
-                        "description": "List of tool calls made in this turn"
-                    },
-                    "tool_results": {
-                        "items": {
-                            "$ref": "#/components/schemas/ToolResultSummary"
-                        },
-                        "type": "array",
-                        "title": "Tool Results",
-                        "description": "List of tool results from this turn"
+                        ],
+                        "title": "Content"
                     },
-                    "provider": {
-                        "type": "string",
-                        "title": "Provider",
-                        "description": "Provider identifier used for this turn",
-                        "examples": [
-                            "openai"
-                        ]
+                    "role": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "const": "system"
+                            },
+                            {
+                                "type": "string",
+                                "const": "developer"
+                            },
+                            {
+                                "type": "string",
+                                "const": "user"
+                            },
+                            {
+                                "type": "string",
+                                "const": "assistant"
+                            }
+                        ],
+                        "title": "Role"
                     },
-                    "model": {
+                    "type": {
                         "type": "string",
-                        "title": "Model",
-                        "description": "Model identifier used for this turn",
-                        "examples": [
-                            "gpt-4o-mini"
-                        ]
+                        "const": "message",
+                        "title": "Type",
+                        "default": "message"
                     },
-                    "started_at": {
-                        "type": "string",
-                        "title": "Started At",
-                        "description": "ISO 8601 timestamp when the turn started",
-                        "examples": [
-                            "2024-01-01T00:01:00Z"
-                        ]
+                    "id": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Id"
                     },
-                    "completed_at": {
-                        "type": "string",
-                        "title": "Completed At",
-                        "description": "ISO 8601 timestamp when the turn completed",
-                        "examples": [
-                            "2024-01-01T00:01:05Z"
-                        ]
+                    "status": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Status"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "provider",
-                    "model",
-                    "started_at",
-                    "completed_at"
+                    "content",
+                    "role"
                 ],
-                "title": "ConversationTurn",
-                "description": "Model representing a single conversation turn.\n\nAttributes:\n    messages: List of messages in this turn.\n    tool_calls: List of tool calls made in this turn.\n    tool_results: List of tool results from this turn.\n    provider: Provider identifier used for this turn.\n    model: Model identifier used for this turn.\n    started_at: ISO 8601 timestamp when the turn started.\n    completed_at: ISO 8601 timestamp when the turn completed."
+                "title": "OpenAIResponseMessage",
+                "description": "Corresponds to the various Message types in the Responses API.\nThey are all under one type because the Responses API gives them all\nthe same \"type\" value, and there is no way to tell them apart in certain\nscenarios."
             },
-            "ConversationUpdateRequest": {
+            "OpenAIResponseOutputMessageContentOutputText-Input": {
                 "properties": {
-                    "topic_summary": {
+                    "text": {
                         "type": "string",
-                        "maxLength": 1000,
-                        "minLength": 1,
-                        "title": "Topic Summary",
-                        "description": "The new topic summary for the conversation",
-                        "examples": [
-                            "Discussion about machine learning algorithms"
-                        ]
+                        "title": "Text"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "output_text",
+                        "title": "Type",
+                        "default": "output_text"
+                    },
+                    "annotations": {
+                        "items": {
+                            "oneOf": [
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+                                }
+                            ],
+                            "discriminator": {
+                                "propertyName": "type",
+                                "mapping": {
+                                    "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
+                                    "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
+                                    "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath",
+                                    "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation"
+                                }
+                            }
+                        },
+                        "type": "array",
+                        "title": "Annotations"
+                    },
+                    "logprobs": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAITokenLogProb"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Logprobs"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
                 "required": [
-                    "topic_summary"
+                    "text"
                 ],
-                "title": "ConversationUpdateRequest",
-                "description": "Model representing a request to update a conversation topic summary.\n\nAttributes:\n    topic_summary: The new topic summary for the conversation.\n\nExample:\n    ```python\n    update_request = ConversationUpdateRequest(\n        topic_summary=\"Discussion about machine learning algorithms\"\n    )\n    ```"
+                "title": "OpenAIResponseOutputMessageContentOutputText"
             },
-            "ConversationUpdateResponse": {
+            "OpenAIResponseOutputMessageContentOutputText-Output": {
                 "properties": {
-                    "conversation_id": {
+                    "text": {
                         "type": "string",
-                        "title": "Conversation Id",
-                        "description": "The conversation ID (UUID) that was updated",
-                        "examples": [
-                            "123e4567-e89b-12d3-a456-426614174000"
-                        ]
-                    },
-                    "success": {
-                        "type": "boolean",
-                        "title": "Success",
-                        "description": "Whether the update was successful",
-                        "examples": [
-                            true
-                        ]
+                        "title": "Text"
                     },
-                    "message": {
+                    "type": {
                         "type": "string",
-                        "title": "Message",
-                        "description": "A message about the update result",
-                        "examples": [
-                            "Topic summary updated successfully"
-                        ]
+                        "const": "output_text",
+                        "title": "Type",
+                        "default": "output_text"
+                    },
+                    "annotations": {
+                        "items": {
+                            "oneOf": [
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+                                }
+                            ],
+                            "discriminator": {
+                                "propertyName": "type",
+                                "mapping": {
+                                    "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
+                                    "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
+                                    "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath",
+                                    "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation"
+                                }
+                            }
+                        },
+                        "type": "array",
+                        "title": "Annotations"
+                    },
+                    "logprobs": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAITokenLogProb"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Logprobs"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "conversation_id",
-                    "success",
-                    "message"
+                    "text"
                 ],
-                "title": "ConversationUpdateResponse",
-                "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was updated.\n    success: Whether the update was successful.\n    message: A message about the update result.\n\nExample:\n    ```python\n    update_response = ConversationUpdateResponse(\n        conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n        success=True,\n        message=\"Topic summary updated successfully\",\n    )\n    ```",
-                "examples": [
-                    {
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "message": "Topic summary updated successfully",
-                        "success": true
-                    }
-                ]
+                "title": "OpenAIResponseOutputMessageContentOutputText"
             },
-            "ConversationsListResponse": {
+            "OpenAIResponseOutputMessageFileSearchToolCall": {
                 "properties": {
-                    "conversations": {
+                    "id": {
+                        "type": "string",
+                        "title": "Id"
+                    },
+                    "queries": {
                         "items": {
-                            "$ref": "#/components/schemas/ConversationDetails"
+                            "type": "string"
                         },
                         "type": "array",
-                        "title": "Conversations"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "conversations"
-                ],
-                "title": "ConversationsListResponse",
-                "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n    conversations: List of conversation details associated with the user.",
-                "examples": [
-                    {
-                        "conversations": [
+                        "title": "Queries"
+                    },
+                    "status": {
+                        "type": "string",
+                        "title": "Status"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "file_search_call",
+                        "title": "Type",
+                        "default": "file_search_call"
+                    },
+                    "results": {
+                        "anyOf": [
                             {
-                                "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                                "created_at": "2024-01-01T00:00:00Z",
-                                "last_message_at": "2024-01-01T00:05:00Z",
-                                "last_used_model": "gemini/gemini-2.0-flash",
-                                "last_used_provider": "gemini",
-                                "message_count": 5,
-                                "topic_summary": "Openshift Microservices Deployment Strategies"
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCallResults"
+                                },
+                                "type": "array"
                             },
                             {
-                                "conversation_id": "456e7890-e12b-34d5-a678-901234567890",
-                                "created_at": "2024-01-01T01:00:00Z",
-                                "last_used_model": "gemini/gemini-2.5-flash",
-                                "last_used_provider": "gemini",
-                                "message_count": 2,
-                                "topic_summary": "RHDH Purpose Summary"
+                                "type": "null"
                             }
-                        ]
-                    }
-                ]
-            },
-            "ConversationsListResponseV2": {
-                "properties": {
-                    "conversations": {
-                        "items": {
-                            "$ref": "#/components/schemas/ConversationData"
-                        },
-                        "type": "array",
-                        "title": "Conversations"
+                        ],
+                        "title": "Results"
                     }
                 },
                 "type": "object",
-                "required": [
-                    "conversations"
-                ],
-                "title": "ConversationsListResponseV2",
-                "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n    conversations: List of conversation data associated with the user.",
-                "examples": [
-                    {
-                        "conversations": [
-                            {
-                                "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                                "last_message_timestamp": 1704067200.0,
-                                "topic_summary": "Openshift Microservices Deployment Strategies"
-                            }
-                        ]
-                    }
-                ]
+                "required": [
+                    "id",
+                    "queries",
+                    "status"
+                ],
+                "title": "OpenAIResponseOutputMessageFileSearchToolCall",
+                "description": "File search tool call output message for OpenAI responses.\n\n:param id: Unique identifier for this tool call\n:param queries: List of search queries executed\n:param status: Current status of the file search operation\n:param type: Tool call type identifier, always \"file_search_call\"\n:param results: (Optional) Search results returned by the file search operation"
             },
-            "CustomProfile": {
+            "OpenAIResponseOutputMessageFileSearchToolCallResults": {
                 "properties": {
-                    "path": {
+                    "attributes": {
+                        "additionalProperties": true,
+                        "type": "object",
+                        "title": "Attributes"
+                    },
+                    "file_id": {
                         "type": "string",
-                        "title": "Path to custom profile",
-                        "description": "Path to Python modules containing custom profile."
+                        "title": "File Id"
                     },
-                    "prompts": {
-                        "additionalProperties": {
-                            "type": "string"
-                        },
-                        "type": "object",
-                        "title": "System prompts",
-                        "description": "Dictionary containing map of system prompts",
-                        "default": {}
+                    "filename": {
+                        "type": "string",
+                        "title": "Filename"
+                    },
+                    "score": {
+                        "type": "number",
+                        "title": "Score"
+                    },
+                    "text": {
+                        "type": "string",
+                        "title": "Text"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "path"
+                    "attributes",
+                    "file_id",
+                    "filename",
+                    "score",
+                    "text"
                 ],
-                "title": "CustomProfile",
-                "description": "Custom profile customization for prompts and validation."
+                "title": "OpenAIResponseOutputMessageFileSearchToolCallResults",
+                "description": "Search results returned by the file search operation.\n\n:param attributes: (Optional) Key-value attributes associated with the file\n:param file_id: Unique identifier of the file containing the result\n:param filename: Name of the file containing the result\n:param score: Relevance score for this search result (between 0 and 1)\n:param text: Text content of the search result"
             },
-            "Customization": {
+            "OpenAIResponseOutputMessageFunctionToolCall": {
                 "properties": {
-                    "profile_path": {
+                    "call_id": {
+                        "type": "string",
+                        "title": "Call Id"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "arguments": {
+                        "type": "string",
+                        "title": "Arguments"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "function_call",
+                        "title": "Type",
+                        "default": "function_call"
+                    },
+                    "id": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -6394,31 +9357,54 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Profile Path"
-                    },
-                    "disable_query_system_prompt": {
-                        "type": "boolean",
-                        "title": "Disable Query System Prompt",
-                        "default": false
-                    },
-                    "disable_shield_ids_override": {
-                        "type": "boolean",
-                        "title": "Disable Shield Ids Override",
-                        "default": false
+                        "title": "Id"
                     },
-                    "system_prompt_path": {
+                    "status": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "format": "file-path"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "System Prompt Path"
+                        "title": "Status"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "call_id",
+                    "name",
+                    "arguments"
+                ],
+                "title": "OpenAIResponseOutputMessageFunctionToolCall",
+                "description": "Function tool call output message for OpenAI responses.\n\n:param call_id: Unique identifier for the function call\n:param name: Name of the function being called\n:param arguments: JSON string containing the function arguments\n:param type: Tool call type identifier, always \"function_call\"\n:param id: (Optional) Additional identifier for the tool call\n:param status: (Optional) Current status of the function call execution"
+            },
+            "OpenAIResponseOutputMessageMCPCall": {
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "title": "Id"
                     },
-                    "system_prompt": {
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_call",
+                        "title": "Type",
+                        "default": "mcp_call"
+                    },
+                    "arguments": {
+                        "type": "string",
+                        "title": "Arguments"
+                    },
+                    "name": {
+                        "type": "string",
+                        "title": "Name"
+                    },
+                    "server_label": {
+                        "type": "string",
+                        "title": "Server Label"
+                    },
+                    "error": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -6427,367 +9413,443 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "System Prompt"
+                        "title": "Error"
                     },
-                    "agent_card_path": {
+                    "output": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "format": "file-path"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Agent Card Path"
+                        "title": "Output"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "id",
+                    "arguments",
+                    "name",
+                    "server_label"
+                ],
+                "title": "OpenAIResponseOutputMessageMCPCall",
+                "description": "Model Context Protocol (MCP) call output message for OpenAI responses.\n\n:param id: Unique identifier for this MCP call\n:param type: Tool call type identifier, always \"mcp_call\"\n:param arguments: JSON string containing the MCP call arguments\n:param name: Name of the MCP method being called\n:param server_label: Label identifying the MCP server handling the call\n:param error: (Optional) Error message if the MCP call failed\n:param output: (Optional) Output result from the successful MCP call"
+            },
+            "OpenAIResponseOutputMessageMCPListTools": {
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "title": "Id"
                     },
-                    "agent_card_config": {
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_list_tools",
+                        "title": "Type",
+                        "default": "mcp_list_tools"
+                    },
+                    "server_label": {
+                        "type": "string",
+                        "title": "Server Label"
+                    },
+                    "tools": {
+                        "items": {
+                            "$ref": "#/components/schemas/MCPListToolsTool"
+                        },
+                        "type": "array",
+                        "title": "Tools"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "id",
+                    "server_label",
+                    "tools"
+                ],
+                "title": "OpenAIResponseOutputMessageMCPListTools",
+                "description": "MCP list tools output message containing available tools from an MCP server.\n\n:param id: Unique identifier for this MCP list tools operation\n:param type: Tool call type identifier, always \"mcp_list_tools\"\n:param server_label: Label identifying the MCP server providing the tools\n:param tools: List of available tools provided by the MCP server"
+            },
+            "OpenAIResponseOutputMessageWebSearchToolCall": {
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "title": "Id"
+                    },
+                    "status": {
+                        "type": "string",
+                        "title": "Status"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "web_search_call",
+                        "title": "Type",
+                        "default": "web_search_call"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "id",
+                    "status"
+                ],
+                "title": "OpenAIResponseOutputMessageWebSearchToolCall",
+                "description": "Web search tool call output message for OpenAI responses.\n\n:param id: Unique identifier for this tool call\n:param status: Current status of the web search operation\n:param type: Tool call type identifier, always \"web_search_call\""
+            },
+            "OpenAIResponsePrompt": {
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "title": "Id"
+                    },
+                    "variables": {
                         "anyOf": [
                             {
-                                "additionalProperties": true,
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
+                                        }
+                                    ],
+                                    "discriminator": {
+                                        "propertyName": "type",
+                                        "mapping": {
+                                            "input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile",
+                                            "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
+                                            "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText"
+                                        }
+                                    }
+                                },
                                 "type": "object"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Agent Card Config"
+                        "title": "Variables"
                     },
-                    "custom_profile": {
+                    "version": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/CustomProfile"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
-                        ]
+                        ],
+                        "title": "Version"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
-                "title": "Customization",
-                "description": "Service customization."
+                "required": [
+                    "id"
+                ],
+                "title": "OpenAIResponsePrompt",
+                "description": "OpenAI compatible Prompt object that is used in OpenAI responses.\n\n:param id: Unique identifier of the prompt template\n:param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types\nlike images or files.\n:param version: Version number of the prompt to use (defaults to latest if not specified)"
             },
-            "DatabaseConfiguration": {
+            "OpenAIResponseReasoning": {
                 "properties": {
-                    "sqlite": {
+                    "effort": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
+                                "type": "string",
+                                "enum": [
+                                    "none",
+                                    "minimal",
+                                    "low",
+                                    "medium",
+                                    "high",
+                                    "xhigh"
+                                ]
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "SQLite configuration",
-                        "description": "SQLite database configuration"
-                    },
-                    "postgres": {
+                        "title": "Effort"
+                    }
+                },
+                "type": "object",
+                "title": "OpenAIResponseReasoning",
+                "description": "Configuration for reasoning effort in OpenAI responses.\n\nControls how much reasoning the model performs before generating a response.\n\n:param effort: The effort level for reasoning. \"low\" favors speed and economical token usage,\n               \"high\" favors more complete reasoning, \"medium\" is a balance between the two."
+            },
+            "OpenAIResponseText": {
+                "properties": {
+                    "format": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
+                                "$ref": "#/components/schemas/OpenAIResponseTextFormat"
                             },
                             {
                                 "type": "null"
                             }
-                        ],
-                        "title": "PostgreSQL configuration",
-                        "description": "PostgreSQL database configuration"
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "title": "DatabaseConfiguration",
-                "description": "Database configuration."
-            },
-            "DetailModel": {
-                "properties": {
-                    "response": {
-                        "type": "string",
-                        "title": "Response",
-                        "description": "Short summary of the error"
-                    },
-                    "cause": {
-                        "type": "string",
-                        "title": "Cause",
-                        "description": "Detailed explanation of what caused the error"
+                        ]
                     }
                 },
                 "type": "object",
-                "required": [
-                    "response",
-                    "cause"
-                ],
-                "title": "DetailModel",
-                "description": "Nested detail model for error responses."
-            },
-            "FeedbackCategory": {
-                "type": "string",
-                "enum": [
-                    "incorrect",
-                    "not_relevant",
-                    "incomplete",
-                    "outdated_information",
-                    "unsafe",
-                    "other"
-                ],
-                "title": "FeedbackCategory",
-                "description": "Enum representing predefined feedback categories for AI responses.\n\nThese categories help provide structured feedback about AI inference quality\nwhen users provide negative feedback (thumbs down). Multiple categories can\nbe selected to provide comprehensive feedback about response issues."
+                "title": "OpenAIResponseText",
+                "description": "Text response configuration for OpenAI responses.\n\n:param format: (Optional) Text format configuration specifying output format requirements"
             },
-            "FeedbackRequest": {
+            "OpenAIResponseTextFormat": {
                 "properties": {
-                    "conversation_id": {
-                        "type": "string",
-                        "title": "Conversation Id",
-                        "description": "The required conversation ID (UUID)",
-                        "examples": [
-                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
-                        ]
-                    },
-                    "user_question": {
-                        "type": "string",
-                        "title": "User Question",
-                        "description": "User question (the query string)",
-                        "examples": [
-                            "What is Kubernetes?"
-                        ]
+                    "type": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "const": "text"
+                            },
+                            {
+                                "type": "string",
+                                "const": "json_schema"
+                            },
+                            {
+                                "type": "string",
+                                "const": "json_object"
+                            }
+                        ],
+                        "title": "Type"
                     },
-                    "llm_response": {
-                        "type": "string",
-                        "title": "Llm Response",
-                        "description": "Response from LLM",
-                        "examples": [
-                            "Kubernetes is an open-source container orchestration system for automating ..."
-                        ]
+                    "name": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Name"
                     },
-                    "sentiment": {
+                    "schema": {
                         "anyOf": [
                             {
-                                "type": "integer"
+                                "additionalProperties": true,
+                                "type": "object"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Sentiment",
-                        "description": "User sentiment, if provided must be -1 or 1",
-                        "examples": [
-                            -1,
-                            1
-                        ]
+                        "title": "Schema"
                     },
-                    "user_feedback": {
+                    "description": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "maxLength": 4096
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "User Feedback",
-                        "description": "Feedback on the LLM response.",
-                        "examples": [
-                            "I'm not satisfied with the response because it is too vague."
-                        ]
+                        "title": "Description"
                     },
-                    "categories": {
+                    "strict": {
                         "anyOf": [
                             {
-                                "items": {
-                                    "$ref": "#/components/schemas/FeedbackCategory"
-                                },
-                                "type": "array"
+                                "type": "boolean"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Categories",
-                        "description": "List of feedback categories that describe issues with the LLM response (for negative feedback).",
-                        "examples": [
-                            [
-                                "incorrect",
-                                "incomplete"
-                            ]
-                        ]
+                        "title": "Strict"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
-                "required": [
-                    "conversation_id",
-                    "user_question",
-                    "llm_response"
-                ],
-                "title": "FeedbackRequest",
-                "description": "Model representing a feedback request.\n\nAttributes:\n    conversation_id: The required conversation ID (UUID).\n    user_question: The required user question.\n    llm_response: The required LLM response.\n    sentiment: The optional sentiment.\n    user_feedback: The optional user feedback.\n    categories: The optional list of feedback categories (multi-select for negative feedback).\n\nExample:\n    ```python\n    feedback_request = FeedbackRequest(\n        conversation_id=\"12345678-abcd-0000-0123-456789abcdef\",\n        user_question=\"what are you doing?\",\n        user_feedback=\"This response is not helpful\",\n        llm_response=\"I don't know\",\n        sentiment=-1,\n        categories=[FeedbackCategory.INCORRECT, FeedbackCategory.INCOMPLETE]\n    )\n    ```",
-                "examples": [
-                    {
-                        "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
-                        "llm_response": "bar",
-                        "sentiment": -1,
-                        "user_feedback": "Not satisfied with the response quality.",
-                        "user_question": "foo"
-                    },
-                    {
-                        "categories": [
-                            "incorrect"
-                        ],
-                        "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
-                        "llm_response": "The capital of France is Berlin.",
-                        "sentiment": -1,
-                        "user_question": "What is the capital of France?"
-                    },
-                    {
-                        "categories": [
-                            "incomplete",
-                            "not_relevant"
-                        ],
-                        "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
-                        "llm_response": "Use Docker.",
-                        "sentiment": -1,
-                        "user_feedback": "This response is too general and doesn't provide specific steps.",
-                        "user_question": "How do I deploy a web app?"
-                    }
-                ]
+                "title": "OpenAIResponseTextFormat",
+                "description": "Configuration for Responses API text format.\n\n:param type: Must be \"text\", \"json_schema\", or \"json_object\" to identify the format type\n:param name: The name of the response format. Only used for json_schema.\n:param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema.\n:param description: (Optional) A description of the response format. Only used for json_schema.\n:param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema."
             },
-            "FeedbackResponse": {
+            "OpenAIResponseToolMCP": {
                 "properties": {
-                    "response": {
+                    "type": {
                         "type": "string",
-                        "title": "Response",
-                        "description": "The response of the feedback request.",
-                        "examples": [
-                            "feedback received"
-                        ]
+                        "const": "mcp",
+                        "title": "Type",
+                        "default": "mcp"
+                    },
+                    "server_label": {
+                        "type": "string",
+                        "title": "Server Label"
+                    },
+                    "allowed_tools": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AllowedToolsFilter"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Allowed Tools"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "response"
+                    "server_label"
                 ],
-                "title": "FeedbackResponse",
-                "description": "Model representing a response to a feedback request.\n\nAttributes:\n    response: The response of the feedback request.\n\nExample:\n    ```python\n    feedback_response = FeedbackResponse(response=\"feedback received\")\n    ```",
-                "examples": [
-                    {
-                        "response": "feedback received"
-                    }
-                ]
+                "title": "OpenAIResponseToolMCP",
+                "description": "Model Context Protocol (MCP) tool configuration for OpenAI response object.\n\n:param type: Tool type identifier, always \"mcp\"\n:param server_label: Label to identify this MCP server\n:param allowed_tools: (Optional) Restriction on which tools can be used from this server"
             },
-            "FeedbackStatusUpdateRequest": {
+            "OpenAIResponseUsage": {
                 "properties": {
-                    "status": {
-                        "type": "boolean",
-                        "title": "Status",
-                        "description": "Desired state of feedback enablement, must be False or True",
-                        "default": false,
-                        "examples": [
-                            true,
-                            false
-                        ]
+                    "input_tokens": {
+                        "type": "integer",
+                        "title": "Input Tokens"
+                    },
+                    "output_tokens": {
+                        "type": "integer",
+                        "title": "Output Tokens"
+                    },
+                    "total_tokens": {
+                        "type": "integer",
+                        "title": "Total Tokens"
+                    },
+                    "input_tokens_details": {
+                        "$ref": "#/components/schemas/OpenAIResponseUsageInputTokensDetails"
+                    },
+                    "output_tokens_details": {
+                        "$ref": "#/components/schemas/OpenAIResponseUsageOutputTokensDetails"
                     }
                 },
-                "additionalProperties": false,
                 "type": "object",
-                "title": "FeedbackStatusUpdateRequest",
-                "description": "Model representing a feedback status update request.\n\nAttributes:\n    status: Value of the desired feedback enabled state.\n\nExample:\n    ```python\n    feedback_request = FeedbackRequest(\n        status=false\n    )\n    ```"
+                "required": [
+                    "input_tokens",
+                    "output_tokens",
+                    "total_tokens",
+                    "input_tokens_details",
+                    "output_tokens_details"
+                ],
+                "title": "OpenAIResponseUsage",
+                "description": "Usage information for OpenAI response.\n\n:param input_tokens: Number of tokens in the input\n:param output_tokens: Number of tokens in the output\n:param total_tokens: Total tokens used (input + output)\n:param input_tokens_details: Detailed breakdown of input token usage\n:param output_tokens_details: Detailed breakdown of output token usage"
             },
-            "FeedbackStatusUpdateResponse": {
+            "OpenAIResponseUsageInputTokensDetails": {
                 "properties": {
-                    "status": {
-                        "additionalProperties": true,
-                        "type": "object",
-                        "title": "Status"
+                    "cached_tokens": {
+                        "type": "integer",
+                        "title": "Cached Tokens"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "status"
+                    "cached_tokens"
                 ],
-                "title": "FeedbackStatusUpdateResponse",
-                "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n    status: The previous and current status of the service and who updated it.\n\nExample:\n    ```python\n    status_response = StatusResponse(\n        status={\n            \"previous_status\": true,\n            \"updated_status\": false,\n            \"updated_by\": \"user/test\",\n            \"timestamp\": \"2023-03-15 12:34:56\"\n        },\n    )\n    ```",
-                "examples": [
-                    {
-                        "status": {
-                            "previous_status": true,
-                            "timestamp": "2023-03-15 12:34:56",
-                            "updated_by": "user/test",
-                            "updated_status": false
-                        }
-                    }
-                ]
+                "title": "OpenAIResponseUsageInputTokensDetails",
+                "description": "Token details for input tokens in OpenAI response usage.\n\n:param cached_tokens: Number of tokens retrieved from cache"
             },
-            "ForbiddenResponse": {
+            "OpenAIResponseUsageOutputTokensDetails": {
                 "properties": {
-                    "status_code": {
+                    "reasoning_tokens": {
                         "type": "integer",
-                        "title": "Status Code"
-                    },
-                    "detail": {
-                        "$ref": "#/components/schemas/DetailModel"
+                        "title": "Reasoning Tokens"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "status_code",
-                    "detail"
+                    "reasoning_tokens"
                 ],
-                "title": "ForbiddenResponse",
-                "description": "403 Forbidden. Access denied.",
-                "examples": [
-                    {
-                        "detail": {
-                            "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000",
-                            "response": "User does not have permission to perform this action"
-                        },
-                        "label": "conversation read"
-                    },
-                    {
-                        "detail": {
-                            "cause": "User 6789 does not have permission to delete conversation with ID 123e4567-e89b-12d3-a456-426614174000",
-                            "response": "User does not have permission to perform this action"
-                        },
-                        "label": "conversation delete"
+                "title": "OpenAIResponseUsageOutputTokensDetails",
+                "description": "Token details for output tokens in OpenAI response usage.\n\n:param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models)"
+            },
+            "OpenAITokenLogProb": {
+                "properties": {
+                    "token": {
+                        "type": "string",
+                        "title": "Token",
+                        "description": "The token."
                     },
-                    {
-                        "detail": {
-                            "cause": "User 6789 is not authorized to access this endpoint.",
-                            "response": "User does not have permission to access this endpoint"
-                        },
-                        "label": "endpoint"
+                    "bytes": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "integer"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Bytes",
+                        "description": "The bytes for the token."
                     },
-                    {
-                        "detail": {
-                            "cause": "Storing feedback is disabled.",
-                            "response": "Storing feedback is disabled"
-                        },
-                        "label": "feedback"
+                    "logprob": {
+                        "type": "number",
+                        "title": "Logprob",
+                        "description": "The log probability of the token."
                     },
-                    {
-                        "detail": {
-                            "cause": "User lacks model_override permission required to override model/provider.",
-                            "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request."
-                        },
-                        "label": "model override"
+                    "top_logprobs": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAITopLogProb"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Top Logprobs",
+                        "description": "The top log probabilities for the token."
                     }
-                ]
+                },
+                "type": "object",
+                "required": [
+                    "token",
+                    "logprob"
+                ],
+                "title": "OpenAITokenLogProb",
+                "description": "The log probability for a token from an OpenAI-compatible chat completion response."
             },
-            "HTTPAuthSecurityScheme": {
+            "OpenAITopLogProb": {
                 "properties": {
-                    "bearerFormat": {
+                    "token": {
+                        "type": "string",
+                        "title": "Token",
+                        "description": "The token."
+                    },
+                    "bytes": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "items": {
+                                    "type": "integer"
+                                },
+                                "type": "array"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Bearerformat"
+                        "title": "Bytes",
+                        "description": "The bytes for the token."
                     },
+                    "logprob": {
+                        "type": "number",
+                        "title": "Logprob",
+                        "description": "The log probability of the token."
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "token",
+                    "logprob"
+                ],
+                "title": "OpenAITopLogProb",
+                "description": "The top log probability for a token from an OpenAI-compatible chat completion response."
+            },
+            "OpenIdConnectSecurityScheme": {
+                "properties": {
                     "description": {
                         "anyOf": [
                             {
@@ -6799,43 +9861,26 @@
                         ],
                         "title": "Description"
                     },
-                    "scheme": {
+                    "openIdConnectUrl": {
                         "type": "string",
-                        "title": "Scheme"
+                        "title": "Openidconnecturl"
                     },
                     "type": {
                         "type": "string",
-                        "const": "http",
+                        "const": "openIdConnect",
                         "title": "Type",
-                        "default": "http"
+                        "default": "openIdConnect"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "scheme"
+                    "openIdConnectUrl"
                 ],
-                "title": "HTTPAuthSecurityScheme",
-                "description": "Defines a security scheme using HTTP authentication."
-            },
-            "HTTPValidationError": {
-                "properties": {
-                    "detail": {
-                        "items": {
-                            "$ref": "#/components/schemas/ValidationError"
-                        },
-                        "type": "array",
-                        "title": "Detail"
-                    }
-                },
-                "type": "object",
-                "title": "HTTPValidationError"
+                "title": "OpenIdConnectSecurityScheme",
+                "description": "Defines a security scheme using OpenID Connect."
             },
-            "ImplicitOAuthFlow": {
+            "PasswordOAuthFlow": {
                 "properties": {
-                    "authorizationUrl": {
-                        "type": "string",
-                        "title": "Authorizationurl"
-                    },
                     "refreshUrl": {
                         "anyOf": [
                             {
@@ -6853,46 +9898,53 @@
                         },
                         "type": "object",
                         "title": "Scopes"
+                    },
+                    "tokenUrl": {
+                        "type": "string",
+                        "title": "Tokenurl"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "authorizationUrl",
-                    "scopes"
-                ],
-                "title": "ImplicitOAuthFlow",
-                "description": "Defines configuration details for the OAuth 2.0 Implicit flow."
-            },
-            "In": {
-                "type": "string",
-                "enum": [
-                    "cookie",
-                    "header",
-                    "query"
+                    "scopes",
+                    "tokenUrl"
                 ],
-                "title": "In",
-                "description": "The location of the API key."
+                "title": "PasswordOAuthFlow",
+                "description": "Defines configuration details for the OAuth 2.0 Resource Owner Password flow."
             },
-            "InMemoryCacheConfig": {
+            "PostgreSQLDatabaseConfiguration": {
                 "properties": {
-                    "max_entries": {
+                    "host": {
+                        "type": "string",
+                        "title": "Hostname",
+                        "description": "Database server host or socket directory",
+                        "default": "localhost"
+                    },
+                    "port": {
                         "type": "integer",
                         "exclusiveMinimum": 0.0,
-                        "title": "Max entries",
-                        "description": "Maximum number of entries stored in the in-memory cache"
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "required": [
-                    "max_entries"
-                ],
-                "title": "InMemoryCacheConfig",
-                "description": "In-memory cache configuration."
-            },
-            "InferenceConfiguration": {
-                "properties": {
-                    "default_model": {
+                        "title": "Port",
+                        "description": "Database server port",
+                        "default": 5432
+                    },
+                    "db": {
+                        "type": "string",
+                        "title": "Database name",
+                        "description": "Database name to connect to"
+                    },
+                    "user": {
+                        "type": "string",
+                        "title": "User name",
+                        "description": "Database user name used to authenticate"
+                    },
+                    "password": {
+                        "type": "string",
+                        "format": "password",
+                        "title": "Password",
+                        "description": "Password used to authenticate",
+                        "writeOnly": true
+                    },
+                    "namespace": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -6901,543 +9953,665 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Default model",
-                        "description": "Identification of default model used when no other model is specified."
+                        "title": "Name space",
+                        "description": "Database namespace",
+                        "default": "public"
                     },
-                    "default_provider": {
+                    "ssl_mode": {
+                        "type": "string",
+                        "title": "SSL mode",
+                        "description": "SSL mode",
+                        "default": "prefer"
+                    },
+                    "gss_encmode": {
+                        "type": "string",
+                        "title": "GSS encmode",
+                        "description": "This option determines whether or with what priority a secure GSS TCP/IP connection will be negotiated with the server.",
+                        "default": "prefer"
+                    },
+                    "ca_cert_path": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "type": "string",
+                                "format": "file-path"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Default provider",
-                        "description": "Identification of default provider used when no other model is specified."
+                        "title": "CA certificate path",
+                        "description": "Path to CA certificate"
                     }
                 },
                 "additionalProperties": false,
                 "type": "object",
-                "title": "InferenceConfiguration",
-                "description": "Inference configuration."
+                "required": [
+                    "db",
+                    "user",
+                    "password"
+                ],
+                "title": "PostgreSQLDatabaseConfiguration",
+                "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)"
             },
-            "InfoResponse": {
+            "PromptTooLongResponse": {
                 "properties": {
-                    "name": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
+                    "detail": {
+                        "$ref": "#/components/schemas/DetailModel"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "status_code",
+                    "detail"
+                ],
+                "title": "PromptTooLongResponse",
+                "description": "413 Payload Too Large - Prompt is too long.",
+                "examples": [
+                    {
+                        "detail": {
+                            "cause": "The prompt exceeds the maximum allowed length.",
+                            "response": "Prompt is too long"
+                        },
+                        "label": "prompt too long"
+                    }
+                ]
+            },
+            "ProviderHealthStatus": {
+                "properties": {
+                    "provider_id": {
                         "type": "string",
-                        "title": "Name",
-                        "description": "Service name",
+                        "title": "Provider Id",
+                        "description": "The ID of the provider"
+                    },
+                    "status": {
+                        "type": "string",
+                        "title": "Status",
+                        "description": "The health status",
                         "examples": [
-                            "Lightspeed Stack"
+                            "ok",
+                            "unhealthy",
+                            "not_implemented"
+                        ]
+                    },
+                    "message": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Message",
+                        "description": "Optional message about the health status",
+                        "examples": [
+                            "All systems operational",
+                            "Llama Stack is unavailable"
                         ]
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "provider_id",
+                    "status"
+                ],
+                "title": "ProviderHealthStatus",
+                "description": "Model representing the health status of a provider.\n\nAttributes:\n    provider_id: The ID of the provider.\n    status: The health status ('ok', 'unhealthy', 'not_implemented').\n    message: Optional message about the health status."
+            },
+            "ProviderResponse": {
+                "properties": {
+                    "api": {
+                        "type": "string",
+                        "title": "Api",
+                        "description": "The API this provider implements"
+                    },
+                    "config": {
+                        "additionalProperties": {
+                            "anyOf": [
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "items": {},
+                                    "type": "array"
+                                },
+                                {},
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "type": "object",
+                        "title": "Config",
+                        "description": "Provider configuration parameters"
+                    },
+                    "health": {
+                        "additionalProperties": {
+                            "anyOf": [
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "items": {},
+                                    "type": "array"
+                                },
+                                {},
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "type": "object",
+                        "title": "Health",
+                        "description": "Current health status of the provider"
                     },
-                    "service_version": {
+                    "provider_id": {
                         "type": "string",
-                        "title": "Service Version",
-                        "description": "Service version",
-                        "examples": [
-                            "0.1.0",
-                            "0.2.0",
-                            "1.0.0"
-                        ]
+                        "title": "Provider Id",
+                        "description": "Unique provider identifier"
                     },
-                    "llama_stack_version": {
+                    "provider_type": {
                         "type": "string",
-                        "title": "Llama Stack Version",
-                        "description": "Llama Stack version",
-                        "examples": [
-                            "0.2.1",
-                            "0.2.2",
-                            "0.2.18",
-                            "0.2.21",
-                            "0.2.22"
-                        ]
+                        "title": "Provider Type",
+                        "description": "Provider implementation type"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "name",
-                    "service_version",
-                    "llama_stack_version"
+                    "api",
+                    "config",
+                    "health",
+                    "provider_id",
+                    "provider_type"
                 ],
-                "title": "InfoResponse",
-                "description": "Model representing a response to an info request.\n\nAttributes:\n    name: Service name.\n    service_version: Service version.\n    llama_stack_version: Llama Stack version.\n\nExample:\n    ```python\n    info_response = InfoResponse(\n        name=\"Lightspeed Stack\",\n        service_version=\"1.0.0\",\n        llama_stack_version=\"0.2.22\",\n    )\n    ```",
+                "title": "ProviderResponse",
+                "description": "Model representing a response to get specific provider request.",
                 "examples": [
                     {
-                        "llama_stack_version": "1.0.0",
-                        "name": "Lightspeed Stack",
-                        "service_version": "1.0.0"
+                        "api": "inference",
+                        "config": {
+                            "api_key": "********"
+                        },
+                        "health": {
+                            "message": "Healthy",
+                            "status": "OK"
+                        },
+                        "provider_id": "openai",
+                        "provider_type": "remote::openai"
                     }
                 ]
             },
-            "InternalServerErrorResponse": {
+            "ProvidersListResponse": {
                 "properties": {
-                    "status_code": {
-                        "type": "integer",
-                        "title": "Status Code"
-                    },
-                    "detail": {
-                        "$ref": "#/components/schemas/DetailModel"
+                    "providers": {
+                        "additionalProperties": {
+                            "items": {
+                                "additionalProperties": true,
+                                "type": "object"
+                            },
+                            "type": "array"
+                        },
+                        "type": "object",
+                        "title": "Providers",
+                        "description": "List of available API types and their corresponding providers"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "status_code",
-                    "detail"
+                    "providers"
                 ],
-                "title": "InternalServerErrorResponse",
-                "description": "500 Internal Server Error.",
+                "title": "ProvidersListResponse",
+                "description": "Model representing a response to providers request.",
                 "examples": [
                     {
-                        "detail": {
-                            "cause": "An unexpected error occurred while processing the request.",
-                            "response": "Internal server error"
-                        },
-                        "label": "internal"
-                    },
-                    {
-                        "detail": {
-                            "cause": "Lightspeed Stack configuration has not been initialized.",
-                            "response": "Configuration is not loaded"
-                        },
-                        "label": "configuration"
-                    },
-                    {
-                        "detail": {
-                            "cause": "Failed to store feedback at directory: /path/example",
-                            "response": "Failed to store feedback"
-                        },
-                        "label": "feedback storage"
-                    },
-                    {
-                        "detail": {
-                            "cause": "Failed to call backend API",
-                            "response": "Error while processing query"
-                        },
-                        "label": "query"
-                    },
-                    {
-                        "detail": {
-                            "cause": "Conversation cache is not configured or unavailable.",
-                            "response": "Conversation cache not configured"
-                        },
-                        "label": "conversation cache"
-                    },
-                    {
-                        "detail": {
-                            "cause": "Failed to query the database",
-                            "response": "Database query failed"
-                        },
-                        "label": "database"
+                        "providers": {
+                            "agents": [
+                                {
+                                    "provider_id": "meta-reference",
+                                    "provider_type": "inline::meta-reference"
+                                }
+                            ],
+                            "inference": [
+                                {
+                                    "provider_id": "sentence-transformers",
+                                    "provider_type": "inline::sentence-transformers"
+                                },
+                                {
+                                    "provider_id": "openai",
+                                    "provider_type": "remote::openai"
+                                }
+                            ]
+                        }
                     }
                 ]
             },
-            "JsonPathOperator": {
-                "type": "string",
-                "enum": [
-                    "equals",
-                    "contains",
-                    "in",
-                    "match"
-                ],
-                "title": "JsonPathOperator",
-                "description": "Supported operators for JSONPath evaluation.\n\nNote: this is not a real model, just an enumeration of all supported JSONPath operators."
-            },
-            "JwkConfiguration": {
-                "properties": {
-                    "url": {
-                        "type": "string",
-                        "minLength": 1,
-                        "format": "uri",
-                        "title": "URL",
-                        "description": "HTTPS URL of the JWK (JSON Web Key) set used to validate JWTs."
-                    },
-                    "jwt_configuration": {
-                        "$ref": "#/components/schemas/JwtConfiguration",
-                        "title": "JWT configuration",
-                        "description": "JWT (JSON Web Token) configuration"
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "required": [
-                    "url"
-                ],
-                "title": "JwkConfiguration",
-                "description": "JWK (JSON Web Key) configuration.\n\nA JSON Web Key (JWK) is a JavaScript Object Notation (JSON) data structure\nthat represents a cryptographic key.\n\nUseful resources:\n\n  - [JSON Web Key](https://openid.net/specs/draft-jones-json-web-key-03.html)\n  - [RFC 7517](https://www.rfc-editor.org/rfc/rfc7517)"
-            },
-            "JwtConfiguration": {
+            "QueryRequest": {
                 "properties": {
-                    "user_id_claim": {
+                    "query": {
                         "type": "string",
-                        "title": "User ID claim",
-                        "description": "JWT claim name that uniquely identifies the user (subject ID).",
-                        "default": "user_id"
+                        "title": "Query",
+                        "description": "The query string",
+                        "examples": [
+                            "What is Kubernetes?"
+                        ]
                     },
-                    "username_claim": {
-                        "type": "string",
-                        "title": "Username claim",
-                        "description": "JWT claim name that provides the human-readable username.",
-                        "default": "username"
+                    "conversation_id": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Conversation Id",
+                        "description": "The optional conversation ID (UUID)",
+                        "examples": [
+                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
+                        ]
                     },
-                    "role_rules": {
-                        "items": {
-                            "$ref": "#/components/schemas/JwtRoleRule"
-                        },
-                        "type": "array",
-                        "title": "Role rules",
-                        "description": "Rules for extracting roles from JWT claims"
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "title": "JwtConfiguration",
-                "description": "JWT (JSON Web Token) configuration.\n\nJSON Web Token (JWT) is a compact, URL-safe means of representing\nclaims to be transferred between two parties.  The claims in a JWT\nare encoded as a JSON object that is used as the payload of a JSON\nWeb Signature (JWS) structure or as the plaintext of a JSON Web\nEncryption (JWE) structure, enabling the claims to be digitally\nsigned or integrity protected with a Message Authentication Code\n(MAC) and/or encrypted.\n\nUseful resources:\n\n  - [JSON Web Token](https://en.wikipedia.org/wiki/JSON_Web_Token)\n  - [RFC 7519](https://datatracker.ietf.org/doc/html/rfc7519)\n  - [JSON Web Tokens](https://auth0.com/docs/secure/tokens/json-web-tokens)"
-            },
-            "JwtRoleRule": {
-                "properties": {
-                    "jsonpath": {
-                        "type": "string",
-                        "title": "JSON path",
-                        "description": "JSONPath expression to evaluate against the JWT payload"
+                    "provider": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Provider",
+                        "description": "The optional provider",
+                        "examples": [
+                            "openai",
+                            "watsonx"
+                        ]
                     },
-                    "operator": {
-                        "$ref": "#/components/schemas/JsonPathOperator",
-                        "title": "Operator",
-                        "description": "JSON path comparison operator"
+                    "model": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Model",
+                        "description": "The optional model",
+                        "examples": [
+                            "gpt4mini"
+                        ]
                     },
-                    "negate": {
-                        "type": "boolean",
-                        "title": "Negate rule",
-                        "description": "If set to true, the meaning of the rule is negated",
-                        "default": false
+                    "system_prompt": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "System Prompt",
+                        "description": "The optional system prompt.",
+                        "examples": [
+                            "You are OpenShift assistant.",
+                            "You are Ansible assistant."
+                        ]
                     },
-                    "value": {
-                        "title": "Value",
-                        "description": "Value to compare against"
+                    "attachments": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "$ref": "#/components/schemas/Attachment"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Attachments",
+                        "description": "The optional list of attachments.",
+                        "examples": [
+                            {
+                                "attachment_type": "log",
+                                "content": "this is attachment",
+                                "content_type": "text/plain"
+                            },
+                            {
+                                "attachment_type": "configuration",
+                                "content": "kind: Pod\n metadata:\n name:    private-reg",
+                                "content_type": "application/yaml"
+                            },
+                            {
+                                "attachment_type": "configuration",
+                                "content": "foo: bar",
+                                "content_type": "application/yaml"
+                            }
+                        ]
                     },
-                    "roles": {
-                        "items": {
-                            "type": "string"
-                        },
-                        "type": "array",
-                        "title": "List of roles",
-                        "description": "Roles to be assigned if the rule matches"
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "required": [
-                    "jsonpath",
-                    "operator",
-                    "value",
-                    "roles"
-                ],
-                "title": "JwtRoleRule",
-                "description": "Rule for extracting roles from JWT claims."
-            },
-            "LivenessResponse": {
-                "properties": {
-                    "alive": {
-                        "type": "boolean",
-                        "title": "Alive",
-                        "description": "Flag indicating that the app is alive",
+                    "no_tools": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "No Tools",
+                        "description": "Whether to bypass all tools and MCP servers",
+                        "default": false,
                         "examples": [
                             true,
                             false
                         ]
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "alive"
-                ],
-                "title": "LivenessResponse",
-                "description": "Model representing a response to a liveness request.\n\nAttributes:\n    alive: If app is alive.\n\nExample:\n    ```python\n    liveness_response = LivenessResponse(alive=True)\n    ```",
-                "examples": [
-                    {
-                        "alive": true
-                    }
-                ]
-            },
-            "LlamaStackConfiguration": {
-                "properties": {
-                    "url": {
+                    },
+                    "generate_topic_summary": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "minLength": 1,
-                                "format": "uri"
+                                "type": "boolean"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Llama Stack URL",
-                        "description": "URL to Llama Stack service; used when library mode is disabled. Must be a valid HTTP or HTTPS URL."
+                        "title": "Generate Topic Summary",
+                        "description": "Whether to generate topic summary for new conversations",
+                        "default": true,
+                        "examples": [
+                            true,
+                            false
+                        ]
                     },
-                    "api_key": {
+                    "media_type": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "format": "password",
-                                "writeOnly": true
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "API key",
-                        "description": "API key to access Llama Stack service"
+                        "title": "Media Type",
+                        "description": "Media type for the response format",
+                        "examples": [
+                            "application/json",
+                            "text/plain"
+                        ]
                     },
-                    "use_as_library_client": {
+                    "vector_store_ids": {
                         "anyOf": [
                             {
-                                "type": "boolean"
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Use as library",
-                        "description": "When set to true Llama Stack will be used in library mode, not in server mode (default)"
+                        "title": "Vector Store Ids",
+                        "description": "Optional list of specific vector store IDs to query for RAG. If not provided, all available vector stores will be queried.",
+                        "examples": [
+                            "ocp_docs",
+                            "knowledge_base",
+                            "vector_db_1"
+                        ]
                     },
-                    "library_client_config_path": {
+                    "shield_ids": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Llama Stack configuration path",
-                        "description": "Path to configuration file used when Llama Stack is run in library mode"
+                        "title": "Shield Ids",
+                        "description": "Optional list of safety shield IDs to apply. If None, all configured shields are used. ",
+                        "examples": [
+                            "llama-guard",
+                            "custom-shield"
+                        ]
                     },
-                    "timeout": {
-                        "type": "integer",
-                        "exclusiveMinimum": 0.0,
-                        "title": "Request timeout",
-                        "description": "Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries.",
-                        "default": 180
+                    "solr": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": true,
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Solr",
+                        "description": "Solr-specific query parameters including filter queries",
+                        "examples": [
+                            {
+                                "fq": [
+                                    "product:*openshift*",
+                                    "product_version:*4.16*"
+                                ]
+                            }
+                        ]
                     }
                 },
                 "additionalProperties": false,
                 "type": "object",
-                "title": "LlamaStackConfiguration",
-                "description": "Llama stack configuration.\n\nLlama Stack is a comprehensive system that provides a uniform set of tools\nfor building, scaling, and deploying generative AI applications, enabling\ndevelopers to create, integrate, and orchestrate multiple AI services and\ncapabilities into an adaptable setup.\n\nUseful resources:\n\n  - [Llama Stack](https://www.llama.com/products/llama-stack/)\n  - [Python Llama Stack client](https://github.com/llamastack/llama-stack-client-python)\n  - [Build AI Applications with Llama Stack](https://llamastack.github.io/)"
-            },
-            "MCPClientAuthOptionsResponse": {
-                "properties": {
-                    "servers": {
-                        "items": {
-                            "$ref": "#/components/schemas/MCPServerAuthInfo"
-                        },
-                        "type": "array",
-                        "title": "Servers",
-                        "description": "List of MCP servers that accept client-provided authorization"
-                    }
-                },
-                "type": "object",
-                "title": "MCPClientAuthOptionsResponse",
-                "description": "Response containing MCP servers that accept client-provided authorization.",
+                "required": [
+                    "query"
+                ],
+                "title": "QueryRequest",
+                "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n    query: The query string.\n    conversation_id: The optional conversation ID (UUID).\n    provider: The optional provider.\n    model: The optional model.\n    system_prompt: The optional system prompt.\n    attachments: The optional attachments.\n    no_tools: Whether to bypass all tools and MCP servers (default: False).\n    generate_topic_summary: Whether to generate topic summary for new conversations.\n    media_type: The optional media type for response format (application/json or text/plain).\n    vector_store_ids: The optional list of specific vector store IDs to query for RAG.\n    shield_ids: The optional list of safety shield IDs to apply.\n\nExample:\n    ```python\n    query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n    ```",
                 "examples": [
                     {
-                        "servers": [
+                        "attachments": [
                             {
-                                "client_auth_headers": [
-                                    "Authorization"
-                                ],
-                                "name": "github"
+                                "attachment_type": "log",
+                                "content": "this is attachment",
+                                "content_type": "text/plain"
                             },
                             {
-                                "client_auth_headers": [
-                                    "Authorization",
-                                    "X-API-Key"
-                                ],
-                                "name": "gitlab"
+                                "attachment_type": "configuration",
+                                "content": "kind: Pod\n metadata:\n    name: private-reg",
+                                "content_type": "application/yaml"
+                            },
+                            {
+                                "attachment_type": "configuration",
+                                "content": "foo: bar",
+                                "content_type": "application/yaml"
                             }
+                        ],
+                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "generate_topic_summary": true,
+                        "model": "model-name",
+                        "no_tools": false,
+                        "provider": "openai",
+                        "query": "write a deployment yaml for the mongodb image",
+                        "system_prompt": "You are a helpful assistant",
+                        "vector_store_ids": [
+                            "ocp_docs",
+                            "knowledge_base"
                         ]
                     }
                 ]
             },
-            "MCPServerAuthInfo": {
+            "QueryResponse": {
                 "properties": {
-                    "name": {
+                    "conversation_id": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Conversation Id",
+                        "description": "The optional conversation ID (UUID)",
+                        "examples": [
+                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
+                        ]
+                    },
+                    "response": {
                         "type": "string",
-                        "title": "Name",
-                        "description": "MCP server name"
+                        "title": "Response",
+                        "description": "Response from LLM",
+                        "examples": [
+                            "Kubernetes is an open-source container orchestration system for automating ..."
+                        ]
                     },
-                    "client_auth_headers": {
+                    "rag_chunks": {
                         "items": {
-                            "type": "string"
+                            "$ref": "#/components/schemas/RAGChunk"
                         },
                         "type": "array",
-                        "title": "Client Auth Headers",
-                        "description": "List of authentication header names for client-provided tokens"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "name",
-                    "client_auth_headers"
-                ],
-                "title": "MCPServerAuthInfo",
-                "description": "Information about MCP server client authentication options."
-            },
-            "Message": {
-                "properties": {
-                    "content": {
-                        "type": "string",
-                        "title": "Content",
-                        "description": "The message content",
+                        "title": "Rag Chunks",
+                        "description": "Deprecated: List of RAG chunks used to generate the response."
+                    },
+                    "referenced_documents": {
+                        "items": {
+                            "$ref": "#/components/schemas/ReferencedDocument"
+                        },
+                        "type": "array",
+                        "title": "Referenced Documents",
+                        "description": "List of documents referenced in generating the response",
                         "examples": [
-                            "Hello, how can I help you?"
+                            [
+                                {
+                                    "doc_title": "Operator Lifecycle Manager (OLM)",
+                                    "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html"
+                                }
+                            ]
                         ]
                     },
-                    "type": {
-                        "type": "string",
-                        "enum": [
-                            "user",
-                            "assistant",
-                            "system",
-                            "developer"
-                        ],
-                        "title": "Type",
-                        "description": "The type of message",
+                    "truncated": {
+                        "type": "boolean",
+                        "title": "Truncated",
+                        "description": "Deprecated:Whether conversation history was truncated",
+                        "default": false,
                         "examples": [
-                            "user",
-                            "assistant",
-                            "system",
-                            "developer"
+                            false,
+                            true
                         ]
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "content",
-                    "type"
-                ],
-                "title": "Message",
-                "description": "Model representing a message in a conversation turn.\n\nAttributes:\n    content: The message content.\n    type: The type of message."
-            },
-            "ModelContextProtocolServer": {
-                "properties": {
-                    "name": {
-                        "type": "string",
-                        "title": "MCP name",
-                        "description": "MCP server name that must be unique"
                     },
-                    "provider_id": {
-                        "type": "string",
-                        "title": "Provider ID",
-                        "description": "MCP provider identification",
-                        "default": "model-context-protocol"
+                    "input_tokens": {
+                        "type": "integer",
+                        "title": "Input Tokens",
+                        "description": "Number of tokens sent to LLM",
+                        "default": 0,
+                        "examples": [
+                            150,
+                            250,
+                            500
+                        ]
                     },
-                    "url": {
-                        "type": "string",
-                        "title": "MCP server URL",
-                        "description": "URL of the MCP server"
+                    "output_tokens": {
+                        "type": "integer",
+                        "title": "Output Tokens",
+                        "description": "Number of tokens received from LLM",
+                        "default": 0,
+                        "examples": [
+                            50,
+                            100,
+                            200
+                        ]
                     },
-                    "authorization_headers": {
+                    "available_quotas": {
                         "additionalProperties": {
-                            "type": "string"
+                            "type": "integer"
                         },
                         "type": "object",
-                        "title": "Authorization headers",
-                        "description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 3 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client-provided token in the header. To specify this use a string 'client' instead of the file path. 3. Usage of the oauth token in the header. To specify this use a string 'oauth' instead of the file path. "
+                        "title": "Available Quotas",
+                        "description": "Quota available as measured by all configured quota limiters",
+                        "examples": [
+                            {
+                                "daily": 1000,
+                                "monthly": 50000
+                            }
+                        ]
                     },
-                    "headers": {
+                    "tool_calls": {
                         "items": {
-                            "type": "string"
+                            "$ref": "#/components/schemas/ToolCallSummary"
                         },
                         "type": "array",
-                        "title": "Propagated headers",
-                        "description": "List of HTTP header names to automatically forward from the incoming request to this MCP server. Headers listed here are extracted from the original client request and included when calling the MCP server. This is useful when infrastructure components (e.g. API gateways) inject headers that MCP servers need, such as x-rh-identity in HCC. Header matching is case-insensitive. These headers are additive with authorization_headers and MCP-HEADERS."
+                        "title": "Tool Calls",
+                        "description": "List of tool calls made during response generation"
                     },
-                    "timeout": {
-                        "anyOf": [
-                            {
-                                "type": "integer",
-                                "exclusiveMinimum": 0.0
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ],
-                        "title": "Request timeout",
-                        "description": "Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support."
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "required": [
-                    "name",
-                    "url"
-                ],
-                "title": "ModelContextProtocolServer",
-                "description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and capabilities to the\nAI agents. These are configured by this structure. Only MCP servers\ndefined in the lightspeed-stack.yaml configuration are available to the\nagents. Tools configured in the llama-stack run.yaml are not accessible to\nlightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)"
-            },
-            "ModelsResponse": {
-                "properties": {
-                    "models": {
+                    "tool_results": {
                         "items": {
-                            "additionalProperties": true,
-                            "type": "object"
+                            "$ref": "#/components/schemas/ToolResultSummary"
                         },
                         "type": "array",
-                        "title": "Models",
-                        "description": "List of models available"
+                        "title": "Tool Results",
+                        "description": "List of tool results"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "models"
+                    "response"
                 ],
-                "title": "ModelsResponse",
-                "description": "Model representing a response to models request.",
+                "title": "QueryResponse",
+                "description": "Model representing LLM response to a query.\n\nAttributes:\n    conversation_id: The optional conversation ID (UUID).\n    response: The response.\n    rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n        This information is now available in tool_results under file_search_call type.\n    referenced_documents: The URLs and titles for the documents used to generate the response.\n    tool_calls: List of tool calls made during response generation.\n    tool_results: List of tool results.\n    truncated: Whether conversation history was truncated.\n    input_tokens: Number of tokens sent to LLM.\n    output_tokens: Number of tokens received from LLM.\n    available_quotas: Quota available as measured by all configured quota limiters.",
                 "examples": [
                     {
-                        "models": [
+                        "available_quotas": {
+                            "ClusterQuotaLimiter": 998911,
+                            "UserQuotaLimiter": 998911
+                        },
+                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "input_tokens": 123,
+                        "output_tokens": 456,
+                        "referenced_documents": [
                             {
-                                "api_model_type": "llm",
-                                "identifier": "openai/gpt-4-turbo",
-                                "metadata": {},
-                                "model_type": "llm",
-                                "provider_id": "openai",
-                                "provider_resource_id": "gpt-4-turbo",
-                                "type": "model"
+                                "doc_title": "Operator Lifecycle Manager concepts and resources",
+                                "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/understanding/olm/olm-understanding-olm.html"
                             }
-                        ]
-                    }
-                ]
-            },
-            "MutualTLSSecurityScheme": {
-                "properties": {
-                    "description": {
-                        "anyOf": [
+                        ],
+                        "response": "Operator Lifecycle Manager (OLM) helps users install...",
+                        "tool_calls": [
                             {
-                                "type": "string"
-                            },
+                                "args": {},
+                                "id": "1",
+                                "name": "tool1",
+                                "type": "tool_call"
+                            }
+                        ],
+                        "tool_results": [
                             {
-                                "type": "null"
+                                "content": "bla",
+                                "id": "1",
+                                "round": 1,
+                                "status": "success",
+                                "type": "tool_result"
                             }
                         ],
-                        "title": "Description"
-                    },
-                    "type": {
-                        "type": "string",
-                        "const": "mutualTLS",
-                        "title": "Type",
-                        "default": "mutualTLS"
+                        "truncated": false
                     }
-                },
-                "type": "object",
-                "title": "MutualTLSSecurityScheme",
-                "description": "Defines a security scheme using mTLS authentication."
+                ]
             },
-            "NotFoundResponse": {
+            "QuotaExceededResponse": {
                 "properties": {
                     "status_code": {
                         "type": "integer",
@@ -7451,158 +10625,195 @@
                 "required": [
                     "status_code",
                     "detail"
-                ],
-                "title": "NotFoundResponse",
-                "description": "404 Not Found - Resource does not exist.",
+                ],
+                "title": "QuotaExceededResponse",
+                "description": "429 Too Many Requests - Quota limit exceeded.",
                 "examples": [
                     {
                         "detail": {
-                            "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
-                            "response": "Conversation not found"
+                            "cause": "The token quota for model gpt-4-turbo has been exceeded.",
+                            "response": "The model quota has been exceeded"
                         },
-                        "label": "conversation"
+                        "label": "model"
                     },
                     {
                         "detail": {
-                            "cause": "Provider with ID openai does not exist",
-                            "response": "Provider not found"
+                            "cause": "User 123 has no available tokens.",
+                            "response": "The quota has been exceeded"
                         },
-                        "label": "provider"
+                        "label": "user none"
                     },
                     {
                         "detail": {
-                            "cause": "Model with ID gpt-4-turbo is not configured",
-                            "response": "Model not found"
+                            "cause": "Cluster has no available tokens.",
+                            "response": "The quota has been exceeded"
                         },
-                        "label": "model"
+                        "label": "cluster none"
                     },
                     {
                         "detail": {
-                            "cause": "Rag with ID vs_7b52a8cf-0fa3-489c-beab-27e061d102f3 does not exist",
-                            "response": "Rag not found"
+                            "cause": "Unknown subject 999 has no available tokens.",
+                            "response": "The quota has been exceeded"
                         },
-                        "label": "rag"
+                        "label": "subject none"
                     },
                     {
                         "detail": {
-                            "cause": "Streaming Request with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
-                            "response": "Streaming Request not found"
+                            "cause": "User 123 has 5 tokens, but 10 tokens are needed.",
+                            "response": "The quota has been exceeded"
                         },
-                        "label": "streaming request"
+                        "label": "user insufficient"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Cluster has 500 tokens, but 900 tokens are needed.",
+                            "response": "The quota has been exceeded"
+                        },
+                        "label": "cluster insufficient"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.",
+                            "response": "The quota has been exceeded"
+                        },
+                        "label": "subject insufficient"
                     }
                 ]
             },
-            "OAuth2SecurityScheme": {
+            "QuotaHandlersConfiguration": {
                 "properties": {
-                    "description": {
+                    "sqlite": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Description"
-                    },
-                    "flows": {
-                        "$ref": "#/components/schemas/OAuthFlows"
+                        "title": "SQLite configuration",
+                        "description": "SQLite database configuration"
                     },
-                    "oauth2MetadataUrl": {
+                    "postgres": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Oauth2Metadataurl"
+                        "title": "PostgreSQL configuration",
+                        "description": "PostgreSQL database configuration"
                     },
-                    "type": {
-                        "type": "string",
-                        "const": "oauth2",
-                        "title": "Type",
-                        "default": "oauth2"
+                    "limiters": {
+                        "items": {
+                            "$ref": "#/components/schemas/QuotaLimiterConfiguration"
+                        },
+                        "type": "array",
+                        "title": "Quota limiters",
+                        "description": "Quota limiters configuration"
+                    },
+                    "scheduler": {
+                        "$ref": "#/components/schemas/QuotaSchedulerConfiguration",
+                        "title": "Quota scheduler",
+                        "description": "Quota scheduler configuration"
+                    },
+                    "enable_token_history": {
+                        "type": "boolean",
+                        "title": "Enable token history",
+                        "description": "Enables storing information about token usage history",
+                        "default": false
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
-                "required": [
-                    "flows"
-                ],
-                "title": "OAuth2SecurityScheme",
-                "description": "Defines a security scheme using OAuth 2.0."
+                "title": "QuotaHandlersConfiguration",
+                "description": "Quota limiter configuration.\n\nIt is possible to limit quota usage per user or per service or services\n(that typically run in one cluster). Each limit is configured as a separate\n_quota limiter_. It can be of type `user_limiter` or `cluster_limiter`\n(which is name that makes sense in OpenShift deployment)."
             },
-            "OAuthFlows": {
+            "QuotaLimiterConfiguration": {
                 "properties": {
-                    "authorizationCode": {
-                        "anyOf": [
-                            {
-                                "$ref": "#/components/schemas/AuthorizationCodeOAuthFlow"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ]
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "user_limiter",
+                            "cluster_limiter"
+                        ],
+                        "title": "Quota limiter type",
+                        "description": "Quota limiter type, either user_limiter or cluster_limiter"
                     },
-                    "clientCredentials": {
-                        "anyOf": [
-                            {
-                                "$ref": "#/components/schemas/ClientCredentialsOAuthFlow"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ]
+                    "name": {
+                        "type": "string",
+                        "title": "Quota limiter name",
+                        "description": "Human readable quota limiter name"
                     },
-                    "implicit": {
-                        "anyOf": [
-                            {
-                                "$ref": "#/components/schemas/ImplicitOAuthFlow"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ]
+                    "initial_quota": {
+                        "type": "integer",
+                        "minimum": 0.0,
+                        "title": "Initial quota",
+                        "description": "Quota set at beginning of the period"
                     },
-                    "password": {
-                        "anyOf": [
-                            {
-                                "$ref": "#/components/schemas/PasswordOAuthFlow"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ]
+                    "quota_increase": {
+                        "type": "integer",
+                        "minimum": 0.0,
+                        "title": "Quota increase",
+                        "description": "Delta value used to increase quota when period is reached"
+                    },
+                    "period": {
+                        "type": "string",
+                        "title": "Period",
+                        "description": "Period specified in human readable form"
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
-                "title": "OAuthFlows",
-                "description": "Defines the configuration for the supported OAuth 2.0 flows."
+                "required": [
+                    "type",
+                    "name",
+                    "initial_quota",
+                    "quota_increase",
+                    "period"
+                ],
+                "title": "QuotaLimiterConfiguration",
+                "description": "Configuration for one quota limiter.\n\nThere are three configuration options for each limiter:\n\n1. ``period`` is specified in a human-readable form, see\n   https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT\n   for all possible options. When the end of the period is reached, the\n   quota is reset or increased.\n2. ``initial_quota`` is the value set at the beginning of the period.\n3. ``quota_increase`` is the value (if specified) used to increase the\n   quota when the period is reached.\n\nThere are two basic use cases:\n\n1. When the quota needs to be reset to a specific value periodically (for\n   example on a weekly or monthly basis), set ``initial_quota`` to the\n   required value.\n2. When the quota needs to be increased by a specific value periodically\n   (for example on a daily basis), set ``quota_increase``."
             },
-            "OkpConfiguration": {
+            "QuotaSchedulerConfiguration": {
                 "properties": {
-                    "offline": {
-                        "type": "boolean",
-                        "title": "OKP offline mode",
-                        "description": "When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs.",
-                        "default": true
+                    "period": {
+                        "type": "integer",
+                        "exclusiveMinimum": 0.0,
+                        "title": "Period",
+                        "description": "Quota scheduler period specified in seconds",
+                        "default": 1
                     },
-                    "chunk_filter_query": {
-                        "type": "string",
-                        "title": "OKP chunk filter query",
-                        "description": "OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'.",
-                        "default": "is_chunk:true"
+                    "database_reconnection_count": {
+                        "type": "integer",
+                        "exclusiveMinimum": 0.0,
+                        "title": "Database reconnection count on startup",
+                        "description": "Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.",
+                        "default": 10
+                    },
+                    "database_reconnection_delay": {
+                        "type": "integer",
+                        "exclusiveMinimum": 0.0,
+                        "title": "Database reconnection delay",
+                        "description": "Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.",
+                        "default": 1
                     }
                 },
                 "additionalProperties": false,
                 "type": "object",
-                "title": "OkpConfiguration",
-                "description": "OKP (Offline Knowledge Portal) provider configuration.\n\nControls provider-specific behaviour for the OKP vector store.\nOnly relevant when ``\"okp\"`` is listed in ``rag.inline`` or ``rag.tool``."
+                "title": "QuotaSchedulerConfiguration",
+                "description": "Quota scheduler configuration."
             },
-            "OpenIdConnectSecurityScheme": {
+            "RAGChunk": {
                 "properties": {
-                    "description": {
+                    "content": {
+                        "type": "string",
+                        "title": "Content",
+                        "description": "The content of the chunk"
+                    },
+                    "source": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -7611,92 +10822,53 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Description"
+                        "title": "Source",
+                        "description": "Index name identifying the knowledge source from configuration"
                     },
-                    "openIdConnectUrl": {
-                        "type": "string",
-                        "title": "Openidconnecturl"
+                    "score": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Score",
+                        "description": "Relevance score"
                     },
-                    "type": {
-                        "type": "string",
-                        "const": "openIdConnect",
-                        "title": "Type",
-                        "default": "openIdConnect"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "openIdConnectUrl"
-                ],
-                "title": "OpenIdConnectSecurityScheme",
-                "description": "Defines a security scheme using OpenID Connect."
-            },
-            "PasswordOAuthFlow": {
-                "properties": {
-                    "refreshUrl": {
+                    "attributes": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "additionalProperties": true,
+                                "type": "object"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Refreshurl"
-                    },
-                    "scopes": {
-                        "additionalProperties": {
-                            "type": "string"
-                        },
-                        "type": "object",
-                        "title": "Scopes"
-                    },
-                    "tokenUrl": {
-                        "type": "string",
-                        "title": "Tokenurl"
+                        "title": "Attributes",
+                        "description": "Document metadata from the RAG provider (e.g., url, title, author)"
                     }
                 },
                 "type": "object",
                 "required": [
-                    "scopes",
-                    "tokenUrl"
+                    "content"
                 ],
-                "title": "PasswordOAuthFlow",
-                "description": "Defines configuration details for the OAuth 2.0 Resource Owner Password flow."
+                "title": "RAGChunk",
+                "description": "Model representing a RAG chunk used in the response."
             },
-            "PostgreSQLDatabaseConfiguration": {
+            "RAGInfoResponse": {
                 "properties": {
-                    "host": {
-                        "type": "string",
-                        "title": "Hostname",
-                        "description": "Database server host or socket directory",
-                        "default": "localhost"
-                    },
-                    "port": {
-                        "type": "integer",
-                        "exclusiveMinimum": 0.0,
-                        "title": "Port",
-                        "description": "Database server port",
-                        "default": 5432
-                    },
-                    "db": {
-                        "type": "string",
-                        "title": "Database name",
-                        "description": "Database name to connect to"
-                    },
-                    "user": {
-                        "type": "string",
-                        "title": "User name",
-                        "description": "Database user name used to authenticate"
-                    },
-                    "password": {
+                    "id": {
                         "type": "string",
-                        "format": "password",
-                        "title": "Password",
-                        "description": "Password used to authenticate",
-                        "writeOnly": true
+                        "title": "Id",
+                        "description": "Vector DB unique ID",
+                        "examples": [
+                            "vs_00000000_0000_0000"
+                        ]
                     },
-                    "namespace": {
+                    "name": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -7705,296 +10877,244 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Name space",
-                        "description": "Database namespace",
-                        "default": "public"
-                    },
-                    "ssl_mode": {
-                        "type": "string",
-                        "title": "SSL mode",
-                        "description": "SSL mode",
-                        "default": "prefer"
+                        "title": "Name",
+                        "description": "Human readable vector DB name",
+                        "examples": [
+                            "Faiss Store with Knowledge base"
+                        ]
                     },
-                    "gss_encmode": {
-                        "type": "string",
-                        "title": "GSS encmode",
-                        "description": "This option determines whether or with what priority a secure GSS TCP/IP connection will be negotiated with the server.",
-                        "default": "prefer"
+                    "created_at": {
+                        "type": "integer",
+                        "title": "Created At",
+                        "description": "When the vector store was created, represented as Unix time",
+                        "examples": [
+                            1763391371
+                        ]
                     },
-                    "ca_cert_path": {
+                    "last_active_at": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "format": "file-path"
+                                "type": "integer"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "CA certificate path",
-                        "description": "Path to CA certificate"
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "required": [
-                    "db",
-                    "user",
-                    "password"
-                ],
-                "title": "PostgreSQLDatabaseConfiguration",
-                "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)"
-            },
-            "PromptTooLongResponse": {
-                "properties": {
-                    "status_code": {
-                        "type": "integer",
-                        "title": "Status Code"
-                    },
-                    "detail": {
-                        "$ref": "#/components/schemas/DetailModel"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "status_code",
-                    "detail"
-                ],
-                "title": "PromptTooLongResponse",
-                "description": "413 Payload Too Large - Prompt is too long.",
-                "examples": [
-                    {
-                        "detail": {
-                            "cause": "The prompt exceeds the maximum allowed length.",
-                            "response": "Prompt is too long"
-                        },
-                        "label": "prompt too long"
-                    }
-                ]
-            },
-            "ProviderHealthStatus": {
-                "properties": {
-                    "provider_id": {
-                        "type": "string",
-                        "title": "Provider Id",
-                        "description": "The ID of the provider"
+                        "title": "Last Active At",
+                        "description": "When the vector store was last active, represented as Unix time",
+                        "examples": [
+                            1763391371
+                        ]
                     },
-                    "status": {
-                        "type": "string",
-                        "title": "Status",
-                        "description": "The health status",
+                    "usage_bytes": {
+                        "type": "integer",
+                        "title": "Usage Bytes",
+                        "description": "Storage byte(s) used by this vector DB",
                         "examples": [
-                            "ok",
-                            "unhealthy",
-                            "not_implemented"
+                            0
                         ]
                     },
-                    "message": {
+                    "expires_at": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "type": "integer"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Message",
-                        "description": "Optional message about the health status",
+                        "title": "Expires At",
+                        "description": "When the vector store expires, represented as Unix time",
                         "examples": [
-                            "All systems operational",
-                            "Llama Stack is unavailable"
+                            1763391371
                         ]
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "provider_id",
-                    "status"
-                ],
-                "title": "ProviderHealthStatus",
-                "description": "Model representing the health status of a provider.\n\nAttributes:\n    provider_id: The ID of the provider.\n    status: The health status ('ok', 'unhealthy', 'not_implemented').\n    message: Optional message about the health status."
-            },
-            "ProviderResponse": {
-                "properties": {
-                    "api": {
-                        "type": "string",
-                        "title": "Api",
-                        "description": "The API this provider implements"
-                    },
-                    "config": {
-                        "additionalProperties": {
-                            "anyOf": [
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "items": {},
-                                    "type": "array"
-                                },
-                                {},
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "type": "object",
-                        "title": "Config",
-                        "description": "Provider configuration parameters"
-                    },
-                    "health": {
-                        "additionalProperties": {
-                            "anyOf": [
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "items": {},
-                                    "type": "array"
-                                },
-                                {},
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "type": "object",
-                        "title": "Health",
-                        "description": "Current health status of the provider"
                     },
-                    "provider_id": {
+                    "object": {
                         "type": "string",
-                        "title": "Provider Id",
-                        "description": "Unique provider identifier"
+                        "title": "Object",
+                        "description": "Object type",
+                        "examples": [
+                            "vector_store"
+                        ]
                     },
-                    "provider_type": {
+                    "status": {
                         "type": "string",
-                        "title": "Provider Type",
-                        "description": "Provider implementation type"
+                        "title": "Status",
+                        "description": "Vector DB status",
+                        "examples": [
+                            "completed"
+                        ]
                     }
                 },
                 "type": "object",
                 "required": [
-                    "api",
-                    "config",
-                    "health",
-                    "provider_id",
-                    "provider_type"
+                    "id",
+                    "created_at",
+                    "usage_bytes",
+                    "object",
+                    "status"
                 ],
-                "title": "ProviderResponse",
-                "description": "Model representing a response to get specific provider request.",
+                "title": "RAGInfoResponse",
+                "description": "Model representing a response with information about RAG DB.",
                 "examples": [
                     {
-                        "api": "inference",
-                        "config": {
-                            "api_key": "********"
-                        },
-                        "health": {
-                            "message": "Healthy",
-                            "status": "OK"
-                        },
-                        "provider_id": "openai",
-                        "provider_type": "remote::openai"
+                        "created_at": 1763391371,
+                        "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                        "last_active_at": 1763391371,
+                        "name": "Faiss Store with Knowledge base",
+                        "object": "vector_store",
+                        "status": "completed",
+                        "usage_bytes": 1024000
                     }
                 ]
             },
-            "ProvidersListResponse": {
+            "RAGListResponse": {
                 "properties": {
-                    "providers": {
-                        "additionalProperties": {
-                            "items": {
-                                "additionalProperties": true,
-                                "type": "object"
-                            },
-                            "type": "array"
+                    "rags": {
+                        "items": {
+                            "type": "string"
                         },
-                        "type": "object",
-                        "title": "Providers",
-                        "description": "List of available API types and their corresponding providers"
+                        "type": "array",
+                        "title": "RAG list response",
+                        "description": "List of RAG identifiers",
+                        "examples": [
+                            "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                            "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3"
+                        ]
                     }
                 },
                 "type": "object",
                 "required": [
-                    "providers"
+                    "rags"
                 ],
-                "title": "ProvidersListResponse",
-                "description": "Model representing a response to providers request.",
+                "title": "RAGListResponse",
+                "description": "Model representing a response to list RAGs request.",
                 "examples": [
                     {
-                        "providers": {
-                            "agents": [
-                                {
-                                    "provider_id": "meta-reference",
-                                    "provider_type": "inline::meta-reference"
-                                }
-                            ],
-                            "inference": [
-                                {
-                                    "provider_id": "sentence-transformers",
-                                    "provider_type": "inline::sentence-transformers"
-                                },
-                                {
-                                    "provider_id": "openai",
-                                    "provider_type": "remote::openai"
-                                }
-                            ]
-                        }
+                        "rags": [
+                            "vs_00000000-cafe-babe-0000-000000000000",
+                            "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                            "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3"
+                        ]
                     }
                 ]
             },
-            "QueryRequest": {
+            "RHIdentityConfiguration": {
                 "properties": {
-                    "query": {
-                        "type": "string",
-                        "title": "Query",
-                        "description": "The query string",
-                        "examples": [
-                            "What is Kubernetes?"
-                        ]
+                    "required_entitlements": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Required entitlements",
+                        "description": "List of all required entitlements."
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "RHIdentityConfiguration",
+                "description": "Red Hat Identity authentication configuration."
+            },
+            "RagConfiguration": {
+                "properties": {
+                    "inline": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "type": "array",
+                        "title": "Inline RAG IDs",
+                        "description": "RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG)."
                     },
-                    "conversation_id": {
+                    "tool": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Conversation Id",
-                        "description": "The optional conversation ID (UUID)",
+                        "title": "Tool RAG IDs",
+                        "description": "RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility)."
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "RagConfiguration",
+                "description": "RAG strategy configuration.\n\nControls which RAG sources are used for inline and tool-based retrieval.\n\nEach strategy lists RAG IDs to include. The special ID ``\"okp\"`` defined in constants,\nactivates the OKP provider; all other IDs refer to entries in ``byok_rag``.\n\nBackward compatibility:\n    - ``inline`` defaults to ``[]`` (no inline RAG).\n    - ``tool`` defaults to ``None`` which means all registered vector stores\n      are used (identical to the previous ``tool.byok.enabled = True`` default)."
+            },
+            "ReadinessResponse": {
+                "properties": {
+                    "ready": {
+                        "type": "boolean",
+                        "title": "Ready",
+                        "description": "Flag indicating if service is ready",
                         "examples": [
-                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
+                            true,
+                            false
                         ]
                     },
-                    "provider": {
+                    "reason": {
+                        "type": "string",
+                        "title": "Reason",
+                        "description": "The reason for the readiness",
+                        "examples": [
+                            "Service is ready"
+                        ]
+                    },
+                    "providers": {
+                        "items": {
+                            "$ref": "#/components/schemas/ProviderHealthStatus"
+                        },
+                        "type": "array",
+                        "title": "Providers",
+                        "description": "List of unhealthy providers in case of readiness failure.",
+                        "examples": []
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "ready",
+                    "reason",
+                    "providers"
+                ],
+                "title": "ReadinessResponse",
+                "description": "Model representing response to a readiness request.\n\nAttributes:\n    ready: If service is ready.\n    reason: The reason for the readiness.\n    providers: List of unhealthy providers in case of readiness failure.\n\nExample:\n    ```python\n    readiness_response = ReadinessResponse(\n        ready=False,\n        reason=\"Service is not ready\",\n        providers=[\n            ProviderHealthStatus(\n                provider_id=\"ollama\",\n                status=\"unhealthy\",\n                message=\"Server is unavailable\"\n            )\n        ]\n    )\n    ```",
+                "examples": [
+                    {
+                        "providers": [],
+                        "ready": true,
+                        "reason": "Service is ready"
+                    }
+                ]
+            },
+            "ReferencedDocument": {
+                "properties": {
+                    "doc_url": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "type": "string",
+                                "minLength": 1,
+                                "format": "uri"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Provider",
-                        "description": "The optional provider",
-                        "examples": [
-                            "openai",
-                            "watsonx"
-                        ]
+                        "title": "Doc Url",
+                        "description": "URL of the referenced document"
                     },
-                    "model": {
+                    "doc_title": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -8003,13 +11123,10 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Model",
-                        "description": "The optional model",
-                        "examples": [
-                            "gpt4mini"
-                        ]
+                        "title": "Doc Title",
+                        "description": "Title of the referenced document"
                     },
-                    "system_prompt": {
+                    "source": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -8018,80 +11135,100 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "System Prompt",
-                        "description": "The optional system prompt.",
-                        "examples": [
-                            "You are OpenShift assistant.",
-                            "You are Ansible assistant."
-                        ]
+                        "title": "Source",
+                        "description": "Index name identifying the knowledge source from configuration"
+                    }
+                },
+                "type": "object",
+                "title": "ReferencedDocument",
+                "description": "Model representing a document referenced in generating a response.\n\nAttributes:\n    doc_url: Url to the referenced doc.\n    doc_title: Title of the referenced doc."
+            },
+            "ResponseInput": {
+                "anyOf": [
+                    {
+                        "type": "string"
                     },
-                    "attachments": {
+                    {
+                        "items": {
+                            "$ref": "#/components/schemas/ResponseItem"
+                        },
+                        "type": "array"
+                    }
+                ]
+            },
+            "ResponseItem": {
+                "anyOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseMessage-Input"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
+                    }
+                ]
+            },
+            "ResponsesRequest": {
+                "properties": {
+                    "input": {
+                        "$ref": "#/components/schemas/ResponseInput"
+                    },
+                    "model": {
                         "anyOf": [
                             {
-                                "items": {
-                                    "$ref": "#/components/schemas/Attachment"
-                                },
-                                "type": "array"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Attachments",
-                        "description": "The optional list of attachments.",
-                        "examples": [
-                            {
-                                "attachment_type": "log",
-                                "content": "this is attachment",
-                                "content_type": "text/plain"
-                            },
-                            {
-                                "attachment_type": "configuration",
-                                "content": "kind: Pod\n metadata:\n name:    private-reg",
-                                "content_type": "application/yaml"
-                            },
-                            {
-                                "attachment_type": "configuration",
-                                "content": "foo: bar",
-                                "content_type": "application/yaml"
-                            }
-                        ]
+                        "title": "Model"
                     },
-                    "no_tools": {
+                    "conversation": {
                         "anyOf": [
                             {
-                                "type": "boolean"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "No Tools",
-                        "description": "Whether to bypass all tools and MCP servers",
-                        "default": false,
-                        "examples": [
-                            true,
-                            false
-                        ]
+                        "title": "Conversation"
                     },
-                    "generate_topic_summary": {
+                    "include": {
                         "anyOf": [
                             {
-                                "type": "boolean"
+                                "items": {
+                                    "$ref": "#/components/schemas/IncludeParameter"
+                                },
+                                "type": "array"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Generate Topic Summary",
-                        "description": "Whether to generate topic summary for new conversations",
-                        "default": true,
-                        "examples": [
-                            true,
-                            false
-                        ]
+                        "title": "Include"
                     },
-                    "media_type": {
+                    "instructions": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -8100,496 +11237,252 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Media Type",
-                        "description": "Media type for the response format",
-                        "examples": [
-                            "application/json",
-                            "text/plain"
-                        ]
+                        "title": "Instructions"
                     },
-                    "vector_store_ids": {
+                    "max_infer_iters": {
                         "anyOf": [
                             {
-                                "items": {
-                                    "type": "string"
-                                },
-                                "type": "array"
+                                "type": "integer"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Vector Store Ids",
-                        "description": "Optional list of specific vector store IDs to query for RAG. If not provided, all available vector stores will be queried.",
-                        "examples": [
-                            "ocp_docs",
-                            "knowledge_base",
-                            "vector_db_1"
-                        ]
+                        "title": "Max Infer Iters"
                     },
-                    "shield_ids": {
+                    "max_output_tokens": {
                         "anyOf": [
                             {
-                                "items": {
-                                    "type": "string"
-                                },
-                                "type": "array"
+                                "type": "integer"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Shield Ids",
-                        "description": "Optional list of safety shield IDs to apply. If None, all configured shields are used. If provided, must contain at least one valid shield ID (empty list raises 422 error).",
-                        "examples": [
-                            "llama-guard",
-                            "custom-shield"
-                        ]
+                        "title": "Max Output Tokens"
                     },
-                    "solr": {
+                    "max_tool_calls": {
                         "anyOf": [
                             {
-                                "additionalProperties": true,
-                                "type": "object"
+                                "type": "integer"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Solr",
-                        "description": "Solr-specific query parameters including filter queries",
-                        "examples": [
-                            {
-                                "fq": [
-                                    "product:*openshift*",
-                                    "product_version:*4.16*"
-                                ]
-                            }
-                        ]
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "required": [
-                    "query"
-                ],
-                "title": "QueryRequest",
-                "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n    query: The query string.\n    conversation_id: The optional conversation ID (UUID).\n    provider: The optional provider.\n    model: The optional model.\n    system_prompt: The optional system prompt.\n    attachments: The optional attachments.\n    no_tools: Whether to bypass all tools and MCP servers (default: False).\n    generate_topic_summary: Whether to generate topic summary for new conversations.\n    media_type: The optional media type for response format (application/json or text/plain).\n    vector_store_ids: The optional list of specific vector store IDs to query for RAG.\n    shield_ids: The optional list of safety shield IDs to apply.\n\nExample:\n    ```python\n    query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n    ```",
-                "examples": [
-                    {
-                        "attachments": [
-                            {
-                                "attachment_type": "log",
-                                "content": "this is attachment",
-                                "content_type": "text/plain"
-                            },
-                            {
-                                "attachment_type": "configuration",
-                                "content": "kind: Pod\n metadata:\n    name: private-reg",
-                                "content_type": "application/yaml"
-                            },
-                            {
-                                "attachment_type": "configuration",
-                                "content": "foo: bar",
-                                "content_type": "application/yaml"
-                            }
-                        ],
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "generate_topic_summary": true,
-                        "model": "model-name",
-                        "no_tools": false,
-                        "provider": "openai",
-                        "query": "write a deployment yaml for the mongodb image",
-                        "system_prompt": "You are a helpful assistant",
-                        "vector_store_ids": [
-                            "ocp_docs",
-                            "knowledge_base"
-                        ]
-                    }
-                ]
-            },
-            "QueryResponse": {
-                "properties": {
-                    "conversation_id": {
+                        "title": "Max Tool Calls"
+                    },
+                    "metadata": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "additionalProperties": {
+                                    "type": "string"
+                                },
+                                "type": "object"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Conversation Id",
-                        "description": "The optional conversation ID (UUID)",
-                        "examples": [
-                            "c5260aec-4d82-4370-9fdf-05cf908b3f16"
-                        ]
-                    },
-                    "response": {
-                        "type": "string",
-                        "title": "Response",
-                        "description": "Response from LLM",
-                        "examples": [
-                            "Kubernetes is an open-source container orchestration system for automating ..."
-                        ]
-                    },
-                    "rag_chunks": {
-                        "items": {
-                            "$ref": "#/components/schemas/RAGChunk"
-                        },
-                        "type": "array",
-                        "title": "Rag Chunks",
-                        "description": "Deprecated: List of RAG chunks used to generate the response."
-                    },
-                    "referenced_documents": {
-                        "items": {
-                            "$ref": "#/components/schemas/ReferencedDocument"
-                        },
-                        "type": "array",
-                        "title": "Referenced Documents",
-                        "description": "List of documents referenced in generating the response",
-                        "examples": [
-                            [
-                                {
-                                    "doc_title": "Operator Lifecycle Manager (OLM)",
-                                    "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html"
-                                }
-                            ]
-                        ]
-                    },
-                    "truncated": {
-                        "type": "boolean",
-                        "title": "Truncated",
-                        "description": "Deprecated:Whether conversation history was truncated",
-                        "default": false,
-                        "examples": [
-                            false,
-                            true
-                        ]
-                    },
-                    "input_tokens": {
-                        "type": "integer",
-                        "title": "Input Tokens",
-                        "description": "Number of tokens sent to LLM",
-                        "default": 0,
-                        "examples": [
-                            150,
-                            250,
-                            500
-                        ]
-                    },
-                    "output_tokens": {
-                        "type": "integer",
-                        "title": "Output Tokens",
-                        "description": "Number of tokens received from LLM",
-                        "default": 0,
-                        "examples": [
-                            50,
-                            100,
-                            200
-                        ]
+                        "title": "Metadata"
                     },
-                    "available_quotas": {
-                        "additionalProperties": {
-                            "type": "integer"
-                        },
-                        "type": "object",
-                        "title": "Available Quotas",
-                        "description": "Quota available as measured by all configured quota limiters",
-                        "examples": [
+                    "parallel_tool_calls": {
+                        "anyOf": [
                             {
-                                "daily": 1000,
-                                "monthly": 50000
-                            }
-                        ]
-                    },
-                    "tool_calls": {
-                        "items": {
-                            "$ref": "#/components/schemas/ToolCallSummary"
-                        },
-                        "type": "array",
-                        "title": "Tool Calls",
-                        "description": "List of tool calls made during response generation"
-                    },
-                    "tool_results": {
-                        "items": {
-                            "$ref": "#/components/schemas/ToolResultSummary"
-                        },
-                        "type": "array",
-                        "title": "Tool Results",
-                        "description": "List of tool results"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "response"
-                ],
-                "title": "QueryResponse",
-                "description": "Model representing LLM response to a query.\n\nAttributes:\n    conversation_id: The optional conversation ID (UUID).\n    response: The response.\n    rag_chunks: Deprecated. List of RAG chunks used to generate the response.\n        This information is now available in tool_results under file_search_call type.\n    referenced_documents: The URLs and titles for the documents used to generate the response.\n    tool_calls: List of tool calls made during response generation.\n    tool_results: List of tool results.\n    truncated: Whether conversation history was truncated.\n    input_tokens: Number of tokens sent to LLM.\n    output_tokens: Number of tokens received from LLM.\n    available_quotas: Quota available as measured by all configured quota limiters.",
-                "examples": [
-                    {
-                        "available_quotas": {
-                            "ClusterQuotaLimiter": 998911,
-                            "UserQuotaLimiter": 998911
-                        },
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "input_tokens": 123,
-                        "output_tokens": 456,
-                        "referenced_documents": [
+                                "type": "boolean"
+                            },
                             {
-                                "doc_title": "Operator Lifecycle Manager concepts and resources",
-                                "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/understanding/olm/olm-understanding-olm.html"
+                                "type": "null"
                             }
                         ],
-                        "response": "Operator Lifecycle Manager (OLM) helps users install...",
-                        "tool_calls": [
+                        "title": "Parallel Tool Calls"
+                    },
+                    "previous_response_id": {
+                        "anyOf": [
                             {
-                                "args": {},
-                                "id": "1",
-                                "name": "tool1",
-                                "type": "tool_call"
-                            }
-                        ],
-                        "tool_results": [
+                                "type": "string"
+                            },
                             {
-                                "content": "bla",
-                                "id": "1",
-                                "round": 1,
-                                "status": "success",
-                                "type": "tool_result"
+                                "type": "null"
                             }
                         ],
-                        "truncated": false
-                    }
-                ]
-            },
-            "QuotaExceededResponse": {
-                "properties": {
-                    "status_code": {
-                        "type": "integer",
-                        "title": "Status Code"
-                    },
-                    "detail": {
-                        "$ref": "#/components/schemas/DetailModel"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "status_code",
-                    "detail"
-                ],
-                "title": "QuotaExceededResponse",
-                "description": "429 Too Many Requests - Quota limit exceeded.",
-                "examples": [
-                    {
-                        "detail": {
-                            "cause": "The token quota for model gpt-4-turbo has been exceeded.",
-                            "response": "The model quota has been exceeded"
-                        },
-                        "label": "model"
-                    },
-                    {
-                        "detail": {
-                            "cause": "User 123 has no available tokens.",
-                            "response": "The quota has been exceeded"
-                        },
-                        "label": "user none"
-                    },
-                    {
-                        "detail": {
-                            "cause": "Cluster has no available tokens.",
-                            "response": "The quota has been exceeded"
-                        },
-                        "label": "cluster none"
+                        "title": "Previous Response Id"
                     },
-                    {
-                        "detail": {
-                            "cause": "Unknown subject 999 has no available tokens.",
-                            "response": "The quota has been exceeded"
-                        },
-                        "label": "subject none"
-                    },
-                    {
-                        "detail": {
-                            "cause": "User 123 has 5 tokens, but 10 tokens are needed.",
-                            "response": "The quota has been exceeded"
-                        },
-                        "label": "user insufficient"
-                    },
-                    {
-                        "detail": {
-                            "cause": "Cluster has 500 tokens, but 900 tokens are needed.",
-                            "response": "The quota has been exceeded"
-                        },
-                        "label": "cluster insufficient"
+                    "prompt": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponsePrompt"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
                     },
-                    {
-                        "detail": {
-                            "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.",
-                            "response": "The quota has been exceeded"
-                        },
-                        "label": "subject insufficient"
-                    }
-                ]
-            },
-            "QuotaHandlersConfiguration": {
-                "properties": {
-                    "sqlite": {
+                    "reasoning": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
+                                "$ref": "#/components/schemas/OpenAIResponseReasoning"
                             },
                             {
                                 "type": "null"
                             }
-                        ],
-                        "title": "SQLite configuration",
-                        "description": "SQLite database configuration"
+                        ]
                     },
-                    "postgres": {
+                    "safety_identifier": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "PostgreSQL configuration",
-                        "description": "PostgreSQL database configuration"
-                    },
-                    "limiters": {
-                        "items": {
-                            "$ref": "#/components/schemas/QuotaLimiterConfiguration"
-                        },
-                        "type": "array",
-                        "title": "Quota limiters",
-                        "description": "Quota limiters configuration"
+                        "title": "Safety Identifier"
                     },
-                    "scheduler": {
-                        "$ref": "#/components/schemas/QuotaSchedulerConfiguration",
-                        "title": "Quota scheduler",
-                        "description": "Quota scheduler configuration"
+                    "store": {
+                        "type": "boolean",
+                        "title": "Store",
+                        "default": true
                     },
-                    "enable_token_history": {
+                    "stream": {
                         "type": "boolean",
-                        "title": "Enable token history",
-                        "description": "Enables storing information about token usage history",
+                        "title": "Stream",
                         "default": false
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "title": "QuotaHandlersConfiguration",
-                "description": "Quota limiter configuration.\n\nIt is possible to limit quota usage per user or per service or services\n(that typically run in one cluster). Each limit is configured as a separate\n_quota limiter_. It can be of type `user_limiter` or `cluster_limiter`\n(which is name that makes sense in OpenShift deployment)."
-            },
-            "QuotaLimiterConfiguration": {
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "enum": [
-                            "user_limiter",
-                            "cluster_limiter"
-                        ],
-                        "title": "Quota limiter type",
-                        "description": "Quota limiter type, either user_limiter or cluster_limiter"
-                    },
-                    "name": {
-                        "type": "string",
-                        "title": "Quota limiter name",
-                        "description": "Human readable quota limiter name"
                     },
-                    "initial_quota": {
-                        "type": "integer",
-                        "minimum": 0.0,
-                        "title": "Initial quota",
-                        "description": "Quota set at beginning of the period"
-                    },
-                    "quota_increase": {
-                        "type": "integer",
-                        "minimum": 0.0,
-                        "title": "Quota increase",
-                        "description": "Delta value used to increase quota when period is reached"
+                    "temperature": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Temperature"
                     },
-                    "period": {
-                        "type": "string",
-                        "title": "Period",
-                        "description": "Period specified in human readable form"
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "required": [
-                    "type",
-                    "name",
-                    "initial_quota",
-                    "quota_increase",
-                    "period"
-                ],
-                "title": "QuotaLimiterConfiguration",
-                "description": "Configuration for one quota limiter.\n\nThere are three configuration options for each limiter:\n\n1. ``period`` is specified in a human-readable form, see\n   https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT\n   for all possible options. When the end of the period is reached, the\n   quota is reset or increased.\n2. ``initial_quota`` is the value set at the beginning of the period.\n3. ``quota_increase`` is the value (if specified) used to increase the\n   quota when the period is reached.\n\nThere are two basic use cases:\n\n1. When the quota needs to be reset to a specific value periodically (for\n   example on a weekly or monthly basis), set ``initial_quota`` to the\n   required value.\n2. When the quota needs to be increased by a specific value periodically\n   (for example on a daily basis), set ``quota_increase``."
-            },
-            "QuotaSchedulerConfiguration": {
-                "properties": {
-                    "period": {
-                        "type": "integer",
-                        "exclusiveMinimum": 0.0,
-                        "title": "Period",
-                        "description": "Quota scheduler period specified in seconds",
-                        "default": 1
+                    "text": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseText"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
                     },
-                    "database_reconnection_count": {
-                        "type": "integer",
-                        "exclusiveMinimum": 0.0,
-                        "title": "Database reconnection count on startup",
-                        "description": "Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.",
-                        "default": 10
+                    "tool_choice": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMode"
+                            },
+                            {
+                                "oneOf": [
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool"
+                                    }
+                                ],
+                                "discriminator": {
+                                    "propertyName": "type",
+                                    "mapping": {
+                                        "allowed_tools": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools",
+                                        "custom": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool",
+                                        "file_search": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch",
+                                        "function": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool",
+                                        "mcp": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool",
+                                        "web_search": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch",
+                                        "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch",
+                                        "web_search_preview": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch",
+                                        "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch"
+                                    }
+                                }
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Tool Choice"
                     },
-                    "database_reconnection_delay": {
-                        "type": "integer",
-                        "exclusiveMinimum": 0.0,
-                        "title": "Database reconnection delay",
-                        "description": "Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.",
-                        "default": 1
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "title": "QuotaSchedulerConfiguration",
-                "description": "Quota scheduler configuration."
-            },
-            "RAGChunk": {
-                "properties": {
-                    "content": {
-                        "type": "string",
-                        "title": "Content",
-                        "description": "The content of the chunk"
+                    "tools": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputToolWebSearch"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputToolFileSearch"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputToolFunction"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputToolMCP"
+                                        }
+                                    ],
+                                    "discriminator": {
+                                        "propertyName": "type",
+                                        "mapping": {
+                                            "file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch",
+                                            "function": "#/components/schemas/OpenAIResponseInputToolFunction",
+                                            "mcp": "#/components/schemas/OpenAIResponseInputToolMCP",
+                                            "web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch",
+                                            "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolWebSearch",
+                                            "web_search_preview": "#/components/schemas/OpenAIResponseInputToolWebSearch",
+                                            "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolWebSearch"
+                                        }
+                                    }
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Tools"
                     },
-                    "source": {
+                    "generate_topic_summary": {
                         "anyOf": [
                             {
-                                "type": "string"
+                                "type": "boolean"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Source",
-                        "description": "Index name identifying the knowledge source from configuration"
+                        "title": "Generate Topic Summary",
+                        "default": true
                     },
-                    "score": {
+                    "shield_ids": {
                         "anyOf": [
                             {
-                                "type": "number"
+                                "items": {
+                                    "type": "string"
+                                },
+                                "type": "array"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Score",
-                        "description": "Relevance score"
+                        "title": "Shield Ids"
                     },
-                    "attributes": {
+                    "solr": {
                         "anyOf": [
                             {
                                 "additionalProperties": true,
@@ -8599,28 +11492,115 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Attributes",
-                        "description": "Document metadata from the RAG provider (e.g., url, title, author)"
+                        "title": "Solr"
                     }
                 },
+                "additionalProperties": false,
                 "type": "object",
                 "required": [
-                    "content"
+                    "input"
                 ],
-                "title": "RAGChunk",
-                "description": "Model representing a RAG chunk used in the response."
+                "title": "ResponsesRequest",
+                "description": "Model representing a request for the Responses API following LCORE specification.\n\nAttributes:\n    input: Input text or structured input items containing the query.\n    model: Model identifier in format \"provider/model\". Auto-selected if not provided.\n    conversation: Conversation ID linking to an existing conversation. Accepts both\n        OpenAI and LCORE formats. Mutually exclusive with previous_response_id.\n    include: Explicitly specify output item types that are excluded by default but\n        should be included in the response.\n    instructions: System instructions or guidelines provided to the model (acts as\n        the system prompt).\n    max_infer_iters: Maximum number of inference iterations the model can perform.\n    max_output_tokens: Maximum number of tokens allowed in the response.\n    max_tool_calls: Maximum number of tool calls allowed in a single response.\n    metadata: Custom metadata dictionary with key-value pairs for tracking or logging.\n    parallel_tool_calls: Whether the model can make multiple tool calls in parallel.\n    previous_response_id: Identifier of the previous response in a multi-turn\n        conversation. Mutually exclusive with conversation.\n    prompt: Prompt object containing a template with variables for dynamic\n        substitution.\n    reasoning: Reasoning configuration for the response.\n    safety_identifier: Safety identifier for the response.\n    store: Whether to store the response in conversation history. Defaults to True.\n    stream: Whether to stream the response as it is generated. Defaults to False.\n    temperature: Sampling temperature controlling randomness (typically 0.0\u20132.0).\n    text: Text response configuration specifying output format constraints (JSON\n        schema, JSON object, or plain text).\n    tool_choice: Tool selection strategy (\"auto\", \"required\", \"none\", or specific\n        tool configuration).\n    tools: List of tools available to the model (file search, web search, function\n        calls, MCP tools). Defaults to all tools available to the model.\n    generate_topic_summary: LCORE-specific flag indicating whether to generate a\n        topic summary for new conversations. Defaults to True.\n    shield_ids: LCORE-specific list of safety shield IDs to apply. If None, all\n        configured shields are used.\n    solr: LCORE-specific Solr vector_io provider query parameters (e.g. filter\n        queries). Optional.",
+                "examples": [
+                    {
+                        "generate_topic_summary": true,
+                        "input": "Hello World!",
+                        "instructions": "You are a helpful assistant",
+                        "model": "openai/gpt-4o-mini",
+                        "store": true,
+                        "stream": false
+                    }
+                ]
             },
-            "RAGInfoResponse": {
+            "ResponsesResponse": {
                 "properties": {
+                    "created_at": {
+                        "type": "integer",
+                        "title": "Created At"
+                    },
+                    "completed_at": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Completed At"
+                    },
+                    "error": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseError"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
                     "id": {
                         "type": "string",
-                        "title": "Id",
-                        "description": "Vector DB unique ID",
-                        "examples": [
-                            "vs_00000000_0000_0000"
-                        ]
+                        "title": "Id"
                     },
-                    "name": {
+                    "model": {
+                        "type": "string",
+                        "title": "Model"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "response",
+                        "title": "Object",
+                        "default": "response"
+                    },
+                    "output": {
+                        "items": {
+                            "oneOf": [
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseMessage-Output"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
+                                }
+                            ],
+                            "discriminator": {
+                                "propertyName": "type",
+                                "mapping": {
+                                    "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall",
+                                    "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
+                                    "mcp_approval_request": "#/components/schemas/OpenAIResponseMCPApprovalRequest",
+                                    "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
+                                    "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools",
+                                    "message": "#/components/schemas/OpenAIResponseMessage-Output",
+                                    "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+                                }
+                            }
+                        },
+                        "type": "array",
+                        "title": "Output"
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean",
+                        "title": "Parallel Tool Calls",
+                        "default": true
+                    },
+                    "previous_response_id": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -8629,244 +11609,205 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Name",
-                        "description": "Human readable vector DB name",
-                        "examples": [
-                            "Faiss Store with Knowledge base"
-                        ]
+                        "title": "Previous Response Id"
                     },
-                    "created_at": {
-                        "type": "integer",
-                        "title": "Created At",
-                        "description": "When the vector store was created, represented as Unix time",
-                        "examples": [
-                            1763391371
+                    "prompt": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponsePrompt"
+                            },
+                            {
+                                "type": "null"
+                            }
                         ]
                     },
-                    "last_active_at": {
+                    "status": {
+                        "type": "string",
+                        "title": "Status"
+                    },
+                    "temperature": {
                         "anyOf": [
                             {
-                                "type": "integer"
+                                "type": "number"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Last Active At",
-                        "description": "When the vector store was last active, represented as Unix time",
-                        "examples": [
-                            1763391371
-                        ]
+                        "title": "Temperature"
                     },
-                    "usage_bytes": {
-                        "type": "integer",
-                        "title": "Usage Bytes",
-                        "description": "Storage byte(s) used by this vector DB",
-                        "examples": [
-                            0
+                    "text": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseText"
+                            },
+                            {
+                                "type": "null"
+                            }
                         ]
                     },
-                    "expires_at": {
+                    "top_p": {
                         "anyOf": [
                             {
-                                "type": "integer"
+                                "type": "number"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Expires At",
-                        "description": "When the vector store expires, represented as Unix time",
-                        "examples": [
-                            1763391371
-                        ]
-                    },
-                    "object": {
-                        "type": "string",
-                        "title": "Object",
-                        "description": "Object type",
-                        "examples": [
-                            "vector_store"
-                        ]
+                        "title": "Top P"
                     },
-                    "status": {
-                        "type": "string",
-                        "title": "Status",
-                        "description": "Vector DB status",
-                        "examples": [
-                            "completed"
-                        ]
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "id",
-                    "created_at",
-                    "usage_bytes",
-                    "object",
-                    "status"
-                ],
-                "title": "RAGInfoResponse",
-                "description": "Model representing a response with information about RAG DB.",
-                "examples": [
-                    {
-                        "created_at": 1763391371,
-                        "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
-                        "last_active_at": 1763391371,
-                        "name": "Faiss Store with Knowledge base",
-                        "object": "vector_store",
-                        "status": "completed",
-                        "usage_bytes": 1024000
-                    }
-                ]
-            },
-            "RAGListResponse": {
-                "properties": {
-                    "rags": {
-                        "items": {
-                            "type": "string"
-                        },
-                        "type": "array",
-                        "title": "RAG list response",
-                        "description": "List of RAG identifiers",
-                        "examples": [
-                            "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
-                            "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3"
-                        ]
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "rags"
-                ],
-                "title": "RAGListResponse",
-                "description": "Model representing a response to list RAGs request.",
-                "examples": [
-                    {
-                        "rags": [
-                            "vs_00000000-cafe-babe-0000-000000000000",
-                            "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
-                            "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3"
-                        ]
-                    }
-                ]
-            },
-            "RHIdentityConfiguration": {
-                "properties": {
-                    "required_entitlements": {
+                    "tools": {
+                        "anyOf": [
+                            {
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputToolWebSearch"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputToolFileSearch"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseInputToolFunction"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/OpenAIResponseToolMCP"
+                                        }
+                                    ],
+                                    "discriminator": {
+                                        "propertyName": "type",
+                                        "mapping": {
+                                            "file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch",
+                                            "function": "#/components/schemas/OpenAIResponseInputToolFunction",
+                                            "mcp": "#/components/schemas/OpenAIResponseToolMCP",
+                                            "web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch",
+                                            "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolWebSearch",
+                                            "web_search_preview": "#/components/schemas/OpenAIResponseInputToolWebSearch",
+                                            "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolWebSearch"
+                                        }
+                                    }
+                                },
+                                "type": "array"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Tools"
+                    },
+                    "tool_choice": {
                         "anyOf": [
                             {
-                                "items": {
-                                    "type": "string"
-                                },
-                                "type": "array"
+                                "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMode"
+                            },
+                            {
+                                "oneOf": [
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool"
+                                    },
+                                    {
+                                        "$ref": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool"
+                                    }
+                                ],
+                                "discriminator": {
+                                    "propertyName": "type",
+                                    "mapping": {
+                                        "allowed_tools": "#/components/schemas/OpenAIResponseInputToolChoiceAllowedTools",
+                                        "custom": "#/components/schemas/OpenAIResponseInputToolChoiceCustomTool",
+                                        "file_search": "#/components/schemas/OpenAIResponseInputToolChoiceFileSearch",
+                                        "function": "#/components/schemas/OpenAIResponseInputToolChoiceFunctionTool",
+                                        "mcp": "#/components/schemas/OpenAIResponseInputToolChoiceMCPTool",
+                                        "web_search": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch",
+                                        "web_search_2025_08_26": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch",
+                                        "web_search_preview": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch",
+                                        "web_search_preview_2025_03_11": "#/components/schemas/OpenAIResponseInputToolChoiceWebSearch"
+                                    }
+                                }
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Required entitlements",
-                        "description": "List of all required entitlements."
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "title": "RHIdentityConfiguration",
-                "description": "Red Hat Identity authentication configuration."
-            },
-            "RagConfiguration": {
-                "properties": {
-                    "inline": {
-                        "items": {
-                            "type": "string"
-                        },
-                        "type": "array",
-                        "title": "Inline RAG IDs",
-                        "description": "RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG)."
+                        "title": "Tool Choice"
                     },
-                    "tool": {
+                    "truncation": {
                         "anyOf": [
                             {
-                                "items": {
-                                    "type": "string"
-                                },
-                                "type": "array"
+                                "type": "string"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Tool RAG IDs",
-                        "description": "RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility)."
-                    }
-                },
-                "additionalProperties": false,
-                "type": "object",
-                "title": "RagConfiguration",
-                "description": "RAG strategy configuration.\n\nControls which RAG sources are used for inline and tool-based retrieval.\n\nEach strategy lists RAG IDs to include. The special ID ``\"okp\"`` defined in constants,\nactivates the OKP provider; all other IDs refer to entries in ``byok_rag``.\n\nBackward compatibility:\n    - ``inline`` defaults to ``[]`` (no inline RAG).\n    - ``tool`` defaults to ``None`` which means all registered vector stores\n      are used (identical to the previous ``tool.byok.enabled = True`` default)."
-            },
-            "ReadinessResponse": {
-                "properties": {
-                    "ready": {
-                        "type": "boolean",
-                        "title": "Ready",
-                        "description": "Flag indicating if service is ready",
-                        "examples": [
-                            true,
-                            false
+                        "title": "Truncation"
+                    },
+                    "usage": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseUsage"
+                            },
+                            {
+                                "type": "null"
+                            }
                         ]
                     },
-                    "reason": {
-                        "type": "string",
-                        "title": "Reason",
-                        "description": "The reason for the readiness",
-                        "examples": [
-                            "Service is ready"
+                    "instructions": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Instructions"
+                    },
+                    "max_tool_calls": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Max Tool Calls"
+                    },
+                    "reasoning": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/OpenAIResponseReasoning"
+                            },
+                            {
+                                "type": "null"
+                            }
                         ]
                     },
-                    "providers": {
-                        "items": {
-                            "$ref": "#/components/schemas/ProviderHealthStatus"
-                        },
-                        "type": "array",
-                        "title": "Providers",
-                        "description": "List of unhealthy providers in case of readiness failure.",
-                        "examples": []
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "ready",
-                    "reason",
-                    "providers"
-                ],
-                "title": "ReadinessResponse",
-                "description": "Model representing response to a readiness request.\n\nAttributes:\n    ready: If service is ready.\n    reason: The reason for the readiness.\n    providers: List of unhealthy providers in case of readiness failure.\n\nExample:\n    ```python\n    readiness_response = ReadinessResponse(\n        ready=False,\n        reason=\"Service is not ready\",\n        providers=[\n            ProviderHealthStatus(\n                provider_id=\"ollama\",\n                status=\"unhealthy\",\n                message=\"Server is unavailable\"\n            )\n        ]\n    )\n    ```",
-                "examples": [
-                    {
-                        "providers": [],
-                        "ready": true,
-                        "reason": "Service is ready"
-                    }
-                ]
-            },
-            "ReferencedDocument": {
-                "properties": {
-                    "doc_url": {
+                    "max_output_tokens": {
                         "anyOf": [
                             {
-                                "type": "string",
-                                "minLength": 1,
-                                "format": "uri"
+                                "type": "integer"
                             },
                             {
                                 "type": "null"
                             }
                         ],
-                        "title": "Doc Url",
-                        "description": "URL of the referenced document"
+                        "title": "Max Output Tokens"
                     },
-                    "doc_title": {
+                    "safety_identifier": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -8875,10 +11816,34 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Doc Title",
-                        "description": "Title of the referenced document"
+                        "title": "Safety Identifier"
                     },
-                    "source": {
+                    "metadata": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": {
+                                    "type": "string"
+                                },
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Metadata"
+                    },
+                    "store": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Store"
+                    },
+                    "conversation": {
                         "anyOf": [
                             {
                                 "type": "string"
@@ -8887,13 +11852,75 @@
                                 "type": "null"
                             }
                         ],
-                        "title": "Source",
-                        "description": "Index name identifying the knowledge source from configuration"
+                        "title": "Conversation"
+                    },
+                    "available_quotas": {
+                        "additionalProperties": {
+                            "type": "integer"
+                        },
+                        "type": "object",
+                        "title": "Available Quotas"
+                    },
+                    "output_text": {
+                        "type": "string",
+                        "title": "Output Text"
                     }
                 },
                 "type": "object",
-                "title": "ReferencedDocument",
-                "description": "Model representing a document referenced in generating a response.\n\nAttributes:\n    doc_url: Url to the referenced doc.\n    doc_title: Title of the referenced doc."
+                "required": [
+                    "created_at",
+                    "id",
+                    "model",
+                    "output",
+                    "status",
+                    "available_quotas",
+                    "output_text"
+                ],
+                "title": "ResponsesResponse",
+                "description": "Model representing a response from the Responses API following LCORE specification.\n\nAttributes:\n    created_at: Unix timestamp when the response was created.\n    completed_at: Unix timestamp when the response was completed, if applicable.\n    error: Error details if the response failed or was blocked.\n    id: Unique identifier for this response.\n    model: Model identifier in \"provider/model\" format used for generation.\n    object: Object type identifier, always \"response\".\n    output: List of structured output items containing messages, tool calls, and\n        other content. This is the primary response content.\n    parallel_tool_calls: Whether the model can make multiple tool calls in parallel.\n    previous_response_id: Identifier of the previous response in a multi-turn\n        conversation.\n    prompt: The input prompt object that was sent to the model.\n    status: Current status of the response (e.g., \"completed\", \"blocked\",\n        \"in_progress\").\n    temperature: Temperature parameter used for generation (controls randomness).\n    text: Text response configuration object used for OpenAI responses.\n    top_p: Top-p sampling parameter used for generation.\n    tools: List of tools available to the model during generation.\n    tool_choice: Tool selection strategy used (e.g., \"auto\", \"required\", \"none\").\n    truncation: Strategy used for handling content that exceeds context limits.\n    usage: Token usage statistics including input_tokens, output_tokens, and\n        total_tokens.\n    instructions: System instructions or guidelines provided to the model.\n    max_tool_calls: Maximum number of tool calls allowed in a single response.\n    reasoning: Reasoning configuration (effort level) used for the response.\n    max_output_tokens: Upper bound for tokens generated in the response.\n    safety_identifier: Safety/guardrail identifier applied to the request.\n    metadata: Additional metadata dictionary with custom key-value pairs.\n    store: Whether the response was stored.\n    conversation: Conversation ID linking this response to a conversation thread\n        (LCORE-specific).\n    available_quotas: Remaining token quotas for the user (LCORE-specific).\n    output_text: Aggregated text output from all output_text items in the\n        output array.",
+                "examples": [
+                    {
+                        "available_quotas": {
+                            "daily": 1000,
+                            "monthly": 50000
+                        },
+                        "completed_at": 1704067250,
+                        "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e",
+                        "created_at": 1704067200,
+                        "id": "resp_abc123",
+                        "instructions": "You are a helpful assistant",
+                        "model": "openai/gpt-4-turbo",
+                        "object": "response",
+                        "output": [
+                            {
+                                "content": [
+                                    {
+                                        "text": "Kubernetes is an open-source container orchestration system...",
+                                        "type": "output_text"
+                                    }
+                                ],
+                                "role": "assistant",
+                                "type": "message"
+                            }
+                        ],
+                        "output_text": "Kubernetes is an open-source container orchestration system...",
+                        "parallel_tool_calls": true,
+                        "status": "completed",
+                        "store": true,
+                        "temperature": 0.7,
+                        "text": {
+                            "format": {
+                                "type": "text"
+                            }
+                        },
+                        "usage": {
+                            "input_tokens": 100,
+                            "output_tokens": 50,
+                            "total_tokens": 150
+                        }
+                    }
+                ],
+                "sse_example": "event: response.created\ndata: {\"type\":\"response.created\",\"sequence_number\":0,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"status\":\"in_progress\",\"output\":[],\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{},\"output_text\":\"\"}}\n\nevent: response.output_item.added\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":1}\n\nevent: response.output_item.done\ndata: {\"response_id\":\"resp_abc\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"sequence_number\":3,\"response\":{\"id\":\"resp_abc\",\"created_at\":1704067200,\"completed_at\":1704067250,\"status\":\"completed\",\"output\":[{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\"}]}],\"usage\":{\"input_tokens\":10,\"output_tokens\":6,\"total_tokens\":16},\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{\"daily\":1000,\"monthly\":50000},\"output_text\":\"Hello! How can I help?\"}}\n\ndata: [DONE]\n\n"
             },
             "RlsapiV1Attachment": {
                 "properties": {
@@ -9163,6 +12190,90 @@
                 "title": "SQLiteDatabaseConfiguration",
                 "description": "SQLite database configuration."
             },
+            "SearchRankingOptions": {
+                "properties": {
+                    "ranker": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Ranker"
+                    },
+                    "score_threshold": {
+                        "anyOf": [
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Score Threshold",
+                        "default": 0.0
+                    },
+                    "alpha": {
+                        "anyOf": [
+                            {
+                                "type": "number",
+                                "maximum": 1.0,
+                                "minimum": 0.0
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Alpha",
+                        "description": "Weight factor for weighted ranker"
+                    },
+                    "impact_factor": {
+                        "anyOf": [
+                            {
+                                "type": "number",
+                                "exclusiveMinimum": 0.0
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Impact Factor",
+                        "description": "Impact factor for RRF algorithm"
+                    },
+                    "weights": {
+                        "anyOf": [
+                            {
+                                "additionalProperties": {
+                                    "type": "number"
+                                },
+                                "type": "object"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Weights",
+                        "description": "Weights for combining vector, keyword, and neural scores. Keys: 'vector', 'keyword', 'neural'"
+                    },
+                    "model": {
+                        "anyOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Model",
+                        "description": "Model identifier for neural reranker"
+                    }
+                },
+                "type": "object",
+                "title": "SearchRankingOptions",
+                "description": "Options for ranking and filtering search results.\n\nThis class configures how search results are ranked and filtered. You can use algorithm-based\nrerankers (weighted, RRF) or neural rerankers. Defaults from VectorStoresConfig are\nused when parameters are not provided.\n\nExamples:\n    # Weighted ranker with custom alpha\n    SearchRankingOptions(ranker=\"weighted\", alpha=0.7)\n\n    # RRF ranker with custom impact factor\n    SearchRankingOptions(ranker=\"rrf\", impact_factor=50.0)\n\n    # Use config defaults (just specify ranker type)\n    SearchRankingOptions(ranker=\"weighted\")  # Uses alpha from VectorStoresConfig\n\n    # Score threshold filtering\n    SearchRankingOptions(ranker=\"weighted\", score_threshold=0.5)\n\n:param ranker: (Optional) Name of the ranking algorithm to use. Supported values:\n    - \"weighted\": Weighted combination of vector and keyword scores\n    - \"rrf\": Reciprocal Rank Fusion algorithm\n    - \"neural\": Neural reranking model (requires model parameter, Part II)\n    Note: For OpenAI API compatibility, any string value is accepted, but only the above values are supported.\n:param score_threshold: (Optional) Minimum relevance score threshold for results. Default: 0.0\n:param alpha: (Optional) Weight factor for weighted ranker (0-1).\n    - 0.0 = keyword only\n    - 0.5 = equal weight (default)\n    - 1.0 = vector only\n    Only used when ranker=\"weighted\" and weights is not provided.\n    Falls back to VectorStoresConfig.chunk_retrieval_params.weighted_search_alpha if not provided.\n:param impact_factor: (Optional) Impact factor (k) for RRF algorithm.\n    Lower values emphasize higher-ranked results. Default: 60.0 (optimal from research).\n    Only used when ranker=\"rrf\".\n    Falls back to VectorStoresConfig.chunk_retrieval_params.rrf_impact_factor if not provided.\n:param weights: (Optional) Dictionary of weights for combining different signal types.\n    Keys can be \"vector\", \"keyword\", \"neural\". Values should sum to 1.0.\n    Used when combining algorithm-based reranking with neural reranking (Part II).\n    Example: {\"vector\": 0.3, \"keyword\": 0.3, \"neural\": 0.4}\n:param model: (Optional) Model identifier for neural reranker (e.g., \"vllm/Qwen3-Reranker-0.6B\").\n    Required when ranker=\"neural\" or when weights contains \"neural\" (Part II)."
+            },
             "SecurityScheme": {
                 "anyOf": [
                     {
@@ -9854,4 +12965,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/docs/openapi.md b/docs/openapi.md
index e1df8b8c6..8c4f77d8a 100644
--- a/docs/openapi.md
+++ b/docs/openapi.md
@@ -3804,6 +3804,7 @@ BYOK (Bring Your Own Knowledge) RAG configuration.
 | embedding_dimension | integer | Dimensionality of embedding vectors. |
 | vector_db_id | string | Vector database identification. |
 | db_path | string | Path to RAG database. |
+| score_multiplier | number | Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them. |
 
 
 ## CORSConfiguration
@@ -3868,7 +3869,8 @@ Global service configuration.
 | azure_entra_id |  |  |
 | splunk |  | Splunk HEC configuration for sending telemetry events. |
 | deployment_environment | string | Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events. |
-| solr |  | Configuration for Solr vector search operations. |
+| rag |  | Configuration for all RAG strategies (inline and tool-based). |
+| okp |  | OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool. |
 
 
 ## ConfigurationResponse
@@ -4523,12 +4525,14 @@ Model representing a message in a conversation turn.
 Attributes:
     content: The message content.
     type: The type of message.
+    referenced_documents: Optional list of documents referenced in an assistant response.
 
 
 | Field | Type | Description |
 |-------|------|-------------|
 | content | string | The message content |
 | type | string | The type of message |
+| referenced_documents |  | List of documents referenced in the response (assistant messages only) |
 
 
 ## ModelContextProtocolServer
@@ -4622,6 +4626,21 @@ Defines the configuration for the supported OAuth 2.0 flows.
 | password |  |  |
 
 
+## OkpConfiguration
+
+
+OKP (Offline Knowledge Portal) provider configuration.
+
+Controls provider-specific behaviour for the OKP vector store.
+Only relevant when ``"okp"`` is listed in ``rag.inline`` or ``rag.tool``.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| offline | boolean | When True, use parent_id for OKP chunk source URLs. When False, use reference_url for chunk source URLs. |
+| chunk_filter_query | string | OKP filter query applied to every OKP search request. Defaults to 'is_chunk:true' to restrict results to chunk documents. To add extra constraints, extend the expression using boolean syntax, e.g. 'is_chunk:true AND product:*openshift*'. |
+
+
 ## OpenIdConnectSecurityScheme
 
 
@@ -4769,7 +4788,7 @@ Example:
 | generate_topic_summary |  | Whether to generate topic summary for new conversations |
 | media_type |  | Media type for the response format |
 | vector_store_ids |  | Optional list of specific vector store IDs to query for RAG. If not provided, all available vector stores will be queried. |
-| shield_ids |  | Optional list of safety shield IDs to apply. If None, all configured shields are used. If provided, must contain at least one valid shield ID (empty list raises 422 error). |
+| shield_ids |  | Optional list of safety shield IDs to apply. If None, all configured shields are used.  |
 | solr |  | Solr-specific query parameters including filter queries |
 
 
@@ -4938,6 +4957,28 @@ Red Hat Identity authentication configuration.
 | required_entitlements |  | List of all required entitlements. |
 
 
+## RagConfiguration
+
+
+RAG strategy configuration.
+
+Controls which RAG sources are used for inline and tool-based retrieval.
+
+Each strategy lists RAG IDs to include. The special ID ``"okp"`` defined in constants,
+activates the OKP provider; all other IDs refer to entries in ``byok_rag``.
+
+Backward compatibility:
+    - ``inline`` defaults to ``[]`` (no inline RAG).
+    - ``tool`` defaults to ``None`` which means all registered vector stores
+      are used (identical to the previous ``tool.byok.enabled = True`` default).
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| inline | array | RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG). |
+| tool |  | RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility). |
+
+
 ## ReadinessResponse
 
 
@@ -5200,21 +5241,6 @@ Model representing a response to shields request.
 | shields | array | List of shields available |
 
 
-## SolrConfiguration
-
-
-Solr configuration for vector search queries.
-
-Controls whether to use offline or online mode when building document URLs
-from vector search results, and enables/disables Solr vector IO functionality.
-
-
-| Field | Type | Description |
-|-------|------|-------------|
-| enabled | boolean | When True, enables Solr vector IO functionality for vector search queries. When False, disables Solr vector search processing. |
-| offline | boolean | When True, use parent_id for chunk source URLs. When False, use reference_url for chunk source URLs. |
-
-
 ## SplunkConfiguration
 
 
diff --git a/docs/rag_guide.md b/docs/rag_guide.md
index 1ddbbd96d..a7122b6ea 100644
--- a/docs/rag_guide.md
+++ b/docs/rag_guide.md
@@ -67,7 +67,7 @@ For users with BYOK or OKP/Solr configurations, you can automatically enrich you
 
 ```bash
 # Enrich run.yaml with BYOK and/or Solr configurations from lightspeed-stack.yaml
-python src/llama_stack_configuration.py -c lightspeed-stack.yaml -i run.yaml -o run_enriched.yaml
+uv run src/llama_stack_configuration.py -c lightspeed-stack.yaml -i run.yaml -o run_enriched.yaml
 ```
 
 This script automatically adds the necessary:
diff --git a/docs/responses.md b/docs/responses.md
index 80131048b..1eafb22ab 100644
--- a/docs/responses.md
+++ b/docs/responses.md
@@ -1,6 +1,6 @@
 # LCORE OpenResponses API Specification
 
-This document describes the LCORE implementation of the OpenResponses API, exposed via the `POST /v1/responses` endpoint. This endpoint follows the OpenResponses specification and is built on top of the Llama Stack Responses API. Since the underlying Llama Stack Responses API is still evolving, the LCORE endpoint provides a standards-aligned interface while documenting a supported subset of OpenResponses fields. In addition, it introduces LCORE-specific extensions to preserve feature parity and defines explicit field mappings to reproduce the functionality of existing `/v1/query` and `/v1/streaming_query` endpoints.
+This document describes the LCORE implementation of the OpenResponses API, exposed via the `POST /v1/responses` endpoint. This endpoint follows the OpenResponses specification and is built on top of the Llama Stack Responses API. In addition, it introduces LCORE-specific extensions to preserve feature parity and defines explicit field mappings to reproduce the functionality of existing `/v1/query` and `/v1/streaming_query` endpoints.
 
 ---
 
@@ -9,7 +9,7 @@ This document describes the LCORE implementation of the OpenResponses API, expos
 * [Introduction](#introduction)
 * [Endpoint Overview](#endpoint-overview)
 * [Request Specification](#request-specification)
-  * [Inherited LLS OpenAPI Fields](#inherited-lls-openapi-fields)
+  * [Inherited LLS OpenAPI Fields](#inherited-lls-openapi-attributes)
   * [LCORE-Specific Extensions](#lcore-specific-extensions)
   * [Field Mappings](#field-mappings)
   * [Structured request attributes: variants and usage](#structured-request-attributes-variants-and-usage)
@@ -19,7 +19,7 @@ This document describes the LCORE implementation of the OpenResponses API, expos
   * [LCORE-Specific Extensions](#lcore-specific-extensions-1)
   * [Field Mappings](#field-mappings-1)
 * [Streaming Support](#streaming-support)
-* [Known Limitations and Behavioral Differences](#known-limitations-and-behavioral-differences)
+* [Behavioral Differences](#behavioral-differences)
   * [Conversation Handling](#conversation-handling)
   * [Output Representation](#output-representation)
   * [Tool Configuration Differences](#tool-configuration-differences)
@@ -47,9 +47,9 @@ This document describes the LCORE implementation of the OpenResponses API, expos
 
 ## Introduction
 
-The LCORE OpenResponses API provides a standards-aligned interface for AI response generation while preserving feature compatibility with existing LCORE workflows. In particular, the endpoint enriches requests and responses with LCORE-specific attributes, adjusts the semantics of some fields for compatibility, and enriches streaming events.
+The LCORE OpenResponses API provides a standards-aligned interface for AI response generation while preserving feature compatibility with existing LCORE workflows. In particular, the endpoint enriches requests and responses with LCORE-specific attributes, adjusts the semantics of some fields for compatibility, and enriches content of some streaming events.
 
-The endpoint is designed to provide feature parity with existing streaming endpoints while offering a more direct interface to the underlying Responses API.
+The endpoint is designed to provide feature parity with existing query endpoints while offering a more direct interface to the underlying Responses API.
 
 ---
 
@@ -69,7 +69,7 @@ The endpoint is designed to provide feature parity with existing streaming endpo
 
 ## Request Specification
 
-### Inherited LLS OpenAPI Fields
+### Inherited LLS OpenAPI Attributes
 
 The following request attributes are supported as defined by the underlying Llama Stack Responses API and retain their original OpenResponses semantics unless otherwise stated:
 
@@ -80,12 +80,15 @@ The following request attributes are supported as defined by the underlying Llam
 | `conversation` | string | Conversation ID (OpenAI or LCORE format). Mutually exclusive with `previous_response_id` | No |
 | `include` | array[string] | Extra output item types to include | No |
 | `instructions` | string | System prompt | No |
-| `max_infer_iters` | integer | Max inference iterations | No |
-| `max_tool_calls` | integer | Max tool calls per response | No |
+| `max_infer_iters` | integer | Maximum of inference iterations | No |
+| `max_output_tokens` | integer | Maximum of output tokens | No |
+| `max_tool_calls` | integer | Maximum of tool calls per response | No |
 | `metadata` | dictionary | Custom metadata (tracking/logging) | No |
 | `parallel_tool_calls` | boolean | Allow parallel tool calls | No |
 | `previous_response_id` | string | Previous response ID for context. Mutually exclusive with `conversation` | No |
 | `prompt` | object | Prompt substitution template | No |
+| `reasoning` | object | Reasoning configuration (effort level) used for the response | No |
+| `safety_identifier` | string | Safety/guardrail identifier applied to the request | No |
 | `store` | boolean | Store in conversation history (default: true) | No |
 | `stream` | boolean | Stream response (default: false) | No |
 | `temperature` | float | Sampling temperature (0.0–2.0) | No |
@@ -93,15 +96,14 @@ The following request attributes are supported as defined by the underlying Llam
 | `tool_choice` | string or object | Tool selection strategy (auto, required, none, or specific rules). Default: auto | No |
 | `tools` | array[object] | Tools available for request (file search, web search, functions, MCP). Default: all | No |
 
-**Note:** Only the fields listed above are currently supported. Additional OpenResponses fields may not yet be available due to LLS API incompleteness.
-
 ### LCORE-Specific Extensions
 
 The following fields are LCORE-specific request extensions and are not part of the standard LLS OpenAPI specification:
 
 | Field | Type | Description | Required |
 |-------|------|-------------|----------|
-| `generate_topic_summary` | boolean | Generate topic summary for new conversations | No |
+| `generate_topic_summary` | boolean | Generate topic summary for new conversations. Default: true | No |
+| `shield_ids` | array[string] | Shield IDs to apply. If omitted, all configured shields in LCORE are used | No |
 | `solr` | dictionary | Solr vector_io provider query parameters | No |
 
 
@@ -114,11 +116,12 @@ The following table maps LCORE query request fields to the OpenResponses request
 | `query` | `input` | The attribute allows to pass string-like input and also structured input of list of input items |
 | `conversation_id` | `conversation` | Supports OpenAI `conv_*` format or LCORE hex UUID |
 | `provider` + `model` | `model` | Concatenated as `provider/model` |
-| `system_prompt` | `instructions` | Only change in attribute's name |
+| `system_prompt` | `instructions` | Same meaning. Only change in attribute's name |
 | `attachments` | `input` items | Attachments can be passed as input messages with content of type `input_file` |
 | `no_tools` | `tool_choice` | `no_tools=true` mapped to `tool_choice="none"` |
 | `vector_store_ids` | `tools` + `tool_choice` | Vector stores can be explicitly specified and restricted by `file_search` tool type's `vector_store_ids` attribute |
 | `generate_topic_summary` | N/A | Exposed directly (LCORE-specific) |
+| `shield_ids` | N/A | Exposed directly (LCORE-specific) |
 | `solr` | N/A | Exposed directly (LCORE-specific) |
 
 **Note:** The `media_type` attribute is not present in the LCORE specification, as downstream logic determines which format to process (structured `output` or textual `output_text` response attributes).
@@ -141,7 +144,7 @@ Required. Either a **string** or a list of input items. Each **item** is one of:
 - [mcp_approval_request](#mcp_approval_request) — request for human approval of an MCP call
 - [mcp_approval_response](#mcp_approval_response) — human approval or denial
 
-All input item objects have a common `type` attribute that determines their structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type.
+All input item objects have a common `type` discriminator that determines the subsequent structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type.
 
 #### `include`
 
@@ -190,6 +193,28 @@ Template with multiple variable types (text, image, file):
 
 Here the template `report_template` (version `2.0`) might define placeholders such as `{{title}}`, `{{chart}}`, and `{{data}}`; the backend substitutes them with the provided text, image, and file respectively.
 
+#### `reasoning`
+
+Optional. **Reasoning effort configuration** that controls how much “thinking” the model does before producing its answer. Supported on models that expose reasoning (e.g. o1/o3-style). Lower effort favors speed and fewer tokens; higher effort favors more thorough reasoning.
+
+When provided, the object has a single key:
+
+`effort`: One of `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"`, or `"xhigh"`. `None` leaves the default behavior to the backend.
+
+**Examples:**
+
+```json
+{ "reasoning": { "effort": "low" } }
+```
+
+```json
+{ "reasoning": { "effort": "high" } }
+```
+
+```json
+{ "reasoning": { "effort": "medium" } }
+```
+
 #### `text`
 
 Optional. Text response configuration that tells the model how to format its main text output.
@@ -357,27 +382,31 @@ The following response attributes are inherited directly from the LLS OpenAPI sp
 
 | Field | Type | Description |
 |-------|------|-------------|
-| `id` | string | Unique response ID |
-| `object` | string | Always `"response"` |
 | `created_at` | integer | Creation time (Unix) |
-| `status` | string | Status (e.g. completed, blocked, in_progress) |
 | `completed_at` | integer | Completion time (Unix), if set |
+| `error` | object | Error details if failed or incompleted |
+| `id` | string | Unique response ID or moderation ID |
 | `model` | string | Model ID (provider/model) used |
+| `object` | string | Always `"response"` |
 | `output` | array[object] | Structured output (messages, tool calls, etc.) |
-| `error` | object | Error details if failed or blocked |
-| `instructions` | string | System instructions used |
-| `max_tool_calls` | integer | Max tool calls allowed |
-| `metadata` | dictionary | Custom metadata |
 | `parallel_tool_calls` | boolean | Parallel tool calls allowed |
 | `previous_response_id` | string | Previous response ID (multi-turn) |
-| `prompt` | object | Prompt echoed (id, variables, version) |
-| `temperature` | float | Temperature used |
-| `text` | object | Text config (format key) |
-| `tool_choice` | string or object | Tool selection used |
-| `tools` | array[object] | Tools available during generation |
+| `prompt` | object | The input prompt object that was sent to the model |
+| `status` | string | Status (e.g. completed, blocked, in_progress) |
+| `temperature` | float | Temperature parameter used for generation |
+| `text` | object | Text response configuration object used |
 | `top_p` | float | Top-p sampling used |
+| `tools` | array[object] | Tools available during generation |
+| `tool_choice` | string or object | Tool selection used |
 | `truncation` | string | Truncation strategy applied (`"auto"` or `"disabled"`) |
 | `usage` | object | Token usage (input_tokens, output_tokens, total_tokens) |
+| `instructions` | string | System instructions used |
+| `max_tool_calls` | integer | Max tool calls allowed |
+| `reasoning` | object | Reasoning configuration applied |
+| `max_output_tokens` | integer | Maximum output tokens allowed, if set |
+| `safety_identifier` | string | Safety model or identifier used, if set |
+| `metadata` | dictionary | Custom metadata specified in request |
+| `store` | boolean | Whether the response was stored |
 | `output_text` | string | Aggregated text from output items |
 
 ### Structured response output: object types and examples
@@ -394,7 +423,7 @@ The `output` array contains structured items. Each item has a `type`. Each list
 
 **Note:** No `mcp_approval_response` nor `function_call_output` here as they can serve only as input items.
 
-All response item objects have a common `type` attribute that determines their structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type.
+All response item objects have a common `type` discriminator that determines subsequent structure. See [Available OpenResponses items](#available-openresponses-items) for detailed descriptions and examples of each item type.
 
 ### LCORE-Specific Extensions
 
@@ -421,7 +450,7 @@ The following mappings are applied when converting from LLS OpenAPI format to LC
 **Deprecated Fields:** The following fields are not exposed in the LCORE OpenResponses specification:
 * `rag_chunks` - Part of `output` items of `file_search_call` type
 * `referenced_documents` - Part of `output` items
-* `truncated` - Deprecated; `truncation` field indicates used strategy, not whether the truncation was applied.
+* `truncated` - Deprecated; `truncation` field indicates used strategy, not whether the truncation was actually applied.
 
 ---
 
@@ -447,15 +476,15 @@ Each streaming event follows the Server-Sent Events (SSE) format:
 
 ---
 
-## Known Limitations and Behavioral Differences
+## Behavioral Differences
 
-The `/v1/responses` endpoint follows the OpenResponses structure but is currently constrained by the capabilities of the underlying Llama Stack Responses API. As a result, only the documented subset of request and response fields is supported.
+The `/v1/responses` endpoint follows the OpenResponses structure but also incorporates LCORE-specific features to maintain full feature compatibility with query endpoints.
 
 Several behavioral differences and implementation details should be noted:
 
 ### Conversation Handling
 
-The `conversation` field in responses is a LCORE-managed extension. While not natively defined by the Llama Stack specification, it is internally resolved and linked to the request conversation to preserve multi-turn behavior.
+The `conversation` field in responses is a LCORE-managed extension. While not natively defined by the Llama Stack specification, it is internally resolved and **always** present in the response to preserve LCORE conversation-based model.
 
 The endpoint accepts two conversation ID formats:
 
@@ -484,13 +513,14 @@ Fields such as `media_type`, `tool_calls`, `tool_results`, `rag_chunks`, and `re
 
 ### Tool Configuration Differences
 
-Vector store IDs are configured within the `tools` array (e.g., as `file_search` tools) rather than through separate parameters. By default all tools that are configured in LCORE are used to support the response. The set of available tools can be maintained per-request by `tool_choice` or `tools` attributes.
+Vector store IDs are configured within the `tools` as `file_search` tools rather than through separate parameters. MCP tools are configurable under `mcp` tool type. By default **all** tools that are configured in LCORE are used to support the response. The set of available tools can be maintained per-request by `tool_choice` or `tools` attributes.
 
 ### LCORE-Specific Extensions
 
 The API introduces extensions that are not part of the OpenResponses specification:
 
 - `generate_topic_summary` (request) — When set to `true` and a new conversation is created, a topic summary is automatically generated and stored in conversation metadata.
+- `shield_ids` (request) — Optional list of safety shield IDs to apply. If omitted, all configured shields are used.
 - `solr` (request) — Solr vector_io provider query parameters (e.g. filter queries).
 - `available_quotas` (response) — Provides real-time quota information from all configured quota limiters.
 
@@ -498,10 +528,33 @@ The API introduces extensions that are not part of the OpenResponses specificati
 
 Streaming responses use Server-Sent Events (SSE) and are enriched with LCORE-specific metadata:
 
-- The `conversation` attribute is included in streamed response payloads.
+- The `conversation` attribute is included in all streamed payloads that contain `response` attribute.
 - The `available_quotas` attribute is added to final completion events (`response.completed`, `response.incomplete`, or `response.failed`) and also to the intermediate `response.in_progress` with empty object.
 
-This enrichment may differ slightly from standard OpenAI streaming behavior but preserves compatibility with existing LCORE streaming workflows.
+
+## Implicit Conversation Management
+
+This implementation introduces **implicit conversation management**, ensuring that every response is associated with a conversation and can be inspected through the Conversations API.
+
+Users can provide context to the LLM using one of the following **mutually exclusive** strategies:
+
+- `conversation` — reference an existing conversation by ID
+- `previous_response_id` — reference a previous response (for multi-turn continuation or branching)
+- **no context** — neither a conversation nor a previous response is provided
+
+In **LCORE**, a conversation is modeled as a **linear chain of user turns** (request + response), where every turn belongs to exactly one conversation. Supporting `previous_response_id` as a context mechanism introduces **branching semantics**, which would break this linear structure if handled naively. To preserve a consistent conversation model, implicit conversation management applies the following rules:
+
+- **Context via `conversation`** — All items from the referenced conversation are provided as context for the new response. The new turn is automatically appended to that conversation, provided the conversation exists and the user has permission to access it.
+
+- **No context provided** — LCORE creates a new, empty conversation and assigns the new turn to it.
+
+- **Context via `previous_response_id`** — LCORE determines whether the referenced response is the **latest response in its conversation**:
+  - **If it is the latest successful response** — The request is treated as a normal continuation of that conversation, preserving the linear structure.
+  - **If it is not the latest response** — The conversation is **forked**. A new conversation is created, and the new turn becomes the starting point of that conversation.
+
+**Moderation responses** (requests that fail shield moderation) follow the same conversation rules. However, only **valid (successful) responses** can be referenced via `previous_response_id`; moderation responses cannot be used as context for follow-up requests.
+
+Blocked turns still appear in conversation history via the Conversations API, but they **do not produce a referenceable response** for continuation or forking. They are also **excluded when determining the latest response** in a conversation.
 
 ## Examples
 
diff --git a/examples/run.yaml b/examples/run.yaml
index e4951ee2b..cf6fcc4df 100644
--- a/examples/run.yaml
+++ b/examples/run.yaml
@@ -163,6 +163,12 @@ registered_resources:
   - toolgroup_id: builtin::rag # Register the RAG tool
     provider_id: rag-runtime
 vector_stores:
+  annotation_prompt_params: # Override the default Llama Stack annotation that adds <| file-xyz |> to responses
+    enable_annotations: true
+    annotation_instruction_template: >
+      When appropriate, cite sources at the end of sentences using doc_url and doc_title format. 
+      Citing sources is not always required because citations are handled externally. 
+      Never include any citation that is in the form '<| file-id |>'.
   default_provider_id: faiss
   default_embedding_model: # Define the default embedding model for RAG
     provider_id: sentence-transformers
diff --git a/pyproject.toml b/pyproject.toml
index 5f60d4351..c3025c3fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,9 +28,9 @@ dependencies = [
     # Used by authentication/k8s integration
     "kubernetes>=30.1.0",
     # Used to call Llama Stack APIs
-    "llama-stack==0.4.3",
-    "llama-stack-client==0.4.3",
-    "llama-stack-api==0.4.4",
+    "llama-stack==0.5.2",
+    "llama-stack-client==0.5.2",
+    "llama-stack-api==0.5.2",
     # Used by Logger
     "rich>=14.0.0",
     # Used by JWK token auth handler
@@ -223,7 +223,7 @@ disable = ["R0801"]
 extend-exclude = ["tests/profiles/syntax_error.py"]
 
 [tool.ruff.lint]
-extend-select = ["TID251", "UP006", "UP007", "UP017", "UP035", "RUF100", "B010"]
+extend-select = ["TID251", "UP006", "UP007", "UP010", "UP017", "UP035", "RUF100", "B009", "B010", "DTZ005"]
 
 [tool.ruff.lint.flake8-tidy-imports.banned-api]
 unittest = { msg = "use pytest instead of unittest" }
diff --git a/requirements-build.txt b/requirements-build.txt
index 66b538dc4..e3c84f824 100644
--- a/requirements-build.txt
+++ b/requirements-build.txt
@@ -1,11 +1,13 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.13
 # by the following command:
 #
 #    pybuild-deps compile --output-file=requirements-build.txt requirements.source.txt
 #
 calver==2025.10.20
     # via trove-classifiers
+cython==3.2.4
+    # via oracledb
 dunamai==1.26.0
     # via uv-dynamic-versioning
 flit-core==3.12.0
@@ -35,6 +37,7 @@ hatchling==1.29.0
     #   hatch-fancy-pypi-readme
     #   hatch-vcs
     #   opentelemetry-api
+    #   opentelemetry-distro
     #   opentelemetry-exporter-otlp
     #   opentelemetry-exporter-otlp-proto-common
     #   opentelemetry-exporter-otlp-proto-grpc
@@ -44,6 +47,7 @@ hatchling==1.29.0
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
     #   pydantic-settings
+    #   pythainlp
     #   uv-dynamic-versioning
     #   uvicorn
     #   wcwidth
@@ -77,6 +81,7 @@ setuptools-rust==1.12.0
 setuptools-scm==9.2.2
     # via
     #   hatch-vcs
+    #   llama-stack
     #   llama-stack-api
     #   pluggy
     #   setuptools-rust
@@ -90,15 +95,22 @@ uv-dynamic-versioning==0.13.0
 wheel==0.46.3
     # via
     #   authlib
+    #   azure-core
     #   azure-identity
     #   cachetools
     #   litellm
+    #   oci
+    #   oracledb
     #   sentence-transformers
+    #   tornado
 
 # The following packages are considered to be unsafe in a requirements file:
 setuptools==82.0.0
+    # via charset-normalizer
+setuptools==82.0.1
     # via
     #   authlib
+    #   azure-core
     #   azure-identity
     #   blobfile
     #   cachetools
@@ -107,12 +119,15 @@ setuptools==82.0.0
     #   emoji
     #   google-api-core
     #   google-cloud-bigquery
+    #   googleapis-common-protos
     #   greenlet
     #   llama-stack
     #   llama-stack-api
     #   markupsafe
     #   maturin
     #   multiprocess
+    #   oci
+    #   oracledb
     #   pathspec
     #   pluggy
     #   polyleven
@@ -128,5 +143,6 @@ setuptools==82.0.0
     #   setuptools-scm
     #   sse-starlette
     #   tenacity
+    #   tornado
     #   trl
     #   trove-classifiers
diff --git a/requirements.hashes.source.txt b/requirements.hashes.source.txt
index 1845c51e4..2057f95ea 100644
--- a/requirements.hashes.source.txt
+++ b/requirements.hashes.source.txt
@@ -2,9 +2,9 @@
 #    uv pip compile requirements.source.txt --refresh --generate-hashes --python-version 3.12 --emit-index-url --no-deps --no-annotate
 --index-url https://pypi.org/simple
 
-a2a-sdk==0.3.24 \
-    --hash=sha256:3581e6e8a854cd725808f5732f90b7978e661b6d4e227a4755a8f063a3c1599d \
-    --hash=sha256:7b248767096bb55311f57deebf6b767349388d94c1b376c60cb8f6b715e053f6
+a2a-sdk==0.3.25 \
+    --hash=sha256:2fce38faea82eb0b6f9f9c2bcf761b0d78612c80ef0e599b50d566db1b2654b5 \
+    --hash=sha256:afda85bab8d6af0c5d15e82f326c94190f6be8a901ce562d045a338b7127242f
 accelerate==1.13.0 \
     --hash=sha256:cf1a3efb96c18f7b152eb0fa7490f3710b19c3f395699358f08decca2b8b62e0 \
     --hash=sha256:d631b4e0f5b3de4aff2d7e9e6857d164810dfc3237d54d017f075122d057b236
@@ -14,49 +14,166 @@ authlib==1.6.9 \
 autoevals==0.1.0 \
     --hash=sha256:573ab490966fd5f2265dc4842d0bfd7b729ee121c86bd72db4440badb7264587 \
     --hash=sha256:ae884fe6107dbd6e05d840f51c2dba7eccfa01449e5ee5e83b6b4589508b2aca
-azure-core==1.38.2 \
-    --hash=sha256:074806c75cf239ea284a33a66827695ef7aeddac0b4e19dda266a93e4665ead9 \
-    --hash=sha256:67562857cb979217e48dc60980243b61ea115b77326fa93d83b729e7ff0482e7
-azure-identity==1.25.2 \
-    --hash=sha256:030dbaa720266c796221c6cdbd1999b408c079032c919fef725fcc348a540fe9 \
-    --hash=sha256:1b40060553d01a72ba0d708b9a46d0f61f56312e215d8896d836653ffdc6753d
+azure-core==1.38.3 \
+    --hash=sha256:a7931fd445cb4af8802c6f39c6a326bbd1e34b115846550a8245fa656ead6f8e \
+    --hash=sha256:bf59d29765bf4748ab9edf25f98a30b7ea9797f43e367c06d846a30b29c1f845
+azure-identity==1.25.3 \
+    --hash=sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6 \
+    --hash=sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c
 blobfile==3.2.0 \
     --hash=sha256:78514a9265b9aa7d4607042dc77c5e6461ab27036450ad8e1f6ef9a7f29bf958 \
     --hash=sha256:e5e4095477da9f09e2077f41320c006001b2102a61f07d41ceaaecdf5d9741d8
-cachetools==7.0.3 \
-    --hash=sha256:8c246313b95849964e54a909c03b327a87ab0428b068fac10da7b105ca275ef6 \
-    --hash=sha256:c128ffca156eef344c25fcd08a96a5952803786fa33097f5f2d49edf76f79d53
+cachetools==7.0.5 \
+    --hash=sha256:0cd042c24377200c1dcd225f8b7b12b0ca53cc2c961b43757e774ebe190fd990 \
+    --hash=sha256:46bc8ebefbe485407621d0a4264b23c080cedd913921bad7ac3ed2f26c183114
 certifi==2026.2.25 \
     --hash=sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa \
     --hash=sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7
-chardet==7.0.1 \
-    --hash=sha256:11f51985946b49739968b6dc2fa70e7d8f490bb15574377c5ee114f33d19ef7e \
-    --hash=sha256:1566d0f91990b8f33b53836391d557f779584bd48beabf90efbf7a6efa89179e \
-    --hash=sha256:169951fa88d449e72e0c6194cec1c5e405fd36a6cfbe74c7dab5494cc35f1700 \
-    --hash=sha256:26186f0ea03c4c1f9be20c088b127c71b0e9d487676930fab77625ddec2a4ef2 \
-    --hash=sha256:265cb3b5dafc0411c0949800a0692f07e986fb663b6ae1ecfba32ad193a55a03 \
-    --hash=sha256:302798e1e62008ca34a216dd04ecc5e240993b2090628e2a35d4c0754313ea9a \
-    --hash=sha256:3355a3c8453d673e7c1664fdd24a0c6ef39964c3d41befc4849250f7eb1de3b5 \
-    --hash=sha256:33f4132f9781302beff34713fe6c990badd009aa8ea730611aef0931b27f1541 \
-    --hash=sha256:44011e3b4fd4a8a15bc94736717414b7ec82880066fb22d9f476c68a4ded2647 \
-    --hash=sha256:4af34cf0652a9da44720540c97f11e30781a77900c89547b311984a7272b33f7 \
-    --hash=sha256:5333f9967863ea7d8642df0e00cf4d33e8ed7e99fe7b6464b40ba969a2808544 \
-    --hash=sha256:54e448fab0c11b27bb908ea0218e2094578c583d05faa5f65b91fa6ccfa45570 \
-    --hash=sha256:63bc210ce73f8a1b87430b949f84d086cb326d67eb259305862e7c8861b73374 \
-    --hash=sha256:67fe3f453416ed9343057dcf06583b36aae6d8bdb013370b3ff46bc37b7e30ac \
-    --hash=sha256:69708a504a43464b60ea16d031250b58206969c9bbd6851266e2f39afef53168 \
-    --hash=sha256:6f907962b18df78d5ca87a7484e4034354408d2c97cec6f53634b0ea0424c594 \
-    --hash=sha256:6fce895c12c5495bb598e59ae3cd89306969b4464ec7b6dd609b9c86e3397fe3 \
-    --hash=sha256:8714f0013c208452a98e23595d99cef53c5364565454425f431446eb586e2591 \
-    --hash=sha256:88793aeebb28a5296eea9bdd9b5e74ee4e3582766a6a2cb7f39e4761a96fdd55 \
-    --hash=sha256:8a8d87853c7f191029933307094a8896b087c2c436703281cb289a22aa4ae8bd \
-    --hash=sha256:9e827211249d8e3cacc1adf6950a7a8cf56920e5e303e56dcab827b71c03df33 \
-    --hash=sha256:c12abc65830068ad05bd257fb953aaaf63a551446688e03e145522086be5738c \
-    --hash=sha256:c3f59dc3e148b54813ec5c7b4b2e025d37f5dc221ee28a06d1a62f169cfaedf5 \
-    --hash=sha256:dd6db7505556ae8f9e2a3bf6d689c2b86aa6b459cf39552645d2c4d3fdbf489c \
-    --hash=sha256:e51e1ff2c51b2d622d97c9737bd5ee9d9b9038f05b7dd8f9ea10b9e2d9674c24 \
-    --hash=sha256:f661edbfa77b8683a503043ddc9b9fe9036cf28af13064200e11fa1844ded79c \
-    --hash=sha256:fb14755377d8de845c69378bbaedc0e35109c21a43824450524fd9c3178792d5
+chardet==7.1.0 \
+    --hash=sha256:00c3182f739ae7715641e8c08e0ee8ae21b5db6402b883264aa04511edf428b9 \
+    --hash=sha256:18afc27681cd9f583fac47282179f8b73f37b1cab171a528e8af89e7e4562b32 \
+    --hash=sha256:1a3c22672c9502af99e0433b47421d0d72c8803efce2cd4a91a3ae1ab5972243 \
+    --hash=sha256:20b73403b7a21487e31b2810ea9d7182ce5e301a8ebe847b49d91ec6022e214a \
+    --hash=sha256:38be4c07e016dac37fb6060094f3a720200e3e49dc14f55924a1c230eeffa59f \
+    --hash=sha256:43c1e3cba6c41d8958ee4acdab94c151dbe256d7ef8df4ae032dc62a892f294f \
+    --hash=sha256:49b5edd762751735704d1fec38665ee219da28c66f2a4921d615b12be389735b \
+    --hash=sha256:5d86d349f768e6d35f6804013f6643d880ec877b94c453fa40a3fd10d16ddb48 \
+    --hash=sha256:619d7ef3187ff1691525a7fdbe8c30f5a519885e1de82f6f57e26a29866bf11b \
+    --hash=sha256:6f806f325825325e0682226269a2a4859993344cccca14f2463855d4f5a93272 \
+    --hash=sha256:70adef4a036d39d9b5f6c2702f773118a9985c0dff23b650e1045f20e33c9467 \
+    --hash=sha256:7f677725333bf53f84b7f57458f44669a8a5eb2ac4092ac699cdfa9b1af08a5f \
+    --hash=sha256:8e067ee79709ccf9caa5732419df2dbef476bd9b9658ffc929d45cf13fc91ed7 \
+    --hash=sha256:8f47bc4accac17bd9accbb4acc1d563acc024a783806c0a43c3a583f5285690b \
+    --hash=sha256:96e7fe0770cd77361bec21a1dd8524e77aaa567577fa8372368d5fa8dd0ef00b \
+    --hash=sha256:97cdd7a016fbb451a4dc26b3b1173960b3c0071bbe46a46d6b70027a517170ff \
+    --hash=sha256:a02197831a4304eed360559e0ffc58deccc9cdda9f9315c6e7ad978f7d8617d3 \
+    --hash=sha256:a6492bebaba8882afb3e14c786fb69ed767326b6f514b8e093dcdf6e2a094d33 \
+    --hash=sha256:b951107b254cdc766e52f4b8339dcfa97c7b45ca9f5509075308db2497e7f3af \
+    --hash=sha256:bacc8f862998c59e9ee7fe4960538300d1cc3fe2c293b9cc99bbbc7bf3bedf51 \
+    --hash=sha256:bbd4fccf1cf6d92fdd75a1827a478672abb5685e61e92ce863d9380b18cb813f \
+    --hash=sha256:c35d17822fc94467b7951adebd897cb01c0e37ac694be18d2cbd2b676d61df4f \
+    --hash=sha256:cc8c7520a9736da766f5794bbabb1c6cdfe446676429a5cf691af878631a80bf \
+    --hash=sha256:dff284d0661563e82d235f79f1d410c526b15ef8d50adc0446cba8162db68d22 \
+    --hash=sha256:e096d9c211050fff40e22748e1d09d0cec8348fc13ee6e2e0a1da079345b8a86 \
+    --hash=sha256:eb2a9b4052be006b87a985dbdbb00ab35b4b1b66d2751b0ee12680f8f4e90406 \
+    --hash=sha256:fdfc42dfc44ccd569b84fe6a1fdea1df66dc0c48461bc3899dea5efea8d507f6
+charset-normalizer==3.4.5 \
+    --hash=sha256:014837af6fabf57121b6254fa8ade10dceabc3528b27b721a64bbc7b8b1d4eb4 \
+    --hash=sha256:01a1ed54b953303ca7e310fafe0fe347aab348bd81834a0bcd602eb538f89d66 \
+    --hash=sha256:0294916d6ccf2d069727d65973c3a1ca477d68708db25fd758dd28b0827cff54 \
+    --hash=sha256:02a9d1b01c1e12c27883b0c9349e0bcd9ae92e727ff1a277207e1a262b1cbf05 \
+    --hash=sha256:036c079aa08a6a592b82487f97c60b439428320ed1b2ea0b3912e99d30c77765 \
+    --hash=sha256:039215608ac7b358c4da0191d10fc76868567fbf276d54c14721bdedeb6de064 \
+    --hash=sha256:0625665e4ebdddb553ab185de5db7054393af8879fb0c87bd5690d14379d6819 \
+    --hash=sha256:0a45e504f5e1be0bd385935a8e1507c442349ca36f511a47057a71c9d1d6ea9e \
+    --hash=sha256:0b362bcd27819f9c07cbf23db4e0e8cd4b44c5ecd900c2ff907b2b92274a7412 \
+    --hash=sha256:0c300cefd9b0970381a46394902cd18eaf2aa00163f999590ace991989dcd0fc \
+    --hash=sha256:1088345bcc93c58d8d8f3d783eca4a6e7a7752bbff26c3eee7e73c597c191c2e \
+    --hash=sha256:10b473fc8dca1c3ad8559985794815f06ca3fc71942c969129070f2c3cdf7281 \
+    --hash=sha256:131716d6786ad5e3dc542f5cc6f397ba3339dc0fb87f87ac30e550e8987756af \
+    --hash=sha256:14498a429321de554b140013142abe7608f9d8ccc04d7baf2ad60498374aefa2 \
+    --hash=sha256:149ec69866c3d6c2fb6f758dbc014ecb09f30b35a5ca90b6a8a2d4e54e18fdfe \
+    --hash=sha256:165c7b21d19365464e8f70e5ce5e12524c58b48c78c1f5a57524603c1ab003f8 \
+    --hash=sha256:1827734a5b308b65ac54e86a618de66f935a4f63a8a462ff1e19a6788d6c2262 \
+    --hash=sha256:19092dde50335accf365cce21998a1c6dd8eafd42c7b226eb54b2747cdce2fac \
+    --hash=sha256:1a374cc0b88aa710e8865dc1bd6edb3743c59f27830f0293ab101e4cf3ce9f85 \
+    --hash=sha256:1d1401945cb77787dbd3af2446ff2d75912327c4c3a1526ab7955ecf8600687c \
+    --hash=sha256:1f2da5cbb9becfcd607757a169e38fb82aa5fd86fae6653dea716e7b613fe2cf \
+    --hash=sha256:259cd1ca995ad525f638e131dbcc2353a586564c038fc548a3fe450a91882139 \
+    --hash=sha256:2820a98460c83663dd8ec015d9ddfd1e4879f12e06bb7d0500f044fb477d2770 \
+    --hash=sha256:28269983f25a4da0425743d0d257a2d6921ea7d9b83599d4039486ec5b9f911d \
+    --hash=sha256:2b970382e4a36bed897c19f310f31d7d13489c11b4f468ddfba42d41cddfb918 \
+    --hash=sha256:2da4eedcb6338e2321e831a0165759c0c620e37f8cd044a263ff67493be8ffb3 \
+    --hash=sha256:30987f4a8ed169983f93e1be8ffeea5214a779e27ed0b059835c7afe96550ad7 \
+    --hash=sha256:30a2b1a48478c3428d047ed9690d57c23038dac838a87ad624c85c0a78ebeb39 \
+    --hash=sha256:340810d34ef83af92148e96e3e44cb2d3f910d2bf95e5618a5c467d9f102231d \
+    --hash=sha256:3f64c6bf8f32f9133b668c7f7a7cbdbc453412bc95ecdbd157f3b1e377a92990 \
+    --hash=sha256:4167a621a9a1a986c73777dbc15d4b5eac8ac5c10393374109a343d4013ec765 \
+    --hash=sha256:4354e401eb6dab9aed3c7b4030514328a6c748d05e1c3e19175008ca7de84fb1 \
+    --hash=sha256:4481e6da1830c8a1cc0b746b47f603b653dadb690bcd851d039ffaefe70533aa \
+    --hash=sha256:4b8551b6e6531e156db71193771c93bda78ffc4d1e6372517fe58ad3b91e4659 \
+    --hash=sha256:4cd966c2559f501c6fd69294d082c2934c8dd4719deb32c22961a5ac6db0df1d \
+    --hash=sha256:50bcbca6603c06a1dcc7b056ed45c37715fb5d2768feb3bcd37d2313c587a5b9 \
+    --hash=sha256:530beedcec9b6e027e7a4b6ce26eed36678aa39e17da85e6e03d7bd9e8e9d7c9 \
+    --hash=sha256:568e3c34b58422075a1b49575a6abc616d9751b4d61b23f712e12ebb78fe47b2 \
+    --hash=sha256:573ef5814c4b7c0d59a7710aa920eaaaef383bd71626aa420fba27b5cab92e8d \
+    --hash=sha256:58ad8270cfa5d4bef1bc85bd387217e14ff154d6630e976c6f56f9a040757475 \
+    --hash=sha256:597d10dec876923e5c59e48dbd366e852eacb2b806029491d307daea6b917d7c \
+    --hash=sha256:5bcb3227c3d9aaf73eaaab1db7ccd80a8995c509ee9941e2aae060ca6e4e5d81 \
+    --hash=sha256:5cffde4032a197bd3b42fd0b9509ec60fb70918d6970e4cc773f20fc9180ca67 \
+    --hash=sha256:5fea359734b140d0d6741189fea5478c6091b54ffc69d7ce119e0a05637d8c99 \
+    --hash=sha256:60d68e820af339df4ae8358c7a2e7596badeb61e544438e489035f9fbf3246a5 \
+    --hash=sha256:610f72c0ee565dfb8ae1241b666119582fdbfe7c0975c175be719f940e110694 \
+    --hash=sha256:65a126fb4b070d05340a84fc709dd9e7c75d9b063b610ece8a60197a291d0adf \
+    --hash=sha256:65b3c403a5b6b8034b655e7385de4f72b7b244869a22b32d4030b99a60593eca \
+    --hash=sha256:66dee73039277eb35380d1b82cccc69cc82b13a66f9f4a18da32d573acf02b7c \
+    --hash=sha256:708c7acde173eedd4bfa4028484426ba689d2103b28588c513b9db2cd5ecde9c \
+    --hash=sha256:728c6a963dfab66ef865f49286e45239384249672cd598576765acc2a640a636 \
+    --hash=sha256:754f96058e61a5e22e91483f823e07df16416ce76afa4ebf306f8e1d1296d43f \
+    --hash=sha256:75dfd1afe0b1647449e852f4fb428195a7ed0588947218f7ba929f6538487f02 \
+    --hash=sha256:75ee9c1cce2911581a70a3c0919d8bccf5b1cbc9b0e5171400ec736b4b569497 \
+    --hash=sha256:76a9d0de4d0eab387822e7b35d8f89367dd237c72e82ab42b9f7bf5e15ada00f \
+    --hash=sha256:77be992288f720306ab4108fe5c74797de327f3248368dfc7e1a916d6ed9e5a2 \
+    --hash=sha256:7ad83b8f9379176c841f8865884f3514d905bcd2a9a3b210eaa446e7d2223e4d \
+    --hash=sha256:8197abe5ca1ffb7d91e78360f915eef5addff270f8a71c1fc5be24a56f3e4873 \
+    --hash=sha256:82cc7c2ad42faec8b574351f8bc2a0c049043893853317bd9bb309f5aba6cb5a \
+    --hash=sha256:8a28afb04baa55abf26df544e3e5c6534245d3daa5178bc4a8eeb48202060d0e \
+    --hash=sha256:8b78d8a609a4b82c273257ee9d631ded7fac0d875bdcdccc109f3ee8328cfcb1 \
+    --hash=sha256:8ce11cd4d62d11166f2b441e30ace226c19a3899a7cf0796f668fba49a9fb123 \
+    --hash=sha256:8fff79bf5978c693c9b1a4d71e4a94fddfb5fe744eb062a318e15f4a2f63a550 \
+    --hash=sha256:92263f7eca2f4af326cd20de8d16728d2602f7cfea02e790dcde9d83c365d7cc \
+    --hash=sha256:93b3b2cc5cf1b8743660ce77a4f45f3f6d1172068207c1defc779a36eea6bb36 \
+    --hash=sha256:95adae7b6c42a6c5b5b559b1a99149f090a57128155daeea91732c8d970d8644 \
+    --hash=sha256:97ab7787092eb9b50fb47fa04f24c75b768a606af1bcba1957f07f128a7219e4 \
+    --hash=sha256:9db5e3fcdcee89a78c04dffb3fe33c79f77bd741a624946db2591c81b2fc85b0 \
+    --hash=sha256:a118e2e0b5ae6b0120d5efa5f866e58f2bb826067a646431da4d6a2bdae7950e \
+    --hash=sha256:a2aecdb364b8a1802afdc7f9327d55dad5366bc97d8502d0f5854e50712dbc5f \
+    --hash=sha256:a66aa5022bf81ab4b1bebfb009db4fd68e0c6d4307a1ce5ef6a26e5878dfc9e4 \
+    --hash=sha256:a68766a3c58fde7f9aaa22b3786276f62ab2f594efb02d0a1421b6282e852e98 \
+    --hash=sha256:aa2f963b4da26daf46231d9b9e0e2c9408a751f8f0d0f44d2de56d3caf51d294 \
+    --hash=sha256:aa92ec1102eaff840ccd1021478af176a831f1bccb08e526ce844b7ddda85c22 \
+    --hash=sha256:ac59c15e3f1465f722607800c68713f9fbc2f672b9eb649fe831da4019ae9b23 \
+    --hash=sha256:ae8b03427410731469c4033934cf473426faff3e04b69d2dfb64a4281a3719f8 \
+    --hash=sha256:afca7f78067dd27c2b848f1b234623d26b87529296c6c5652168cc1954f2f3b2 \
+    --hash=sha256:b2d37d78297b39a9eb9eb92c0f6df98c706467282055419df141389b23f93362 \
+    --hash=sha256:b3e71afc578b98512bfe7bdb822dd6bc57d4b0093b4b6e5487c1e96ad4ace242 \
+    --hash=sha256:ba20bdf69bd127f66d0174d6f2a93e69045e0b4036dc1ca78e091bcc765830c4 \
+    --hash=sha256:c108f8619e504140569ee7de3f97d234f0fbae338a7f9f360455071ef9855a95 \
+    --hash=sha256:c23eb3263356d94858655b3e63f85ac5d50970c6e8febcdde7830209139cc37d \
+    --hash=sha256:c5af897b45fa606b12464ccbe0014bbf8c09191e0a66aab6aa9d5cf6e77e0c94 \
+    --hash=sha256:c7a80a9242963416bd81f99349d5f3fce1843c303bd404f204918b6d75a75fd6 \
+    --hash=sha256:c7e84e0c0005e3bdc1a9211cd4e62c78ba80bc37b2365ef4410cd2007a9047f2 \
+    --hash=sha256:cace89841c0599d736d3d74a27bc5821288bb47c5441923277afc6059d7fbcb4 \
+    --hash=sha256:cd2d0f0ec9aa977a27731a3209ebbcacebebaf41f902bd453a928bfd281cf7f8 \
+    --hash=sha256:d01de5e768328646e6a3fa9e562706f8f6641708c115c62588aef2b941a4f88e \
+    --hash=sha256:d1028de43596a315e2720a9849ee79007ab742c06ad8b45a50db8cdb7ed4a82a \
+    --hash=sha256:d27ce22ec453564770d29d03a9506d449efbb9fa13c00842262b2f6801c48cce \
+    --hash=sha256:d29dd9c016f2078b43d0c357511e87eee5b05108f3dd603423cb389b89813969 \
+    --hash=sha256:d31f0d1671e1534e395f9eb84a68e0fb670e1edb1fe819a9d7f564ae3bc4e53f \
+    --hash=sha256:d4eb8ac7469b2a5d64b5b8c04f84d8bf3ad340f4514b98523805cbf46e3b3923 \
+    --hash=sha256:d5e52d127045d6ae01a1e821acfad2f3a1866c54d0e837828538fabe8d9d1bd6 \
+    --hash=sha256:d77f97e515688bd615c1d1f795d540f32542d514242067adcb8ef532504cb9ee \
+    --hash=sha256:d8ed79b8f6372ca4254955005830fd61c1ccdd8c0fac6603e2c145c61dd95db6 \
+    --hash=sha256:dc57a0baa3eeedd99fafaef7511b5a6ef4581494e8168ee086031744e2679467 \
+    --hash=sha256:e09f671a54ce70b79a1fc1dc6da3072b7ef7251fadb894ed92d9aa8218465a5f \
+    --hash=sha256:e22d1059b951e7ae7c20ef6b06afd10fb95e3c41bf3c4fbc874dba113321c193 \
+    --hash=sha256:e37bd100d2c5d3ba35db9c7c5ba5a9228cbcffe5c4778dc824b164e5257813d7 \
+    --hash=sha256:e51ae7d81c825761d941962450f50d041db028b7278e7b08930b4541b3e45cb9 \
+    --hash=sha256:e545b51da9f9af5c67815ca0eb40676c0f016d0b0381c86f20451e35696c5f95 \
+    --hash=sha256:e6302ca4ae283deb0af68d2fbf467474b8b6aedcd3dab4db187e07f94c109763 \
+    --hash=sha256:e71bbb595973622b817c042bd943c3f3667e9c9983ce3d205f973f486fec98a7 \
+    --hash=sha256:ec56a2266f32bc06ed3c3e2a8f58417ce02f7e0356edc89786e52db13c593c98 \
+    --hash=sha256:ed1a9a204f317ef879b32f9af507d47e49cd5e7f8e8d5d96358c98373314fc60 \
+    --hash=sha256:ed97c282ee4f994ef814042423a529df9497e3c666dca19be1d4cd1129dc7ade \
+    --hash=sha256:ed98364e1c262cf5f9363c3eca8c2df37024f52a8fa1180a3610014f26eac51c \
+    --hash=sha256:ee57b926940ba00bca7ba7041e665cc956e55ef482f851b9b65acb20d867e7a2 \
+    --hash=sha256:f1d725b754e967e648046f00c4facc42d414840f5ccc670c5670f59f83693e4f \
+    --hash=sha256:f8102ae93c0bc863b1d41ea0f4499c20a83229f52ed870850892df555187154a \
+    --hash=sha256:fc1c64934b8faf7584924143eb9db4770bbdb16659626e1a1a4d9efbcb68d947 \
+    --hash=sha256:ff95a9283de8a457e6b12989de3f9f5193430f375d64297d323a615ea52cbdb3
+circuitbreaker==2.1.3 \
+    --hash=sha256:1a4baee510f7bea3c91b194dcce7c07805fe96c4423ed5594b75af438531d084 \
+    --hash=sha256:87ba6a3ed03fdc7032bc175561c2b04d52ade9d5faf94ca2b035fbdc5e6b1dd1
 emoji==2.15.0 \
     --hash=sha256:205296793d66a89d88af4688fa57fd6496732eb48917a87175a023c8138995eb \
     --hash=sha256:eae4ab7d86456a70a00a985125a03263a5eac54cd55e51d7e184b1ed3b6757e4
@@ -142,24 +259,27 @@ fastuuid==0.14.0 \
     --hash=sha256:ec27778c6ca3393ef662e2762dba8af13f4ec1aaa32d08d77f71f2a70ae9feb8 \
     --hash=sha256:f54d5b36c56a2d5e1a31e73b950b28a0d83eb0c37b91d10408875a5a29494bad \
     --hash=sha256:f74631b8322d2780ebcf2d2d75d58045c3e9378625ec51865fe0b5620800c39d
-filelock==3.25.0 \
-    --hash=sha256:5ccf8069f7948f494968fc0713c10e5c182a9c9d9eef3a636307a20c2490f047 \
-    --hash=sha256:8f00faf3abf9dc730a1ffe9c354ae5c04e079ab7d3a683b7c32da5dd05f26af3
+filelock==3.25.2 \
+    --hash=sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694 \
+    --hash=sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70
 google-api-core==2.30.0 \
     --hash=sha256:02edfa9fab31e17fc0befb5f161b3bf93c9096d99aed584625f38065c511ad9b \
     --hash=sha256:80be49ee937ff9aba0fd79a6eddfde35fe658b9953ab9b79c57dd7061afa8df5
-google-auth==2.48.0 \
-    --hash=sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f \
-    --hash=sha256:4f7e706b0cd3208a3d940a19a822c37a476ddba5450156c3e6624a71f7c841ce
-google-cloud-aiplatform==1.140.0 \
-    --hash=sha256:e94493a2682b9d17efa7146a53bb3665bf1595c3394fd3d0f45d18f71623fddc \
-    --hash=sha256:ea7eb1870b4cf600f8c2472102e21c3a1bcaf723d6e49f00ed51bc6b88d54fff
+google-auth==2.49.1 \
+    --hash=sha256:16d40da1c3c5a0533f57d268fe72e0ebb0ae1cc3b567024122651c045d879b64 \
+    --hash=sha256:195ebe3dca18eddd1b3db5edc5189b76c13e96f29e73043b923ebcf3f1a860f7
+google-cloud-aiplatform==1.141.0 \
+    --hash=sha256:6bd25b4d514c40b8181ca703e1b313ad6d0454ab8006fc9907fb3e9f672f31d1 \
+    --hash=sha256:e3b1cdb28865dd862aac9c685dfc5ac076488705aba0a5354016efadcddd59c6
 google-cloud-bigquery==3.40.1 \
     --hash=sha256:75afcfb6e007238fe1deefb2182105249321145ff921784fe7b1de2b4ba24506 \
     --hash=sha256:9082a6b8193aba87bed6a2c79cf1152b524c99bb7e7ac33a785e333c09eac868
 google-cloud-storage==3.9.0 \
     --hash=sha256:2dce75a9e8b3387078cbbdad44757d410ecdb916101f8ba308abf202b6968066 \
     --hash=sha256:f2d8ca7db2f652be757e92573b2196e10fbc09649b5c016f8b422ad593c641cc
+googleapis-common-protos==1.73.0 \
+    --hash=sha256:778d07cd4fbeff84c6f7c72102f0daf98fa2bfd3fa8bea426edc545588da0b5a \
+    --hash=sha256:dfdaaa2e860f242046be561e6d6cb5c5f1541ae02cfbcb034371aadb2942b4e8
 greenlet==3.3.2 \
     --hash=sha256:02b0a8682aecd4d3c6c18edf52bc8e51eacdd75c8eac52a790a210b06aa295fd \
     --hash=sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082 \
@@ -223,18 +343,18 @@ jsonpath-ng==1.8.0 \
 langdetect==1.0.9 \
     --hash=sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a \
     --hash=sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0
-litellm==1.82.0 \
-    --hash=sha256:5496b5d4532cccdc7a095c21cbac4042f7662021c57bc1d17be4e39838929e80 \
-    --hash=sha256:d388f52447daccbcaafa19a3e68d17b75f1374b5bf2cde680d65e1cd86e50d22
-llama-stack==0.4.3 \
-    --hash=sha256:423207eae2b640894992a9075ff9dd6300ff904ab06a49fe38cfe0bb809d4669 \
-    --hash=sha256:70d379ae9dbb5b1d0693f14054d9817aba183ffcd805133f0a4442baee132c6d
-llama-stack-api==0.4.4 \
-    --hash=sha256:3973ca3bacf86916e04e521f77e7909533eec7364d32c3eabc35dc2976dbfe7d \
-    --hash=sha256:7bbc63330ed186502dcd48f65cae014dbeb788ba5690be738c98693cfcd2f599
-llama-stack-client==0.4.3 \
-    --hash=sha256:97b8cc5032bad4f0cdd1b0ae992cf44f5554679d315b7c40f46deb358c041f50 \
-    --hash=sha256:cb807be258206e8fedeb5e5ceba7be7108d3badb31d74199406808c3d1679c35
+litellm==1.82.2 \
+    --hash=sha256:641ed024774fa3d5b4dd9347f0efb1e31fa422fba2a6500aabedee085d1194cb \
+    --hash=sha256:f5f4c4049f344a88bf80b2e421bb927807687c99624515d7ff4152d533ec9dcb
+llama-stack==0.5.2 \
+    --hash=sha256:581fda638088ee029aab20afe3c42ba8f7f6ef21c80bd9ebcae20bb13c3409d3 \
+    --hash=sha256:9334c781e4ded6520aa60c3301a9087e9fb8fdaea8e5f30f8e21d85b17231d8d
+llama-stack-api==0.5.2 \
+    --hash=sha256:6531556dd8bb6555d778360ecfcd850aad7a49a8172b68146995d538e71641f0 \
+    --hash=sha256:a272e4b803fe24a8ba7d22e6d904bf88abd118ba0b6610a20ff5dedb09f38ad7
+llama-stack-client==0.5.2 \
+    --hash=sha256:17c1bbad90f7699da4eb3cae256e8823caa4d2be945512a45c8c6f89ab899f28 \
+    --hash=sha256:473f4d67ac0b243b0fc29555a0203a742615d31bea606b4332d9e2f193f73d6a
 markupsafe==3.0.3 \
     --hash=sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f \
     --hash=sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a \
@@ -354,12 +474,18 @@ multiprocess==0.70.18 \
 nltk==3.9.3 \
     --hash=sha256:60b3db6e9995b3dd976b1f0fa7dec22069b2677e759c28eb69b62ddd44870522 \
     --hash=sha256:cb5945d6424a98d694c2b9a0264519fab4363711065a46aa0ae7a2195b92e71f
-openai==2.26.0 \
-    --hash=sha256:6151bf8f83802f036117f06cc8a57b3a4da60da9926826cc96747888b57f394f \
-    --hash=sha256:b41f37c140ae0034a6e92b0c509376d907f3a66109935fba2c1b471a7c05a8fb
+oci==2.168.1 \
+    --hash=sha256:b941674171b41e999b8e3adb38d4797d7b42d2bb5ff40d17c26e8ce2a7d4b605 \
+    --hash=sha256:d106cfffc9153b5c9de628877c967ed87bbbfbbc9d411c97feee0eba8f2e4eab
+openai==2.28.0 \
+    --hash=sha256:79aa5c45dba7fef84085701c235cf13ba88485e1ef4f8dfcedc44fc2a698fc1d \
+    --hash=sha256:bb7fdff384d2a787fa82e8822d1dd3c02e8cf901d60f1df523b7da03cbb6d48d
 opentelemetry-api==1.40.0 \
     --hash=sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f \
     --hash=sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9
+opentelemetry-distro==0.61b0 \
+    --hash=sha256:975b845f50181ad53753becf4fd4b123b54fa04df5a9d78812264436d6518981 \
+    --hash=sha256:f21d1ac0627549795d75e332006dd068877f00e461b1b2e8fe4568d6eb7b9590
 opentelemetry-exporter-otlp==1.40.0 \
     --hash=sha256:48c87e539ec9afb30dc443775a1334cc5487de2f72a770a4c00b1610bf6c697d \
     --hash=sha256:7caa0870b95e2fcb59d64e16e2b639ecffb07771b6cd0000b5d12e5e4fef765a
@@ -384,6 +510,38 @@ opentelemetry-sdk==1.40.0 \
 opentelemetry-semantic-conventions==0.61b0 \
     --hash=sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a \
     --hash=sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2
+oracledb==3.4.2 \
+    --hash=sha256:00c79448017f367bb7ab6900efe0706658a53768abea2b4519a4c9b2d5743890 \
+    --hash=sha256:0e16fe3d057e0c41a23ad2ae95bfa002401690773376d476be608f79ac74bf05 \
+    --hash=sha256:0f04a2d62073407672f114d02529921de0677c6883ed7c64d8d1a3c04caa3238 \
+    --hash=sha256:1617a1db020346883455af005efbefd51be2c4d797e43b1b38455a19f8526b48 \
+    --hash=sha256:19fa80ef84f85ad74077aa626067bbe697e527bd39604b4209f9d86cb2876b89 \
+    --hash=sha256:1e4930d7f6584832dcc15b8ca415a7957b0c45f5aa7c4f88702e070e5c53bf93 \
+    --hash=sha256:23aa07c1eaca17ae74c6fdc86b218f58484d56452958aead1aa460c0596a76c1 \
+    --hash=sha256:31b7ee83c23d0439778303de8a675717f805f7e8edb5556d48c4d8343bcf14f5 \
+    --hash=sha256:3df8eee1410d25360599968b1625b000f10c5ae0e47274031a7842a9dc418890 \
+    --hash=sha256:404ec1451d0448653ee074213b87d6c5bd65eaa74b50083ddf2c9c3e11c71c71 \
+    --hash=sha256:46e0f2278ff1fe83fbc33a3b93c72d429323ec7eed47bc9484e217776cd437e5 \
+    --hash=sha256:55397e7eb43bb7017c03a981c736c25724182f5210951181dfe3fab0e5d457fb \
+    --hash=sha256:574c8280d49cbbe21dbe03fc28356d9b9a5b9e300ebcde6c6d106e51453a7e65 \
+    --hash=sha256:59ad6438f56a25e8e1a4a3dd1b42235a5d09ab9ba417ff2ad14eae6596f3d06f \
+    --hash=sha256:5d7befb014174c5ae11c3a08f5ed6668a25ab2335d8e7104dca70d54d54a5b3a \
+    --hash=sha256:5ed78d7e7079a778062744ccf42141ce4806818c3f4dd6463e4a7edd561c9f86 \
+    --hash=sha256:643c25d301a289a371e37fcedb59e5fa5e54fb321708e5c12821c4b55bdd8a4d \
+    --hash=sha256:6d85622664cc88d5a82bbd7beccb62cd53bd272c550a5e15e7d5f8ae6b86f1f1 \
+    --hash=sha256:9f434a739405557bd57cb39b62238142bb27855a524a70dc6d397a2a8c576c9d \
+    --hash=sha256:a7396664e592881225ba66385ee83ce339d864f39003d6e4ca31a894a7e7c552 \
+    --hash=sha256:ac25a0448fc830fb7029ad50cd136cdbfcd06975d53967e269772cc5cb8c203a \
+    --hash=sha256:b1095d95d0c8b37e4d0e17cf1928919cb59222b6344362a1cf6a2f3ca205a28a \
+    --hash=sha256:b26a10f9c790bd141ffc8af68520803ed4a44a9258bf7d1eea9bfdd36bd6df7f \
+    --hash=sha256:b8e4b8a852251cef09038b75f30fce1227010835f4e19cfbd436027acba2697c \
+    --hash=sha256:b974caec2c330c22bbe765705a5ac7d98ec3022811dec2042d561a3c65cb991b \
+    --hash=sha256:d7ce75c498bff758548ec6e4424ab4271aa257e5887cc436a54bc947fd46199a \
+    --hash=sha256:d8d75e4f879b908be66cce05ba6c05791a5dbb4a15e39abc01aa25c8a2492bd9 \
+    --hash=sha256:e068ef844a327877bfefbef1bc6fb7284c727bb87af80095f08d95bcaf7b8bb2 \
+    --hash=sha256:f8ea989965a4f636a309444bd696ab877bba373d5d67bf744785f9bd8c560865 \
+    --hash=sha256:f93cae08e8ed20f2d5b777a8602a71f9418389c661d2c937e84d94863e7e7011 \
+    --hash=sha256:ff3c89cecea62af8ca02aa33cab0f2edc0214c747eac7d3364ed6b2640cb55e4
 polyleven==0.11.0 \
     --hash=sha256:046e90c02c5b8dae2ab71c4fb33772bd6f27b7883b05e2117573bf478b5ced44 \
     --hash=sha256:05207bb66da15a2dc5c530e2f5cb5f0588d0a7e79b3bd542965f9e06e3fb14fe \
@@ -541,12 +699,15 @@ pycryptodomex==3.23.0 \
 pydantic-settings==2.13.1 \
     --hash=sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025 \
     --hash=sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237
-pyjwt==2.11.0 \
-    --hash=sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623 \
-    --hash=sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469
-pythainlp==5.2.0 \
-    --hash=sha256:04c6e4bdd806204be742f139b1f2e666411c4509c270dfff1a8b5afa69d36d2b \
-    --hash=sha256:fd64d6b3d33973782390822e74b8e2c9b867760eeed19d0d218945165b431e35
+pyjwt==2.12.1 \
+    --hash=sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c \
+    --hash=sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b
+pyopenssl==25.3.0 \
+    --hash=sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6 \
+    --hash=sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329
+pythainlp==5.3.1 \
+    --hash=sha256:516c34d22689c2b469dd74bb18221eb9336e42f5137aa32940008293f1895de4 \
+    --hash=sha256:f33fb134fcfbd281fb64494c924fddb5e7cc27e053f7a73f18b6b5acbb7a4e2d
 python-dotenv==1.2.2 \
     --hash=sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a \
     --hash=sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3
@@ -674,15 +835,26 @@ rich==14.3.3 \
 semver==3.0.4 \
     --hash=sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746 \
     --hash=sha256:afc7d8c584a5ed0a11033af086e8af226a9c0b206f313e0301f8dd7b6b589602
-sentence-transformers==5.2.3 \
-    --hash=sha256:3cd3044e1f3fe859b6a1b66336aac502eaae5d3dd7d5c8fc237f37fbf58137c7 \
-    --hash=sha256:6437c62d4112b615ddebda362dfc16a4308d604c5b68125ed586e3e95d5b2e30
+sentence-transformers==5.3.0 \
+    --hash=sha256:414a0a881f53a4df0e6cbace75f823bfcb6b94d674c42a384b498959b7c065e2 \
+    --hash=sha256:dca6b98db790274a68185d27a65801b58b4caf653a4e556b5f62827509347c7d
 sse-starlette==3.3.2 \
     --hash=sha256:5c3ea3dad425c601236726af2f27689b74494643f57017cafcb6f8c9acfbb862 \
     --hash=sha256:678fca55a1945c734d8472a6cad186a55ab02840b4f6786f5ee8770970579dcd
 tenacity==9.1.4 \
     --hash=sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55 \
     --hash=sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a
+tornado==6.5.5 \
+    --hash=sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9 \
+    --hash=sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6 \
+    --hash=sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca \
+    --hash=sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e \
+    --hash=sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07 \
+    --hash=sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa \
+    --hash=sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b \
+    --hash=sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521 \
+    --hash=sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7 \
+    --hash=sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5
 trl==0.29.0 \
     --hash=sha256:7d49cb1526c55cc1d798d921d9d91bb84a35ad5c645d6277441ffb7a30a233aa \
     --hash=sha256:b1c9f756a3d73c5457b7025b0c7bb9792873a87a2f3841cccf4f9d4f0e9ab273
diff --git a/requirements.hashes.wheel.txt b/requirements.hashes.wheel.txt
index fc084979a..04e79edf7 100644
--- a/requirements.hashes.wheel.txt
+++ b/requirements.hashes.wheel.txt
@@ -26,19 +26,19 @@ asyncpg==0.31.0 \
 attrs==25.4.0 \
     --hash=sha256:6d626bc22c041e7c3a84117f6d65a2be8eba3b58a83d3d110c58ee71db234b1e
 cffi==2.0.0 \
-    --hash=sha256:59a31b51151b49adbbcd58c0542ea5b0eddc74fd18e5cb5d4deffc97e4c93388 \
-    --hash=sha256:92f0b76faee1feab91508f8c932b54fd9a9203986b156761a5bf266addd23f25 \
-    --hash=sha256:d6f66e891be32d0acc7933c7ff9f55285da5b73a1364e8d222bf1e9241b06a6a \
-    --hash=sha256:fe96da20ecfe1e84563b411a2996740d2891d126aeef9e19476563c22dca65d8
-charset-normalizer==3.4.4 \
-    --hash=sha256:ccdd9a2c5c46da6335354acd6a04ef80e2eb2804a8ed58f932b7f5ae345c5891
+    --hash=sha256:257e90f733c1a33b9f5ade4a4f47db6a3984de5c2d2654848feca129888ff9d5 \
+    --hash=sha256:34cf2187e399eb7baaa20488d0b78ab20be91060ef9ac531685f37478ca1a12a \
+    --hash=sha256:e39d7009b1872abbd91bb17057b48fe24057a1026a20ab06b672fbdd4721d789 \
+    --hash=sha256:ffe747cf5e265169ad5bd64b4359368f74e445cf8ea3cfe99045450554fb4e13
 chevron==0.14.0 \
     --hash=sha256:215f5e3e7ac75d150eadfc0f8c651b3815dc36813e122484b1ed68e142e5adfb
 click==8.3.1 \
     --hash=sha256:a87f0253ce1fb7747cdde1674d73f34241bb4de9fca7a31bc866fb0a8a5f4307
-cryptography==46.0.4 \
-    --hash=sha256:26ffa7666abe41b7b75905f7353c43ff4e812cf7f5b2b253ec3bcddd0d29e7a1 \
-    --hash=sha256:b1352b1c73e908b825c91e8fe23f24775fcebca7d59f66a6864e4dcf6fdd6323
+cryptography==46.0.5 \
+    --hash=sha256:2b44c9fd892f763465b2d7782bf310d65c04dab741b1241f5be203ccf022368d \
+    --hash=sha256:661cf199efa488e0c5fb4987d36214e11c1fe2dfb842a1a330b1854ff069f8d3 \
+    --hash=sha256:88347ad17f60b60e31e5f2b58e37339d88fa90bfa4f0d35528b3bd18d464427b \
+    --hash=sha256:bb1b90386c7b5d3d8c9d8d53b207cfbe5c3e639457ff9ebe84f2131918b785a9
 datasets==4.5.0 \
     --hash=sha256:afc7c4ccba966970c71e264f0eb7977bdc7ca8ea9a946e331cf9d3d1d072cb2f
 dill==0.4.0 \
@@ -81,8 +81,6 @@ google-genai==1.59.0 \
     --hash=sha256:388b25b31c0c00307a947690f21528a7b652e329feb3a14d49cbb16765012313
 google-resumable-media==2.8.0 \
     --hash=sha256:bdf13a0ceec8ba97622165ceeb789e6f0fb7c1614665be246b819811ca661f9c
-googleapis-common-protos==1.72.0 \
-    --hash=sha256:b530c484aa7341cab24eda0b801abc608c85da52ff9002d8a485e2f9d9d302c5
 grpc-google-iam-v1==0.14.3 \
     --hash=sha256:b29d3adb3873130b93f289e44449166586cd684bfdaccc8a4c4d35dc0e3ce9df
 grpcio==1.76.0 \
@@ -147,7 +145,7 @@ numpy==2.3.5 \
 oauthlib==3.3.1 \
     --hash=sha256:df9219023be7c78d77401950592822d1bbc318225d3cd71d02271c549ef1f980
 packaging==26.0 \
-    --hash=sha256:4e4d463e366772479222863a8ff3bb307310faee4da6256c6c39641c207b1a44
+    --hash=sha256:76081d1703deb79a44f8c44f23e391b98ab21ea54a2c21b0e8890779e0eaecb7
 pandas==2.3.3 \
     --hash=sha256:308c8ca6262e058136d91a7d4d2bd84dd02e65caf60340fefd20bcf26b97e819 \
     --hash=sha256:6ba245cff03afa84a0bc5e715a11caafab1dc51ca8fe4d827017706c4b62f238 \
@@ -178,11 +176,11 @@ pyarrow==23.0.0 \
     --hash=sha256:f5af9838bbfefa2535c3ae9bf4fbfaab63367994c1e65243b1830e41b943b366 \
     --hash=sha256:f8eea464a5138cfe7b32b5422caedaca6a9fe4b20af1ed917f2a6cf9ec802d94
 pyasn1==0.6.2 \
-    --hash=sha256:4448ac141114a5dfc4a2e3b6184a48b363541f26b4ca764e2b89651505dc4576
+    --hash=sha256:dad757598fe812256782cbf67535fef02cf245bec157dfa1d82ceee62151cd65
 pyasn1-modules==0.4.2 \
     --hash=sha256:cda22a851735d664e92c8354a30405d88c5a1aa7d6313ba6c583bc3034654937
 pycparser==3.0 \
-    --hash=sha256:a0a805ba04eb0922ef6f214736f63a5c38dbaebdc4d991f88d64cf3d06af00c5
+    --hash=sha256:86072d0cf4bf7e125171d79036c2126a2fa377687520e834987c2de4b4e5a9e9
 pydantic==2.12.5 \
     --hash=sha256:ba78cfc9d85e40047d67d32828da04ddbe9a27bd0718983938629bfca0d3cc6c
 pydantic-core==2.41.5 \
@@ -197,10 +195,10 @@ python-dateutil==2.9.0.post0 \
 python-multipart==0.0.22 \
     --hash=sha256:b5fc2a9738209bd168ef0c746ca0ee5eb66f2c69aeeb689e0dbc15b36c779aac
 pyyaml==6.0.3 \
-    --hash=sha256:1ccd350719df6e1794b08d7a28b14e3bed61ef2742cb0ea03ced7c05f1e14cfd \
-    --hash=sha256:6ea391cbbbc6abd3412718672301a184bb87f1f877b58081150b5be5942f0915 \
-    --hash=sha256:d122b1f987c23385d9600d211475f765fcb59a2fff150c5e6036293196df5b80 \
-    --hash=sha256:d4980565b8082efbd9baf9d659cfb37dce47e56e0530bfe1dba00e3287159f69
+    --hash=sha256:84a2acfe1e8dfcf8be95fa61611ea7637f8bfcff49761a6a8c396aaa0b6d94a9 \
+    --hash=sha256:bcaf1b152ce8cd6ec4ff56f8ca929f176eb01e6aa472fe10b3ea31f19e15fb39 \
+    --hash=sha256:c22e4a0d2bb69fddc73b3b52f1540533df1f206a08f52ea8cae49337615b55d2 \
+    --hash=sha256:c99b924c5311933fa398cc53c617f99705cf2d96226d023f75e95e15e753f81f
 referencing==0.37.0 \
     --hash=sha256:bd019ff74869f84a893b7e50b84ce3d0db1e465ceb3a336403011c6467ffacb0
 requests==2.32.5 \
@@ -212,8 +210,6 @@ rpds-py==0.30.0 \
     --hash=sha256:613bc173bc12f35e40c3d1c1e6252555eaa48ea9425ee0b01fb151f3ca8a2a05 \
     --hash=sha256:6c94c5f1c2501e50c22b7c993b083aeef4d342974d3058763296fb4646d8a059 \
     --hash=sha256:75a565fc839958562ab5ad648c3a4bd0c46874a5556acb48da423f91c47c355b
-rsa==4.9.1 \
-    --hash=sha256:1c5f55eb95147d5a8d6f4a20869cfb97d680b28e2ea6ef3747b70c518c44f7fa
 safetensors==0.7.0 \
     --hash=sha256:18abcf37ffae5f85a81ca46c440fdc5f38ae0938ff0f4a4de14e37386ed248e7 \
     --hash=sha256:6833f019f20c8f0bb790ae3f0fd088f50c9fe4e051106dc3e59df5a953f68532 \
@@ -230,7 +226,7 @@ scipy==1.17.0 \
     --hash=sha256:a1b3d25c892207a7626f0634fe768bd71b5f09cbb393be07d82fc44cced109ac \
     --hash=sha256:f1c7b6ff095ed94c422bce52aa0ca8c8aea18922877853bbacaf8b7947e02510
 setuptools==80.9.0 \
-    --hash=sha256:ade450006de562f26341e24f8199338e8aff247cd565ac4f52235478a8dfa815
+    --hash=sha256:3a3c26f9000ab213d87be4efa0f1926fb0975237ae9e8e7cc68c9d7fcba6c294
 six==1.17.0 \
     --hash=sha256:33f558442b372864d53b1813933f5d624876f418521b1b89624ea6e3d79f0e97
 sniffio==1.3.1 \
@@ -264,11 +260,6 @@ torch==2.9.1 \
     --hash=sha256:9bd4844a0cf3f199351830697973a168ad2fd3a99e77b150ca4a1582067dc633 \
     --hash=sha256:ccb4a5b3c15819df80d96d2474b053306a2a4eba0301337c4aa56a58cbe45e10 \
     --hash=sha256:e42ab849b64444059f5eda352d61c9c3a078f30797e48e2972857182c9a00cf8
-tornado==6.5.4 \
-    --hash=sha256:4009891b752e77c47c396ed587fd3abeeeaf53ce0ff180ac5fbd81f3c5522903 \
-    --hash=sha256:9532231bbed1fab64200c9f755616e45f4a9d2b52fbefb91dd3b24f9b9ade291 \
-    --hash=sha256:be68f2c30d855e21a11c7e5a6cdaf6e7d12b8373e703798ad75f79422732b02a \
-    --hash=sha256:d533131fa41fd4b126d33efd3fd1f7be15a3380af7fd107d6cfac4942a47c07c
 tqdm==4.67.3 \
     --hash=sha256:d798b33fcc041b9a42c57f462b9c068a5a15c2dbcef1c87695d80c7074770a4d
 transformers==4.57.6 \
diff --git a/requirements.overrides.txt b/requirements.overrides.txt
index b1f4aaaf2..413782c93 100644
--- a/requirements.overrides.txt
+++ b/requirements.overrides.txt
@@ -1,11 +1,11 @@
 # override these package to the version available on RHOAI wheels index:
-# https://console.redhat.com/api/pypi/public-rhai/rhoai/3.2/cpu-ubi9/simple
+# https://console.redhat.com/api/pypi/public-rhai/rhoai/3.3/cpu-ubi9/simple
 transformers==4.57.6
 tokenizers==0.22.2
 scipy==1.17.0
 aiohttp==3.13.3
 aiosqlite==0.22.1
-cryptography==46.0.4
+cryptography==46.0.5
 anyio==4.12.1
 datasets==4.5.0
 pandas==2.3.3
diff --git a/scripts/gen_doc.py b/scripts/gen_doc.py
index c62e18a44..e23bf6244 100755
--- a/scripts/gen_doc.py
+++ b/scripts/gen_doc.py
@@ -25,13 +25,13 @@ def generate_docfile(directory):
             f"# List of source files stored in `{directory}` directory",
             file=indexfile,
         )
-        print("", file=indexfile)
+        print(file=indexfile)
         files = sorted(os.listdir())
 
         for file in files:
             if file.endswith(".py"):
                 print(f"## [{file}]({file})", file=indexfile)
-                with open(file, "r", encoding="utf-8") as fin:
+                with open(file, encoding="utf-8") as fin:
                     source = fin.read()
                 try:
                     mod = ast.parse(source)
diff --git a/scripts/konflux_requirements.sh b/scripts/konflux_requirements.sh
index 5331bfff2..67cff3e7e 100755
--- a/scripts/konflux_requirements.sh
+++ b/scripts/konflux_requirements.sh
@@ -4,6 +4,8 @@
 # Packages from pypi.org go to requirements.source.txt
 # Packages from console.redhat.com go to requirements.wheel.txt
 
+set -x
+
 RAW_REQ_FILE="requirements.no_hashes.txt"
 SOURCE_FILE="requirements.source.txt"
 WHEEL_FILE="requirements.wheel.txt"
@@ -87,4 +89,4 @@ echo "Done!"
 echo "Packages from pypi.org written to: $SOURCE_HASH_FILE ($(wc -l < "$SOURCE_HASH_FILE") packages)"
 echo "Packages from console.redhat.com written to: $WHEEL_HASH_FILE ($(wc -l < "$WHEEL_HASH_FILE") packages)"
 echo "Build dependencies written to: $BUILD_FILE ($(wc -l < "$BUILD_FILE") packages)"
-echo "Remember to commit $SOURCE_HASH_FILE, $WHEEL_HASH_FILE, $BUILD_FILE, pipeline configurations and push the changes"
\ No newline at end of file
+echo "Remember to commit $SOURCE_HASH_FILE, $WHEEL_HASH_FILE, $BUILD_FILE, pipeline configurations and push the changes"
diff --git a/src/a2a_storage/__init__.py b/src/a2a_storage/__init__.py
index 2707019cb..aa7b1fafc 100644
--- a/src/a2a_storage/__init__.py
+++ b/src/a2a_storage/__init__.py
@@ -16,8 +16,8 @@
 
 __all__ = [
     "A2AContextStore",
+    "A2AStorageFactory",
     "InMemoryA2AContextStore",
-    "SQLiteA2AContextStore",
     "PostgresA2AContextStore",
-    "A2AStorageFactory",
+    "SQLiteA2AContextStore",
 ]
diff --git a/src/app/endpoints/a2a.py b/src/app/endpoints/a2a.py
index a00ece91e..e5fe2abd1 100644
--- a/src/app/endpoints/a2a.py
+++ b/src/app/endpoints/a2a.py
@@ -340,7 +340,7 @@ async def _process_task_streaming(  # pylint: disable=too-many-locals
             stream = await client.responses.create(**responses_params.model_dump())
         except APIConnectionError as e:
             error_message = (
-                f"Unable to connect to Llama Stack backend service: {str(e)}. "
+                f"Unable to connect to Llama Stack backend service: {e!s}. "
                 "The service may be temporarily unavailable. Please try again later."
             )
             logger.error(
diff --git a/src/app/endpoints/conversations_v1.py b/src/app/endpoints/conversations_v1.py
index 1dc14cc95..158d312b6 100644
--- a/src/app/endpoints/conversations_v1.py
+++ b/src/app/endpoints/conversations_v1.py
@@ -16,7 +16,6 @@
 from configuration import configuration
 from models.config import Action
 from models.database.conversations import (
-    UserTurn,
     UserConversation,
 )
 from models.requests import ConversationUpdateRequest
@@ -38,6 +37,7 @@
     check_configuration_loaded,
     delete_conversation,
     retrieve_conversation,
+    retrieve_conversation_turns,
     validate_and_retrieve_conversation,
 )
 from utils.suid import (
@@ -45,7 +45,10 @@
     normalize_conversation_id,
     to_llama_stack_conversation_id,
 )
-from utils.conversations import build_conversation_turns_from_items
+from utils.conversations import (
+    build_conversation_turns_from_items,
+    get_all_conversation_items,
+)
 from log import get_logger
 
 logger = get_logger(__name__)
@@ -236,46 +239,23 @@ async def get_conversation_endpoint_handler(  # pylint: disable=too-many-locals,
             llama_stack_conv_id,
         )
 
-        # Use Conversations API to retrieve conversation items
-        conversation_items_response = await client.conversations.items.list(
-            conversation_id=llama_stack_conv_id,
-            after=None,
-            include=None,
-            limit=None,
-            order="asc",  # oldest first
-        )
+        # Retrieve turns metadata from database (can be empty for legacy conversations)
+        db_turns = retrieve_conversation_turns(normalized_conv_id)
 
-        if not conversation_items_response.data:
+        # Use Conversations API to retrieve conversation items
+        items = await get_all_conversation_items(client, llama_stack_conv_id)
+        if not items:
             logger.error("No items found for conversation %s", conversation_id)
             response = NotFoundResponse(
                 resource="conversation", resource_id=normalized_conv_id
             ).model_dump()
             raise HTTPException(**response)
 
-        items = conversation_items_response.data
-
         logger.info(
             "Successfully retrieved %d items for conversation %s",
             len(items),
             conversation_id,
         )
-        # Retrieve turns metadata from database
-        db_turns: list[UserTurn] = []
-        try:
-            with get_session() as session:
-                db_turns = (
-                    session.query(UserTurn)
-                    .filter_by(conversation_id=normalized_conv_id)
-                    .order_by(UserTurn.turn_number)
-                    .all()
-                )
-        except SQLAlchemyError as e:
-            logger.error(
-                "Database error occurred while retrieving conversation turns for %s.",
-                normalized_conv_id,
-            )
-            response = InternalServerErrorResponse.database_error()
-            raise HTTPException(**response.model_dump()) from e
 
         # Build conversation turns from items and populate turns metadata
         # Use conversation.created_at for legacy conversations without turn metadata
diff --git a/src/app/endpoints/health.py b/src/app/endpoints/health.py
index 7a8058d79..994103ced 100644
--- a/src/app/endpoints/health.py
+++ b/src/app/endpoints/health.py
@@ -75,7 +75,7 @@ async def get_providers_health_statuses() -> list[ProviderHealthStatus]:
         providers = await client.providers.list()
         logger.debug("Found %d providers", len(providers))
 
-        health_results = [
+        return [
             ProviderHealthStatus(
                 provider_id=provider.provider_id,
                 status=str(provider.health.get("status", "unknown")),
@@ -83,7 +83,6 @@ async def get_providers_health_statuses() -> list[ProviderHealthStatus]:
             )
             for provider in providers
         ]
-        return health_results
 
     except APIConnectionError as e:
         logger.error("Failed to check providers health: %s", e)
@@ -91,7 +90,7 @@ async def get_providers_health_statuses() -> list[ProviderHealthStatus]:
             ProviderHealthStatus(
                 provider_id="unknown",
                 status=HealthStatus.ERROR.value,
-                message=f"Failed to initialize health check: {str(e)}",
+                message=f"Failed to initialize health check: {e!s}",
             )
         ]
 
diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py
index a2f519292..348cf954b 100644
--- a/src/app/endpoints/models.py
+++ b/src/app/endpoints/models.py
@@ -49,7 +49,7 @@ def parse_llama_stack_model(model: Any) -> dict[str, Any]:
         if k not in ("provider_id", "provider_resource_id", "model_type")
     }
 
-    legacy_model = {
+    return {
         "identifier": getattr(model, "id", ""),
         "metadata": metadata,
         "api_model_type": model_type,
@@ -58,7 +58,6 @@ def parse_llama_stack_model(model: Any) -> dict[str, Any]:
         "provider_resource_id": str(custom_metadata.get("provider_resource_id", "")),
         "model_type": model_type,
     }
-    return legacy_model
 
 
 models_responses: dict[int | str, dict[str, Any]] = {
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
index fd123bd21..d9635e791 100644
--- a/src/app/endpoints/query.py
+++ b/src/app/endpoints/query.py
@@ -1,5 +1,3 @@
-# pylint: disable=too-many-locals,too-many-branches,too-many-nested-blocks
-
 """Handler for REST API call to provide answer to query using Response API."""
 
 import datetime
@@ -36,6 +34,7 @@
     UnauthorizedResponse,
     UnprocessableEntityResponse,
 )
+from utils.conversations import append_turn_items_to_conversation
 from utils.endpoints import (
     check_configuration_loaded,
     validate_and_retrieve_conversation,
@@ -59,14 +58,11 @@
     get_topic_summary,
     prepare_responses_params,
 )
-from utils.shields import (
-    append_turn_to_conversation,
-    run_shield_moderation,
-    validate_shield_ids_override,
-)
+from utils.shields import run_shield_moderation, validate_shield_ids_override
 from utils.suid import normalize_conversation_id
 from utils.types import (
     ResponsesApiParams,
+    ShieldModerationResult,
     TurnSummary,
 )
 from utils.vector_search import build_rag_context
@@ -158,14 +154,21 @@ async def query_endpoint_handler(
 
     client = AsyncLlamaStackClientHolder().get_client()
 
-    # Build RAG context from Inline RAG sources
-    inline_rag_context = await build_rag_context(
-        client, query_request.query, query_request.vector_store_ids, query_request.solr
-    )
-
     # Moderation input is the raw user content (query + attachments) without injected RAG
     # context, to avoid false positives from retrieved document content.
     moderation_input = prepare_input(query_request)
+    moderation_result = await run_shield_moderation(
+        client, moderation_input, query_request.shield_ids
+    )
+
+    # Build RAG context from Inline RAG sources
+    inline_rag_context = await build_rag_context(
+        client,
+        moderation_result.decision,
+        query_request.query,
+        query_request.vector_store_ids,
+        query_request.solr,
+    )
 
     # Prepare API request parameters
     responses_params = await prepare_responses_params(
@@ -177,7 +180,7 @@ async def query_endpoint_handler(
         stream=False,
         store=True,
         request_headers=request.headers,
-        inline_rag_context=inline_rag_context.context_text or None,
+        inline_rag_context=inline_rag_context.context_text,
     )
 
     # Handle Azure token refresh if needed
@@ -189,32 +192,22 @@ async def query_endpoint_handler(
     ):
         client = await update_azure_token(client)
 
-    # Build index identification mapping for RAG source resolution
-    vector_store_ids = extract_vector_store_ids_from_tools(responses_params.tools)
-    rag_id_mapping = configuration.rag_id_mapping
-
     # Retrieve response using Responses API
-    turn_summary = await retrieve_response(
-        client,
-        responses_params,
-        query_request.shield_ids,
-        vector_store_ids,
-        rag_id_mapping,
-        moderation_input=moderation_input,
-    )
-
-    # Combine inline RAG results (BYOK + Solr) with tool-based RAG results for the transcript
-    rag_chunks = inline_rag_context.rag_chunks
-    tool_rag_chunks = turn_summary.rag_chunks or []
-    logger.info("RAG as a tool retrieved %d chunks", len(tool_rag_chunks))
-    turn_summary.rag_chunks = rag_chunks + tool_rag_chunks
-
-    # Add tool-based RAG documents and chunks
-    rag_documents = inline_rag_context.referenced_documents
-    tool_rag_documents = turn_summary.referenced_documents or []
-    turn_summary.referenced_documents = deduplicate_referenced_documents(
-        rag_documents + tool_rag_documents
-    )
+    turn_summary = await retrieve_response(client, responses_params, moderation_result)
+
+    if moderation_result.decision == "passed":
+        # Combine inline RAG results (BYOK + Solr) with tool-based RAG results for the transcript
+        rag_chunks = inline_rag_context.rag_chunks
+        tool_rag_chunks = turn_summary.rag_chunks
+        logger.info("RAG as a tool retrieved %d chunks", len(tool_rag_chunks))
+        turn_summary.rag_chunks = rag_chunks + tool_rag_chunks
+
+        # Add tool-based RAG documents and chunks
+        rag_documents = inline_rag_context.referenced_documents
+        tool_rag_documents = turn_summary.referenced_documents
+        turn_summary.referenced_documents = deduplicate_referenced_documents(
+            rag_documents + tool_rag_documents
+        )
 
     # Get topic summary for new conversation
     if not user_conversation and query_request.generate_topic_summary:
@@ -269,13 +262,10 @@ async def query_endpoint_handler(
     )
 
 
-async def retrieve_response(  # pylint: disable=too-many-locals
+async def retrieve_response(
     client: AsyncLlamaStackClient,
     responses_params: ResponsesApiParams,
-    shield_ids: Optional[list[str]] = None,
-    vector_store_ids: Optional[list[str]] = None,
-    rag_id_mapping: Optional[dict[str, str]] = None,
-    moderation_input: Optional[str] = None,
+    moderation_result: ShieldModerationResult,
 ) -> TurnSummary:
     """
     Retrieve response from LLMs and agents.
@@ -286,33 +276,23 @@ async def retrieve_response(  # pylint: disable=too-many-locals
     Parameters:
         client: The AsyncLlamaStackClient to use for the request.
         responses_params: The Responses API parameters.
-        shield_ids: Optional list of shield IDs for moderation.
-        vector_store_ids: Vector store IDs used in the query for source resolution.
-        rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
-        moderation_input: Text to moderate. Should be the raw user content (query +
-            attachments) without injected RAG context to avoid false positives.
-            Falls back to responses_params.input if not provided.
+        moderation_result: The moderation result.
 
     Returns:
         TurnSummary: Summary of the LLM response content
     """
     response: Optional[OpenAIResponseObject] = None
-    try:
-        moderation_result = await run_shield_moderation(
+    if moderation_result.decision == "blocked":
+        await append_turn_items_to_conversation(
             client,
-            moderation_input or cast(str, responses_params.input),
-            shield_ids,
+            responses_params.conversation,
+            responses_params.input,
+            [moderation_result.refusal_response],
+        )
+        return TurnSummary(
+            id=moderation_result.moderation_id, llm_response=moderation_result.message
         )
-        if moderation_result.decision == "blocked":
-            # Handle shield moderation blocking
-            violation_message = moderation_result.message
-            await append_turn_to_conversation(
-                client,
-                responses_params.conversation,
-                cast(str, responses_params.input),
-                violation_message,
-            )
-            return TurnSummary(llm_response=violation_message)
+    try:
         response = await client.responses.create(
             **responses_params.model_dump(exclude_none=True)
         )
@@ -333,6 +313,8 @@ async def retrieve_response(  # pylint: disable=too-many-locals
         error_response = handle_known_apistatus_errors(e, responses_params.model)
         raise HTTPException(**error_response.model_dump()) from e
 
+    vector_store_ids = extract_vector_store_ids_from_tools(responses_params.tools)
+    rag_id_mapping = configuration.rag_id_mapping
     return build_turn_summary(
         response, responses_params.model, vector_store_ids, rag_id_mapping
     )
diff --git a/src/app/endpoints/responses.py b/src/app/endpoints/responses.py
new file mode 100644
index 000000000..89a475f9a
--- /dev/null
+++ b/src/app/endpoints/responses.py
@@ -0,0 +1,731 @@
+# pylint: disable=too-many-locals,too-many-branches,too-many-nested-blocks, too-many-arguments,too-many-positional-arguments
+
+"""Handler for REST API call to provide answer using Responses API (LCORE specification)."""
+
+import json
+from datetime import UTC, datetime
+from typing import Annotated, Any, Optional, cast
+from collections.abc import AsyncIterator
+
+from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi.responses import StreamingResponse
+from llama_stack_api import (
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+    OpenAIResponseObjectStreamResponseOutputItemAdded as OutputItemAddedChunk,
+    OpenAIResponseObjectStreamResponseOutputItemDone as OutputItemDoneChunk,
+)
+from llama_stack_client import (
+    APIConnectionError,
+    APIStatusError as LLSApiStatusError,
+    AsyncLlamaStackClient,
+)
+from openai._exceptions import (
+    APIStatusError as OpenAIAPIStatusError,
+)
+
+from authentication import get_auth_dependency
+from authentication.interface import AuthTuple
+from authorization.azure_token_manager import AzureEntraIDManager
+from authorization.middleware import authorize
+from client import AsyncLlamaStackClientHolder
+from configuration import configuration
+from log import get_logger
+from models.config import Action
+from models.requests import ResponsesRequest
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
+    PromptTooLongResponse,
+    QuotaExceededResponse,
+    ResponsesResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+    UnprocessableEntityResponse,
+)
+
+from utils.conversations import append_turn_items_to_conversation
+from utils.endpoints import (
+    check_configuration_loaded,
+    resolve_response_context,
+)
+from utils.mcp_headers import mcp_headers_dependency
+from utils.mcp_oauth_probe import check_mcp_auth
+from utils.query import (
+    consume_query_tokens,
+    extract_provider_and_model_from_model_id,
+    handle_known_apistatus_errors,
+    store_query_results,
+    update_azure_token,
+    validate_model_provider_override,
+)
+from utils.quota import check_tokens_available, get_available_quotas
+from utils.responses import (
+    build_tool_call_summary,
+    build_turn_summary,
+    check_model_configured,
+    deduplicate_referenced_documents,
+    extract_attachments_text,
+    extract_text_from_response_items,
+    extract_token_usage,
+    extract_vector_store_ids_from_tools,
+    get_topic_summary,
+    get_zero_usage,
+    parse_referenced_documents,
+    resolve_tool_choice,
+    select_model_for_responses,
+)
+from utils.shields import run_shield_moderation
+from utils.suid import (
+    normalize_conversation_id,
+)
+from utils.types import (
+    RAGContext,
+    ResponseInput,
+    ResponsesApiParams,
+    ShieldModerationBlocked,
+    ShieldModerationResult,
+    TurnSummary,
+)
+from utils.vector_search import (
+    append_inline_rag_context_to_responses_input,
+    build_rag_context,
+)
+
+logger = get_logger(__name__)
+router = APIRouter(tags=["responses"])
+
+responses_response: dict[int | str, dict[str, Any]] = {
+    200: ResponsesResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(
+        examples=["endpoint", "conversation read", "model override"]
+    ),
+    404: NotFoundResponse.openapi_response(
+        examples=["model", "conversation", "provider"]
+    ),
+    413: PromptTooLongResponse.openapi_response(),
+    422: UnprocessableEntityResponse.openapi_response(),
+    429: QuotaExceededResponse.openapi_response(),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
+}
+
+
+@router.post(
+    "/responses",
+    responses=responses_response,
+    response_model=None,
+    summary="Responses Endpoint Handler",
+)
+@authorize(Action.QUERY)
+async def responses_endpoint_handler(
+    request: Request,
+    responses_request: ResponsesRequest,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+    mcp_headers: dict[str, dict[str, str]] = Depends(mcp_headers_dependency),
+) -> ResponsesResponse | StreamingResponse:
+    """
+    Handle request to the /responses endpoint using Responses API (LCORE specification).
+
+    Processes a POST request to the responses endpoint, forwarding the
+    user's request to a selected Llama Stack LLM and returning the generated response
+    following the LCORE OpenAPI specification.
+
+    Returns:
+        ResponsesResponse: Contains the response following LCORE specification (non-streaming).
+        StreamingResponse: SSE-formatted streaming response with enriched events (streaming).
+            - response.created event includes conversation attribute
+            - response.completed event includes available_quotas attribute
+
+    Raises:
+        HTTPException:
+            - 401: Unauthorized - Missing or invalid credentials
+            - 403: Forbidden - Insufficient permissions or model override not allowed
+            - 404: Not Found - Conversation, model, or provider not found
+            - 413: Prompt too long - Prompt exceeded model's context window size
+            - 422: Unprocessable Entity - Request validation failed
+            - 429: Quota limit exceeded - The token quota for model or user has been exceeded
+            - 500: Internal Server Error - Configuration not loaded or other server errors
+            - 503: Service Unavailable - Unable to connect to Llama Stack backend
+    """
+    responses_request = responses_request.model_copy(deep=True)
+    check_configuration_loaded(configuration)
+    started_at = datetime.now(UTC)
+    user_id = auth[0]
+
+    await check_mcp_auth(configuration, mcp_headers)
+
+    # Check token availability
+    check_tokens_available(configuration.quota_limiters, user_id)
+
+    # Enforce RBAC: optionally disallow overriding model in requests
+    validate_model_provider_override(
+        responses_request.model,
+        None,  # provider specified as model prefix
+        request.state.authorized_actions,
+    )
+
+    response_context = await resolve_response_context(
+        user_id=user_id,
+        others_allowed=(
+            Action.READ_OTHERS_CONVERSATIONS in request.state.authorized_actions
+        ),
+        conversation_id=responses_request.conversation,
+        previous_response_id=responses_request.previous_response_id,
+        generate_topic_summary=responses_request.generate_topic_summary,
+    )
+    responses_request.conversation = response_context.conversation
+    responses_request.generate_topic_summary = response_context.generate_topic_summary
+    client = AsyncLlamaStackClientHolder().get_client()
+
+    # LCORE-specific: Automatically select model if not provided in request
+    # This extends the base LLS API which requires model to be specified.
+    if not responses_request.model:
+        responses_request.model = await select_model_for_responses(
+            client, response_context.user_conversation
+        )
+    if not await check_model_configured(client, responses_request.model):
+        _, model_id = extract_provider_and_model_from_model_id(responses_request.model)
+        error_response = NotFoundResponse(resource="model", resource_id=model_id)
+        raise HTTPException(**error_response.model_dump())
+
+    # Handle Azure token refresh if needed
+    if (
+        responses_request.model.startswith("azure")
+        and AzureEntraIDManager().is_entra_id_configured
+        and AzureEntraIDManager().is_token_expired
+        and AzureEntraIDManager().refresh_token()
+    ):
+        client = await update_azure_token(client)
+
+    input_text = (
+        responses_request.input
+        if isinstance(responses_request.input, str)
+        else extract_text_from_response_items(responses_request.input)
+    )
+    attachments_text = extract_attachments_text(responses_request.input)
+
+    moderation_result = await run_shield_moderation(
+        client,
+        input_text + "\n\n" + attachments_text,
+        responses_request.shield_ids,
+    )
+
+    (
+        responses_request.tools,
+        responses_request.tool_choice,
+        vector_store_ids,
+    ) = await resolve_tool_choice(
+        responses_request.tools,
+        responses_request.tool_choice,
+        auth[1],
+        mcp_headers,
+        request.headers,
+    )
+
+    # Build RAG context from Inline RAG sources
+    inline_rag_context = await build_rag_context(
+        client,
+        moderation_result.decision,
+        input_text,
+        vector_store_ids,
+        responses_request.solr,
+    )
+    if moderation_result.decision == "passed":
+        responses_request.input = append_inline_rag_context_to_responses_input(
+            responses_request.input, inline_rag_context.context_text
+        )
+
+    response_handler = (
+        handle_streaming_response
+        if responses_request.stream
+        else handle_non_streaming_response
+    )
+    return await response_handler(
+        client=client,
+        request=responses_request,
+        auth=auth,
+        input_text=input_text,
+        started_at=started_at,
+        moderation_result=moderation_result,
+        inline_rag_context=inline_rag_context,
+    )
+
+
+async def handle_streaming_response(
+    client: AsyncLlamaStackClient,
+    request: ResponsesRequest,
+    auth: AuthTuple,
+    input_text: str,
+    started_at: datetime,
+    moderation_result: ShieldModerationResult,
+    inline_rag_context: RAGContext,
+) -> StreamingResponse:
+    """Handle streaming response from Responses API.
+
+    Args:
+        client: The AsyncLlamaStackClient instance
+        request: ResponsesRequest (LCORE-specific fields e.g. generate_topic_summary)
+        auth: Authentication tuple
+        input_text: The extracted input text
+        started_at: Timestamp when the conversation started
+        moderation_result: Result of shield moderation check
+        inline_rag_context: Inline RAG context to be used for the response
+    Returns:
+        StreamingResponse with SSE-formatted events
+    """
+    api_params = ResponsesApiParams.model_validate(request.model_dump())
+    turn_summary = TurnSummary()
+    # Handle blocked response
+    if moderation_result.decision == "blocked":
+        turn_summary.id = moderation_result.moderation_id
+        turn_summary.llm_response = moderation_result.message
+        available_quotas = get_available_quotas(
+            quota_limiters=configuration.quota_limiters, user_id=auth[0]
+        )
+        generator = shield_violation_generator(
+            moderation_result,
+            api_params.conversation,
+            request.echoed_params(),
+            started_at,
+            available_quotas,
+        )
+        if api_params.store:
+            await append_turn_items_to_conversation(
+                client=client,
+                conversation_id=api_params.conversation,
+                user_input=request.input,
+                llm_output=[moderation_result.refusal_response],
+            )
+    else:
+        try:
+            response = await client.responses.create(
+                **api_params.model_dump(exclude_none=True)
+            )
+            generator = response_generator(
+                stream=cast(AsyncIterator[OpenAIResponseObjectStream], response),
+                user_input=request.input,
+                api_params=api_params,
+                user_id=auth[0],
+                turn_summary=turn_summary,
+                inline_rag_context=inline_rag_context,
+            )
+        except RuntimeError as e:  # library mode wraps 413 into runtime error
+            if "context_length" in str(e).lower():
+                error_response = PromptTooLongResponse(model=api_params.model)
+                raise HTTPException(**error_response.model_dump()) from e
+            raise e
+        except APIConnectionError as e:
+            error_response = ServiceUnavailableResponse(
+                backend_name="Llama Stack",
+                cause=str(e),
+            )
+            raise HTTPException(**error_response.model_dump()) from e
+        except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+            error_response = handle_known_apistatus_errors(e, api_params.model)
+            raise HTTPException(**error_response.model_dump()) from e
+
+    return StreamingResponse(
+        generate_response(
+            generator=generator,
+            turn_summary=turn_summary,
+            client=client,
+            auth=auth,
+            input_text=input_text,
+            started_at=started_at,
+            api_params=api_params,
+            generate_topic_summary=request.generate_topic_summary or False,
+        ),
+        media_type="text/event-stream",
+    )
+
+
+async def shield_violation_generator(
+    moderation_result: ShieldModerationBlocked,
+    conversation_id: str,
+    echoed_params: dict[str, Any],
+    created_at: datetime,
+    available_quotas: dict[str, int],
+) -> AsyncIterator[str]:
+    """Generate SSE-formatted streaming response for shield-blocked requests.
+
+    Follows the Open Responses spec:
+    - Content-Type: text/event-stream
+    - Each event has 'event:' field matching the type in the event body
+    - Data objects are JSON-encoded strings
+    - Terminal event is the literal string [DONE]
+    - Emits full event sequence: response.created (in_progress), output_item.added,
+      output_item.done, response.completed (completed)
+    - Performs topic summary and persistence after [DONE] is emitted
+
+    Args:
+        moderation_result: The moderation result
+        conversation_id: The conversation ID to include in the response
+        echoed_params: Echoed parameters from the request
+        created_at: Unix timestamp when the response was created
+        available_quotas: Available quotas dictionary for the user
+    Yields:
+        SSE-formatted strings for streaming events, ending with [DONE]
+    """
+    normalized_conv_id = normalize_conversation_id(conversation_id)
+
+    # 1. Send response.created event with status "in_progress" and empty output
+    created_response_object = ResponsesResponse.model_construct(
+        id=moderation_result.moderation_id,
+        created_at=int(created_at.timestamp()),
+        status="in_progress",
+        output=[],
+        conversation=normalized_conv_id,
+        available_quotas={},
+        output_text="",
+        **echoed_params,
+    )
+    created_response_dict = created_response_object.model_dump(exclude_none=True)
+    created_event = {
+        "type": "response.created",
+        "sequence_number": 0,
+        "response": created_response_dict,
+    }
+    data_json = json.dumps(created_event)
+    yield f"event: response.created\ndata: {data_json}\n\n"
+
+    # 2. Send response.output_item.added event
+    item_added_event = OutputItemAddedChunk(
+        response_id=moderation_result.moderation_id,
+        item=moderation_result.refusal_response,
+        output_index=0,
+        sequence_number=1,
+    )
+    data_json = json.dumps(item_added_event.model_dump(exclude_none=True))
+    yield f"event: response.output_item.added\ndata: {data_json}\n\n"
+
+    # 3. Send response.output_item.done event
+    item_done_event = OutputItemDoneChunk(
+        response_id=moderation_result.moderation_id,
+        item=moderation_result.refusal_response,
+        output_index=0,
+        sequence_number=2,
+    )
+    data_json = json.dumps(item_done_event.model_dump(exclude_none=True))
+    yield f"event: response.output_item.done\ndata: {data_json}\n\n"
+
+    # 4. Send response.completed event with status "completed" and output populated
+    completed_response_object = ResponsesResponse.model_construct(
+        id=moderation_result.moderation_id,
+        created_at=int(created_at.timestamp()),
+        completed_at=int(datetime.now(UTC).timestamp()),
+        status="completed",
+        output=[moderation_result.refusal_response],
+        usage=get_zero_usage(),
+        conversation=normalized_conv_id,
+        available_quotas=available_quotas,
+        output_text=moderation_result.message,
+        **echoed_params,
+    )
+    completed_response_dict = completed_response_object.model_dump(exclude_none=True)
+    completed_event = {
+        "type": "response.completed",
+        "sequence_number": 3,
+        "response": completed_response_dict,
+    }
+    data_json = json.dumps(completed_event)
+    yield f"event: response.completed\ndata: {data_json}\n\n"
+
+    yield "data: [DONE]\n\n"
+
+
+async def response_generator(
+    stream: AsyncIterator[OpenAIResponseObjectStream],
+    user_input: ResponseInput,
+    api_params: ResponsesApiParams,
+    user_id: str,
+    turn_summary: TurnSummary,
+    inline_rag_context: RAGContext,
+) -> AsyncIterator[str]:
+    """Generate SSE-formatted streaming response with LCORE-enriched events.
+
+    Args:
+        stream: The streaming response from Llama Stack
+        user_input: User input to the response
+        api_params: ResponsesApiParams
+        user_id: User ID for quota retrieval
+        turn_summary: TurnSummary to populate during streaming
+        inline_rag_context: Inline RAG context to be used for the response
+    Yields:
+        SSE-formatted strings for streaming events, ending with [DONE]
+    """
+    normalized_conv_id = normalize_conversation_id(api_params.conversation)
+
+    logger.debug("Starting streaming response (Responses API) processing")
+
+    latest_response_object: Optional[OpenAIResponseObject] = None
+    sequence_number = 0
+
+    async for chunk in stream:
+        event_type = getattr(chunk, "type", None)
+        logger.debug("Processing streaming chunk, type: %s", event_type)
+
+        chunk_dict = chunk.model_dump(exclude_none=True)
+
+        # Create own sequence number for chunks to maintain order
+        chunk_dict["sequence_number"] = sequence_number
+        sequence_number += 1
+
+        # Add conversation attribute to the response if chunk has it
+        if "response" in chunk_dict:
+            chunk_dict["response"]["conversation"] = normalized_conv_id
+
+        # Intermediate response - no quota consumption and text yet
+        if event_type == "response.in_progress":
+            chunk_dict["response"]["available_quotas"] = {}
+            chunk_dict["response"]["output_text"] = ""
+
+        # Handle completion, incomplete, and failed events - only quota handling here
+        if event_type in (
+            "response.completed",
+            "response.incomplete",
+            "response.failed",
+        ):
+            latest_response_object = cast(
+                OpenAIResponseObject, cast(Any, chunk).response
+            )
+
+            # Extract and consume tokens if any were used
+            turn_summary.token_usage = extract_token_usage(
+                latest_response_object.usage, api_params.model
+            )
+            consume_query_tokens(
+                user_id=user_id,
+                model_id=api_params.model,
+                token_usage=turn_summary.token_usage,
+            )
+
+            # Get available quotas after token consumption
+            available_quotas = get_available_quotas(
+                quota_limiters=configuration.quota_limiters, user_id=user_id
+            )
+            chunk_dict["response"]["available_quotas"] = available_quotas
+            turn_summary.llm_response = extract_text_from_response_items(
+                latest_response_object.output
+            )
+            chunk_dict["response"]["output_text"] = turn_summary.llm_response
+
+        data_json = json.dumps(chunk_dict)
+        yield f"event: {event_type or 'error'}\ndata: {data_json}\n\n"
+
+    # Extract response metadata from final response object
+    if latest_response_object:
+        turn_summary.id = latest_response_object.id
+        vector_store_ids = extract_vector_store_ids_from_tools(api_params.tools)
+        tool_rag_docs = parse_referenced_documents(
+            latest_response_object, vector_store_ids, configuration.rag_id_mapping
+        )
+        turn_summary.referenced_documents = deduplicate_referenced_documents(
+            inline_rag_context.referenced_documents + tool_rag_docs
+        )
+        for item in latest_response_object.output:
+            tool_call, tool_result = build_tool_call_summary(
+                item,
+                turn_summary.rag_chunks,
+                vector_store_ids,
+                configuration.rag_id_mapping,
+            )
+            if tool_call:
+                turn_summary.tool_calls.append(tool_call)
+            if tool_result:
+                turn_summary.tool_results.append(tool_result)
+
+        turn_summary.rag_chunks.extend(inline_rag_context.rag_chunks)
+
+    client = AsyncLlamaStackClientHolder().get_client()
+    # Explicitly append the turn to conversation if context passed by previous response
+    if api_params.store and api_params.previous_response_id and latest_response_object:
+        await append_turn_items_to_conversation(
+            client, api_params.conversation, user_input, latest_response_object.output
+        )
+
+    yield "data: [DONE]\n\n"
+
+
+async def generate_response(
+    generator: AsyncIterator[str],
+    turn_summary: TurnSummary,
+    client: AsyncLlamaStackClient,
+    auth: AuthTuple,
+    input_text: str,
+    started_at: datetime,
+    api_params: ResponsesApiParams,
+    generate_topic_summary: bool,
+) -> AsyncIterator[str]:
+    """Stream the response from the generator and persist conversation details.
+
+    After streaming completes, conversation details are persisted.
+
+    Args:
+        generator: The SSE event generator
+        turn_summary: TurnSummary populated during streaming
+        client: The AsyncLlamaStackClient instance
+        auth: Authentication tuple
+        input_text: The extracted input text
+        started_at: Timestamp when the conversation started
+        api_params: ResponsesApiParams
+        generate_topic_summary: Whether to generate topic summary for new conversations
+    Yields:
+        SSE-formatted strings from the generator
+    """
+    user_id, _, skip_userid_check, _ = auth
+    async for event in generator:
+        yield event
+
+    # Get topic summary for new conversation
+    topic_summary = None
+    if generate_topic_summary:
+        logger.debug("Generating topic summary for new conversation")
+        topic_summary = await get_topic_summary(input_text, client, api_params.model)
+
+    completed_at = datetime.now(UTC)
+    if api_params.store:
+        store_query_results(
+            user_id=user_id,
+            conversation_id=normalize_conversation_id(api_params.conversation),
+            model=api_params.model,
+            started_at=started_at.strftime("%Y-%m-%dT%H:%M:%SZ"),
+            completed_at=completed_at.strftime("%Y-%m-%dT%H:%M:%SZ"),
+            summary=turn_summary,
+            query=input_text,
+            attachments=[],
+            skip_userid_check=skip_userid_check,
+            topic_summary=topic_summary,
+        )
+
+
+async def handle_non_streaming_response(
+    client: AsyncLlamaStackClient,
+    request: ResponsesRequest,
+    auth: AuthTuple,
+    input_text: str,
+    started_at: datetime,
+    moderation_result: ShieldModerationResult,
+    inline_rag_context: RAGContext,
+) -> ResponsesResponse:
+    """Handle non-streaming response from Responses API.
+
+    Args:
+        client: The AsyncLlamaStackClient instance
+        request: Request object
+        auth: Authentication tuple
+        input_text: The extracted input text
+        started_at: Timestamp when the conversation started
+        moderation_result: Result of shield moderation check
+        inline_rag_context: Inline RAG context to be used for the response
+    Returns:
+        ResponsesResponse with the completed response
+    """
+    user_id, _, skip_userid_check, _ = auth
+    api_params = ResponsesApiParams.model_validate(request.model_dump())
+
+    # Fork: Get response object (blocked vs normal)
+    if moderation_result.decision == "blocked":
+        output_text = moderation_result.message
+        api_response = OpenAIResponseObject.model_construct(
+            id=moderation_result.moderation_id,
+            created_at=int(started_at.timestamp()),
+            status="completed",
+            output=[moderation_result.refusal_response],
+            usage=get_zero_usage(),
+            **request.echoed_params(),
+        )
+        if api_params.store:
+            await append_turn_items_to_conversation(
+                client=client,
+                conversation_id=api_params.conversation,
+                user_input=request.input,
+                llm_output=[moderation_result.refusal_response],
+            )
+    else:
+        try:
+            api_response = cast(
+                OpenAIResponseObject,
+                await client.responses.create(
+                    **api_params.model_dump(exclude_none=True)
+                ),
+            )
+            token_usage = extract_token_usage(api_response.usage, api_params.model)
+            logger.info("Consuming tokens")
+            consume_query_tokens(
+                user_id=user_id,
+                model_id=api_params.model,
+                token_usage=token_usage,
+            )
+            output_text = extract_text_from_response_items(api_response.output)
+            # Explicitly append the turn to conversation if context passed by previous response
+            if api_params.store and api_params.previous_response_id:
+                await append_turn_items_to_conversation(
+                    client, api_params.conversation, request.input, api_response.output
+                )
+
+        except RuntimeError as e:
+            if "context_length" in str(e).lower():
+                error_response = PromptTooLongResponse(model=api_params.model)
+                raise HTTPException(**error_response.model_dump()) from e
+            raise e
+        except APIConnectionError as e:
+            error_response = ServiceUnavailableResponse(
+                backend_name="Llama Stack",
+                cause=str(e),
+            )
+            raise HTTPException(**error_response.model_dump()) from e
+        except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+            error_response = handle_known_apistatus_errors(e, api_params.model)
+            raise HTTPException(**error_response.model_dump()) from e
+
+    # Get available quotas
+    logger.info("Getting available quotas")
+    available_quotas = get_available_quotas(
+        quota_limiters=configuration.quota_limiters, user_id=user_id
+    )
+    # Get topic summary for new conversation
+    topic_summary = None
+    if request.generate_topic_summary:
+        logger.debug("Generating topic summary for new conversation")
+        topic_summary = await get_topic_summary(input_text, client, api_params.model)
+
+    vector_store_ids = extract_vector_store_ids_from_tools(api_params.tools)
+    turn_summary = build_turn_summary(
+        api_response,
+        api_params.model,
+        vector_store_ids,
+        configuration.rag_id_mapping,
+    )
+    turn_summary.referenced_documents = deduplicate_referenced_documents(
+        inline_rag_context.referenced_documents + turn_summary.referenced_documents
+    )
+    turn_summary.rag_chunks.extend(inline_rag_context.rag_chunks)
+    completed_at = datetime.now(UTC)
+    if api_params.store:
+        store_query_results(
+            user_id=user_id,
+            conversation_id=normalize_conversation_id(api_params.conversation),
+            model=api_params.model,
+            started_at=started_at.strftime("%Y-%m-%dT%H:%M:%SZ"),
+            completed_at=completed_at.strftime("%Y-%m-%dT%H:%M:%SZ"),
+            summary=turn_summary,
+            query=input_text,
+            attachments=[],
+            skip_userid_check=skip_userid_check,
+            topic_summary=topic_summary,
+        )
+    response = ResponsesResponse.model_validate(
+        {
+            **api_response.model_dump(exclude_none=True),
+            "available_quotas": available_quotas,
+            "conversation": normalize_conversation_id(api_params.conversation),
+            "completed_at": int(completed_at.timestamp()),
+            "output_text": output_text,
+        }
+    )
+    return response
diff --git a/src/app/endpoints/rlsapi_v1.py b/src/app/endpoints/rlsapi_v1.py
index 886d04c12..4f34f3761 100644
--- a/src/app/endpoints/rlsapi_v1.py
+++ b/src/app/endpoints/rlsapi_v1.py
@@ -6,7 +6,7 @@
 
 import functools
 import time
-from datetime import datetime
+from datetime import datetime, UTC
 from typing import Annotated, Any, Optional, cast
 
 import jinja2
@@ -123,7 +123,7 @@ def _build_instructions(systeminfo: RlsapiV1SystemInfo) -> str:
     Returns:
         The rendered instructions string for the LLM.
     """
-    date_today = datetime.now().strftime("%B %d, %Y")
+    date_today = datetime.now(tz=UTC).strftime("%B %d, %Y")
 
     return _get_prompt_template().render(
         date=date_today,
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
index d77f26807..250a29b89 100644
--- a/src/app/endpoints/streaming_query.py
+++ b/src/app/endpoints/streaming_query.py
@@ -8,7 +8,7 @@
 
 from fastapi import APIRouter, Depends, HTTPException, Request
 from fastapi.responses import StreamingResponse
-from llama_stack_api.openai_responses import (
+from llama_stack_api import (
     OpenAIResponseObject,
     OpenAIResponseObjectStream,
     OpenAIResponseObjectStreamResponseMcpCallArgumentsDone as MCPArgsDoneChunk,
@@ -57,6 +57,7 @@
     UnauthorizedResponse,
     UnprocessableEntityResponse,
 )
+from utils.conversations import append_turn_items_to_conversation
 from utils.endpoints import (
     check_configuration_loaded,
     validate_and_retrieve_conversation,
@@ -189,10 +190,22 @@ async def streaming_query_endpoint_handler(  # pylint: disable=too-many-locals
 
     client = AsyncLlamaStackClientHolder().get_client()
 
+    # Moderation input is the raw user content (query + attachments) without injected RAG
+    # context, to avoid false positives from retrieved document content.
+    moderation_input = prepare_input(query_request)
+    moderation_result = await run_shield_moderation(
+        client, moderation_input, query_request.shield_ids
+    )
+
     # Build RAG context from Inline RAG sources
     inline_rag_context = await build_rag_context(
-        client, query_request.query, query_request.vector_store_ids, query_request.solr
+        client,
+        moderation_result.decision,
+        query_request.query,
+        query_request.vector_store_ids,
+        query_request.solr,
     )
+
     # Prepare API request parameters
     responses_params = await prepare_responses_params(
         client=client,
@@ -203,7 +216,7 @@ async def streaming_query_endpoint_handler(  # pylint: disable=too-many-locals
         stream=True,
         store=True,
         request_headers=request.headers,
-        inline_rag_context=inline_rag_context.context_text or None,
+        inline_rag_context=inline_rag_context.context_text,
     )
 
     # Handle Azure token refresh if needed
@@ -227,8 +240,10 @@ async def streaming_query_endpoint_handler(  # pylint: disable=too-many-locals
         query_request=query_request,
         started_at=started_at,
         client=client,
+        moderation_result=moderation_result,
         vector_store_ids=extract_vector_store_ids_from_tools(responses_params.tools),
         rag_id_mapping=configuration.rag_id_mapping,
+        inline_rag_context=inline_rag_context,
     )
 
     # Update metrics for the LLM call
@@ -240,9 +255,14 @@ async def streaming_query_endpoint_handler(  # pylint: disable=too-many-locals
     generator, turn_summary = await retrieve_response_generator(
         responses_params=responses_params,
         context=context,
-        inline_rag_documents=inline_rag_context.referenced_documents,
     )
 
+    # Combine inline RAG results (BYOK + Solr) with tool-based results
+    if context.moderation_result.decision == "passed":
+        turn_summary.referenced_documents = deduplicate_referenced_documents(
+            inline_rag_context.referenced_documents + turn_summary.referenced_documents
+        )
+
     response_media_type = (
         MEDIA_TYPE_TEXT
         if query_request.media_type == MEDIA_TYPE_TEXT
@@ -263,7 +283,6 @@ async def streaming_query_endpoint_handler(  # pylint: disable=too-many-locals
 async def retrieve_response_generator(
     responses_params: ResponsesApiParams,
     context: ResponseGeneratorContext,
-    inline_rag_documents: list[ReferencedDocument],
 ) -> tuple[AsyncIterator[str], TurnSummary]:
     """
     Retrieve the appropriate response generator.
@@ -275,30 +294,27 @@ async def retrieve_response_generator(
     Args:
         responses_params: The Responses API parameters
         context: The response generator context
-        inline_rag_documents: Referenced documents from inline  RAG (BYOK + Solr)
-
     Returns:
         tuple[AsyncIterator[str], TurnSummary]: The response generator and turn summary
 
     """
     turn_summary = TurnSummary()
     try:
-        moderation_result = await run_shield_moderation(
-            context.client,
-            prepare_input(context.query_request),
-            context.query_request.shield_ids,
-        )
-        if moderation_result.decision == "blocked":
-            turn_summary.llm_response = moderation_result.message
-            await append_turn_to_conversation(
+        if context.moderation_result.decision == "blocked":
+            turn_summary.llm_response = context.moderation_result.message
+            turn_summary.id = context.moderation_result.moderation_id
+            await append_turn_items_to_conversation(
                 context.client,
                 responses_params.conversation,
-                cast(str, responses_params.input),
-                moderation_result.message,
+                responses_params.input,
+                [context.moderation_result.refusal_response],
             )
             media_type = context.query_request.media_type or MEDIA_TYPE_JSON
             return (
-                shield_violation_generator(moderation_result.message, media_type),
+                shield_violation_generator(
+                    context.moderation_result.message,
+                    media_type,
+                ),
                 turn_summary,
             )
         # Retrieve response stream (may raise exceptions)
@@ -306,9 +322,14 @@ async def retrieve_response_generator(
             **responses_params.model_dump(exclude_none=True)
         )
         # Store pre-RAG documents for later merging with tool-based RAG
-        turn_summary.inline_rag_documents = inline_rag_documents
-        return response_generator(response, context, turn_summary), turn_summary
-
+        return (
+            response_generator(
+                response,
+                context,
+                turn_summary,
+            ),
+            turn_summary,
+        )
     # Handle know LLS client errors only at stream creation time and shield execution
     except RuntimeError as e:  # library mode wraps 413 into runtime error
         if "context_length" in str(e).lower():
@@ -698,7 +719,7 @@ async def response_generator(  # pylint: disable=too-many-branches,too-many-stat
         # Completed response - capture final text and response object
         elif event_type == "response.completed":
             latest_response_object = cast(
-                OpenAIResponseObject, getattr(chunk, "response")
+                OpenAIResponseObject, getattr(chunk, "response")  # noqa: B009
             )
             turn_summary.llm_response = turn_summary.llm_response or "".join(text_parts)
             yield stream_event(
@@ -714,7 +735,7 @@ async def response_generator(  # pylint: disable=too-many-branches,too-many-stat
         # Incomplete or failed response - emit error
         elif event_type in ("response.incomplete", "response.failed"):
             latest_response_object = cast(
-                OpenAIResponseObject, getattr(chunk, "response")
+                OpenAIResponseObject, getattr(chunk, "response")  # noqa: B009
             )
             error_message = (
                 latest_response_object.error.message
@@ -741,15 +762,19 @@ async def response_generator(  # pylint: disable=too-many-branches,too-many-stat
     turn_summary.token_usage = extract_token_usage(
         latest_response_object.usage, context.model_id
     )
-    tool_based_documents = parse_referenced_documents(
+    # Parse tool-based referenced documents from the final response object
+    tool_rag_docs = parse_referenced_documents(
         latest_response_object,
         vector_store_ids=context.vector_store_ids,
         rag_id_mapping=context.rag_id_mapping,
     )
-
-    # Merge pre-RAG documents with tool-based documents and deduplicate
+    # Combine inline RAG results (BYOK + Solr) with tool-based results
     turn_summary.referenced_documents = deduplicate_referenced_documents(
-        turn_summary.inline_rag_documents + tool_based_documents
+        context.inline_rag_context.referenced_documents + tool_rag_docs
+    )
+    # Combine inline RAG chunks (BYOK + Solr) with tool-based chunks
+    turn_summary.rag_chunks = (
+        context.inline_rag_context.rag_chunks + turn_summary.rag_chunks
     )
 
 
diff --git a/src/app/routers.py b/src/app/routers.py
index 78663e18f..97e3522f6 100644
--- a/src/app/routers.py
+++ b/src/app/routers.py
@@ -26,6 +26,7 @@
     rlsapi_v1,
     # A2A (Agent-to-Agent) protocol support
     a2a,
+    responses,
 )
 
 
@@ -58,7 +59,7 @@ def include_routers(app: FastAPI) -> None:
     app.include_router(feedback.router, prefix="/v1")
     app.include_router(conversations_v1.router, prefix="/v1")
     app.include_router(conversations_v2.router, prefix="/v2")
-
+    app.include_router(responses.router, prefix="/v1")
     # RHEL Lightspeed rlsapi v1 compatibility - stateless CLA (Command Line Assistant) endpoint
     app.include_router(rlsapi_v1.router, prefix="/v1")
 
diff --git a/src/authorization/resolvers.py b/src/authorization/resolvers.py
index 359064d4b..3ce553c0c 100644
--- a/src/authorization/resolvers.py
+++ b/src/authorization/resolvers.py
@@ -148,8 +148,7 @@ def _get_claims(auth: AuthTuple) -> dict[str, Any]:
             # No claims for guests
             return {}
 
-        jwt_claims = unsafe_get_claims(token)
-        return jwt_claims
+        return unsafe_get_claims(token)
 
     @staticmethod
     def _evaluate_operator(
diff --git a/src/configuration.py b/src/configuration.py
index c9ea8e4af..41cd3deb1 100644
--- a/src/configuration.py
+++ b/src/configuration.py
@@ -2,39 +2,38 @@
 
 from typing import Any, Optional
 
+import yaml
+
 # We want to support environment variable replacement in the configuration
 # similarly to how it is done in llama-stack, so we use their function directly
 from llama_stack.core.stack import replace_env_vars
 
-import yaml
 import constants
+from cache.cache import Cache
+from cache.cache_factory import CacheFactory
+from log import get_logger
 from models.config import (
     A2AStateConfiguration,
+    AuthenticationConfiguration,
     AuthorizationConfiguration,
     AzureEntraIdConfiguration,
     Configuration,
+    ConversationHistoryConfiguration,
     Customization,
+    DatabaseConfiguration,
+    InferenceConfiguration,
     LlamaStackConfiguration,
+    ModelContextProtocolServer,
     OkpConfiguration,
+    QuotaHandlersConfiguration,
     RagConfiguration,
-    UserDataCollection,
     ServiceConfiguration,
-    ModelContextProtocolServer,
-    AuthenticationConfiguration,
-    InferenceConfiguration,
-    DatabaseConfiguration,
-    ConversationHistoryConfiguration,
-    QuotaHandlersConfiguration,
     SplunkConfiguration,
+    UserDataCollection,
 )
-
-from cache.cache import Cache
-from cache.cache_factory import CacheFactory
-
 from quota.quota_limiter import QuotaLimiter
-from quota.token_usage_history import TokenUsageHistory
 from quota.quota_limiter_factory import QuotaLimiterFactory
-from log import get_logger
+from quota.token_usage_history import TokenUsageHistory
 
 logger = get_logger(__name__)
 
@@ -382,18 +381,28 @@ def okp(self) -> "OkpConfiguration":
 
     @property
     def rag_id_mapping(self) -> dict[str, str]:
-        """Return mapping from vector_db_id to rag_id from BYOK RAG config.
+        """Return mapping from vector_db_id to rag_id from BYOK and OKP RAG config.
 
         Returns:
-            dict[str, str]: Mapping where keys are llama-stack vector_db_ids
-            and values are user-facing rag_ids from configuration.
+            dict[str, str]: Mapping where keys are llama-stack vector_store_ids
+            (old vector_db_id) and values are user-facing rag_ids from configuration.
 
         Raises:
             LogicError: If the configuration has not been loaded.
         """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
-        return {brag.vector_db_id: brag.rag_id for brag in self._configuration.byok_rag}
+        byok_mapping = {
+            brag.vector_db_id: brag.rag_id for brag in self._configuration.byok_rag
+        }
+
+        rag = self._configuration.rag
+        okp_id = constants.OKP_RAG_ID
+        okp_enabled = okp_id in (rag.inline or []) or okp_id in (rag.tool or [])
+        okp_mapping = (
+            {constants.SOLR_DEFAULT_VECTOR_STORE_ID: okp_id} if okp_enabled else {}
+        )
+        return {**byok_mapping, **okp_mapping}
 
     @property
     def score_multiplier_mapping(self) -> dict[str, float]:
diff --git a/src/constants.py b/src/constants.py
index 0c5437fb2..20145a812 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -2,7 +2,7 @@
 
 # Minimal and maximal supported Llama Stack version
 MINIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.17"
-MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.4.3"
+MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.5.2"
 
 UNABLE_TO_PROCESS_RESPONSE = "Unable to process this request"
 
@@ -214,3 +214,5 @@
 # Environment variable to force StreamHandler instead of RichHandler
 # Set to any non-empty value to disable RichHandler
 LIGHTSPEED_STACK_DISABLE_RICH_HANDLER_ENV_VAR = "LIGHTSPEED_STACK_DISABLE_RICH_HANDLER"
+
+DEFAULT_VIOLATION_MESSAGE = "I cannot process this request due to policy restrictions."
diff --git a/src/llama_stack_configuration.py b/src/llama_stack_configuration.py
index 39e026630..a0530b326 100644
--- a/src/llama_stack_configuration.py
+++ b/src/llama_stack_configuration.py
@@ -251,8 +251,7 @@ def construct_models_section(
 
         # Strip sentence-transformers/ prefix if present
         provider_model_id = embedding_model
-        if provider_model_id.startswith("sentence-transformers/"):
-            provider_model_id = provider_model_id[len("sentence-transformers/") :]
+        provider_model_id = provider_model_id.removeprefix("sentence-transformers/")
 
         # Skip if embedding model already registered
         existing_model_ids = [m.get("provider_model_id") for m in output]
@@ -443,6 +442,7 @@ def enrich_solr(ls_config: dict[str, Any], solr_config: dict[str, Any]) -> None:
                         "parent_total_chunks_field": "total_chunks",
                         "parent_total_tokens_field": "total_tokens",
                         "chunk_filter_query": chunk_filter_query,
+                        "chunk_family_fields": ["headings"],
                     },
                     "persistence": {
                         "namespace": constants.SOLR_DEFAULT_VECTOR_STORE_ID,
@@ -489,8 +489,7 @@ def enrich_solr(ls_config: dict[str, Any], solr_config: dict[str, Any]) -> None:
 
     # Strip sentence-transformers/ prefix from constant for provider_model_id
     provider_model_id = constants.SOLR_DEFAULT_EMBEDDING_MODEL
-    if provider_model_id.startswith("sentence-transformers/"):
-        provider_model_id = provider_model_id[len("sentence-transformers/") :]
+    provider_model_id = provider_model_id.removeprefix("sentence-transformers/")
 
     # Check if already registered
     registered_models = ls_config["registered_resources"]["models"]
diff --git a/src/models/context.py b/src/models/context.py
index 2ef76f36d..9876a1485 100644
--- a/src/models/context.py
+++ b/src/models/context.py
@@ -4,6 +4,7 @@
 from llama_stack_client import AsyncLlamaStackClient
 
 from models.requests import QueryRequest
+from utils.types import RAGContext, ShieldModerationResult
 
 
 @dataclass
@@ -23,6 +24,8 @@ class ResponseGeneratorContext:  # pylint: disable=too-many-instance-attributes
         query_request: The query request object
         started_at: Timestamp when the request started (ISO 8601 format)
         client: The Llama Stack client for API interactions
+        moderation_result: The moderation result
+        inline_rag_context: Inline RAG context
         vector_store_ids: Vector store IDs used in the query for source resolution.
         rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
     """
@@ -42,7 +45,9 @@ class ResponseGeneratorContext:  # pylint: disable=too-many-instance-attributes
 
     # Dependencies & State
     client: AsyncLlamaStackClient
+    moderation_result: ShieldModerationResult
 
     # RAG index identification
+    inline_rag_context: RAGContext
     vector_store_ids: list[str] = field(default_factory=list)
     rag_id_mapping: dict[str, str] = field(default_factory=dict)
diff --git a/src/models/database/conversations.py b/src/models/database/conversations.py
index b34c9eb53..baebf6aa9 100644
--- a/src/models/database/conversations.py
+++ b/src/models/database/conversations.py
@@ -31,6 +31,7 @@ class UserConversation(Base):  # pylint: disable=too-few-public-methods
         DateTime(timezone=True),
         server_default=func.now(),  # pylint: disable=not-callable
     )
+    last_response_id: Mapped[str] = mapped_column(nullable=True)
 
     # The number of user messages in the conversation
     message_count: Mapped[int] = mapped_column(default=0)
@@ -66,3 +67,7 @@ class UserTurn(Base):  # pylint: disable=too-few-public-methods
     provider: Mapped[str] = mapped_column(nullable=False)
 
     model: Mapped[str] = mapped_column(nullable=False)
+
+    # Llama Stack response ID for this turn (1:1); nullable for legacy turns without it.
+    # Indexed for fast lookup when resolving previous_response_id to conversation.
+    response_id: Mapped[str] = mapped_column(nullable=True, index=True)
diff --git a/src/models/requests.py b/src/models/requests.py
index 4027e3772..0e05e61d5 100644
--- a/src/models/requests.py
+++ b/src/models/requests.py
@@ -6,10 +6,11 @@
 
 from llama_stack_api.openai_responses import (
     OpenAIResponseInputToolChoice as ToolChoice,
-    OpenAIResponseInputToolChoiceMode as ToolChoiceMode,
     OpenAIResponseInputTool as InputTool,
     OpenAIResponsePrompt as Prompt,
     OpenAIResponseText as Text,
+    OpenAIResponseToolMCP as OutputToolMCP,
+    OpenAIResponseReasoning as Reasoning,
 )
 from pydantic import BaseModel, Field, field_validator, model_validator
 
@@ -20,6 +21,28 @@
 
 logger = get_logger(__name__)
 
+# Attribute names that are echoed back in the response.
+_ECHOED_FIELDS = set(
+    {
+        "instructions",
+        "max_tool_calls",
+        "max_output_tokens",
+        "metadata",
+        "model",
+        "parallel_tool_calls",
+        "previous_response_id",
+        "prompt",
+        "reasoning",
+        "safety_identifier",
+        "temperature",
+        "top_p",
+        "truncation",
+        "text",
+        "tool_choice",
+        "store",
+    }
+)
+
 
 class Attachment(BaseModel):
     """Model representing an attachment that can be send from the UI as part of query.
@@ -179,8 +202,7 @@ class QueryRequest(BaseModel):
     shield_ids: Optional[list[str]] = Field(
         None,
         description="Optional list of safety shield IDs to apply. "
-        "If None, all configured shields are used. "
-        "If provided, must contain at least one valid shield ID (empty list raises 422 error).",
+        "If None, all configured shields are used. ",
         examples=["llama-guard", "custom-shield"],
     )
 
@@ -503,8 +525,7 @@ def validate_categories(
         if len(value) == 0:
             return None  # Convert empty list to None for consistency
 
-        unique_categories = list(dict.fromkeys(value))  # don't lose ordering
-        return unique_categories
+        return list(dict.fromkeys(value))  # don't lose ordering
 
     @model_validator(mode="after")
     def check_feedback_provided(self) -> Self:
@@ -616,6 +637,7 @@ class ResponsesRequest(BaseModel):
         instructions: System instructions or guidelines provided to the model (acts as
             the system prompt).
         max_infer_iters: Maximum number of inference iterations the model can perform.
+        max_output_tokens: Maximum number of tokens allowed in the response.
         max_tool_calls: Maximum number of tool calls allowed in a single response.
         metadata: Custom metadata dictionary with key-value pairs for tracking or logging.
         parallel_tool_calls: Whether the model can make multiple tool calls in parallel.
@@ -623,17 +645,21 @@ class ResponsesRequest(BaseModel):
             conversation. Mutually exclusive with conversation.
         prompt: Prompt object containing a template with variables for dynamic
             substitution.
+        reasoning: Reasoning configuration for the response.
+        safety_identifier: Safety identifier for the response.
         store: Whether to store the response in conversation history. Defaults to True.
         stream: Whether to stream the response as it is generated. Defaults to False.
         temperature: Sampling temperature controlling randomness (typically 0.0–2.0).
         text: Text response configuration specifying output format constraints (JSON
             schema, JSON object, or plain text).
         tool_choice: Tool selection strategy ("auto", "required", "none", or specific
-            tool configuration). Defaults to "auto".
+            tool configuration).
         tools: List of tools available to the model (file search, web search, function
             calls, MCP tools). Defaults to all tools available to the model.
         generate_topic_summary: LCORE-specific flag indicating whether to generate a
             topic summary for new conversations. Defaults to True.
+        shield_ids: LCORE-specific list of safety shield IDs to apply. If None, all
+            configured shields are used.
         solr: LCORE-specific Solr vector_io provider query parameters (e.g. filter
             queries). Optional.
     """
@@ -644,18 +670,23 @@ class ResponsesRequest(BaseModel):
     include: Optional[list[IncludeParameter]] = None
     instructions: Optional[str] = None
     max_infer_iters: Optional[int] = None
+    max_output_tokens: Optional[int] = None
     max_tool_calls: Optional[int] = None
     metadata: Optional[dict[str, str]] = None
     parallel_tool_calls: Optional[bool] = None
     previous_response_id: Optional[str] = None
     prompt: Optional[Prompt] = None
+    reasoning: Optional[Reasoning] = None
+    safety_identifier: Optional[str] = None
     store: bool = True
     stream: bool = False
     temperature: Optional[float] = None
     text: Optional[Text] = None
-    tool_choice: Optional[ToolChoice] = ToolChoiceMode.auto
+    tool_choice: Optional[ToolChoice] = None
     tools: Optional[list[InputTool]] = None
+    # LCORE-specific attributes
     generate_topic_summary: Optional[bool] = True
+    shield_ids: Optional[list[str]] = None
     solr: Optional[dict[str, Any]] = None
 
     model_config = {
@@ -663,40 +694,11 @@ class ResponsesRequest(BaseModel):
         "json_schema_extra": {
             "examples": [
                 {
-                    "input": "What is Kubernetes?",
+                    "input": "Hello World!",
                     "model": "openai/gpt-4o-mini",
-                    "conversation": "conv_0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e",
                     "instructions": "You are a helpful assistant",
-                    "include": ["message.output_text.logprobs"],
-                    "max_tool_calls": 5,
-                    "metadata": {"source": "api"},
-                    "parallel_tool_calls": True,
-                    "prompt": {
-                        "id": "prompt_123",
-                        "variables": {
-                            "topic": {"type": "input_text", "text": "Kubernetes"}
-                        },
-                        "version": "1.0",
-                    },
                     "store": True,
                     "stream": False,
-                    "temperature": 0.7,
-                    "text": {
-                        "format": {
-                            "type": "json_schema",
-                            "schema": {
-                                "type": "object",
-                                "properties": {"answer": {"type": "string"}},
-                            },
-                        }
-                    },
-                    "tool_choice": "auto",
-                    "tools": [
-                        {
-                            "type": "file_search",
-                            "vector_store_ids": ["vs_123"],
-                        }
-                    ],
                     "generate_topic_summary": True,
                 }
             ]
@@ -731,3 +733,32 @@ def check_suid(cls, value: Optional[str]) -> Optional[str]:
         if value and not suid.check_suid(value):
             raise ValueError(f"Improper conversation ID '{value}'")
         return value
+
+    @field_validator("previous_response_id")
+    @classmethod
+    def check_previous_response_id(cls, value: Optional[str]) -> Optional[str]:
+        """Validate that previous_response_id does not start with 'modr'."""
+        if value is not None and value.startswith("modr"):
+            raise ValueError("You cannot provide context by moderation response.")
+        return value
+
+    def echoed_params(self) -> dict[str, Any]:
+        """Dump attributes that are echoed back in the response.
+
+        The tools attribute is converted from input tool to output tool model.
+
+        Returns:
+            Dict of echoed attributes.
+        """
+        data = self.model_dump(include=_ECHOED_FIELDS)
+        if self.tools is not None:
+            data["tools"] = [
+                (
+                    OutputToolMCP.model_validate(t.model_dump()).model_dump()
+                    if t.type == "mcp"
+                    else t.model_dump()
+                )
+                for t in self.tools
+            ]
+
+        return data
diff --git a/src/models/responses.py b/src/models/responses.py
index 9e87ad2cf..fd5ef955a 100644
--- a/src/models/responses.py
+++ b/src/models/responses.py
@@ -2,7 +2,7 @@
 
 """Models for REST API responses."""
 
-from typing import Any, ClassVar, Literal, Optional
+from typing import Any, ClassVar, Literal, Optional, cast
 
 from fastapi import status
 from llama_stack_api.openai_responses import (
@@ -13,6 +13,7 @@
     OpenAIResponseText as Text,
     OpenAIResponseTool as OutputTool,
     OpenAIResponseUsage as Usage,
+    OpenAIResponseReasoning as Reasoning,
 )
 from pydantic import BaseModel, Field
 from pydantic_core import SchemaError
@@ -1412,31 +1413,35 @@ class ResponsesResponse(AbstractSuccessfulResponse):
     """Model representing a response from the Responses API following LCORE specification.
 
     Attributes:
-        id: Unique identifier for this response.
-        object: Object type identifier, always "response".
         created_at: Unix timestamp when the response was created.
-        status: Current status of the response (e.g., "completed", "blocked",
-            "in_progress").
         completed_at: Unix timestamp when the response was completed, if applicable.
+        error: Error details if the response failed or was blocked.
+        id: Unique identifier for this response.
         model: Model identifier in "provider/model" format used for generation.
+        object: Object type identifier, always "response".
         output: List of structured output items containing messages, tool calls, and
             other content. This is the primary response content.
-        error: Error details if the response failed or was blocked.
-        instructions: System instructions or guidelines provided to the model.
-        max_tool_calls: Maximum number of tool calls allowed in a single response.
-        metadata: Additional metadata dictionary with custom key-value pairs.
         parallel_tool_calls: Whether the model can make multiple tool calls in parallel.
         previous_response_id: Identifier of the previous response in a multi-turn
             conversation.
         prompt: The input prompt object that was sent to the model.
+        status: Current status of the response (e.g., "completed", "blocked",
+            "in_progress").
         temperature: Temperature parameter used for generation (controls randomness).
         text: Text response configuration object used for OpenAI responses.
-        tool_choice: Tool selection strategy used (e.g., "auto", "required", "none").
-        tools: List of tools available to the model during generation.
         top_p: Top-p sampling parameter used for generation.
+        tools: List of tools available to the model during generation.
+        tool_choice: Tool selection strategy used (e.g., "auto", "required", "none").
         truncation: Strategy used for handling content that exceeds context limits.
         usage: Token usage statistics including input_tokens, output_tokens, and
             total_tokens.
+        instructions: System instructions or guidelines provided to the model.
+        max_tool_calls: Maximum number of tool calls allowed in a single response.
+        reasoning: Reasoning configuration (effort level) used for the response.
+        max_output_tokens: Upper bound for tokens generated in the response.
+        safety_identifier: Safety/guardrail identifier applied to the request.
+        metadata: Additional metadata dictionary with custom key-value pairs.
+        store: Whether the response was stored.
         conversation: Conversation ID linking this response to a conversation thread
             (LCORE-specific).
         available_quotas: Remaining token quotas for the user (LCORE-specific).
@@ -1444,27 +1449,32 @@ class ResponsesResponse(AbstractSuccessfulResponse):
             output array.
     """
 
-    id: str
-    object: Literal["response"] = "response"
     created_at: int
-    status: str
     completed_at: Optional[int] = None
+    error: Optional[Error] = None
+    id: str
     model: str
+    object: Literal["response"] = "response"
     output: list[Output]
-    error: Optional[Error] = None
-    instructions: Optional[str] = None
-    max_tool_calls: Optional[int] = None
-    metadata: Optional[dict[str, str]] = None
     parallel_tool_calls: bool = True
     previous_response_id: Optional[str] = None
     prompt: Optional[Prompt] = None
+    status: str
     temperature: Optional[float] = None
     text: Optional[Text] = None
-    tool_choice: Optional[ToolChoice] = None
-    tools: Optional[list[OutputTool]] = None
     top_p: Optional[float] = None
+    tools: Optional[list[OutputTool]] = None
+    tool_choice: Optional[ToolChoice] = None
     truncation: Optional[str] = None
-    usage: Usage
+    usage: Optional[Usage] = None
+    instructions: Optional[str] = None
+    max_tool_calls: Optional[int] = None
+    reasoning: Optional[Reasoning] = None
+    max_output_tokens: Optional[int] = None
+    safety_identifier: Optional[str] = None
+    metadata: Optional[dict[str, str]] = None
+    store: Optional[bool] = None
+    # LCORE-specific attributes
     conversation: Optional[str] = None
     available_quotas: dict[str, int]
     output_text: str
@@ -1473,12 +1483,11 @@ class ResponsesResponse(AbstractSuccessfulResponse):
         "json_schema_extra": {
             "examples": [
                 {
-                    "id": "resp_abc123",
-                    "object": "response",
                     "created_at": 1704067200,
                     "completed_at": 1704067250,
+                    "id": "resp_abc123",
                     "model": "openai/gpt-4-turbo",
-                    "status": "completed",
+                    "object": "response",
                     "output": [
                         {
                             "type": "message",
@@ -1494,21 +1503,89 @@ class ResponsesResponse(AbstractSuccessfulResponse):
                             ],
                         }
                     ],
+                    "parallel_tool_calls": True,
+                    "status": "completed",
+                    "temperature": 0.7,
+                    "text": {"format": {"type": "text"}},
                     "usage": {
                         "input_tokens": 100,
                         "output_tokens": 50,
                         "total_tokens": 150,
                     },
                     "instructions": "You are a helpful assistant",
-                    "temperature": 0.7,
-                    "conversation": "conv_0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e",
+                    "store": True,
+                    "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e",
                     "available_quotas": {"daily": 1000, "monthly": 50000},
-                    "output_text": "Kubernetes is an open-source container orchestration system...",
+                    "output_text": (
+                        "Kubernetes is an open-source container "
+                        "orchestration system..."
+                    ),
                 }
-            ]
+            ],
+            "sse_example": (
+                "event: response.created\n"
+                'data: {"type":"response.created","sequence_number":0,'
+                '"response":{"id":"resp_abc","created_at":1704067200,'
+                '"status":"in_progress","output":[],"conversation":'
+                '"0d21ba731f21f798dc9680125d5d6f49","available_quotas":{},'
+                '"output_text":""}}\n\n'
+                "event: response.output_item.added\n"
+                'data: {"response_id":"resp_abc","item":{"type":"message",'
+                '"role":"assistant","content":[{"type":"output_text",'
+                '"text":"Hello! How can I help?"}]},"output_index":0,'
+                '"sequence_number":1}\n\n'
+                "event: response.output_item.done\n"
+                'data: {"response_id":"resp_abc","item":{"type":"message",'
+                '"role":"assistant","content":[{"type":"output_text",'
+                '"text":"Hello! How can I help?"}]},"output_index":0,'
+                '"sequence_number":2}\n\n'
+                "event: response.completed\n"
+                'data: {"type":"response.completed","sequence_number":3,'
+                '"response":{"id":"resp_abc","created_at":1704067200,'
+                '"completed_at":1704067250,"status":"completed",'
+                '"output":[{"type":"message","role":"assistant",'
+                '"content":[{"type":"output_text","text":"Hello! How can I help?"}]}],'
+                '"usage":{"input_tokens":10,"output_tokens":6,"total_tokens":16},'
+                '"conversation":"0d21ba731f21f798dc9680125d5d6f49",'
+                '"available_quotas":{"daily":1000,"monthly":50000},'
+                '"output_text":"Hello! How can I help?"}}\n\n'
+                "data: [DONE]\n\n"
+            ),
         }
     }
 
+    @classmethod
+    def openapi_response(cls) -> dict[str, Any]:
+        """
+        Build OpenAPI response dict with application/json and text/event-stream.
+
+        Uses the single JSON example from the model schema and adds
+        text/event-stream example from json_schema_extra.sse_example.
+        """
+        schema = cls.model_json_schema()
+        model_examples = schema.get("examples", [])
+        json_example = model_examples[0] if model_examples else None
+
+        schema_extra = (
+            cast(dict[str, Any], dict(cls.model_config)).get("json_schema_extra") or {}
+        )
+        sse_example = schema_extra.get("sse_example", "")
+
+        content: dict[str, Any] = {
+            "application/json": {"example": json_example} if json_example else {},
+            "text/event-stream": {
+                "schema": {"type": "string"},
+                "description": "SSE stream of events",
+                "examples": {"stream": {"value": sse_example}} if sse_example else {},
+            },
+        }
+
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "model": cls,
+            "content": content,
+        }
+
 
 class DetailModel(BaseModel):
     """Nested detail model for error responses."""
diff --git a/src/observability/__init__.py b/src/observability/__init__.py
index df8601281..4a4285e8a 100644
--- a/src/observability/__init__.py
+++ b/src/observability/__init__.py
@@ -11,4 +11,4 @@
 from observability.formats import InferenceEventData, build_inference_event
 from observability.splunk import send_splunk_event
 
-__all__ = ["send_splunk_event", "InferenceEventData", "build_inference_event"]
+__all__ = ["InferenceEventData", "build_inference_event", "send_splunk_event"]
diff --git a/src/quota/cluster_quota_limiter.py b/src/quota/cluster_quota_limiter.py
index 805791b9d..d2170f853 100644
--- a/src/quota/cluster_quota_limiter.py
+++ b/src/quota/cluster_quota_limiter.py
@@ -1,7 +1,7 @@
 """Simple cluster quota limiter where quota is fixed for the whole cluster."""
 
-from models.config import QuotaHandlersConfiguration
 from log import get_logger
+from models.config import QuotaHandlersConfiguration
 from quota.revokable_quota_limiter import RevokableQuotaLimiter
 
 logger = get_logger(__name__)
diff --git a/src/quota/connect_pg.py b/src/quota/connect_pg.py
index fbbf95109..e4ba2c9bf 100644
--- a/src/quota/connect_pg.py
+++ b/src/quota/connect_pg.py
@@ -1,6 +1,7 @@
 """PostgreSQL connection handler."""
 
 from typing import Any
+
 import psycopg2
 
 from log import get_logger
diff --git a/src/quota/quota_limiter.py b/src/quota/quota_limiter.py
index bc0d659f8..14543c590 100644
--- a/src/quota/quota_limiter.py
+++ b/src/quota/quota_limiter.py
@@ -30,16 +30,15 @@
 - reset quota to 10,000,000 tokens each month
 """
 
+import datetime
+import sqlite3
 from abc import ABC, abstractmethod
-
 from typing import Optional
 
-import datetime
-import sqlite3
 import psycopg2
 
 from log import get_logger
-from models.config import SQLiteDatabaseConfiguration, PostgreSQLDatabaseConfiguration
+from models.config import PostgreSQLDatabaseConfiguration, SQLiteDatabaseConfiguration
 from quota.connect_pg import connect_pg
 from quota.connect_sqlite import connect_sqlite
 
diff --git a/src/quota/quota_limiter_factory.py b/src/quota/quota_limiter_factory.py
index 9b8105c4d..3067f7493 100644
--- a/src/quota/quota_limiter_factory.py
+++ b/src/quota/quota_limiter_factory.py
@@ -1,12 +1,11 @@
 """Quota limiter factory class."""
 
-from log import get_logger
 import constants
+from log import get_logger
 from models.config import QuotaHandlersConfiguration
-
-from quota.user_quota_limiter import UserQuotaLimiter
 from quota.cluster_quota_limiter import ClusterQuotaLimiter
 from quota.quota_limiter import QuotaLimiter
+from quota.user_quota_limiter import UserQuotaLimiter
 
 logger = get_logger(__name__)
 
diff --git a/src/quota/revokable_quota_limiter.py b/src/quota/revokable_quota_limiter.py
index 9bb47ed3c..9f5ff54cb 100644
--- a/src/quota/revokable_quota_limiter.py
+++ b/src/quota/revokable_quota_limiter.py
@@ -1,24 +1,24 @@
 """Simple quota limiter where quota can be revoked."""
 
-from datetime import datetime
+from datetime import UTC, datetime
 
-from models.config import QuotaHandlersConfiguration
 from log import get_logger
-from utils.connection_decorator import connection
+from models.config import QuotaHandlersConfiguration
 from quota.quota_exceed_error import QuotaExceedError
 from quota.quota_limiter import QuotaLimiter
 from quota.sql import (
     CREATE_QUOTA_TABLE_PG,
     CREATE_QUOTA_TABLE_SQLITE,
-    UPDATE_AVAILABLE_QUOTA_PG,
-    UPDATE_AVAILABLE_QUOTA_SQLITE,
+    INIT_QUOTA_PG,
+    INIT_QUOTA_SQLITE,
     SELECT_QUOTA_PG,
     SELECT_QUOTA_SQLITE,
     SET_AVAILABLE_QUOTA_PG,
     SET_AVAILABLE_QUOTA_SQLITE,
-    INIT_QUOTA_PG,
-    INIT_QUOTA_SQLITE,
+    UPDATE_AVAILABLE_QUOTA_PG,
+    UPDATE_AVAILABLE_QUOTA_SQLITE,
 )
+from utils.connection_decorator import connection
 
 logger = get_logger(__name__)
 
@@ -140,7 +140,7 @@ def _revoke_quota(self, set_statement: str, subject_id: str) -> None:
                               revoked.
         """
         # timestamp to be used
-        revoked_at = datetime.now()
+        revoked_at = datetime.now(tz=UTC)
 
         cursor = self.connection.cursor()
         cursor.execute(
@@ -188,7 +188,7 @@ def _increase_quota(self, set_statement: str, subject_id: str) -> None:
             subject_id (str): Identifier of the subject whose quota will be increased.
         """
         # timestamp to be used
-        updated_at = datetime.now()
+        updated_at = datetime.now(tz=UTC)
 
         cursor = self.connection.cursor()
         cursor.execute(
@@ -286,7 +286,7 @@ def _consume_tokens(
             change.
         """
         # timestamp to be used
-        updated_at = datetime.now()
+        updated_at = datetime.now(tz=UTC)
 
         to_be_consumed = input_tokens + output_tokens
 
@@ -329,7 +329,7 @@ def _init_quota(self, subject_id: str = "") -> None:
             initialize. Defaults to empty string.
         """
         # timestamp to be used
-        revoked_at = datetime.now()
+        revoked_at = datetime.now(tz=UTC)
 
         if self.sqlite_connection_config is not None:
             cursor = self.connection.cursor()
diff --git a/src/quota/token_usage_history.py b/src/quota/token_usage_history.py
index db6134e14..3ea14a452 100644
--- a/src/quota/token_usage_history.py
+++ b/src/quota/token_usage_history.py
@@ -6,25 +6,23 @@
 """
 
 import sqlite3
-from datetime import datetime
+from datetime import UTC, datetime
 from typing import Any, Optional
 
 import psycopg2
 
 from log import get_logger
-
+from models.config import (
+    PostgreSQLDatabaseConfiguration,
+    QuotaHandlersConfiguration,
+    SQLiteDatabaseConfiguration,
+)
 from quota.connect_pg import connect_pg
 from quota.connect_sqlite import connect_sqlite
 from quota.sql import (
-    CREATE_TOKEN_USAGE_TABLE,
     CONSUME_TOKENS_FOR_USER_PG,
     CONSUME_TOKENS_FOR_USER_SQLITE,
-)
-
-from models.config import (
-    QuotaHandlersConfiguration,
-    SQLiteDatabaseConfiguration,
-    PostgreSQLDatabaseConfiguration,
+    CREATE_TOKEN_USAGE_TABLE,
 )
 from utils.connection_decorator import connection
 
@@ -135,7 +133,7 @@ def consume_tokens(  # pylint: disable=too-many-arguments,too-many-positional-ar
             return
 
         # timestamp to be used
-        updated_at = datetime.now()
+        updated_at = datetime.now(tz=UTC)
 
         # it is not possible to use context manager there, because SQLite does
         # not support it
diff --git a/src/quota/user_quota_limiter.py b/src/quota/user_quota_limiter.py
index 13901fd05..eee9dd7c2 100644
--- a/src/quota/user_quota_limiter.py
+++ b/src/quota/user_quota_limiter.py
@@ -1,7 +1,7 @@
 """Simple user quota limiter where each user has a fixed quota."""
 
-from models.config import QuotaHandlersConfiguration
 from log import get_logger
+from models.config import QuotaHandlersConfiguration
 from quota.revokable_quota_limiter import RevokableQuotaLimiter
 
 logger = get_logger(__name__)
diff --git a/src/utils/conversations.py b/src/utils/conversations.py
index 66205594f..ce882402c 100644
--- a/src/utils/conversations.py
+++ b/src/utils/conversations.py
@@ -3,7 +3,10 @@
 import json
 from datetime import UTC, datetime
 from typing import Any, Optional, cast
+from collections.abc import Sequence
 
+from fastapi import HTTPException
+from llama_stack_api import OpenAIResponseMessage, OpenAIResponseOutput
 from llama_stack_api.openai_responses import (
     OpenAIResponseOutputMessageFileSearchToolCall as FileSearchCall,
     OpenAIResponseOutputMessageFunctionToolCall as FunctionCall,
@@ -11,6 +14,8 @@
     OpenAIResponseOutputMessageMCPListTools as MCPListTools,
     OpenAIResponseOutputMessageWebSearchToolCall as WebSearchCall,
 )
+from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient
+from llama_stack_client.types.conversations.item_create_params import Item
 from llama_stack_client.types.conversations.item_list_response import (
     ItemListResponse,
     OpenAIResponseInputFunctionToolCallOutput as FunctionToolCallOutput,
@@ -21,9 +26,14 @@
 
 from constants import DEFAULT_RAG_TOOL
 from models.database.conversations import UserTurn
-from models.responses import ConversationTurn, Message
+from models.responses import (
+    ConversationTurn,
+    InternalServerErrorResponse,
+    Message,
+    ServiceUnavailableResponse,
+)
 from utils.responses import parse_arguments_string
-from utils.types import ToolCallSummary, ToolResultSummary
+from utils.types import ResponseInput, ToolCallSummary, ToolResultSummary
 
 
 def _extract_text_from_content(content: str | list[Any]) -> str:
@@ -423,3 +433,82 @@ def build_conversation_turns_from_items(
         )
 
     return chat_history
+
+
+async def append_turn_items_to_conversation(
+    client: AsyncLlamaStackClient,
+    conversation_id: str,
+    user_input: ResponseInput,
+    llm_output: Sequence[OpenAIResponseOutput],
+) -> None:
+    """
+    Append a turn (user input + LLM output) to a conversation in LLS database.
+
+    Args:
+        client: The Llama Stack client.
+        conversation_id: The Llama Stack conversation ID.
+        user_input: User input text or list of ResponseItem.
+        llm_output: Output from the LLM: a list of OpenAIResponseOutput.
+    """
+    if isinstance(user_input, str):
+        user_message = OpenAIResponseMessage(
+            role="user",
+            content=user_input,
+        )
+        user_items = [user_message.model_dump()]
+    else:
+        user_items = [item.model_dump() for item in user_input]
+
+    output_items = [item.model_dump() for item in llm_output]
+
+    items = user_items + output_items
+    try:
+        await client.conversations.items.create(
+            conversation_id,
+            items=cast(list[Item], items),
+        )
+    except APIConnectionError as e:
+        error_response = ServiceUnavailableResponse(
+            backend_name="Llama Stack",
+            cause=str(e),
+        )
+        raise HTTPException(**error_response.model_dump()) from e
+    except APIStatusError as e:
+        error_response = InternalServerErrorResponse.generic()
+        raise HTTPException(**error_response.model_dump()) from e
+
+
+async def get_all_conversation_items(
+    client: AsyncLlamaStackClient,
+    conversation_id_llama_stack: str,
+) -> list[ItemListResponse]:
+    """Fetch all items for a conversation (Conversations API), paginating as needed.
+
+    Args:
+        client: Llama Stack client.
+        conversation_id_llama_stack: Conversation ID in Llama Stack format.
+
+    Returns:
+        List of all items in the conversation, oldest first.
+    """
+    try:
+        paginator = client.conversations.items.list(
+            conversation_id=conversation_id_llama_stack,
+            order="asc",
+        )
+        first_page = await paginator
+        items: list[ItemListResponse] = list(first_page.data or [])
+        page = first_page
+        while page.has_next_page():
+            page = await page.get_next_page()
+            items.extend(page.data or [])
+        return items
+    except APIConnectionError as e:
+        error_response = ServiceUnavailableResponse(
+            backend_name="Llama Stack",
+            cause=str(e),
+        )
+        raise HTTPException(**error_response.model_dump()) from e
+    except APIStatusError as e:
+        error_response = InternalServerErrorResponse.generic()
+        raise HTTPException(**error_response.model_dump()) from e
diff --git a/src/utils/endpoints.py b/src/utils/endpoints.py
index 332002eeb..a8ffb9a64 100644
--- a/src/utils/endpoints.py
+++ b/src/utils/endpoints.py
@@ -6,17 +6,20 @@
 from pydantic import AnyUrl, ValidationError
 from sqlalchemy.exc import SQLAlchemyError
 
+from client import AsyncLlamaStackClientHolder
 import constants
 from app.database import get_session
 from configuration import AppConfig, LogicError
 from log import get_logger
-from models.database.conversations import UserConversation
+from models.database.conversations import UserConversation, UserTurn
 from models.responses import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
 )
-from utils.types import ReferencedDocument, TurnSummary
+from utils.responses import create_new_conversation
+from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id
+from utils.types import ReferencedDocument, ResponsesConversationContext, TurnSummary
 
 logger = get_logger(__name__)
 
@@ -59,6 +62,35 @@ def retrieve_conversation(conversation_id: str) -> Optional[UserConversation]:
         return session.query(UserConversation).filter_by(id=conversation_id).first()
 
 
+def retrieve_conversation_turns(conversation_id: str) -> list[UserTurn]:
+    """Retrieve all turns for a conversation from the database, ordered by turn number.
+
+    Args:
+        conversation_id (str): The normalized conversation ID.
+
+    Returns:
+        list[UserTurn]: The list of turns for the conversation, ordered by turn_number.
+
+    Raises:
+        HTTPException: 500 if a database error occurs.
+    """
+    try:
+        with get_session() as session:
+            return (
+                session.query(UserTurn)
+                .filter_by(conversation_id=conversation_id)
+                .order_by(UserTurn.turn_number)
+                .all()
+            )
+    except SQLAlchemyError as e:
+        logger.error(
+            "Database error occurred while retrieving conversation turns for %s.",
+            conversation_id,
+        )
+        response = InternalServerErrorResponse.database_error()
+        raise HTTPException(**response.model_dump()) from e
+
+
 def validate_conversation_ownership(
     user_id: str, conversation_id: str, others_allowed: bool = False
 ) -> Optional[UserConversation]:
@@ -179,6 +211,145 @@ def validate_and_retrieve_conversation(
     return user_conversation
 
 
+async def resolve_response_context(
+    user_id: str,
+    others_allowed: bool,
+    conversation_id: Optional[str],
+    previous_response_id: Optional[str],
+    generate_topic_summary: Optional[bool],
+) -> ResponsesConversationContext:
+    """Resolve conversation context for the responses endpoint without mutating the request.
+
+    Parameters:
+        user_id: ID of the user making the request.
+        others_allowed: Whether the user can access conversations owned by others.
+        conversation_id: Conversation ID from the request, if any.
+        previous_response_id: Previous response ID from the request, if any.
+        generate_topic_summary: Resolved value for request.generate_topic_summary.
+
+    Returns:
+        ResponsesConversationContext: Contains conversation, user_conversation, and
+            resolved generate_topic_summary to apply to the request.
+
+    Raises:
+        HTTPException: 404 if previous_response_id is set but the turn does not exist;
+            other HTTP exceptions from validate_and_retrieve_conversation.
+    """
+    client = AsyncLlamaStackClientHolder().get_client()
+    # Context for the LLM passed by conversation
+    if conversation_id:
+        logger.info("Conversation ID specified in request: %s", conversation_id)
+        user_conversation = validate_and_retrieve_conversation(
+            normalized_conv_id=normalize_conversation_id(conversation_id),
+            user_id=user_id,
+            others_allowed=others_allowed,
+        )
+        return ResponsesConversationContext(
+            conversation=to_llama_stack_conversation_id(user_conversation.id),
+            user_conversation=user_conversation,
+            generate_topic_summary=False,
+        )
+
+    # Context for the LLM passed by previous response id
+    if previous_response_id:
+        if not check_turn_existence(previous_response_id):
+            error_response = NotFoundResponse(
+                resource="response", resource_id=previous_response_id
+            )
+            raise HTTPException(**error_response.model_dump())
+        prev_user_turn = retrieve_turn_by_response_id(previous_response_id)
+        user_conversation = validate_and_retrieve_conversation(
+            normalized_conv_id=prev_user_turn.conversation_id,
+            user_id=user_id,
+            others_allowed=others_allowed,
+        )
+        if (
+            user_conversation.last_response_id is not None
+            and user_conversation.last_response_id != previous_response_id
+        ):
+            new_conv_id = await create_new_conversation(client)
+            want_topic_summary = (
+                generate_topic_summary if generate_topic_summary is not None else True
+            )
+            return ResponsesConversationContext(
+                conversation=new_conv_id,
+                user_conversation=user_conversation,
+                generate_topic_summary=want_topic_summary,
+            )
+        return ResponsesConversationContext(
+            conversation=to_llama_stack_conversation_id(user_conversation.id),
+            user_conversation=user_conversation,
+            generate_topic_summary=False,
+        )
+
+    # No context passed, create new conversation
+    new_conv_id = await create_new_conversation(client)
+    want_topic_summary = (
+        generate_topic_summary if generate_topic_summary is not None else True
+    )
+    return ResponsesConversationContext(
+        conversation=new_conv_id,
+        user_conversation=None,
+        generate_topic_summary=want_topic_summary,
+    )
+
+
+def retrieve_turn_by_response_id(response_id: str) -> UserTurn:
+    """Retrieve a response's turn from the database by response ID.
+
+    Looks up the turn that has this response_id to get its conversation.
+    Used for fork/previous_response_id resolution.
+
+    Args:
+        response_id: The ID of the response (stored on UserTurn.response_id).
+
+    Returns:
+        The UserTurn row for that response (has conversation_id).
+
+    Raises:
+        HTTPException: 404 if no turn has this response_id; 500 on database error.
+    """
+    try:
+        with get_session() as session:
+            turn = session.query(UserTurn).filter_by(response_id=response_id).first()
+            if turn is None:
+                logger.error("Response %s not found in database.", response_id)
+                response = NotFoundResponse(
+                    resource="response", resource_id=response_id
+                )
+                raise HTTPException(**response.model_dump())
+            return turn
+    except SQLAlchemyError as e:
+        logger.exception(
+            "Database error while retrieving turn by response_id %s", response_id
+        )
+        response = InternalServerErrorResponse.database_error()
+        raise HTTPException(**response.model_dump()) from e
+
+
+def check_turn_existence(response_id: str) -> bool:
+    """Check if a turn exists for a given response ID.
+
+    Args:
+        response_id: The ID of the response to check.
+
+    Returns:
+        bool: True if the turn exists, False otherwise.
+    """
+    try:
+        with get_session() as session:
+            turn = session.query(UserTurn).filter_by(response_id=response_id).first()
+            return turn is not None
+    except SQLAlchemyError as e:
+        logger.exception(
+            "Database error while checking turn existence for response_id %s",
+            response_id,
+        )
+        raise HTTPException(
+            **InternalServerErrorResponse.database_error().model_dump()
+        ) from e
+
+
 def check_configuration_loaded(config: AppConfig) -> None:
     """
     Raise an error if the configuration is not loaded.
diff --git a/src/utils/query.py b/src/utils/query.py
index 8d96b5eb6..5bd96dbf7 100644
--- a/src/utils/query.py
+++ b/src/utils/query.py
@@ -44,6 +44,7 @@
     create_transcript_metadata,
     store_transcript,
 )
+from utils.suid import is_moderation_id
 from utils.types import TurnSummary
 
 logger = get_logger(__name__)
@@ -290,6 +291,7 @@ def store_query_results(  # pylint: disable=too-many-arguments
             model_id=model_id,
             provider_id=provider_id,
             topic_summary=topic_summary,
+            response_id=summary.id,
         )
     except SQLAlchemyError as e:
         logger.exception("Error persisting conversation details.")
@@ -377,6 +379,7 @@ def persist_user_conversation_details(
     model_id: str,
     provider_id: str,
     topic_summary: Optional[str],
+    response_id: str,
 ) -> None:
     """Associate conversation to user in the database.
 
@@ -388,6 +391,7 @@ def persist_user_conversation_details(
         model_id: The model identifier
         provider_id: The provider identifier
         topic_summary: Optional topic summary for the conversation
+        response_id: Response ID for the conversation
     """
     # Normalize the conversation ID (strip 'conv_' prefix if present)
     normalized_id = normalize_conversation_id(conversation_id)
@@ -402,7 +406,6 @@ def persist_user_conversation_details(
         existing_conversation = (
             session.query(UserConversation).filter_by(id=normalized_id).first()
         )
-
         if not existing_conversation:
             conversation = UserConversation(
                 id=normalized_id,
@@ -411,6 +414,10 @@ def persist_user_conversation_details(
                 last_used_provider=provider_id,
                 topic_summary=topic_summary or "",
                 message_count=1,
+                # For new conversation either current response or None if moderation-blocked
+                last_response_id=(
+                    response_id if not is_moderation_id(response_id) else None
+                ),
             )
             session.add(conversation)
             logger.debug(
@@ -427,6 +434,9 @@ def persist_user_conversation_details(
                 user_id,
                 existing_conversation.message_count,
             )
+            # Update last response id only if not moderation-blocked
+            if not is_moderation_id(response_id):
+                existing_conversation.last_response_id = response_id
 
         max_turn_number = (
             session.query(func.max(UserTurn.turn_number))
@@ -441,6 +451,7 @@ def persist_user_conversation_details(
             completed_at=datetime.fromisoformat(completed_at),
             provider=provider_id,
             model=model_id,
+            response_id=response_id,
         )
         session.add(turn)
         logger.debug(
diff --git a/src/utils/responses.py b/src/utils/responses.py
index bdced0f71..04f4c96c1 100644
--- a/src/utils/responses.py
+++ b/src/utils/responses.py
@@ -7,11 +7,12 @@
 from typing import Any, Optional, cast
 
 from fastapi import HTTPException
+from llama_stack_api import OpenAIResponseObject
 from llama_stack_api.openai_responses import (
     OpenAIResponseContentPartRefusal as ContentPartRefusal,
     OpenAIResponseInputMessageContent as InputMessageContent,
+    OpenAIResponseInputMessageContentFile as InputFilePart,
     OpenAIResponseInputMessageContentText as InputTextPart,
-    OpenAIResponseInputTool as InputTool,
     OpenAIResponseInputToolFileSearch as InputToolFileSearch,
     OpenAIResponseInputToolMCP as InputToolMCP,
     OpenAIResponseMCPApprovalRequest as MCPApprovalRequest,
@@ -27,9 +28,15 @@
     OpenAIResponseOutputMessageMCPListTools as MCPListTools,
     OpenAIResponseOutputMessageWebSearchToolCall as WebSearchCall,
     OpenAIResponseUsage as ResponseUsage,
+    OpenAIResponseInputTool as InputTool,
+    OpenAIResponseUsageInputTokensDetails as UsageInputTokensDetails,
+    OpenAIResponseUsageOutputTokensDetails as UsageOutputTokensDetails,
+    OpenAIResponseInputToolChoiceMode as ToolChoiceMode,
+    OpenAIResponseInputToolChoice as ToolChoice,
 )
 from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient
 
+from client import AsyncLlamaStackClientHolder
 import constants
 import metrics
 from configuration import configuration
@@ -55,6 +62,7 @@
 from utils.types import (
     RAGChunk,
     ReferencedDocument,
+    ResponseInput,
     ResponseItem,
     ResponsesApiParams,
     ToolCallSummary,
@@ -377,6 +385,29 @@ def resolve_vector_store_ids(
     return [rag_id_to_vector_db_id.get(vs_id, vs_id) for vs_id in vector_store_ids]
 
 
+def translate_tools_vector_store_ids(
+    tools: list[InputTool], byok_rags: list[ByokRag]
+) -> list[InputTool]:
+    """Translate user-facing vector_store_ids to llama-stack IDs in each file_search tool.
+
+    Parameters:
+        tools: List of request tools (may contain file_search with user-facing IDs).
+        byok_rags: BYOK RAG configuration for ID resolution.
+
+    Returns:
+        New list of tools with file_search vector_store_ids translated; other tools
+        unchanged.
+    """
+    result: list[InputTool] = []
+    for tool in tools:
+        if tool.type == "file_search":
+            resolved_ids = resolve_vector_store_ids(tool.vector_store_ids, byok_rags)
+            result.append(tool.model_copy(update={"vector_store_ids": resolved_ids}))
+        else:
+            result.append(tool)
+    return result
+
+
 def get_rag_tools(vector_store_ids: list[str]) -> Optional[list[InputToolFileSearch]]:
     """Convert vector store IDs to tools format for Responses API.
 
@@ -842,6 +873,16 @@ def _resolve_source_for_result(
 
     if len(vector_store_ids) > 1:
         attributes = getattr(result, "attributes", {}) or {}
+
+        # Primary: read index name embedded directly by rag-content.
+        # This value is already the user-facing rag_id, not a vector_db_id,
+        # so no mapping is needed.
+        attr_source: Optional[str] = attributes.get("source")
+        if attr_source:
+            return attr_source
+
+        # Fallback: if llama-stack ever populates vector_store_id in results,
+        # use it with the rag_id_mapping.
         attr_store_id: Optional[str] = attributes.get("vector_store_id")
         if attr_store_id:
             return rag_id_mapping.get(attr_store_id, attr_store_id)
@@ -997,8 +1038,7 @@ async def select_model_for_responses(
         and user_conversation.last_used_model
         and user_conversation.last_used_provider
     ):
-        model_id = f"{user_conversation.last_used_provider}/{user_conversation.last_used_model}"
-        return model_id
+        return f"{user_conversation.last_used_provider}/{user_conversation.last_used_model}"
 
     # 2. Select default model from configuration
     if configuration.inference is not None:
@@ -1036,7 +1076,7 @@ async def select_model_for_responses(
 
 
 def build_turn_summary(
-    response: Optional[ResponseObject],
+    response: Optional[OpenAIResponseObject],
     model: str,
     vector_store_ids: Optional[list[str]] = None,
     rag_id_mapping: Optional[dict[str, str]] = None,
@@ -1058,6 +1098,7 @@ def build_turn_summary(
     if response is None or response.output is None:
         return summary
 
+    summary.id = response.id
     # Extract text from output items
     summary.llm_response = extract_text_from_response_items(response.output)
 
@@ -1109,15 +1150,12 @@ def extract_text_from_response_item(response_item: ResponseItem) -> str:
         response_item: A single item from request input or response output.
 
     Returns:
-        Extracted text content, or empty string if not a message or role is user.
+        Extracted text content, or empty string if not a message.
     """
     if response_item.type != "message":
         return ""
 
     message_item = cast(ResponseMessage, response_item)
-    if message_item.role == "user":
-        return ""
-
     return _extract_text_from_content(message_item.content)
 
 
@@ -1139,15 +1177,16 @@ def _extract_text_from_content(
 
     text_fragments: list[str] = []
     for part in content:
-        if part.type == "input_text":
+        part_type = getattr(part, "type", None)
+        if part_type == "input_text":
             input_text_part = cast(InputTextPart, part)
             if input_text_part.text:
                 text_fragments.append(input_text_part.text.strip())
-        elif part.type == "output_text":
+        elif part_type == "output_text":
             output_text_part = cast(OutputTextPart, part)
             if output_text_part.text:
                 text_fragments.append(output_text_part.text.strip())
-        elif part.type == "refusal":
+        elif part_type == "refusal":
             refusal_part = cast(ContentPartRefusal, part)
             if refusal_part.refusal:
                 text_fragments.append(refusal_part.refusal.strip())
@@ -1168,3 +1207,130 @@ def deduplicate_referenced_documents(
         seen.add(key)
         out.append(d)
     return out
+
+
+async def create_new_conversation(
+    client: AsyncLlamaStackClient,
+) -> str:
+    """Create a new conversation via the Llama Stack Conversations API.
+
+    Args:
+        client: The Llama Stack client used to create the conversation.
+
+    Returns:
+        The new conversation's ID (string), as returned by the API.
+    """
+    try:
+        conversation = await client.conversations.create(metadata={})
+        return conversation.id
+    except APIConnectionError as e:
+        error_response = ServiceUnavailableResponse(
+            backend_name="Llama Stack",
+            cause=str(e),
+        )
+        raise HTTPException(**error_response.model_dump()) from e
+    except APIStatusError as e:
+        error_response = InternalServerErrorResponse.generic()
+        raise HTTPException(**error_response.model_dump()) from e
+
+
+def get_zero_usage() -> ResponseUsage:
+    """Create a Usage object with zero values for input and output tokens.
+
+    Returns:
+        Usage object with zero values for input and output tokens.
+    """
+    return ResponseUsage(
+        input_tokens=0,
+        input_tokens_details=UsageInputTokensDetails(cached_tokens=0),
+        output_tokens=0,
+        output_tokens_details=UsageOutputTokensDetails(reasoning_tokens=0),
+        total_tokens=0,
+    )
+
+
+def extract_attachments_text(response_input: ResponseInput) -> str:
+    """Extract file_data from input_file parts inside message content.
+
+    Args:
+        response_input: Response input (string or list of response items).
+
+    Returns:
+        All present file_data values joined by double newline.
+    """
+    if isinstance(response_input, str):
+        return ""
+    file_data_parts: list[str] = []
+    for item in response_input:
+        if item.type != "message":
+            continue
+        message = cast(ResponseMessage, item)
+        content = message.content
+        if isinstance(content, str):
+            continue
+        for part in content:
+            if part.type == "input_file":
+                file_part = cast(InputFilePart, part)
+                if file_part.file_data:
+                    file_data_parts.append(file_part.file_data)
+    return "\n\n".join(file_data_parts)
+
+
+async def resolve_tool_choice(
+    tools: Optional[list[InputTool]],
+    tool_choice: Optional[ToolChoice],
+    token: str,
+    mcp_headers: Optional[McpHeaders] = None,
+    request_headers: Optional[Mapping[str, str]] = None,
+) -> tuple[Optional[list[InputTool]], Optional[ToolChoice], Optional[list[str]]]:
+    """Resolve tools and tool_choice for the Responses API.
+
+    If the request includes tools, uses them as-is and derives vector_store_ids
+    from tool configs; otherwise loads tools via prepare_tools (using all
+    configured vector stores) and honors tool_choice "none" via the no_tools
+    flag. When no tools end up configured, tool_choice is cleared to None.
+
+    Args:
+        tools: Tools from the request, or None to use LCORE-configured tools.
+        tool_choice: Requested tool choice (e.g. auto, required, none) or None.
+        token: User token for MCP/auth.
+        mcp_headers: Optional MCP headers to propagate.
+        request_headers: Optional request headers for tool resolution.
+
+    Returns:
+        A tuple of (prepared_tools, prepared_tool_choice, vector_store_ids):
+        prepared_tools is the list of tools to use, or None if none configured;
+        prepared_tool_choice is the resolved tool choice, or None when there
+        are no tools; vector_store_ids is extracted from tools (in user-facing format)
+        when provided, otherwise None.
+    """
+    prepared_tools: Optional[list[InputTool]] = None
+    client = AsyncLlamaStackClientHolder().get_client()
+    if tools:  # explicitly specified in request
+        # Per-request override of vector stores (user-facing rag_ids)
+        vector_store_ids = extract_vector_store_ids_from_tools(tools)
+        # Translate user-facing rag_ids to llama-stack vector_store_ids in each file_search tool
+        byok_rags = configuration.configuration.byok_rag
+        prepared_tools = translate_tools_vector_store_ids(tools, byok_rags)
+        prepared_tool_choice = tool_choice or ToolChoiceMode.auto
+    else:
+        # Vector stores were not overwritten in request, use all configured vector stores
+        vector_store_ids = None
+        # Get all tools configured in LCORE (returns None or non-empty list)
+        no_tools = (
+            isinstance(tool_choice, ToolChoiceMode)
+            and tool_choice == ToolChoiceMode.none
+        )
+        # Vector stores are prepared in llama-stack format
+        prepared_tools = await prepare_tools(
+            client=client,
+            vector_store_ids=vector_store_ids,  # allow all configured vector stores
+            no_tools=no_tools,
+            token=token,
+            mcp_headers=mcp_headers,
+            request_headers=request_headers,
+        )
+        # If there are no tools, tool_choice cannot be set at all - LLS implicit behavior
+        prepared_tool_choice = tool_choice if prepared_tools else None
+
+    return prepared_tools, prepared_tool_choice, vector_store_ids
diff --git a/src/utils/shields.py b/src/utils/shields.py
index ff99fc3b0..19bd0c214 100644
--- a/src/utils/shields.py
+++ b/src/utils/shields.py
@@ -3,8 +3,14 @@
 from typing import Any, Optional
 
 from fastapi import HTTPException
-from llama_stack_api import OpenAIResponseContentPartRefusal, OpenAIResponseMessage
-from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient
+from llama_stack_api import OpenAIResponseMessage
+from llama_stack_client import (
+    APIConnectionError,
+    APIStatusError as LLSApiStatusError,
+    AsyncLlamaStackClient,
+)
+from llama_stack_client.types import ShieldListResponse
+from openai._exceptions import APIStatusError as OpenAIAPIStatusError
 
 import metrics
 from configuration import AppConfig
@@ -16,17 +22,16 @@
     UnprocessableEntityResponse,
     ServiceUnavailableResponse,
 )
-from utils.suid import get_suid
+from utils.query import handle_known_apistatus_errors
 from utils.types import (
     ShieldModerationBlocked,
     ShieldModerationPassed,
     ShieldModerationResult,
 )
+from constants import DEFAULT_VIOLATION_MESSAGE
 
 logger = get_logger(__name__)
 
-DEFAULT_VIOLATION_MESSAGE = "I cannot process this request due to policy restrictions."
-
 
 async def get_available_shields(client: AsyncLlamaStackClient) -> list[str]:
     """
@@ -129,47 +134,11 @@ async def run_shield_moderation(
     Raises:
         HTTPException: If shield's provider_resource_id is not configured or model not found.
     """
-    all_shields = await client.shields.list()
-
-    # Filter shields based on shield_ids parameter
-    if shield_ids is not None:
-        if len(shield_ids) == 0:
-            response = UnprocessableEntityResponse(
-                response="Invalid shield configuration",
-                cause=(
-                    "shield_ids provided but no shields selected. "
-                    "Remove the parameter to use default shields."
-                ),
-            )
-            raise HTTPException(**response.model_dump())
-
-        shields_to_run = [s for s in all_shields if s.identifier in shield_ids]
-
-        # Log warning if requested shield not found
-        requested = set(shield_ids)
-        available = {s.identifier for s in shields_to_run}
-        missing = requested - available
-        if missing:
-            logger.warning("Requested shields not found: %s", missing)
-
-        # Reject if no requested shields were found (prevents accidental bypass)
-        if not shields_to_run:
-            response = UnprocessableEntityResponse(
-                response="Invalid shield configuration",
-                cause=f"Requested shield_ids not found: {sorted(missing)}",
-            )
-            raise HTTPException(**response.model_dump())
-    else:
-        shields_to_run = list(all_shields)
-
+    shields_to_run = await get_shields_for_request(client, shield_ids)
     available_models = {model.id for model in await client.models.list()}
-
     for shield in shields_to_run:
-        # Only validate provider_resource_id against models for llama-guard.
-        # Llama Stack does not verify that the llama-guard model is registered,
-        # so we check it here to fail fast with a clear error.
-        # Custom shield providers (e.g. lightspeed_question_validity) configure
-        # their model internally, so provider_resource_id is not a model ID.
+        # Lightspeed safety providers configure their model internally
+        # so provider_resource_id is not necessarily a valid model ID.
         if shield.provider_id == "llama-guard" and (
             not shield.provider_resource_id
             or shield.provider_resource_id not in available_models
@@ -184,18 +153,17 @@ async def run_shield_moderation(
             moderation_result = await client.moderations.create(
                 input=input_text, model=shield.provider_resource_id
             )
-        # Known Llama Stack bug: error is raised when violation is present
-        # in the shield LLM response but has wrong format that cannot be parsed.
-        except ValueError:
-            logger.warning(
-                "Shield violation detected, treating as blocked",
+        except APIConnectionError as e:
+            error_response = ServiceUnavailableResponse(
+                backend_name="Llama Stack",
+                cause=str(e),
             )
-            metrics.llm_calls_validation_errors_total.inc()
-            return ShieldModerationBlocked(
-                message=DEFAULT_VIOLATION_MESSAGE,
-                moderation_id=f"modr_{get_suid()}",
-                refusal_response=create_refusal_response(DEFAULT_VIOLATION_MESSAGE),
+            raise HTTPException(**error_response.model_dump()) from e
+        except (LLSApiStatusError, OpenAIAPIStatusError) as e:
+            error_response = handle_known_apistatus_errors(
+                e, shield.provider_resource_id or ""
             )
+            raise HTTPException(**error_response.model_dump()) from e
 
         if moderation_result.results and moderation_result.results[0].flagged:
             result = moderation_result.results[0]
@@ -247,7 +215,7 @@ async def append_turn_to_conversation(
             cause=str(e),
         )
         raise HTTPException(**error_response.model_dump()) from e
-    except APIStatusError as e:
+    except LLSApiStatusError as e:
         error_response = InternalServerErrorResponse.generic()
         raise HTTPException(**error_response.model_dump()) from e
 
@@ -255,18 +223,60 @@ async def append_turn_to_conversation(
 def create_refusal_response(refusal_message: str) -> OpenAIResponseMessage:
     """Create a refusal response message object.
 
-    Creates an OpenAIResponseMessage with assistant role containing a refusal
-    content part. This can be used for both conversation items and response output.
-
     Args:
         refusal_message: The refusal message text.
 
     Returns:
-        OpenAIResponseMessage with refusal content.
+        OpenAIResponseMessage with refusal message.
     """
-    refusal_content = OpenAIResponseContentPartRefusal(refusal=refusal_message)
     return OpenAIResponseMessage(
-        type="message",
         role="assistant",
-        content=[refusal_content],
+        content=refusal_message,
     )
+
+
+async def get_shields_for_request(
+    client: AsyncLlamaStackClient,
+    shield_ids: Optional[list[str]] = None,
+) -> ShieldListResponse:
+    """Resolve shields for the request: filtered by shield_ids or all configured.
+
+    Args:
+        client: Llama Stack client.
+        shield_ids: Optional list of shield IDs. If provided, only shields
+            with these identifiers are returned; if None, all configured
+            shields are returned.
+
+    Returns:
+        ShieldListResponse: List of Shield objects to run for this request.
+
+    Raises:
+        HTTPException: 404 if shield_ids is provided and any requested
+            shield is not configured in Llama Stack.
+    """
+    if shield_ids == []:
+        return []
+    try:
+        configured_shields: ShieldListResponse = await client.shields.list()
+        if shield_ids is None:
+            return configured_shields
+        requested = set(shield_ids)
+        configured_ids = {s.identifier for s in configured_shields}
+        missing = requested - configured_ids
+        if missing:
+            response = NotFoundResponse(
+                resource=f"Shield{'s' if len(missing) > 1 else ''}",
+                resource_id=", ".join(missing),
+            )
+            raise HTTPException(**response.model_dump())
+
+        return [s for s in configured_shields if s.identifier in requested]
+    except APIConnectionError as e:
+        error_response = ServiceUnavailableResponse(
+            backend_name="Llama Stack",
+            cause=str(e),
+        )
+        raise HTTPException(**error_response.model_dump()) from e
+    except LLSApiStatusError as e:
+        error_response = InternalServerErrorResponse.generic()
+        raise HTTPException(**error_response.model_dump()) from e
diff --git a/src/utils/suid.py b/src/utils/suid.py
index aafd64de5..f05fbe701 100644
--- a/src/utils/suid.py
+++ b/src/utils/suid.py
@@ -40,7 +40,7 @@ def check_suid(suid: str) -> bool:
         return False
 
     # Strip 'conv_' prefix if present
-    hex_part = suid[5:] if suid.startswith("conv_") else suid
+    hex_part = suid.removeprefix("conv_")
 
     # Check for 48-char hex string (llama-stack conversation ID format)
     if len(hex_part) == 48:
@@ -103,3 +103,11 @@ def to_llama_stack_conversation_id(conversation_id: str) -> str:
     if not conversation_id.startswith("conv_"):
         return f"conv_{conversation_id}"
     return conversation_id
+
+
+def is_moderation_id(suid: str) -> bool:
+    """Check if given string is a moderation ID.
+
+    Returns True if the string starts with 'modr'.
+    """
+    return suid.startswith("modr")
diff --git a/src/utils/tool_formatter.py b/src/utils/tool_formatter.py
index 8b200ba1d..bde4741be 100644
--- a/src/utils/tool_formatter.py
+++ b/src/utils/tool_formatter.py
@@ -36,7 +36,7 @@ def format_tool_response(tool_dict: dict[str, Any]) -> dict[str, Any]:
         description = clean_description
 
     # Extract only the required fields
-    formatted_tool = {
+    return {
         "identifier": tool_dict.get("identifier", ""),
         "description": description,
         "parameters": tool_dict.get("parameters", []),
@@ -46,8 +46,6 @@ def format_tool_response(tool_dict: dict[str, Any]) -> dict[str, Any]:
         "type": tool_dict.get("type", ""),
     }
 
-    return formatted_tool
-
 
 def extract_clean_description(description: str) -> str:
     """
diff --git a/src/utils/types.py b/src/utils/types.py
index 8b0e618ca..1ccc77f43 100644
--- a/src/utils/types.py
+++ b/src/utils/types.py
@@ -1,6 +1,6 @@
 """Common types for the project."""
 
-from typing import Annotated, Any, Literal, Optional, TypeAlias
+from typing import Annotated, Any, Literal, Optional
 
 from llama_stack_api import ImageContentItem, TextContentItem
 from llama_stack_api.openai_responses import (
@@ -17,14 +17,16 @@
     OpenAIResponseOutputMessageWebSearchToolCall as WebSearchToolCall,
     OpenAIResponsePrompt as Prompt,
     OpenAIResponseText as Text,
+    OpenAIResponseReasoning as Reasoning,
 )
 from llama_stack_client.lib.agents.tool_parser import ToolParser
 from llama_stack_client.lib.agents.types import (
     CompletionMessage as AgentCompletionMessage,
     ToolCall as AgentToolCall,
 )
-from pydantic import AnyUrl, BaseModel, Field
+from pydantic import AnyUrl, BaseModel, ConfigDict, Field
 
+from models.database.conversations import UserConversation
 from utils.token_counter import TokenCounter
 
 
@@ -117,6 +119,31 @@ class ShieldModerationPassed(BaseModel):
     decision: Literal["passed"] = "passed"
 
 
+class ResponsesConversationContext(BaseModel):
+    """Result of resolving conversation context for the responses endpoint.
+
+    Holds the conversation ID to use for the LLM, the optional user conversation
+    record, and the resolved generate_topic_summary flag. Caller assigns these
+    to the request in outer scope instead of mutating the request inside the
+    resolver.
+
+    Attributes:
+        conversation: Conversation ID in llama-stack format to use for the request.
+        user_conversation: Resolved user conversation record, or None for new ones.
+        generate_topic_summary: Resolved value for request.generate_topic_summary.
+    """
+
+    conversation: str = Field(description="Conversation ID in llama-stack format")
+    user_conversation: Optional[UserConversation] = Field(
+        default=None,
+        description="Resolved user conversation record, or None for new conversations",
+    )
+    generate_topic_summary: bool = Field(
+        description="Resolved value for request.generate_topic_summary",
+    )
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+
 class ShieldModerationBlocked(BaseModel):
     """Shield moderation blocked the content; refusal details are present."""
 
@@ -131,7 +158,7 @@ class ShieldModerationBlocked(BaseModel):
     Field(discriminator="decision"),
 ]
 
-IncludeParameter: TypeAlias = Literal[
+type IncludeParameter = Literal[
     "web_search_call.action.sources",
     "code_interpreter_call.outputs",
     "computer_call_output.output.image_url",
@@ -141,7 +168,7 @@ class ShieldModerationBlocked(BaseModel):
     "reasoning.encrypted_content",
 ]
 
-ResponseItem: TypeAlias = (
+type ResponseItem = (
     ResponseMessage
     | WebSearchToolCall
     | FileSearchToolCall
@@ -153,7 +180,7 @@ class ShieldModerationBlocked(BaseModel):
     | McpApprovalResponse
 )
 
-ResponseInput: TypeAlias = str | list[ResponseItem]
+type ResponseInput = str | list[ResponseItem]
 
 
 class ResponsesApiParams(BaseModel):
@@ -177,6 +204,10 @@ class ResponsesApiParams(BaseModel):
         default=None,
         description="Maximum number of inference iterations",
     )
+    max_output_tokens: Optional[int] = Field(
+        default=None,
+        description="Maximum number of tokens allowed in the response",
+    )
     max_tool_calls: Optional[int] = Field(
         default=None,
         description="Maximum tool calls allowed in a single response",
@@ -197,6 +228,10 @@ class ResponsesApiParams(BaseModel):
         default=None,
         description="Prompt template with variables for dynamic substitution",
     )
+    reasoning: Optional[Reasoning] = Field(
+        default=None,
+        description="Reasoning configuration for the response",
+    )
     store: bool = Field(description="Whether to store the response")
     stream: bool = Field(description="Whether to stream the response")
     temperature: Optional[float] = Field(
@@ -230,6 +265,10 @@ def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
         MCP servers.  See LCORE-1414 / GitHub issue #1269.
         """
         result = super().model_dump(*args, **kwargs)
+        # Only one context option is allowed, previous_response_id has priority
+        # Turn is added to conversation manually if previous_response_id is used
+        if self.previous_response_id:
+            result.pop("conversation", None)
         dumped_tools = result.get("tools")
         if not self.tools or not isinstance(dumped_tools, list):
             return result
@@ -327,12 +366,12 @@ class RAGContext(BaseModel):
 class TurnSummary(BaseModel):
     """Summary of a turn in llama stack."""
 
+    id: str = Field(default="", description="ID of the response")
     llm_response: str = ""
     tool_calls: list[ToolCallSummary] = Field(default_factory=list)
     tool_results: list[ToolResultSummary] = Field(default_factory=list)
     rag_chunks: list[RAGChunk] = Field(default_factory=list)
     referenced_documents: list[ReferencedDocument] = Field(default_factory=list)
-    inline_rag_documents: list[ReferencedDocument] = Field(default_factory=list)
     token_usage: TokenCounter = Field(default_factory=TokenCounter)
 
 
diff --git a/src/utils/vector_search.py b/src/utils/vector_search.py
index 485914e0b..4a6b58f49 100644
--- a/src/utils/vector_search.py
+++ b/src/utils/vector_search.py
@@ -6,9 +6,12 @@
 
 import asyncio
 import traceback
-from typing import Any, Optional
+from typing import Any, Optional, cast
 from urllib.parse import urljoin
 
+from llama_stack_api.openai_responses import (
+    OpenAIResponseMessage as ResponseMessage,
+)
 from llama_stack_client import AsyncLlamaStackClient
 from pydantic import AnyUrl
 
@@ -17,7 +20,7 @@
 from log import get_logger
 from models.responses import ReferencedDocument
 from utils.responses import resolve_vector_store_ids
-from utils.types import RAGChunk, RAGContext
+from utils.types import RAGChunk, RAGContext, ResponseInput
 
 logger = get_logger(__name__)
 
@@ -493,6 +496,7 @@ async def _fetch_solr_rag(
 
 async def build_rag_context(
     client: AsyncLlamaStackClient,
+    moderation_decision: str,
     query: str,
     vector_store_ids: Optional[list[str]],
     solr: Optional[dict[str, Any]] = None,
@@ -503,12 +507,17 @@ async def build_rag_context(
 
     Args:
         client: The AsyncLlamaStackClient to use for the request
-        query_request: The user's query request
-        configuration: Application configuration
+        moderation_decision: The moderation decision
+        query: The user's query
+        vector_store_ids: The vector store IDs to query
+        solr: The Solr query parameters
 
     Returns:
         RAGContext containing formatted context text and referenced documents
     """
+    if moderation_decision == "blocked":
+        return RAGContext()
+
     # Fetch from all enabled RAG sources in parallel
     byok_chunks_task = _fetch_byok_rag(client, query, vector_store_ids)
     solr_chunks_task = _fetch_solr_rag(client, query, solr)
@@ -625,3 +634,39 @@ def _convert_solr_chunks_to_rag_format(
         )
 
     return rag_chunks
+
+
+def append_inline_rag_context_to_responses_input(
+    input_value: ResponseInput,
+    inline_rag_context_text: str,
+) -> ResponseInput:
+    """Append inline RAG context to Responses API input.
+
+    If input is str, appends the context text.
+    If input is a sequence of items, appends the context to the text of the first user message.
+    If there is no user message, returns the input unchanged.
+
+    Parameters:
+        input_value: The request input (string or list of ResponseItem).
+        inline_rag_context_text: RAG context string to inject.
+
+    Returns:
+        The same type as input_value, with context merged in.
+    """
+    if not inline_rag_context_text:
+        return input_value
+    if isinstance(input_value, str):
+        return input_value + "\n\n" + inline_rag_context_text
+    for item in input_value:
+        if item.type != "message" or item.role != "user":
+            continue
+        message = cast(ResponseMessage, item)
+        content = message.content
+        if isinstance(content, str):
+            message.content = content + "\n\n" + inline_rag_context_text
+            return input_value
+        for part in content:
+            if part.type == "input_text":
+                part.text = part.text + "\n\n" + inline_rag_context_text
+                return input_value
+    return input_value
diff --git a/tests/benchmarks/data_generators.py b/tests/benchmarks/data_generators.py
index e39f1d6e7..672f09cba 100644
--- a/tests/benchmarks/data_generators.py
+++ b/tests/benchmarks/data_generators.py
@@ -155,5 +155,4 @@ def generate_topic_summary() -> str:
         ],
     ]
 
-    summary = " ".join([random.choice(yap) for yap in yaps]) + "."
-    return summary
+    return " ".join([random.choice(yap) for yap in yaps]) + "."
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-invalid-mcp-file-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-invalid-mcp-file-auth.yaml
new file mode 100644
index 000000000..483e32b73
--- /dev/null
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-invalid-mcp-file-auth.yaml
@@ -0,0 +1,24 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Library mode - embeds llama-stack as library
+  use_as_library_client: true
+  library_client_config_path: run.yaml
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+mcp_servers:
+  - name: "mcp-file"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "/tmp/invalid-mcp-secret-token"
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-client-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-client-auth.yaml
new file mode 100644
index 000000000..05f304a5d
--- /dev/null
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-client-auth.yaml
@@ -0,0 +1,24 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Library mode - embeds llama-stack as library
+  use_as_library_client: true
+  library_client_config_path: run.yaml
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+mcp_servers:
+  - name: "mcp-client"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "client"
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-file-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-file-auth.yaml
index 1ff0d425e..79a8807ec 100644
--- a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-file-auth.yaml
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-file-auth.yaml
@@ -18,8 +18,7 @@ user_data_collection:
 authentication:
   module: "noop"
 mcp_servers:
-  - name: "mcp-file-auth"
-    provider_id: "model-context-protocol"
+  - name: "mcp-file"
     url: "http://mock-mcp:3001"
     authorization_headers:
       Authorization: "/tmp/mcp-secret-token"
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-kubernetes-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-kubernetes-auth.yaml
new file mode 100644
index 000000000..2d79f1f9d
--- /dev/null
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-kubernetes-auth.yaml
@@ -0,0 +1,24 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Library mode - embeds llama-stack as library
+  use_as_library_client: true
+  library_client_config_path: run.yaml
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+mcp_servers:
+  - name: "mcp-kubernetes"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "kubernetes"
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-oauth-auth.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-oauth-auth.yaml
new file mode 100644
index 000000000..3294ac708
--- /dev/null
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp-oauth-auth.yaml
@@ -0,0 +1,24 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Library mode - embeds llama-stack as library
+  use_as_library_client: true
+  library_client_config_path: run.yaml
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+mcp_servers:
+  - name: "mcp-oauth"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "oauth"
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml
index 0656aa87c..647a2cae9 100644
--- a/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-mcp.yaml
@@ -19,7 +19,18 @@ authentication:
   module: "noop"
 mcp_servers:
   - name: "mcp-oauth"
-    provider_id: "model-context-protocol"
     url: "http://mock-mcp:3001"
     authorization_headers:
-      Authorization: "oauth"
\ No newline at end of file
+      Authorization: "oauth"
+  - name: "mcp-kubernetes"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "kubernetes"
+  - name: "mcp-file"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "/tmp/mcp-secret-token"
+  - name: "mcp-client"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "client"
\ No newline at end of file
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-invalid-mcp-file-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-invalid-mcp-file-auth.yaml
new file mode 100644
index 000000000..05ec86fdf
--- /dev/null
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-invalid-mcp-file-auth.yaml
@@ -0,0 +1,25 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Server mode - connects to separate llama-stack service
+  use_as_library_client: false
+  url: http://llama-stack:8321
+  api_key: xyzzy
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+mcp_servers:
+  - name: "mcp-file"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "/tmp/invalid-mcp-secret-token"
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-client-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-client-auth.yaml
new file mode 100644
index 000000000..e0f952fc3
--- /dev/null
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-client-auth.yaml
@@ -0,0 +1,25 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Server mode - connects to separate llama-stack service
+  use_as_library_client: false
+  url: http://llama-stack:8321
+  api_key: xyzzy
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+mcp_servers:
+  - name: "mcp-client"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "client"
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-file-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-file-auth.yaml
index d39f55399..aca5c6ef2 100644
--- a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-file-auth.yaml
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-file-auth.yaml
@@ -19,8 +19,7 @@ user_data_collection:
 authentication:
   module: "noop"
 mcp_servers:
-  - name: "mcp-file-auth"
-    provider_id: "model-context-protocol"
+  - name: "mcp-file"
     url: "http://mock-mcp:3001"
     authorization_headers:
       Authorization: "/tmp/mcp-secret-token"
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-kubernetes-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-kubernetes-auth.yaml
new file mode 100644
index 000000000..66dc7f87b
--- /dev/null
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-kubernetes-auth.yaml
@@ -0,0 +1,25 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Server mode - connects to separate llama-stack service
+  use_as_library_client: false
+  url: http://llama-stack:8321
+  api_key: xyzzy
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+mcp_servers:
+  - name: "mcp-kubernetes"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "kubernetes"
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-oauth-auth.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-oauth-auth.yaml
new file mode 100644
index 000000000..b9125de8e
--- /dev/null
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp-oauth-auth.yaml
@@ -0,0 +1,25 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Server mode - connects to separate llama-stack service
+  use_as_library_client: false
+  url: http://llama-stack:8321
+  api_key: xyzzy
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+mcp_servers:
+  - name: "mcp-oauth"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "oauth"
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml
index a598ce441..e35535f42 100644
--- a/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-mcp.yaml
@@ -20,7 +20,18 @@ authentication:
   module: "noop"
 mcp_servers:
   - name: "mcp-oauth"
-    provider_id: "model-context-protocol"
     url: "http://mock-mcp:3001"
     authorization_headers:
-      Authorization: "oauth"
\ No newline at end of file
+      Authorization: "oauth"
+  - name: "mcp-kubernetes"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "kubernetes"
+  - name: "mcp-file"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "/tmp/mcp-secret-token"
+  - name: "mcp-client"
+    url: "http://mock-mcp:3001"
+    authorization_headers:
+      Authorization: "client"
\ No newline at end of file
diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
index 0ca8781d0..bc117f46f 100644
--- a/tests/e2e/features/environment.py
+++ b/tests/e2e/features/environment.py
@@ -16,11 +16,13 @@
 from tests.e2e.utils.prow_utils import restore_llama_stack_pod
 from behave.runner import Context
 
-from tests.e2e.utils.llama_stack_shields import (
+from tests.e2e.utils.llama_stack_utils import (
     register_shield,
+    unregister_mcp_toolgroups,
     unregister_shield,
 )
 from tests.e2e.utils.utils import (
+    clear_llama_stack_storage,
     create_config_backup,
     is_prow_environment,
     remove_config_backup,
@@ -57,6 +59,22 @@
         "tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp-file-auth.yaml",
         "tests/e2e-prow/rhoai/configs/lightspeed-stack-mcp-file-auth.yaml",
     ),
+    "invalid-mcp-file-auth": (
+        "tests/e2e/configuration/{mode_dir}/lightspeed-stack-invalid-mcp-file-auth.yaml",
+        "tests/e2e-prow/rhoai/configs/lightspeed-stack-invalid-mcp-file-auth.yaml",
+    ),
+    "mcp-kubernetes-auth": (
+        "tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp-kubernetes-auth.yaml",
+        "tests/e2e-prow/rhoai/configs/lightspeed-stack-mcp-kubernetes-auth.yaml",
+    ),
+    "mcp-client-auth": (
+        "tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp-client-auth.yaml",
+        "tests/e2e-prow/rhoai/configs/lightspeed-stack-mcp-client-auth.yaml",
+    ),
+    "mcp-oauth-auth": (
+        "tests/e2e/configuration/{mode_dir}/lightspeed-stack-mcp-oauth-auth.yaml",
+        "tests/e2e-prow/rhoai/configs/lightspeed-stack-mcp-oauth-auth.yaml",
+    ),
 }
 
 
@@ -207,6 +225,27 @@ def before_scenario(context: Context, scenario: Scenario) -> None:
         switch_config(context.scenario_config)
         restart_container("lightspeed-stack")
 
+    config_name: str | None = None
+    if "MCPFileAuthConfig" in scenario.effective_tags:
+        config_name = "mcp-file-auth"
+    elif "InvalidMCPFileAuthConfig" in scenario.effective_tags:
+        config_name = "invalid-mcp-file-auth"
+    elif "MCPKubernetesAuthConfig" in scenario.effective_tags:
+        config_name = "mcp-kubernetes-auth"
+    elif "MCPClientAuthConfig" in scenario.effective_tags:
+        config_name = "mcp-client-auth"
+    elif "MCPOAuthAuthConfig" in scenario.effective_tags:
+        config_name = "mcp-oauth-auth"
+
+    if config_name is not None:
+        if not context.is_library_mode:
+            unregister_mcp_toolgroups()
+        else:
+            clear_llama_stack_storage()
+        context.scenario_config = _get_config_path(config_name, mode_dir)
+        switch_config(context.scenario_config)
+        restart_container("lightspeed-stack")
+
 
 def after_scenario(context: Context, scenario: Scenario) -> None:
     """Run after each scenario is run.
@@ -241,7 +280,15 @@ def after_scenario(context: Context, scenario: Scenario) -> None:
         context.llama_stack_was_running = False
 
     # Tags that require config restoration after scenario
-    config_restore_tags = {"InvalidFeedbackStorageConfig", "NoCacheConfig"}
+    config_restore_tags = {
+        "InvalidFeedbackStorageConfig",
+        "NoCacheConfig",
+        "MCPFileAuthConfig",
+        "InvalidMCPFileAuthConfig",
+        "MCPKubernetesAuthConfig",
+        "MCPClientAuthConfig",
+        "MCPOAuthAuthConfig",
+    }
     if config_restore_tags & set(scenario.effective_tags):
         switch_config(context.feature_config)
         restart_container("lightspeed-stack")
diff --git a/tests/e2e/features/info.feature b/tests/e2e/features/info.feature
index 7b16933af..e3c2c066c 100644
--- a/tests/e2e/features/info.feature
+++ b/tests/e2e/features/info.feature
@@ -16,7 +16,7 @@ Feature: Info tests
      When I access REST API endpoint "info" using HTTP GET method
      Then The status code of the response is 200
       And The body of the response has proper name Lightspeed Core Service (LCS) and version 0.4.2
-      And The body of the response has llama-stack version 0.4.3
+      And The body of the response has llama-stack version 0.5.2
 
   @skip-in-library-mode
   Scenario: Check if info endpoint reports error when llama-stack connection is not working
diff --git a/tests/e2e/features/mcp.feature b/tests/e2e/features/mcp.feature
index 90d6c5cf8..cd8980e9c 100644
--- a/tests/e2e/features/mcp.feature
+++ b/tests/e2e/features/mcp.feature
@@ -5,7 +5,358 @@ Feature: MCP tests
     Given The service is started locally
       And REST API service prefix is /v1
 
-  Scenario: Check if tools endpoint reports error when MCP requires authentication
+
+# File-based
+  @skip  # will be fixed by LCORE-1461
+  @MCPFileAuthConfig
+  Scenario: Check if tools endpoint succeeds when MCP file-based auth token is passed 
+    Given The system is in default state
+    When I access REST API endpoint "tools" using HTTP GET method
+    Then The status code of the response is 200
+    And The body of the response contains mcp-file
+
+  @skip-in-library-mode     # will be fixed in LCORE-1428
+  @MCPFileAuthConfig
+  Scenario: Check if query endpoint succeeds when MCP file-based auth token is passed
+    Given The system is in default state
+    And I capture the current token metrics
+    When I use "query" to ask question
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 200
+    And The response should contain following fragments
+        | Fragments in LLM response |
+        | Hello                     |
+    And The token metrics should have increased
+
+  @skip-in-library-mode     # will be fixed in LCORE-1428
+  @MCPFileAuthConfig
+  Scenario: Check if streaming_query endpoint succeeds when MCP file-based auth token is passed
+    Given The system is in default state
+    And I capture the current token metrics
+    When I use "streaming_query" to ask question
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    When I wait for the response to be completed
+    Then The status code of the response is 200
+    And The streamed response should contain following fragments
+        | Fragments in LLM response |
+        | Hello                     |
+    And The token metrics should have increased
+
+  @skip  # will be fixed by LCORE-1461
+  @InvalidMCPFileAuthConfig
+  Scenario: Check if tools endpoint reports error when MCP file-based invalid auth token is passed 
+    Given The system is in default state
+    When I access REST API endpoint "tools" using HTTP GET method
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+        {
+            "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "MCP server at http://mock-mcp:3001 requires OAuth"
+            }
+        }
+    """
+
+  @skip  # will be fixed by LCORE-1463
+  @InvalidMCPFileAuthConfig
+  Scenario: Check if query endpoint reports error when MCP file-based invalid auth token is passed 
+    Given The system is in default state
+    When I use "query" to ask question
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+        {
+            "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "MCP server at http://mock-mcp:3001 requires OAuth"
+            }
+        }
+    """
+
+  @skip  # will be fixed by LCORE-1463
+  @InvalidMCPFileAuthConfig
+  Scenario: Check if streaming_query endpoint reports error when MCP file-based invalid auth token is passed 
+    Given The system is in default state
+    When I use "streaming_query" to ask question
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+        {
+            "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "MCP server at http://mock-mcp:3001 requires OAuth"
+            }
+        }
+    """
+
+# Kubernetes
+  @skip  # will be fixed by LCORE-1461
+  @MCPKubernetesAuthConfig
+  Scenario: Check if tools endpoint succeeds when MCP kubernetes auth token is passed 
+    Given The system is in default state
+    And I set the Authorization header to Bearer kubernetes-test-token
+    When I access REST API endpoint "tools" using HTTP GET method
+    Then The status code of the response is 200
+    And The body of the response contains mcp-kubernetes
+
+  @skip-in-library-mode     # will be fixed in LCORE-1428
+  @MCPKubernetesAuthConfig
+  Scenario: Check if query endpoint succeeds when MCP kubernetes auth token is passed
+    Given The system is in default state
+    And I set the Authorization header to Bearer kubernetes-test-token
+    And I capture the current token metrics
+    When I use "query" to ask question with authorization header
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 200
+    And The response should contain following fragments
+        | Fragments in LLM response |
+        | Hello                     |
+    And The token metrics should have increased
+
+  @skip-in-library-mode     # will be fixed in LCORE-1428
+  @MCPKubernetesAuthConfig
+  Scenario: Check if streaming_query endpoint succeeds when MCP kubernetes auth token is passed
+    Given The system is in default state
+    And I set the Authorization header to Bearer kubernetes-test-token
+    And I capture the current token metrics
+    When I use "streaming_query" to ask question with authorization header
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    When I wait for the response to be completed
+    Then The status code of the response is 200
+    And The streamed response should contain following fragments
+        | Fragments in LLM response |
+        | Hello                     |
+    And The token metrics should have increased
+
+  @skip  # will be fixed by LCORE-1461
+  @MCPKubernetesAuthConfig
+  Scenario: Check if tools endpoint reports error when MCP kubernetes invalid auth token is passed 
+    Given The system is in default state
+    And I set the Authorization header to Bearer kubernetes-invalid-token
+    When I access REST API endpoint "tools" using HTTP GET method
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+        {
+            "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "MCP server at http://mock-mcp:3001 requires OAuth"
+            }
+        }
+    """
+
+  @skip  # will be fixed by LCORE-1463
+  @MCPKubernetesAuthConfig
+  Scenario: Check if query endpoint reports error when MCP kubernetes invalid auth token is passed
+    Given The system is in default state
+    And I set the Authorization header to Bearer kubernetes-invalid-token
+    When I use "query" to ask question with authorization header
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+        {
+            "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "MCP server at http://mock-mcp:3001 requires OAuth"
+            }
+        }
+    """
+
+  @skip  # will be fixed by LCORE-1463
+  @MCPKubernetesAuthConfig
+  Scenario: Check if streaming_query endpoint reports error when MCP kubernetes invalid auth token is passed 
+    Given The system is in default state
+    And I set the Authorization header to Bearer kubernetes-invalid-token
+    When I use "streaming_query" to ask question with authorization header
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+        {
+            "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "MCP server at http://mock-mcp:3001 requires OAuth"
+            }
+        }
+    """
+
+# Client-provided
+  @skip  # will be fixed by LCORE-1462
+  @MCPClientAuthConfig
+  Scenario: Check if tools endpoint succeeds by skipping when MCP client-provided auth token is omitted
+    Given The system is in default state
+    When I access REST API endpoint "tools" using HTTP GET method
+    Then The status code of the response is 200
+    And The body of the response does not contain mcp-client
+
+  @MCPClientAuthConfig
+  Scenario: Check if query endpoint succeeds by skipping when MCP client-provided auth token is omitted
+    Given The system is in default state
+    And I capture the current token metrics
+    When I use "query" to ask question
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 200
+    And The body of the response does not contain mcp-client
+    And The response should contain following fragments
+        | Fragments in LLM response |
+        | Hello                     |
+    And The token metrics should have increased
+
+  @MCPClientAuthConfig
+  Scenario: Check if streaming_query endpoint succeeds by skipping when MCP client-provided auth token is omitted
+    Given The system is in default state
+    And I capture the current token metrics
+    When I use "streaming_query" to ask question
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    When I wait for the response to be completed
+    Then The status code of the response is 200
+    And The body of the response does not contain mcp-client
+    And The streamed response should contain following fragments
+        | Fragments in LLM response |
+        | Hello                     |
+    And The token metrics should have increased
+
+  @MCPClientAuthConfig
+  Scenario: Check if tools endpoint succeeds when MCP client-provided auth token is passed
+    Given The system is in default state
+    And I set the "MCP-HEADERS" header to
+    """
+    {"mcp-client": {"Authorization": "Bearer client-test-token"}}
+    """
+    When I access REST API endpoint "tools" using HTTP GET method
+    Then The status code of the response is 200
+    And The body of the response contains mcp-client
+
+  @skip-in-library-mode     # will be fixed in LCORE-1428
+  @MCPClientAuthConfig
+  Scenario: Check if query endpoint succeeds when MCP client-provided auth token is passed
+    Given The system is in default state
+    And I set the "MCP-HEADERS" header to
+    """
+    {"mcp-client": {"Authorization": "Bearer client-test-token"}}
+    """
+    And I capture the current token metrics
+    When I use "query" to ask question with authorization header
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 200
+    And The response should contain following fragments
+        | Fragments in LLM response |
+        | Hello                     |
+    And The token metrics should have increased
+
+  @skip-in-library-mode     # will be fixed in LCORE-1428
+  @MCPClientAuthConfig
+  Scenario: Check if streaming_query endpoint succeeds when MCP client-provided auth token is passed
+    Given The system is in default state
+    And I set the "MCP-HEADERS" header to
+    """
+    {"mcp-client": {"Authorization": "Bearer client-test-token"}}
+    """
+    And I capture the current token metrics
+    When I use "streaming_query" to ask question with authorization header
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    When I wait for the response to be completed
+    Then The status code of the response is 200
+    And The streamed response should contain following fragments
+        | Fragments in LLM response |
+        | Hello                     |
+    And The token metrics should have increased
+
+  @MCPClientAuthConfig
+  Scenario: Check if tools endpoint reports error when MCP client-provided invalid auth token is passed
+    Given The system is in default state
+    And I set the "MCP-HEADERS" header to
+    """
+    {"mcp-client": {"Authorization": "Bearer client-invalid-token"}}
+    """
+    When I access REST API endpoint "tools" using HTTP GET method
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+        {
+            "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "MCP server at http://mock-mcp:3001 requires OAuth"
+            }
+        }
+    """
+
+  @MCPClientAuthConfig
+  Scenario: Check if query endpoint reports error when MCP client-provided invalid auth token is passed
+    Given The system is in default state
+    And I set the "MCP-HEADERS" header to
+    """
+    {"mcp-client": {"Authorization": "Bearer client-invalid-token"}}
+    """
+    When I use "query" to ask question with authorization header
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+        {
+            "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "MCP server at http://mock-mcp:3001 requires OAuth"
+            }
+        }
+    """
+
+  @MCPClientAuthConfig
+  Scenario: Check if streaming_query endpoint reports error when MCP client-provided invalid auth token is passed
+    Given The system is in default state
+    And I set the "MCP-HEADERS" header to
+    """
+    {"mcp-client": {"Authorization": "Bearer client-invalid-token"}}
+    """
+    When I use "streaming_query" to ask question with authorization header
+    """
+    {"query": "Say hello", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    """
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+        {
+            "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "MCP server at http://mock-mcp:3001 requires OAuth"
+            }
+        }
+    """
+
+# OAuth
+
+  @MCPOAuthAuthConfig
+  Scenario: Check if tools endpoint reports error when MCP OAuth requires authentication
     Given The system is in default state
     When I access REST API endpoint "tools" using HTTP GET method
     Then The status code of the response is 401
@@ -20,7 +371,8 @@ Feature: MCP tests
     """
     And The headers of the response contains the following header "www-authenticate"
 
-  Scenario: Check if query endpoint reports error when MCP requires authentication
+  @MCPOAuthAuthConfig
+  Scenario: Check if query endpoint reports error when MCP OAuth requires authentication
     Given The system is in default state
     When I use "query" to ask question
     """
@@ -38,7 +390,8 @@ Feature: MCP tests
     """
     And The headers of the response contains the following header "www-authenticate"
 
-  Scenario: Check if streaming_query endpoint reports error when MCP requires authentication
+  @MCPOAuthAuthConfig
+  Scenario: Check if streaming_query endpoint reports error when MCP OAuth requires authentication
     Given The system is in default state
     When I use "streaming_query" to ask question
     """
@@ -56,22 +409,24 @@ Feature: MCP tests
     """
     And The headers of the response contains the following header "www-authenticate"
 
-  Scenario: Check if tools endpoint succeeds when MCP auth token is passed
+  @MCPOAuthAuthConfig
+  Scenario: Check if tools endpoint succeeds when MCP OAuth auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
     """
-    {"mcp-oauth": {"Authorization": "Bearer test-token"}}
+    {"mcp-oauth": {"Authorization": "Bearer oauth-test-token"}}
     """
     When I access REST API endpoint "tools" using HTTP GET method
     Then The status code of the response is 200
     And The body of the response contains mcp-oauth
 
   @skip-in-library-mode     # will be fixed in LCORE-1428
-  Scenario: Check if query endpoint succeeds when MCP auth token is passed
+  @MCPOAuthAuthConfig
+  Scenario: Check if query endpoint succeeds when MCP OAuth auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
     """
-    {"mcp-oauth": {"Authorization": "Bearer test-token"}}
+    {"mcp-oauth": {"Authorization": "Bearer oauth-test-token"}}
     """
     And I capture the current token metrics
     When I use "query" to ask question with authorization header
@@ -85,11 +440,12 @@ Feature: MCP tests
     And The token metrics should have increased
 
   @skip-in-library-mode     # will be fixed in LCORE-1428
-  Scenario: Check if streaming_query endpoint succeeds when MCP auth token is passed
+  @MCPOAuthAuthConfig
+  Scenario: Check if streaming_query endpoint succeeds when MCP OAuth auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
     """
-    {"mcp-oauth": {"Authorization": "Bearer test-token"}}
+    {"mcp-oauth": {"Authorization": "Bearer oauth-test-token"}}
     """
     And I capture the current token metrics
     When I use "streaming_query" to ask question with authorization header
@@ -103,11 +459,12 @@ Feature: MCP tests
         | Hello                     |
     And The token metrics should have increased
 
-  Scenario: Check if tools endpoint reports error when MCP invalid auth token is passed
+  @MCPOAuthAuthConfig
+  Scenario: Check if tools endpoint reports error when MCP OAuth invalid auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
     """
-    {"mcp-oauth": {"Authorization": "Bearer invalid-token"}}
+    {"mcp-oauth": {"Authorization": "Bearer oauth-invalid-token"}}
     """
     When I access REST API endpoint "tools" using HTTP GET method
     Then The status code of the response is 401
@@ -122,12 +479,12 @@ Feature: MCP tests
     """
     And The headers of the response contains the following header "www-authenticate"
 
-  @skip     # will be fixed in LCORE-1366
-  Scenario: Check if query endpoint reports error when MCP invalid auth token is passed
+  @MCPOAuthAuthConfig
+  Scenario: Check if query endpoint reports error when MCP OAuth invalid auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
     """
-    {"mcp-oauth": {"Authorization": "Bearer invalid-token"}}
+    {"mcp-oauth": {"Authorization": "Bearer oauth-invalid-token"}}
     """
     When I use "query" to ask question with authorization header
     """
@@ -145,11 +502,12 @@ Feature: MCP tests
     """
     And The headers of the response contains the following header "www-authenticate"
 
-  Scenario: Check if streaming_query endpoint reports error when MCP invalid auth token is passed
+  @MCPOAuthAuthConfig
+  Scenario: Check if streaming_query endpoint reports error when MCP OAuth invalid auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
     """
-    {"mcp-oauth": {"Authorization": "Bearer invalid-token"}}
+    {"mcp-oauth": {"Authorization": "Bearer oauth-invalid-token"}}
     """
     When I use "streaming_query" to ask question with authorization header
     """
diff --git a/tests/e2e/features/mcp_file_auth.feature b/tests/e2e/features/mcp_file_auth.feature
deleted file mode 100644
index 455f0740c..000000000
--- a/tests/e2e/features/mcp_file_auth.feature
+++ /dev/null
@@ -1,20 +0,0 @@
-@MCPFileAuth
-Feature: MCP file-based authorization tests
-
-  Regression tests for LCORE-1414: MCP authorization tokens configured via
-  file-based authorization_headers must survive model_dump() serialization
-  and reach the MCP server as a valid Bearer token.
-
-  Background:
-    Given The service is started locally
-      And REST API service prefix is /v1
-
-  @skip-in-library-mode
-  Scenario: Query succeeds with file-based MCP authorization
-    Given The system is in default state
-    When I use "query" to ask question
-    """
-    {"query": "Use the mock_tool_e2e tool to send the message 'hello'", "model": "{MODEL}", "provider": "{PROVIDER}"}
-    """
-    Then The status code of the response is 200
-    And The body of the response contains mock_tool_e2e
diff --git a/tests/e2e/features/query.feature b/tests/e2e/features/query.feature
index f765257b1..a85aa8139 100644
--- a/tests/e2e/features/query.feature
+++ b/tests/e2e/features/query.feature
@@ -175,7 +175,7 @@ Scenario: Check if LLM responds for query request with error for missing query
     {"query": "Say hello", "model": "{MODEL}", "provider":"unknown"}
     """
      Then The status code of the response is 404
-      And The body of the response contains Model with ID gpt-4o-mini does not exist
+      And The body of the response contains Model with ID {MODEL} does not exist
 
   @skip-in-library-mode
   Scenario: Check if LLM responds for query request with error for inability to connect to llama-stack
diff --git a/tests/e2e/features/responses.feature b/tests/e2e/features/responses.feature
new file mode 100644
index 000000000..e1e0ccd61
--- /dev/null
+++ b/tests/e2e/features/responses.feature
@@ -0,0 +1,24 @@
+@Authorized
+Feature: Responses endpoint API tests
+
+  Background:
+    Given The service is started locally
+      And REST API service prefix is /v1
+
+  Scenario: Check if responses endpoint returns 200 for minimal request
+    Given The system is in default state
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "responses" to ask question with authorization header
+    """
+    {"input": "Say hello", "model": "{PROVIDER}/{MODEL}", "stream": false}
+    """
+    Then The status code of the response is 200
+
+  Scenario: Check if responses endpoint returns 200 for minimal streaming request
+    Given The system is in default state
+      And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "responses" to ask question with authorization header
+    """
+    {"input": "Say hello", "model": "{PROVIDER}/{MODEL}", "stream": true}
+    """
+    Then The status code of the response is 200
\ No newline at end of file
diff --git a/tests/e2e/features/steps/common_http.py b/tests/e2e/features/steps/common_http.py
index 8e64fe5fc..a8dbcf212 100644
--- a/tests/e2e/features/steps/common_http.py
+++ b/tests/e2e/features/steps/common_http.py
@@ -165,11 +165,35 @@ def check_response_body_schema(context: Context) -> None:
 
 @then("The body of the response contains {substring}")
 def check_response_body_contains(context: Context, substring: str) -> None:
-    """Check that response body contains a substring."""
+    """Check that response body contains a substring.
+
+    Supports {MODEL} and {PROVIDER} placeholders in the substring so
+    assertions work with any configured provider (e.g. unknown-provider
+    error message includes the actual model id).
+    """
+    assert context.response is not None, "Request needs to be performed first"
+    expected = replace_placeholders(context, substring)
+    assert (
+        expected in context.response.text
+    ), f"The response text '{context.response.text}' doesn't contain '{expected}'"
+
+
+@then("The body of the response does not contain {substring}")
+def check_response_body_does_not_contain(context: Context, substring: str) -> None:
+    """Check that response body does not contain a substring."""
+    assert context.response is not None, "Request needs to be performed first"
+    assert (
+        substring not in context.response.text
+    ), f"The response text '{context.response.text}' contains '{substring}'"
+
+
+@then("The body of the response does not contain {substring}")
+def check_response_body_does_not_contain(context: Context, substring: str) -> None:
+    """Check that response body does not contain a substring."""
     assert context.response is not None, "Request needs to be performed first"
     assert (
-        substring in context.response.text
-    ), f"The response text '{context.response.text}' doesn't contain '{substring}'"
+        substring not in context.response.text
+    ), f"The response text '{context.response.text}' contains '{substring}'"
 
 
 @then("The body of the response is the following")
diff --git a/tests/e2e/features/streaming_query.feature b/tests/e2e/features/streaming_query.feature
index 4e587525e..d4e14b180 100644
--- a/tests/e2e/features/streaming_query.feature
+++ b/tests/e2e/features/streaming_query.feature
@@ -133,7 +133,7 @@ Feature: streaming_query endpoint API tests
     {"query": "Say hello", "model": "{MODEL}", "provider":"unknown"}
     """
     Then The status code of the response is 404
-      And The body of the response contains Model with ID gpt-4o-mini does not exist
+      And The body of the response contains Model with ID {MODEL} does not exist
       And The token metrics should not have changed
 
   Scenario: Check if LLM responds properly when XML and JSON attachments are sent
diff --git a/tests/e2e/secrets/invalid-mcp-token b/tests/e2e/secrets/invalid-mcp-token
new file mode 100644
index 000000000..3707272a2
--- /dev/null
+++ b/tests/e2e/secrets/invalid-mcp-token
@@ -0,0 +1 @@
+invalid-token
\ No newline at end of file
diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt
index 3f94d09e0..0da5cae41 100644
--- a/tests/e2e/test_list.txt
+++ b/tests/e2e/test_list.txt
@@ -9,11 +9,11 @@ features/conversation_cache_v2.feature
 features/feedback.feature
 features/health.feature
 features/info.feature
+features/responses.feature
 features/query.feature
 features/rlsapi_v1.feature
 features/rlsapi_v1_errors.feature
 features/streaming_query.feature
 features/rest_api.feature
 features/mcp.feature
-features/mcp_file_auth.feature
 features/models.feature
diff --git a/tests/e2e/utils/llama_stack_shields.py b/tests/e2e/utils/llama_stack_utils.py
similarity index 62%
rename from tests/e2e/utils/llama_stack_shields.py
rename to tests/e2e/utils/llama_stack_utils.py
index 4f793c0bf..2a8c66670 100644
--- a/tests/e2e/utils/llama_stack_shields.py
+++ b/tests/e2e/utils/llama_stack_utils.py
@@ -1,9 +1,12 @@
-"""E2E helpers to unregister and re-register Llama Stack shields via the client API.
+"""E2E test utilities for Llama Stack (toolgroups and shields).
 
-Used by the @disable-shields tag: before the scenario we call client.shields.delete()
-to unregister the shield; after the scenario we call client.shields.register()
-to restore it. Only applies in server mode (Llama Stack as a separate service).
-Requires E2E_LLAMA_STACK_URL or E2E_LLAMA_HOSTNAME/E2E_LLAMA_PORT.
+This module provides functions to manage MCP toolgroups and shields on a running
+Llama Stack instance during end-to-end tests: unregister MCP toolgroups when
+switching configurations or testing MCP auth, and unregister/re-register shields
+(e.g. for the @disable-shields tag).
+
+Only applies when running Llama Stack as a separate service (server mode).
+Requires E2E_LLAMA_STACK_URL or E2E_LLAMA_HOSTNAME and E2E_LLAMA_PORT.
 """
 
 import asyncio
@@ -29,6 +32,54 @@ def _get_llama_stack_client() -> AsyncLlamaStackClient:
     return AsyncLlamaStackClient(base_url=base_url, api_key=api_key, timeout=timeout)
 
 
+# -----------------------------------------------------------------------------
+# Toolgroups
+# -----------------------------------------------------------------------------
+
+
+async def _unregister_toolgroup_async(identifier: str) -> None:
+    """Unregister a toolgroup by identifier; return (provider_id, provider_shield_id) for restore."""
+    client = _get_llama_stack_client()
+    try:
+        await client.toolgroups.unregister(identifier)
+    except APIConnectionError:
+        raise
+    except APIStatusError as e:
+        # 400 "not found": toolgroup already absent, scenario can proceed
+        if e.status_code == 400 and "not found" in str(e).lower():
+            return None
+        raise
+    finally:
+        await client.close()
+
+
+async def _unregister_mcp_toolgroups_async() -> None:
+    """Unregister all MCP toolgroups."""
+    client = _get_llama_stack_client()
+    try:
+        toolgroups = await client.toolgroups.list()
+        for toolgroup in toolgroups:
+            if (
+                toolgroup.identifier
+                and toolgroup.provider_id == "model-context-protocol"
+            ):
+                await _unregister_toolgroup_async(toolgroup.identifier)
+    except APIConnectionError:
+        raise
+    finally:
+        await client.close()
+
+
+def unregister_mcp_toolgroups() -> None:
+    """Unregister all MCP toolgroups."""
+    asyncio.run(_unregister_mcp_toolgroups_async())
+
+
+# -----------------------------------------------------------------------------
+# Shields
+# -----------------------------------------------------------------------------
+
+
 async def _unregister_shield_async(identifier: str) -> Optional[tuple[str, str]]:
     """Unregister a shield by identifier; return (provider_id, provider_shield_id) for restore."""
     client = _get_llama_stack_client()
diff --git a/tests/e2e/utils/utils.py b/tests/e2e/utils/utils.py
index 6b73a0e1f..a47ff1750 100644
--- a/tests/e2e/utils/utils.py
+++ b/tests/e2e/utils/utils.py
@@ -246,6 +246,35 @@ def remove_config_backup(backup_path: str) -> None:
             print(f"Warning: Could not remove backup file {backup_path}: {e}")
 
 
+def clear_llama_stack_storage(container_name: str = "lightspeed-stack") -> None:
+    """Clear Llama Stack storage in library mode (embedded Llama Stack).
+
+    Removes the ~/.llama directory so that toolgroups and other persisted
+    state are reset. Used before MCP config scenarios when not running in
+    server mode (no separate Llama Stack to unregister toolgroups from).
+    Only runs when using Docker (skipped in Prow).
+
+    Parameters:
+        container_name (str): Docker container name (default "lightspeed-stack").
+
+    Returns:
+        None
+    """
+    if is_prow_environment():
+        return
+
+    try:
+        subprocess.run(
+            ["docker", "exec", container_name, "sh", "-c", "rm -rf ~/.llama"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+            check=False,
+        )
+    except subprocess.TimeoutExpired as e:
+        print(f"Warning: Could not clear Llama Stack storage: {e}")
+
+
 def restart_container(container_name: str) -> None:
     """Restart a Docker container by name and wait until it is healthy.
 
@@ -268,7 +297,7 @@ def restart_container(container_name: str) -> None:
             check=True,
         )
     except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
-        print(f"Failed to restart container {container_name}: {str(e.stderr)}")
+        print(f"Failed to restart container {container_name}: {e.stderr!s}")
         raise
 
     # Wait for container to be healthy
@@ -287,5 +316,4 @@ def replace_placeholders(context: Context, text: str) -> str:
     """
     result = text.replace("{MODEL}", context.default_model)
     result = result.replace("{PROVIDER}", context.default_provider)
-    result = result.replace("{VECTOR_STORE_ID}", context.faiss_vector_store_id)
-    return result
+    return result.replace("{VECTOR_STORE_ID}", context.faiss_vector_store_id)
diff --git a/tests/integration/endpoints/test_query_byok_integration.py b/tests/integration/endpoints/test_query_byok_integration.py
new file mode 100644
index 000000000..40191821f
--- /dev/null
+++ b/tests/integration/endpoints/test_query_byok_integration.py
@@ -0,0 +1,1112 @@
+"""Integration tests for /query endpoint BYOK inline and tool RAG functionality."""
+
+# pylint: disable=too-many-lines
+
+from collections.abc import Generator
+from typing import Any
+
+import pytest
+from fastapi import Request
+from llama_stack_api.openai_responses import OpenAIResponseObject
+from llama_stack_client.types import VersionInfo
+from pytest_mock import AsyncMockType, MockerFixture
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm import Session, sessionmaker
+
+import app.database
+import constants
+from app.endpoints.query import query_endpoint_handler
+from authentication.interface import AuthTuple
+from configuration import AppConfig
+from models.requests import QueryRequest
+from models.responses import QueryResponse
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_byok_vector_io_response(mocker: MockerFixture) -> Any:
+    """Build a mock vector_io.query response with BYOK RAG chunks.
+
+    Returns a mock with .chunks and .scores attributes simulating a
+    vector store search result with two chunks.
+    """
+    chunk_1 = mocker.MagicMock()
+    chunk_1.content = "OpenShift is a Kubernetes distribution by Red Hat."
+    chunk_1.chunk_id = "chunk-1"
+    chunk_1.metadata = {
+        "document_id": "doc-ocp-overview",
+        "title": "OpenShift Overview",
+        "reference_url": "https://docs.redhat.com/ocp/overview",
+    }
+
+    chunk_2 = mocker.MagicMock()
+    chunk_2.content = "Pods are the smallest deployable units in Kubernetes."
+    chunk_2.chunk_id = "chunk-2"
+    chunk_2.metadata = {
+        "document_id": "doc-k8s-pods",
+        "title": "Kubernetes Pods",
+        "reference_url": "https://docs.redhat.com/k8s/pods",
+    }
+
+    response = mocker.MagicMock()
+    response.chunks = [chunk_1, chunk_2]
+    response.scores = [0.95, 0.88]
+    return response
+
+
+def _make_vector_io_response(
+    mocker: MockerFixture,
+    chunks_data: list[tuple[str, str, float]],
+) -> Any:
+    """Build a mock vector_io.query response with arbitrary chunks.
+
+    Parameters:
+        mocker: pytest-mock fixture.
+        chunks_data: List of (content, chunk_id, score) tuples.
+
+    Returns:
+        Mock with .chunks and .scores attributes.
+    """
+    chunks = []
+    scores = []
+    for content, chunk_id, score in chunks_data:
+        chunk = mocker.MagicMock()
+        chunk.content = content
+        chunk.chunk_id = chunk_id
+        chunk.metadata = {"document_id": chunk_id}
+        chunks.append(chunk)
+        scores.append(score)
+
+    response = mocker.MagicMock()
+    response.chunks = chunks
+    response.scores = scores
+    return response
+
+
+def _build_base_mock_client(mocker: MockerFixture) -> Any:
+    """Build a base mock Llama Stack client with common stubs.
+
+    Configures models, shields, conversations, version, and a default
+    responses.create return value.
+    """
+    mock_client = mocker.AsyncMock()
+
+    # Model list
+    mock_model = mocker.MagicMock()
+    mock_model.id = "test-provider/test-model"
+    mock_model.custom_metadata = {
+        "provider_id": "test-provider",
+        "model_type": "llm",
+    }
+    mock_client.models.list.return_value = [mock_model]
+
+    # Shields (empty)
+    mock_client.shields.list.return_value = []
+
+    # Conversations
+    mock_conversation = mocker.MagicMock()
+    mock_conversation.id = "conv_" + "a" * 48
+    mock_client.conversations.create = mocker.AsyncMock(return_value=mock_conversation)
+
+    # Version
+    mock_client.inspect.version.return_value = VersionInfo(version="0.4.3")
+
+    # Default response
+    mock_response = mocker.MagicMock(spec=OpenAIResponseObject)
+    mock_response.id = "response-byok"
+    mock_output_item = mocker.MagicMock()
+    mock_output_item.type = "message"
+    mock_output_item.role = "assistant"
+    mock_output_item.content = (
+        "Based on the documentation, OpenShift is a Kubernetes distribution."
+    )
+    mock_output_item.refusal = None
+    mock_response.output = [mock_output_item]
+    mock_response.stop_reason = "end_turn"
+    mock_response.tool_calls = []
+    mock_usage = mocker.MagicMock()
+    mock_usage.input_tokens = 50
+    mock_usage.output_tokens = 20
+    mock_response.usage = mock_usage
+    mock_client.responses.create.return_value = mock_response
+
+    return mock_client
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(name="mock_byok_client")
+def mock_byok_client_fixture(
+    mocker: MockerFixture,
+) -> Generator[Any, None, None]:
+    """Mock Llama Stack client with BYOK inline RAG configured.
+
+    Configures vector_io.query to return BYOK RAG chunks and sets
+    vector_stores.list to empty (no tool-based vector stores).
+    """
+    mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder")
+    mock_client = _build_base_mock_client(mocker)
+
+    # BYOK vector_io returns results
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    # No tool-based vector stores
+    mock_vector_stores_response = mocker.MagicMock()
+    mock_vector_stores_response.data = []
+    mock_client.vector_stores.list.return_value = mock_vector_stores_response
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+    yield mock_client
+
+
+@pytest.fixture(name="mock_byok_tool_rag_client")
+def mock_byok_tool_rag_client_fixture(
+    mocker: MockerFixture,
+) -> Generator[Any, None, None]:
+    """Mock Llama Stack client with BYOK tool RAG (file_search) configured.
+
+    Configures vector_stores.list with a BYOK store and responses.create
+    to return a file_search_call output item alongside the assistant message.
+    """
+    mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder")
+    mock_client = _build_base_mock_client(mocker)
+
+    # vector_io returns empty (no inline RAG)
+    mock_empty_vector_io = mocker.MagicMock()
+    mock_empty_vector_io.chunks = []
+    mock_empty_vector_io.scores = []
+    mock_client.vector_io.query = mocker.AsyncMock(return_value=mock_empty_vector_io)
+
+    # Tool-based vector stores available
+    mock_vector_store = mocker.MagicMock()
+    mock_vector_store.id = "vs-byok-knowledge"
+    mock_list_result = mocker.MagicMock()
+    mock_list_result.data = [mock_vector_store]
+    mock_client.vector_stores.list.return_value = mock_list_result
+
+    # Response with file_search tool call
+    mock_response = mocker.MagicMock(spec=OpenAIResponseObject)
+    mock_response.id = "response-tool-rag"
+
+    mock_tool_output = mocker.MagicMock()
+    mock_tool_output.type = "file_search_call"
+    mock_tool_output.id = "call-fs-1"
+    mock_tool_output.queries = ["What is OpenShift?"]
+    mock_tool_output.status = "completed"
+
+    mock_result = mocker.MagicMock()
+    mock_result.file_id = "doc-ocp-1"
+    mock_result.filename = "openshift-docs.txt"
+    mock_result.score = 0.92
+    mock_result.text = "OpenShift is a Kubernetes distribution by Red Hat."
+    mock_result.attributes = {
+        "doc_url": "https://docs.redhat.com/ocp/overview",
+        "link": "https://docs.redhat.com/ocp/overview",
+    }
+    mock_result.model_dump = mocker.Mock(
+        return_value={
+            "file_id": "doc-ocp-1",
+            "filename": "openshift-docs.txt",
+            "score": 0.92,
+            "text": "OpenShift is a Kubernetes distribution by Red Hat.",
+            "attributes": {
+                "doc_url": "https://docs.redhat.com/ocp/overview",
+            },
+        }
+    )
+    mock_tool_output.results = [mock_result]
+
+    mock_message = mocker.MagicMock()
+    mock_message.type = "message"
+    mock_message.role = "assistant"
+    mock_message.content = (
+        "Based on the documentation, OpenShift is a Kubernetes distribution."
+    )
+    mock_message.refusal = None
+
+    mock_response.output = [mock_tool_output, mock_message]
+    mock_response.stop_reason = "end_turn"
+    mock_response.tool_calls = []
+    mock_usage = mocker.MagicMock()
+    mock_usage.input_tokens = 60
+    mock_usage.output_tokens = 25
+    mock_response.usage = mock_usage
+    mock_client.responses.create.return_value = mock_response
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+    yield mock_client
+
+
+@pytest.fixture(name="patch_db_session", autouse=True)
+def patch_db_session_fixture(
+    test_db_session: Session,
+    test_db_engine: Engine,
+) -> Generator[Session, None, None]:
+    """Patch global database session to use in-memory test database."""
+    original_engine = app.database.engine
+    original_session_local = app.database.session_local
+
+    app.database.engine = test_db_engine
+    app.database.session_local = sessionmaker(bind=test_db_engine)
+
+    yield test_db_session
+
+    app.database.engine = original_engine
+    app.database.session_local = original_session_local
+
+
+@pytest.fixture(name="byok_config")
+def byok_config_fixture(test_config: AppConfig, mocker: MockerFixture) -> AppConfig:
+    """Load test config and patch BYOK RAG configuration.
+
+    Adds a BYOK RAG entry and inline RAG strategy so that inline RAG
+    code paths are exercised with real configuration logic.
+    """
+    byok_entry = mocker.MagicMock()
+    byok_entry.rag_id = "test-knowledge"
+    byok_entry.vector_db_id = "vs-byok-knowledge"
+    byok_entry.score_multiplier = 1.0
+    byok_entry.model_dump.return_value = {
+        "rag_id": "test-knowledge",
+        "rag_type": "inline::faiss",
+        "embedding_model": "sentence-transformers/all-mpnet-base-v2",
+        "embedding_dimension": 768,
+        "vector_db_id": "vs-byok-knowledge",
+        "db_path": "/tmp/test-db",
+        "score_multiplier": 1.0,
+    }
+
+    # Patch the loaded configuration's byok_rag and rag.inline
+    test_config.configuration.byok_rag = [byok_entry]
+    test_config.configuration.rag.inline = ["test-knowledge"]
+
+    return test_config
+
+
+@pytest.fixture(name="byok_tool_config")
+def byok_tool_config_fixture(
+    test_config: AppConfig, mocker: MockerFixture
+) -> AppConfig:
+    """Load test config with BYOK RAG configured for tool-based (file_search) usage.
+
+    Sets rag.inline to empty and rag.tool to include the BYOK store,
+    so only tool-based RAG is active.
+    """
+    byok_entry = mocker.MagicMock()
+    byok_entry.rag_id = "test-knowledge"
+    byok_entry.vector_db_id = "vs-byok-knowledge"
+    byok_entry.score_multiplier = 1.0
+    byok_entry.model_dump.return_value = {
+        "rag_id": "test-knowledge",
+        "rag_type": "inline::faiss",
+        "embedding_model": "sentence-transformers/all-mpnet-base-v2",
+        "embedding_dimension": 768,
+        "vector_db_id": "vs-byok-knowledge",
+        "db_path": "/tmp/test-db",
+        "score_multiplier": 1.0,
+    }
+
+    test_config.configuration.byok_rag = [byok_entry]
+    test_config.configuration.rag.inline = []
+    test_config.configuration.rag.tool = ["test-knowledge"]
+
+    return test_config
+
+
+# ==============================================================================
+# Inline BYOK RAG Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_query_byok_inline_rag_injects_context(
+    byok_config: AppConfig,
+    mock_byok_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that inline BYOK RAG fetches chunks and injects context into the query.
+
+    Verifies:
+    - vector_io.query is called for BYOK inline RAG
+    - RAG context is injected into the responses.create input
+    - Response includes RAG chunks from inline sources
+    """
+    _ = byok_config
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert response.response is not None
+
+    # Verify vector_io.query was called for inline RAG
+    mock_byok_client.vector_io.query.assert_called()
+    # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query.
+    # e.g. "vector_store_id" is the store queried, "query" is the search text.
+    call_kwargs = mock_byok_client.vector_io.query.call_args.kwargs
+    assert call_kwargs["query"] == "What is OpenShift?"
+
+    # Verify RAG context was injected into responses.create input
+    # Use call_args_list[0] — the first call is the main query;
+    # a second call may follow for topic summary generation.
+    create_kwargs = mock_byok_client.responses.create.call_args_list[0].kwargs
+    input_text = create_kwargs["input"]
+    assert "file_search found" in input_text
+    assert "OpenShift is a Kubernetes distribution" in input_text
+
+    # Verify RAG chunks are included in the response
+    assert response.rag_chunks is not None
+    assert len(response.rag_chunks) > 0
+
+
+@pytest.mark.asyncio
+async def test_query_byok_inline_rag_returns_referenced_documents(
+    byok_config: AppConfig,
+    mock_byok_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that inline BYOK RAG extracts referenced documents from chunks.
+
+    Verifies:
+    - Referenced documents are extracted from BYOK RAG chunk metadata
+    - Documents include URLs from chunk metadata
+    """
+    _ = byok_config
+    _ = mock_byok_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert response.referenced_documents is not None
+    assert len(response.referenced_documents) == 2
+
+    # Verify known document metadata propagated from mock chunks
+    doc_urls = [
+        str(doc.doc_url) for doc in response.referenced_documents if doc.doc_url
+    ]
+    assert any(
+        "docs.redhat.com/ocp/overview" in url for url in doc_urls
+    ), f"Expected ocp/overview URL in {doc_urls}"
+    assert any(
+        "docs.redhat.com/k8s/pods" in url for url in doc_urls
+    ), f"Expected k8s/pods URL in {doc_urls}"
+
+    doc_titles = [
+        doc.doc_title for doc in response.referenced_documents if doc.doc_title
+    ]
+    assert "OpenShift Overview" in doc_titles
+    assert "Kubernetes Pods" in doc_titles
+
+
+@pytest.mark.asyncio
+async def test_query_byok_inline_rag_with_request_vector_store_ids(
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that per-request vector_store_ids override config-based inline RAG.
+
+    Config has rag.inline = ["source-a"] (resolves to vs-source-a).
+    Request passes vector_store_ids = ["vs-source-b"].
+    Only vs-source-b should be queried, proving the override works.
+    (passing vector_store_ids overrides config)
+
+    Verifies:
+    - vector_io.query is called with the request-specified store, not config
+    - The config-based store is NOT queried
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 1.0
+
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a"]
+
+    mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder")
+    mock_client = _build_base_mock_client(mocker)
+
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    # Override: request specifies vs-source-b, not the config's vs-source-a
+    query_request = QueryRequest(
+        query="What is OpenShift?",
+        vector_store_ids=["vs-source-b"],
+    )
+
+    await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    # Verify only vs-source-b was queried (not the config's vs-source-a)
+    assert mock_client.vector_io.query.call_count == 1
+    # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query.
+    # e.g. "vector_store_id" is the store queried, "query" is the search text.
+    call_kwargs = mock_client.vector_io.query.call_args.kwargs
+    assert call_kwargs["vector_store_id"] == "vs-source-b"
+
+
+@pytest.mark.asyncio
+async def test_query_byok_request_vector_store_ids_filters_configured_stores(
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that request vector_store_ids selects a subset of stores configured in rag.inline.
+
+    Both source-a and source-b are registered in byok_rag and listed in rag.inline.
+    The request passes vector_store_ids = ["vs-source-a"] to select only one.
+
+    Verifies:
+    - vector_io.query is called exactly once (for vs-source-a only)
+    - vs-source-b is NOT queried despite being in rag.inline
+    - Returned chunks only reference source-a
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 1.0
+
+    # Both sources are in config
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a", "source-b"]
+
+    mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder")
+    mock_client = _build_base_mock_client(mocker)
+
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    # Request narrows down to only vs-source-a
+    query_request = QueryRequest(
+        query="What is OpenShift?",
+        vector_store_ids=["vs-source-a"],
+    )
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    # Only vs-source-a should have been queried
+    assert mock_client.vector_io.query.call_count == 1
+    # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query.
+    # e.g. "vector_store_id" is the store queried, "query" is the search text.
+    call_kwargs = mock_client.vector_io.query.call_args.kwargs
+    assert call_kwargs["vector_store_id"] == "vs-source-a"
+
+    # Chunks should only come from source-a
+    assert response.rag_chunks is not None
+    assert len(response.rag_chunks) == 2
+    assert all(chunk.source == "source-a" for chunk in response.rag_chunks)
+
+
+@pytest.mark.asyncio
+async def test_query_byok_inline_rag_empty_vector_store_ids_returns_no_chunks(
+    byok_config: AppConfig,
+    mock_byok_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that passing an empty vector_store_ids list produces no RAG chunks.
+
+    Verifies:
+    - vector_io.query is never called when vector_store_ids=[]
+    - Response contains no RAG chunks
+    - Response still succeeds
+    """
+    _ = byok_config
+
+    query_request = QueryRequest(query="What is OpenShift?", vector_store_ids=[])
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert response.response is not None
+    mock_byok_client.vector_io.query.assert_not_called()
+    assert not response.rag_chunks
+
+
+@pytest.mark.asyncio
+async def test_query_byok_inline_rag_error_is_handled_gracefully(
+    byok_config: AppConfig,
+    mock_byok_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK RAG search failures are handled gracefully.
+
+    Verifies:
+    - When vector_io.query raises an exception, the query still succeeds
+    - The error is silently handled (BYOK search errors are non-fatal)
+    """
+    _ = byok_config
+
+    mock_byok_client.vector_io.query.side_effect = Exception("Connection refused")
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    # Query should succeed despite BYOK RAG failure, but with no chunks
+    assert isinstance(response, QueryResponse)
+    assert not response.rag_chunks
+
+
+# ==============================================================================
+# Tool-based BYOK RAG Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_query_byok_tool_rag_returns_tool_calls(
+    byok_tool_config: AppConfig,
+    mock_byok_tool_rag_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK tool RAG results include file_search tool calls.
+
+    Verifies:
+    - Response includes tool_calls from file_search_call output
+    - Tool call name is file_search
+    """
+    _ = byok_tool_config
+    _ = mock_byok_tool_rag_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert response.tool_calls is not None
+    assert len(response.tool_calls) > 0
+    assert response.tool_calls[0].name == "file_search"
+
+
+@pytest.mark.asyncio
+async def test_query_byok_tool_rag_referenced_documents(
+    byok_tool_config: AppConfig,
+    mock_byok_tool_rag_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK tool RAG extracts referenced documents from file_search results.
+
+    Verifies:
+    - Referenced documents are extracted from file_search_call results
+    - Documents include proper metadata
+    """
+    _ = byok_tool_config
+    _ = mock_byok_tool_rag_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert response.referenced_documents is not None
+    assert len(response.referenced_documents) >= 1
+
+    # Verify known values from the mock file_search result propagated
+    doc_urls = [
+        str(doc.doc_url) for doc in response.referenced_documents if doc.doc_url
+    ]
+    assert any(
+        "docs.redhat.com/ocp/overview" in url for url in doc_urls
+    ), f"Expected ocp/overview URL in {doc_urls}"
+
+
+# ==============================================================================
+# Combined Inline + Tool RAG Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_query_byok_combined_inline_and_tool_rag(  # pylint: disable=too-many-locals,too-many-statements
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that inline and tool-based BYOK RAG results are combined.
+
+    Verifies:
+    - Both inline RAG chunks and tool RAG chunks appear in response
+    - RAG chunks from both sources are merged
+    """
+    # Configure both inline and tool RAG
+    byok_entry = mocker.MagicMock()
+    byok_entry.rag_id = "test-knowledge"
+    byok_entry.vector_db_id = "vs-byok-knowledge"
+    byok_entry.score_multiplier = 1.0
+    test_config.configuration.byok_rag = [byok_entry]
+    test_config.configuration.rag.inline = ["test-knowledge"]
+    test_config.configuration.rag.tool = ["test-knowledge"]
+
+    # Mock Llama Stack client
+    mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder")
+    mock_client = _build_base_mock_client(mocker)
+
+    # Inline RAG returns chunks via vector_io
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    # Tool RAG vector stores
+    mock_vector_store = mocker.MagicMock()
+    mock_vector_store.id = "vs-byok-knowledge"
+    mock_list_result = mocker.MagicMock()
+    mock_list_result.data = [mock_vector_store]
+    mock_client.vector_stores.list.return_value = mock_list_result
+
+    # Response includes file_search_call (tool RAG result)
+    mock_response = mocker.MagicMock(spec=OpenAIResponseObject)
+    mock_response.id = "response-combined"
+
+    mock_tool_output = mocker.MagicMock()
+    mock_tool_output.type = "file_search_call"
+    mock_tool_output.id = "call-fs-combined"
+    mock_tool_output.queries = ["What is OpenShift?"]
+    mock_tool_output.status = "completed"
+
+    mock_result = mocker.MagicMock()
+    mock_result.file_id = "doc-tool-1"
+    mock_result.filename = "tool-doc.txt"
+    mock_result.score = 0.90
+    mock_result.text = "Tool-based RAG result about OpenShift."
+    mock_result.attributes = {"doc_url": "https://example.com/tool-doc"}
+    mock_result.model_dump = mocker.Mock(
+        return_value={
+            "file_id": "doc-tool-1",
+            "filename": "tool-doc.txt",
+            "score": 0.90,
+            "text": "Tool-based RAG result about OpenShift.",
+            "attributes": {"doc_url": "https://example.com/tool-doc"},
+        }
+    )
+    mock_tool_output.results = [mock_result]
+
+    mock_message = mocker.MagicMock()
+    mock_message.type = "message"
+    mock_message.role = "assistant"
+    mock_message.content = "Combined answer from inline and tool RAG."
+    mock_message.refusal = None
+
+    mock_response.output = [mock_tool_output, mock_message]
+    mock_response.stop_reason = "end_turn"
+    mock_response.tool_calls = []
+    mock_usage = mocker.MagicMock()
+    mock_usage.input_tokens = 80
+    mock_usage.output_tokens = 30
+    mock_response.usage = mock_usage
+    mock_client.responses.create.return_value = mock_response
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    # Verify both inline and tool RAG chunks are present
+    assert response.rag_chunks is not None
+    assert len(response.rag_chunks) == 3
+
+    # Verify tool calls are present (from tool RAG)
+    assert response.tool_calls is not None
+    assert len(response.tool_calls) == 1
+
+
+# ==============================================================================
+# Inline RAG rag_id Resolution Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_query_byok_inline_rag_only_configured_rag_id_is_queried(
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that only the rag_id listed in rag.inline triggers retrieval.
+
+    Two BYOK sources are registered (source-a and source-b) but only
+    source-a is listed in rag.inline.  Only the vector_db_id for
+    source-a should be queried and only its chunks should appear in the response.
+
+    Verifies:
+    - vector_io.query is called exactly once (for the configured source)
+    - The call targets the correct vector_db_id
+    - Returned chunks only reference source-a
+    - source-b chunks are absent
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 1.0
+
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a"]
+
+    mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder")
+    mock_client = _build_base_mock_client(mocker)
+
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert mock_client.vector_io.query.call_count == 1
+    # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query.
+    # e.g. "vector_store_id" is the store queried, "query" is the search text.
+    call_kwargs = mock_client.vector_io.query.call_args.kwargs
+    assert call_kwargs["vector_store_id"] == "vs-source-a"
+
+    assert response.rag_chunks is not None
+    assert len(response.rag_chunks) == 2
+    sources = {chunk.source for chunk in response.rag_chunks}
+    assert "source-a" in sources
+    assert "source-b" not in sources
+
+
+# ==============================================================================
+# Score Multiplier Priority Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_query_byok_score_multiplier_shifts_chunk_priority(  # pylint: disable=too-many-locals
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that score_multiplier can shift chunk priority across sources.
+
+    Doc A (source-a) has high base similarity (0.90) with multiplier 1.0.
+    Doc B (source-b) has low base similarity (0.40) with multiplier 5.0.
+    After weighting: Doc A = 0.90, Doc B = 2.00.
+    Doc B should appear above Doc A in the final chunks.
+
+    Verifies:
+    - The chunk with the higher weighted score appears first
+    - score_multiplier correctly influences ranking
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 5.0
+
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a", "source-b"]
+
+    mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder")
+    mock_client = _build_base_mock_client(mocker)
+
+    # Source A: high base similarity
+    resp_a = _make_vector_io_response(
+        mocker,
+        [
+            ("Doc A content - high similarity", "doc-a", 0.90),
+        ],
+    )
+    # Source B: low base similarity
+    resp_b = _make_vector_io_response(
+        mocker,
+        [
+            ("Doc B content - low similarity", "doc-b", 0.40),
+        ],
+    )
+
+    # Return different results per vector store
+    async def _side_effect(**kwargs: Any) -> Any:
+        if kwargs["vector_store_id"] == "vs-source-a":
+            return resp_a
+        return resp_b
+
+    mock_client.vector_io.query = mocker.AsyncMock(side_effect=_side_effect)
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="test query")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert response.rag_chunks is not None
+    assert len(response.rag_chunks) == 2
+
+    # Doc B (weighted 2.0) should rank above Doc A (weighted 0.9)
+    first_chunk = response.rag_chunks[0]
+    second_chunk = response.rag_chunks[1]
+    assert first_chunk.source == "source-b"
+    assert second_chunk.source == "source-a"
+    assert first_chunk.score > second_chunk.score
+
+
+# ==============================================================================
+# BYOK_RAG_MAX_CHUNKS Capping Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_query_byok_max_chunks_caps_retrieved_results(  # pylint: disable=too-many-locals
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK_RAG_MAX_CHUNKS caps the number of returned chunks.
+
+    A single source returns more chunks than BYOK_RAG_MAX_CHUNKS allows.
+    The response should contain at most BYOK_RAG_MAX_CHUNKS chunks and
+    they should be the highest-scored ones.
+
+    Verifies:
+    - Number of RAG chunks does not exceed BYOK_RAG_MAX_CHUNKS
+    - Returned chunks are the top-scoring ones
+    """
+    entry = mocker.MagicMock()
+    entry.rag_id = "big-source"
+    entry.vector_db_id = "vs-big-source"
+    entry.score_multiplier = 1.0
+
+    test_config.configuration.byok_rag = [entry]
+    test_config.configuration.rag.inline = ["big-source"]
+
+    mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder")
+    mock_client = _build_base_mock_client(mocker)
+
+    # Generate more chunks than BYOK_RAG_MAX_CHUNKS
+    num_chunks = constants.BYOK_RAG_MAX_CHUNKS + 1
+    chunks_data = [
+        (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2))
+        for i in range(num_chunks)
+    ]
+    # Scores increase with index: chunk-0 = 0.50, chunk-14 = 0.92 (for max=10)
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_vector_io_response(mocker, chunks_data)
+    )
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="test query")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert response.rag_chunks is not None
+    assert len(response.rag_chunks) == constants.BYOK_RAG_MAX_CHUNKS
+
+    # Verify chunks are sorted by score descending (highest first)
+    scores = [chunk.score for chunk in response.rag_chunks]
+    assert scores == sorted(scores, reverse=True)
+
+    # The lowest-scored chunks from the original set should be excluded
+    # The highest score in the original set is at the last index
+    highest_original_score = chunks_data[-1][2]  # score of the last chunk
+    assert response.rag_chunks[0].score == highest_original_score
+
+
+@pytest.mark.asyncio
+async def test_query_byok_max_chunks_caps_across_multiple_sources(  # pylint: disable=too-many-locals
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK_RAG_MAX_CHUNKS caps chunks across multiple sources.
+
+    Two sources each return several chunks.  The combined result should
+    not exceed BYOK_RAG_MAX_CHUNKS and should contain the globally
+    highest-scored chunks regardless of source.
+
+    Verifies:
+    - Total chunks across sources are capped at BYOK_RAG_MAX_CHUNKS
+    - Top-scoring chunks from both sources are included
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 1.0
+
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a", "source-b"]
+
+    mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder")
+    mock_client = _build_base_mock_client(mocker)
+
+    # Overlapping score bands so top-k must pick from both sources
+    n = constants.BYOK_RAG_MAX_CHUNKS
+    resp_a = _make_vector_io_response(
+        mocker,
+        [
+            (f"Source A chunk {i}", f"a-chunk-{i}", round(0.70 + i * 0.05, 2))
+            for i in range(n)
+        ],
+    )
+    resp_b = _make_vector_io_response(
+        mocker,
+        [
+            (f"Source B chunk {i}", f"b-chunk-{i}", round(0.72 + i * 0.05, 2))
+            for i in range(n)
+        ],
+    )
+
+    async def _side_effect(**kwargs: Any) -> Any:
+        if kwargs["vector_store_id"] == "vs-source-a":
+            return resp_a
+        return resp_b
+
+    mock_client.vector_io.query = mocker.AsyncMock(side_effect=_side_effect)
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="test query")
+
+    response = await query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert response.rag_chunks is not None
+    assert len(response.rag_chunks) == constants.BYOK_RAG_MAX_CHUNKS
+
+    scores = [chunk.score for chunk in response.rag_chunks]
+    assert scores == sorted(scores, reverse=True)
+
+    # Both sources must survive the cap
+    sources = {chunk.source for chunk in response.rag_chunks}
+    assert "source-a" in sources
+    assert "source-b" in sources
+
+    # Lowest-scoring chunks from each source must be dropped
+    chunk_contents = {chunk.content for chunk in response.rag_chunks}
+    assert "Source A chunk 0" not in chunk_contents
+    assert "Source B chunk 0" not in chunk_contents
diff --git a/tests/integration/endpoints/test_streaming_query_byok_integration.py b/tests/integration/endpoints/test_streaming_query_byok_integration.py
new file mode 100644
index 000000000..5f58f6036
--- /dev/null
+++ b/tests/integration/endpoints/test_streaming_query_byok_integration.py
@@ -0,0 +1,1099 @@
+"""Integration tests for /streaming_query endpoint BYOK inline and tool RAG functionality."""
+
+# pylint: disable=too-many-lines
+
+import json
+from collections.abc import AsyncIterator, Generator
+from typing import Any
+
+import pytest
+from fastapi import Request, status
+from fastapi.responses import StreamingResponse
+from llama_stack_api.openai_responses import OpenAIResponseObject
+from pytest_mock import AsyncMockType, MockerFixture
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm import Session, sessionmaker
+
+import app.database
+import constants
+from app.endpoints.streaming_query import streaming_query_endpoint_handler
+from authentication.interface import AuthTuple
+from configuration import AppConfig
+from models.requests import QueryRequest
+from tests.integration.endpoints.test_query_byok_integration import (
+    _build_base_mock_client,
+    _make_byok_vector_io_response,
+    _make_vector_io_response,
+)
+
+
+async def _collect_sse_events(response: StreamingResponse) -> list[dict[str, Any]]:
+    """Consume a StreamingResponse and parse SSE events into dicts.
+
+    Parameters:
+        response: The StreamingResponse to consume.
+
+    Returns:
+        List of parsed JSON event dicts from ``data:`` lines.
+    """
+    events: list[dict[str, Any]] = []
+    async for chunk in response.body_iterator:
+        text = chunk if isinstance(chunk, str) else bytes(chunk).decode()
+        for line in text.strip().splitlines():
+            if line.startswith("data: "):
+                try:
+                    events.append(json.loads(line[6:]))
+                except json.JSONDecodeError:
+                    pass
+    return events
+
+
+def _build_base_streaming_mock_client(mocker: MockerFixture) -> Any:
+    """Build a base mock Llama Stack client configured for streaming responses.
+
+    Extends the base query mock client with streaming-specific stubs:
+    conversations.items.create and a streaming responses.create.
+    """
+    mock_client = _build_base_mock_client(mocker)
+
+    # Streaming additions
+    mock_client.conversations.items.create = mocker.AsyncMock()
+
+    async def _mock_stream() -> AsyncIterator[Any]:
+        chunk = mocker.MagicMock()
+        chunk.type = "response.output_text.done"
+        chunk.text = (
+            "Based on the documentation, OpenShift is a Kubernetes distribution."
+        )
+        yield chunk
+
+        # Emit response.completed so referenced_documents propagate to end event
+        completed_chunk = mocker.MagicMock()
+        completed_chunk.type = "response.completed"
+        mock_final = mocker.MagicMock(spec=OpenAIResponseObject)
+        mock_final.id = "response-inline-stream"
+        mock_final.error = None
+        mock_usage = mocker.MagicMock()
+        mock_usage.input_tokens = 50
+        mock_usage.output_tokens = 20
+        mock_final.usage = mock_usage
+        mock_final.output = []
+        completed_chunk.response = mock_final
+        yield completed_chunk
+
+    async def _responses_create(**kwargs: Any) -> Any:
+        if kwargs.get("stream", True):
+            return _mock_stream()
+        mock_resp = mocker.MagicMock()
+        mock_resp.output = [mocker.MagicMock(content="topic summary")]
+        return mock_resp
+
+    mock_client.responses.create = mocker.AsyncMock(side_effect=_responses_create)
+
+    return mock_client
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(name="patch_db_session", autouse=True)
+def patch_db_session_fixture(
+    test_db_session: Session,
+    test_db_engine: Engine,
+) -> Generator[Session, None, None]:
+    """Patch global database session to use in-memory test database."""
+    original_engine = app.database.engine
+    original_session_local = app.database.session_local
+
+    app.database.engine = test_db_engine
+    app.database.session_local = sessionmaker(bind=test_db_engine)
+
+    yield test_db_session
+
+    app.database.engine = original_engine
+    app.database.session_local = original_session_local
+
+
+@pytest.fixture(name="mock_streaming_byok_client")
+def mock_streaming_byok_client_fixture(
+    mocker: MockerFixture,
+) -> Generator[Any, None, None]:
+    """Mock Llama Stack client with BYOK inline RAG configured for streaming.
+
+    Configures vector_io.query to return BYOK RAG chunks and sets
+    vector_stores.list to empty (no tool-based vector stores).
+    """
+    mock_holder_class = mocker.patch(
+        "app.endpoints.streaming_query.AsyncLlamaStackClientHolder"
+    )
+    mock_client = _build_base_streaming_mock_client(mocker)
+
+    # BYOK vector_io returns results
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    # No tool-based vector stores
+    mock_vector_stores_response = mocker.MagicMock()
+    mock_vector_stores_response.data = []
+    mock_client.vector_stores.list.return_value = mock_vector_stores_response
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+    yield mock_client
+
+
+@pytest.fixture(name="mock_streaming_byok_tool_client")
+def mock_streaming_byok_tool_client_fixture(  # pylint: disable=too-many-statements
+    mocker: MockerFixture,
+) -> Generator[Any, None, None]:
+    """Mock Llama Stack client with BYOK tool RAG (file_search) for streaming.
+
+    Configures vector_stores.list with a BYOK store and responses.create
+    to stream file_search_call output items alongside the assistant message.
+    """
+    mock_holder_class = mocker.patch(
+        "app.endpoints.streaming_query.AsyncLlamaStackClientHolder"
+    )
+    mock_client = _build_base_streaming_mock_client(mocker)
+
+    # vector_io returns empty (no inline RAG)
+    mock_empty_vector_io = mocker.MagicMock()
+    mock_empty_vector_io.chunks = []
+    mock_empty_vector_io.scores = []
+    mock_client.vector_io.query = mocker.AsyncMock(return_value=mock_empty_vector_io)
+
+    # Tool-based vector stores available
+    mock_vector_store = mocker.MagicMock()
+    mock_vector_store.id = "vs-byok-knowledge"
+    mock_list_result = mocker.MagicMock()
+    mock_list_result.data = [mock_vector_store]
+    mock_client.vector_stores.list.return_value = mock_list_result
+
+    # Build a streaming response with file_search and completion events
+    async def _mock_tool_stream() -> AsyncIterator[Any]:
+        # file_search output item done
+        item_done_chunk = mocker.MagicMock()
+        item_done_chunk.type = "response.output_item.done"
+        item_done_chunk.output_index = 0
+
+        mock_item = mocker.MagicMock()
+        mock_item.type = "file_search_call"
+        mock_item.id = "call-fs-stream-1"
+        mock_item.queries = ["What is OpenShift?"]
+        mock_item.status = "completed"
+
+        mock_result = mocker.MagicMock()
+        mock_result.file_id = "doc-ocp-1"
+        mock_result.filename = "openshift-docs.txt"
+        mock_result.score = 0.92
+        mock_result.text = "OpenShift is a Kubernetes distribution by Red Hat."
+        mock_result.attributes = {
+            "doc_url": "https://docs.redhat.com/ocp/overview",
+        }
+        mock_result.model_dump = mocker.Mock(
+            return_value={
+                "file_id": "doc-ocp-1",
+                "filename": "openshift-docs.txt",
+                "score": 0.92,
+                "text": "OpenShift is a Kubernetes distribution.",
+                "attributes": {"doc_url": "https://docs.redhat.com/ocp/overview"},
+            }
+        )
+        mock_item.results = [mock_result]
+        item_done_chunk.item = mock_item
+        yield item_done_chunk
+
+        # Text done
+        text_done_chunk = mocker.MagicMock()
+        text_done_chunk.type = "response.output_text.done"
+        text_done_chunk.text = (
+            "Based on the documentation, OpenShift is a Kubernetes distribution."
+        )
+        yield text_done_chunk
+
+        # Response completed
+        completed_chunk = mocker.MagicMock()
+        completed_chunk.type = "response.completed"
+        mock_final_response = mocker.MagicMock(spec=OpenAIResponseObject)
+        mock_final_response.id = "response-tool-stream"
+        mock_final_response.error = None
+
+        mock_usage = mocker.MagicMock()
+        mock_usage.input_tokens = 60
+        mock_usage.output_tokens = 25
+        mock_final_response.usage = mock_usage
+
+        # file_search results in the final response output
+        mock_fs_output = mocker.MagicMock()
+        mock_fs_output.type = "file_search_call"
+        mock_fs_output.id = "call-fs-stream-1"
+        mock_fs_output.results = [mock_result]
+        mock_final_response.output = [mock_fs_output]
+
+        completed_chunk.response = mock_final_response
+        yield completed_chunk
+
+    async def _responses_create(**kwargs: Any) -> Any:
+        if kwargs.get("stream", True):
+            return _mock_tool_stream()
+        mock_resp = mocker.MagicMock()
+        mock_resp.output = [mocker.MagicMock(content="topic summary")]
+        return mock_resp
+
+    mock_client.responses.create = mocker.AsyncMock(side_effect=_responses_create)
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+    yield mock_client
+
+
+@pytest.fixture(name="byok_config")
+def byok_config_fixture(test_config: AppConfig, mocker: MockerFixture) -> AppConfig:
+    """Load test config and patch BYOK RAG configuration for inline RAG."""
+    byok_entry = mocker.MagicMock()
+    byok_entry.rag_id = "test-knowledge"
+    byok_entry.vector_db_id = "vs-byok-knowledge"
+    byok_entry.score_multiplier = 1.0
+    byok_entry.model_dump.return_value = {
+        "rag_id": "test-knowledge",
+        "rag_type": "inline::faiss",
+        "embedding_model": "sentence-transformers/all-mpnet-base-v2",
+        "embedding_dimension": 768,
+        "vector_db_id": "vs-byok-knowledge",
+        "db_path": "/tmp/test-db",
+        "score_multiplier": 1.0,
+    }
+
+    test_config.configuration.byok_rag = [byok_entry]
+    test_config.configuration.rag.inline = ["test-knowledge"]
+
+    return test_config
+
+
+@pytest.fixture(name="byok_tool_config")
+def byok_tool_config_fixture(
+    test_config: AppConfig, mocker: MockerFixture
+) -> AppConfig:
+    """Load test config with BYOK RAG configured for tool-based (file_search) usage."""
+    byok_entry = mocker.MagicMock()
+    byok_entry.rag_id = "test-knowledge"
+    byok_entry.vector_db_id = "vs-byok-knowledge"
+    byok_entry.score_multiplier = 1.0
+    byok_entry.model_dump.return_value = {
+        "rag_id": "test-knowledge",
+        "rag_type": "inline::faiss",
+        "embedding_model": "sentence-transformers/all-mpnet-base-v2",
+        "embedding_dimension": 768,
+        "vector_db_id": "vs-byok-knowledge",
+        "db_path": "/tmp/test-db",
+        "score_multiplier": 1.0,
+    }
+
+    test_config.configuration.byok_rag = [byok_entry]
+    test_config.configuration.rag.inline = []
+    test_config.configuration.rag.tool = ["test-knowledge"]
+
+    return test_config
+
+
+# ==============================================================================
+# Inline BYOK RAG Streaming Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_inline_rag_injects_context(
+    byok_config: AppConfig,
+    mock_streaming_byok_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that inline BYOK RAG context is injected into streaming query input.
+
+    Verifies:
+    - RAG context from vector_io.query is injected into responses.create input
+    - Input contains formatted file_search results
+    """
+    _ = byok_config
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    # Verify RAG context was injected into responses.create input
+    # responses.create is the mock for the OpenAI-compatible LLM API call.
+    # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model.
+    create_call = mock_streaming_byok_client.responses.create.call_args_list[0]
+    call_kwargs = create_call.kwargs
+    input_text = call_kwargs["input"]
+    assert "file_search found" in input_text
+    assert "OpenShift is a Kubernetes distribution" in input_text
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_inline_rag_with_request_vector_store_ids(
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that per-request vector_store_ids override config for streaming query.
+
+    Config has rag.inline = ["source-a"] (resolves to vs-source-a).
+    Request passes vector_store_ids = ["vs-source-b"].
+    Only vs-source-b should be queried, proving the override works.
+    (passing vector_store_ids overrides config)
+
+    Verifies:
+    - vector_io.query is called with the request-specified store, not config
+    - The config-based store is NOT queried
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 1.0
+
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a"]
+
+    mock_holder_class = mocker.patch(
+        "app.endpoints.streaming_query.AsyncLlamaStackClientHolder"
+    )
+    mock_client = _build_base_streaming_mock_client(mocker)
+
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    # Override: request specifies vs-source-b, not the config's vs-source-a
+    query_request = QueryRequest(
+        query="What is OpenShift?",
+        vector_store_ids=["vs-source-b"],
+    )
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    # Verify only vs-source-b was queried (not the config's vs-source-a)
+    assert mock_client.vector_io.query.call_count == 1
+    # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query.
+    # e.g. "vector_store_id" is the store queried, "query" is the search text.
+    call_kwargs = mock_client.vector_io.query.call_args.kwargs
+    assert call_kwargs["vector_store_id"] == "vs-source-b"
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_request_vector_store_ids_filters_configured_stores(
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that request vector_store_ids selects a subset of stores configured in rag.inline.
+
+    Both source-a and source-b are registered in byok_rag and listed in rag.inline.
+    The request passes vector_store_ids = ["vs-source-a"] to select only one.
+
+    Verifies:
+    - vector_io.query is called exactly once (for vs-source-a only)
+    - vs-source-b is NOT queried despite being in rag.inline
+    - Injected context contains only source-a content
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 1.0
+
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a", "source-b"]
+
+    mock_holder_class = mocker.patch(
+        "app.endpoints.streaming_query.AsyncLlamaStackClientHolder"
+    )
+    mock_client = _build_base_streaming_mock_client(mocker)
+
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(
+        query="What is OpenShift?",
+        vector_store_ids=["vs-source-a"],
+    )
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    # Only vs-source-a should have been queried
+    assert mock_client.vector_io.query.call_count == 1
+    # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query.
+    # e.g. "vector_store_id" is the store queried, "query" is the search text.
+    call_kwargs = mock_client.vector_io.query.call_args.kwargs
+    assert call_kwargs["vector_store_id"] == "vs-source-a"
+
+    # Verify source-a context was injected into the LLM input
+    # responses.create is the mock for the OpenAI-compatible LLM API call.
+    # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model.
+    create_call = mock_client.responses.create.call_args_list[0]
+    input_text = create_call.kwargs["input"]
+    assert "file_search found" in input_text
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_inline_rag_empty_vector_store_ids_no_context(
+    byok_config: AppConfig,
+    mock_streaming_byok_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that passing an empty vector_store_ids list produces no inline context.
+
+    Verifies:
+    - vector_io.query is never called when vector_store_ids=[]
+    - No RAG context is injected into the streaming input
+    - Streaming response still succeeds
+    """
+    _ = byok_config
+
+    query_request = QueryRequest(query="What is OpenShift?", vector_store_ids=[])
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+    mock_streaming_byok_client.vector_io.query.assert_not_called()
+
+    # responses.create is the mock for the OpenAI-compatible LLM API call.
+    # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model.
+    create_call = mock_streaming_byok_client.responses.create.call_args_list[0]
+    input_text = create_call.kwargs["input"]
+    assert "file_search found" not in input_text
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_inline_rag_error_handled_gracefully(
+    byok_config: AppConfig,
+    mock_streaming_byok_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK RAG search failures are handled gracefully in streaming.
+
+    Verifies:
+    - When vector_io.query raises an exception, streaming query still succeeds
+    - The error is silently handled (BYOK search errors are non-fatal)
+    - No inline RAG context is injected into the prompt when search fails
+    """
+    _ = byok_config
+
+    mock_streaming_byok_client.vector_io.query.side_effect = Exception(
+        "Connection refused"
+    )
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    # Streaming query should succeed despite BYOK RAG failure
+    assert response.status_code == status.HTTP_200_OK
+    assert isinstance(response, StreamingResponse)
+
+    # No inline RAG context should be injected when the search fails.
+    # "file_search found" is the header added by _format_rag_context when chunks are present.
+    # responses.create is the mock for the OpenAI-compatible LLM API call.
+    # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model.
+    create_call = mock_streaming_byok_client.responses.create.call_args_list[0]
+    input_text = create_call.kwargs["input"]
+    assert "file_search found" not in input_text
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_inline_rag_returns_referenced_documents(
+    byok_config: AppConfig,
+    mock_streaming_byok_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that inline BYOK RAG emits referenced documents in the end event.
+
+    Verifies:
+    - Injected context references documents from BYOK RAG chunk metadata
+    - The SSE end event includes referenced_documents with known URLs/titles
+    """
+    _ = byok_config
+    _ = mock_streaming_byok_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    # Consume the stream and verify the end event carries referenced documents
+    events = await _collect_sse_events(response)
+    end_events = [e for e in events if e.get("event") == "end"]
+    assert len(end_events) == 1
+
+    ref_docs = end_events[0]["data"].get("referenced_documents", [])
+    assert len(ref_docs) == 2, f"Expected 2 referenced docs, got {ref_docs}"
+
+    doc_urls = [str(doc.get("doc_url", "")) for doc in ref_docs if doc.get("doc_url")]
+    assert any(
+        "docs.redhat.com/ocp/overview" in url for url in doc_urls
+    ), f"Expected ocp/overview URL in {doc_urls}"
+    assert any(
+        "docs.redhat.com/k8s/pods" in url for url in doc_urls
+    ), f"Expected k8s/pods URL in {doc_urls}"
+
+    doc_titles = [doc.get("doc_title") for doc in ref_docs if doc.get("doc_title")]
+    assert "OpenShift Overview" in doc_titles
+    assert "Kubernetes Pods" in doc_titles
+
+
+# ==============================================================================
+# Tool-based BYOK RAG Streaming Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_tool_rag_emits_tool_call_events(
+    byok_tool_config: AppConfig,
+    mock_streaming_byok_tool_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK tool RAG emits tool call SSE events during streaming.
+
+    Verifies:
+    - Stream contains tool_call events from file_search_call output
+    - Tool call event references file_search / knowledge_search
+    """
+    _ = byok_tool_config
+    _ = mock_streaming_byok_tool_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    events = await _collect_sse_events(response)
+    tool_call_events = [e for e in events if e.get("event") == "tool_call"]
+    assert len(tool_call_events) > 0
+
+    tool_names = [e["data"].get("name", "") for e in tool_call_events]
+    assert any(
+        "file_search" in name or "knowledge_search" in name for name in tool_names
+    )
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_tool_rag_emits_referenced_documents(
+    byok_tool_config: AppConfig,
+    mock_streaming_byok_tool_client: AsyncMockType,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK tool RAG streaming emits referenced documents in end event.
+
+    Verifies:
+    - End event includes referenced_documents list
+    - Documents include URLs from file_search results
+    """
+    _ = byok_tool_config
+    _ = mock_streaming_byok_tool_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    events = await _collect_sse_events(response)
+    end_events = [e for e in events if e.get("event") == "end"]
+    assert len(end_events) == 1
+
+    ref_docs = end_events[0]["data"].get("referenced_documents", [])
+    assert isinstance(ref_docs, list)
+    assert len(ref_docs) >= 1, "Expected at least one referenced document"
+
+    # Verify known URL from the mock file_search result propagated
+    doc_urls = [str(doc.get("doc_url", "")) for doc in ref_docs if doc.get("doc_url")]
+    assert any(
+        "docs.redhat.com/ocp/overview" in url for url in doc_urls
+    ), f"Expected ocp/overview URL in {doc_urls}"
+
+
+# ==============================================================================
+# Combined Inline + Tool RAG Streaming Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_combined_inline_and_tool_rag(
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that inline and tool-based BYOK RAG both work in streaming.
+
+    Verifies:
+    - Inline RAG context is injected into the input
+    - Tool RAG file_search is passed as a tool
+    - Streaming response succeeds
+    """
+    # Configure both inline and tool RAG
+    byok_entry = mocker.MagicMock()
+    byok_entry.rag_id = "test-knowledge"
+    byok_entry.vector_db_id = "vs-byok-knowledge"
+    byok_entry.score_multiplier = 1.0
+    test_config.configuration.byok_rag = [byok_entry]
+    test_config.configuration.rag.inline = ["test-knowledge"]
+    test_config.configuration.rag.tool = ["test-knowledge"]
+
+    # Mock Llama Stack client
+    mock_holder_class = mocker.patch(
+        "app.endpoints.streaming_query.AsyncLlamaStackClientHolder"
+    )
+    mock_client = _build_base_streaming_mock_client(mocker)
+
+    # Inline RAG returns chunks via vector_io
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    # Tool RAG vector stores
+    mock_vector_store = mocker.MagicMock()
+    mock_vector_store.id = "vs-byok-knowledge"
+    mock_list_result = mocker.MagicMock()
+    mock_list_result.data = [mock_vector_store]
+    mock_client.vector_stores.list.return_value = mock_list_result
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+    assert response.status_code == status.HTTP_200_OK
+
+    # Verify inline RAG context was injected
+    # responses.create is the mock for the OpenAI-compatible LLM API call.
+    # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model.
+    create_call = mock_client.responses.create.call_args_list[0]
+    call_kwargs = create_call.kwargs
+    input_text = call_kwargs["input"]
+    assert "file_search found" in input_text
+
+    # Verify tool RAG file_search was passed
+    assert call_kwargs.get("tools") is not None
+    assert any(tool.get("type") == "file_search" for tool in call_kwargs["tools"])
+
+
+# ==============================================================================
+# Inline RAG rag_id Resolution Streaming Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_only_configured_rag_id_is_queried(
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that only the rag_id listed in rag.inline triggers retrieval in streaming.
+
+    Two BYOK sources are registered (source-a and source-b) but only
+    source-a is listed in rag.inline.  Only vs-source-a should be queried
+    and only its content should appear in the injected context.
+
+    Verifies:
+    - vector_io.query is called exactly once (for the configured source)
+    - The call targets the correct vector_db_id
+    - vs-source-b is NOT queried
+    - Injected context contains source-a content
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 1.0
+
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a"]
+
+    mock_holder_class = mocker.patch(
+        "app.endpoints.streaming_query.AsyncLlamaStackClientHolder"
+    )
+    mock_client = _build_base_streaming_mock_client(mocker)
+
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_byok_vector_io_response(mocker)
+    )
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="What is OpenShift?")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    assert mock_client.vector_io.query.call_count == 1
+    # call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query.
+    # e.g. "vector_store_id" is the store queried, "query" is the search text.
+    call_kwargs = mock_client.vector_io.query.call_args.kwargs
+    assert call_kwargs["vector_store_id"] == "vs-source-a"
+
+    queried_stores = [
+        c.kwargs["vector_store_id"] for c in mock_client.vector_io.query.call_args_list
+    ]
+    assert "vs-source-b" not in queried_stores
+
+    # responses.create is the mock for the OpenAI-compatible LLM API call.
+    # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model.
+    create_call = mock_client.responses.create.call_args_list[0]
+    input_text = create_call.kwargs["input"]
+    assert "file_search found" in input_text
+
+
+# ==============================================================================
+# Score Multiplier Priority Streaming Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_score_multiplier_shifts_priority(  # pylint: disable=too-many-locals
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that score_multiplier shifts chunk priority in streaming query.
+
+    Doc A (source-a) has high base similarity (0.90) with multiplier 1.0.
+    Doc B (source-b) has low base similarity (0.40) with multiplier 5.0.
+    After weighting: Doc A = 0.90, Doc B = 2.00.
+    The injected context should list Doc B content before Doc A.
+
+    Verifies:
+    - The higher-weighted chunk content appears first in the injected context
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 5.0
+
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a", "source-b"]
+
+    mock_holder_class = mocker.patch(
+        "app.endpoints.streaming_query.AsyncLlamaStackClientHolder"
+    )
+    mock_client = _build_base_streaming_mock_client(mocker)
+
+    resp_a = _make_vector_io_response(
+        mocker,
+        [
+            ("Doc A high similarity", "doc-a", 0.90),
+        ],
+    )
+    resp_b = _make_vector_io_response(
+        mocker,
+        [
+            ("Doc B low similarity boosted", "doc-b", 0.40),
+        ],
+    )
+
+    async def _side_effect(**kwargs: Any) -> Any:
+        if kwargs["vector_store_id"] == "vs-source-a":
+            return resp_a
+        return resp_b
+
+    mock_client.vector_io.query = mocker.AsyncMock(side_effect=_side_effect)
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="test query")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    # Verify Doc B (weighted 2.0) appears before Doc A (weighted 0.9) in context
+    # responses.create is the mock for the OpenAI-compatible LLM API call.
+    # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model.
+    create_call = mock_client.responses.create.call_args_list[0]
+    input_text = create_call.kwargs["input"]
+    pos_b = input_text.find("Doc B low similarity boosted")
+    pos_a = input_text.find("Doc A high similarity")
+    assert pos_b != -1 and pos_a != -1
+    assert pos_b < pos_a
+
+
+# ==============================================================================
+# BYOK_RAG_MAX_CHUNKS Capping Streaming Tests
+# ==============================================================================
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_max_chunks_caps_context(  # pylint: disable=too-many-locals
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK_RAG_MAX_CHUNKS caps chunks in streaming query context.
+
+    A source returns more chunks than BYOK_RAG_MAX_CHUNKS.  The injected
+    context should contain at most BYOK_RAG_MAX_CHUNKS chunk entries.
+
+    Verifies:
+    - Context chunk count does not exceed BYOK_RAG_MAX_CHUNKS
+    - Only the highest-scored chunks appear in the context
+    """
+    entry = mocker.MagicMock()
+    entry.rag_id = "big-source"
+    entry.vector_db_id = "vs-big-source"
+    entry.score_multiplier = 1.0
+
+    test_config.configuration.byok_rag = [entry]
+    test_config.configuration.rag.inline = ["big-source"]
+
+    mock_holder_class = mocker.patch(
+        "app.endpoints.streaming_query.AsyncLlamaStackClientHolder"
+    )
+    mock_client = _build_base_streaming_mock_client(mocker)
+
+    # Generate more chunks than BYOK_RAG_MAX_CHUNKS
+    num_chunks = constants.BYOK_RAG_MAX_CHUNKS + 5
+    chunks_data = [
+        (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2))
+        for i in range(num_chunks)
+    ]
+    mock_client.vector_io.query = mocker.AsyncMock(
+        return_value=_make_vector_io_response(mocker, chunks_data)
+    )
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="test query")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    # Verify the context header reports the capped count
+    # responses.create is the mock for the OpenAI-compatible LLM API call.
+    # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model.
+    create_call = mock_client.responses.create.call_args_list[0]
+    input_text = create_call.kwargs["input"]
+    expected_header = f"file_search found {constants.BYOK_RAG_MAX_CHUNKS} chunks:"
+    assert expected_header in input_text
+
+    # The lowest-scoring chunk should NOT be in the context
+    assert "Chunk content 0" not in input_text
+    # The highest-scoring chunk should be in the context
+    assert f"Chunk content {num_chunks - 1}" in input_text
+
+
+@pytest.mark.asyncio
+async def test_streaming_query_byok_max_chunks_caps_across_multiple_sources(  # pylint: disable=too-many-locals
+    test_config: AppConfig,
+    mocker: MockerFixture,
+    test_request: Request,
+    test_auth: AuthTuple,
+) -> None:
+    """Test that BYOK_RAG_MAX_CHUNKS caps chunks across multiple sources in streaming.
+
+    Two sources each return several chunks.  The combined context should
+    not exceed BYOK_RAG_MAX_CHUNKS and should contain the globally
+    highest-scored chunks regardless of source.
+
+    Verifies:
+    - Total chunks across sources are capped at BYOK_RAG_MAX_CHUNKS
+    - Only the highest-scored chunks appear in the context
+    """
+    entry_a = mocker.MagicMock()
+    entry_a.rag_id = "source-a"
+    entry_a.vector_db_id = "vs-source-a"
+    entry_a.score_multiplier = 1.0
+
+    entry_b = mocker.MagicMock()
+    entry_b.rag_id = "source-b"
+    entry_b.vector_db_id = "vs-source-b"
+    entry_b.score_multiplier = 1.0
+
+    test_config.configuration.byok_rag = [entry_a, entry_b]
+    test_config.configuration.rag.inline = ["source-a", "source-b"]
+
+    mock_holder_class = mocker.patch(
+        "app.endpoints.streaming_query.AsyncLlamaStackClientHolder"
+    )
+    mock_client = _build_base_streaming_mock_client(mocker)
+
+    # Overlapping score bands so top-k must pick from both sources
+    n = constants.BYOK_RAG_MAX_CHUNKS
+    resp_a = _make_vector_io_response(
+        mocker,
+        [
+            (f"Source A chunk {i}", f"a-chunk-{i}", round(0.70 + i * 0.05, 2))
+            for i in range(n)
+        ],
+    )
+    resp_b = _make_vector_io_response(
+        mocker,
+        [
+            (f"Source B chunk {i}", f"b-chunk-{i}", round(0.72 + i * 0.05, 2))
+            for i in range(n)
+        ],
+    )
+
+    async def _side_effect(**kwargs: Any) -> Any:
+        if kwargs["vector_store_id"] == "vs-source-a":
+            return resp_a
+        return resp_b
+
+    mock_client.vector_io.query = mocker.AsyncMock(side_effect=_side_effect)
+
+    mock_vs_resp = mocker.MagicMock()
+    mock_vs_resp.data = []
+    mock_client.vector_stores.list.return_value = mock_vs_resp
+
+    mock_holder_class.return_value.get_client.return_value = mock_client
+
+    query_request = QueryRequest(query="test query")
+
+    response = await streaming_query_endpoint_handler(
+        request=test_request,
+        query_request=query_request,
+        auth=test_auth,
+        mcp_headers={},
+    )
+
+    assert isinstance(response, StreamingResponse)
+
+    # responses.create is the mock for the OpenAI-compatible LLM API call.
+    # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model.
+    create_call = mock_client.responses.create.call_args_list[0]
+    input_text = create_call.kwargs["input"]
+    expected_header = f"file_search found {constants.BYOK_RAG_MAX_CHUNKS} chunks:"
+    assert expected_header in input_text
+
+    # Both sources must appear in the context (overlapping scores guarantee this)
+    assert "Source A chunk" in input_text
+    assert "Source B chunk" in input_text
+
+    # Lowest-scoring chunks from each source must be dropped
+    assert "Source A chunk 0" not in input_text
+    assert "Source B chunk 0" not in input_text
diff --git a/tests/integration/test_openapi_json.py b/tests/integration/test_openapi_json.py
index 17ff8ac66..05ccc83f8 100644
--- a/tests/integration/test_openapi_json.py
+++ b/tests/integration/test_openapi_json.py
@@ -231,6 +231,11 @@ def test_servers_section_present_from_url(spec_from_url: dict[str, Any]) -> None
             "post",
             {"200", "401", "403", "404"},
         ),
+        (
+            "/v1/responses",
+            "post",
+            {"200", "401", "403", "404", "413", "422", "429", "500", "503"},
+        ),
         ("/v1/config", "get", {"200", "401", "403", "500"}),
         ("/v1/feedback", "post", {"200", "401", "403", "404", "500"}),
         ("/v1/feedback/status", "get", {"200"}),
@@ -318,6 +323,11 @@ def test_paths_and_responses_exist_from_file(
             "post",
             {"200", "401", "403", "404"},
         ),
+        (
+            "/v1/responses",
+            "post",
+            {"200", "401", "403", "404", "413", "422", "429", "500", "503"},
+        ),
         ("/v1/config", "get", {"200", "401", "403", "500"}),
         ("/v1/feedback", "post", {"200", "401", "403", "404", "500"}),
         ("/v1/feedback/status", "get", {"200"}),
diff --git a/tests/unit/app/endpoints/test_conversations.py b/tests/unit/app/endpoints/test_conversations.py
index 5ca4faf0b..9c75f0d2f 100644
--- a/tests/unit/app/endpoints/test_conversations.py
+++ b/tests/unit/app/endpoints/test_conversations.py
@@ -552,12 +552,8 @@ async def test_llama_stack_not_found_error(
     ) -> None:
         """Test the endpoint when LlamaStack returns NotFoundError.
 
-        Verify the GET /conversations/{conversation_id} handler raises an HTTP
-        404 when the Llama Stack client reports the session as not found.
-
-        Asserts that the raised HTTPException contains a response message
-        indicating the conversation was not found and a cause that includes
-        "does not exist" and the conversation ID.
+        When the Llama Stack client reports the session as not found,
+        get_all_conversation_items maps it to HTTP 500 (InternalServerError).
         """
         mock_authorization_resolvers(mocker)
         mocker.patch(
@@ -589,13 +585,13 @@ async def test_llama_stack_not_found_error(
                 auth=MOCK_AUTH,
             )
 
-        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
-
+        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Conversation not found" in detail["response"]  # type: ignore
-        assert "does not exist" in detail["cause"]  # type: ignore
-        assert VALID_CONVERSATION_ID in detail["cause"]  # type: ignore
+        assert detail["response"] == "Internal server error"
+        assert detail["cause"] == (
+            "An unexpected error occurred while processing the request."
+        )
 
     @pytest.mark.asyncio
     async def test_get_conversation_forbidden(
@@ -679,7 +675,10 @@ async def test_get_others_conversations_allowed_for_authorized_user(
         mock_item2.role = "assistant"
         mock_item2.content = "Hi there!"
         mock_items_response.data = [mock_item1, mock_item2]
-        mock_client.conversations.items.list.return_value = mock_items_response
+        mock_items_response.has_next_page.return_value = False
+        mock_client.conversations.items.list = mocker.AsyncMock(
+            return_value=mock_items_response
+        )
 
         mock_client_holder = mocker.patch(
             "app.endpoints.conversations_v1.AsyncLlamaStackClientHolder"
@@ -732,7 +731,8 @@ async def test_successful_conversation_retrieval(
                 type="message", role="assistant", content="I'm doing well, thanks!"
             ),
         ]
-        mock_client.conversations.items.list.return_value = mock_items
+        mock_items.has_next_page.return_value = False
+        mock_client.conversations.items.list = mocker.AsyncMock(return_value=mock_items)
 
         mock_client_holder = mocker.patch(
             "app.endpoints.conversations_v1.AsyncLlamaStackClientHolder"
@@ -806,7 +806,10 @@ async def test_no_items_found_in_get_conversation(
         mock_client = mocker.AsyncMock()
         mock_items_response = mocker.Mock()
         mock_items_response.data = []
-        mock_client.conversations.items.list.return_value = mock_items_response
+        mock_items_response.has_next_page.return_value = False
+        mock_client.conversations.items.list = mocker.AsyncMock(
+            return_value=mock_items_response
+        )
         mock_client_holder = mocker.patch(
             "app.endpoints.conversations_v1.AsyncLlamaStackClientHolder"
         )
@@ -832,7 +835,10 @@ async def test_api_status_error_in_get_conversation(
         dummy_request: Request,
         mock_conversation: MockType,
     ) -> None:
-        """Test when APIStatusError is raised during conversation retrieval."""
+        """Test when APIStatusError is raised during conversation retrieval.
+
+        get_all_conversation_items maps APIStatusError to HTTP 500.
+        """
         mock_authorization_resolvers(mocker)
         mocker.patch(
             "app.endpoints.conversations_v1.configuration", setup_configuration
@@ -863,10 +869,10 @@ async def test_api_status_error_in_get_conversation(
                 auth=MOCK_AUTH,
             )
 
-        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Conversation not found" in detail["response"]  # type: ignore
+        assert "response" in detail
 
     @pytest.mark.asyncio
     async def test_sqlalchemy_error_in_get_conversation(
@@ -940,7 +946,7 @@ def query_side_effect(model_class: type[Any]) -> Any:
         mock_session_context.__enter__.return_value = mock_session
         mock_session_context.__exit__.return_value = None
         mocker.patch(
-            "app.endpoints.conversations_v1.get_session",
+            "utils.endpoints.get_session",
             return_value=mock_session_context,
         )
 
diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py
index 044fb5bf2..06ee69926 100644
--- a/tests/unit/app/endpoints/test_query.py
+++ b/tests/unit/app/endpoints/test_query.py
@@ -17,7 +17,11 @@
 from models.responses import QueryResponse
 from utils.token_counter import TokenCounter
 from utils.types import (
+    RAGChunk,
+    RAGContext,
+    ReferencedDocument,
     ResponsesApiParams,
+    ShieldModerationPassed,
     ToolCallSummary,
     ToolResultSummary,
     TurnSummary,
@@ -42,8 +46,7 @@ def create_dummy_request() -> Request:
         request (fastapi.Request): A Request constructed with a bare HTTP scope
         (type "http") for use in tests.
     """
-    req = Request(scope={"type": "http", "headers": []})
-    return req
+    return Request(scope={"type": "http", "headers": []})
 
 
 @pytest.fixture(name="setup_configuration")
@@ -126,6 +129,10 @@ async def test_successful_query_no_conversation(
             "app.endpoints.query.get_topic_summary",
             new=mocker.AsyncMock(return_value=None),
         )
+        mocker.patch(
+            "app.endpoints.query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
 
         mock_responses_params = mocker.Mock(spec=ResponsesApiParams)
         mock_responses_params.model = "provider1/model1"
@@ -170,6 +177,93 @@ async def mock_retrieve_response(*_args: Any, **_kwargs: Any) -> TurnSummary:
         assert response.conversation_id == "123"
         assert response.response == "Kubernetes is a container orchestration platform"
 
+    @pytest.mark.asyncio
+    async def test_query_merges_inline_and_tool_rag_chunks_and_documents(
+        self,
+        dummy_request: Request,
+        setup_configuration: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that inline RAG and tool-based RAG chunks/docs are correctly merged."""
+        query_request = QueryRequest(
+            query="What is Kubernetes?"
+        )  # pyright: ignore[reportCallIssue]
+
+        mocker.patch("app.endpoints.query.configuration", setup_configuration)
+        mocker.patch("app.endpoints.query.check_configuration_loaded")
+        mocker.patch("app.endpoints.query.check_tokens_available")
+        mocker.patch("app.endpoints.query.validate_model_provider_override")
+
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_response_obj = mocker.Mock()
+        mock_response_obj.output = []
+        mock_client.responses = mocker.Mock()
+        mock_client.responses.create = mocker.AsyncMock(return_value=mock_response_obj)
+        mock_client_holder = mocker.Mock()
+        mock_client_holder.get_client.return_value = mock_client
+        mocker.patch(
+            "app.endpoints.query.AsyncLlamaStackClientHolder",
+            return_value=mock_client_holder,
+        )
+        mocker.patch(
+            "app.endpoints.query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
+
+        inline_chunk = RAGChunk(content="inline chunk content", source="byok")
+        inline_doc = ReferencedDocument(doc_title="Inline Doc")
+        inline_rag = RAGContext(
+            context_text="",
+            rag_chunks=[inline_chunk],
+            referenced_documents=[inline_doc],
+        )
+        mocker.patch(
+            "app.endpoints.query.build_rag_context",
+            new=mocker.AsyncMock(return_value=inline_rag),
+        )
+
+        mock_responses_params = mocker.Mock(spec=ResponsesApiParams)
+        mock_responses_params.model = "provider1/model1"
+        mock_responses_params.conversation = "conv_123"
+        mock_responses_params.tools = None
+        mock_responses_params.model_dump.return_value = {
+            "input": "test",
+            "model": "provider1/model1",
+        }
+        mocker.patch(
+            "app.endpoints.query.prepare_responses_params",
+            new=mocker.AsyncMock(return_value=mock_responses_params),
+        )
+
+        tool_chunk = RAGChunk(content="tool chunk content", source="vs-1")
+        tool_doc = ReferencedDocument(doc_title="Tool Doc")
+        mock_turn_summary = TurnSummary()
+        mock_turn_summary.rag_chunks = [tool_chunk]
+        mock_turn_summary.referenced_documents = [tool_doc]
+
+        mocker.patch(
+            "app.endpoints.query.retrieve_response",
+            new=mocker.AsyncMock(return_value=mock_turn_summary),
+        )
+        mocker.patch("app.endpoints.query.store_query_results")
+        mocker.patch("app.endpoints.query.consume_query_tokens")
+        mocker.patch("app.endpoints.query.get_available_quotas", return_value={})
+
+        response = await query_endpoint_handler(
+            request=dummy_request,
+            query_request=query_request,
+            auth=MOCK_AUTH,
+            mcp_headers={},
+        )
+
+        assert isinstance(response, QueryResponse)
+        assert len(response.rag_chunks) == 2
+        assert response.rag_chunks[0].content == "inline chunk content"
+        assert response.rag_chunks[1].content == "tool chunk content"
+        assert len(response.referenced_documents) == 2
+        assert response.referenced_documents[0].doc_title == "Inline Doc"
+        assert response.referenced_documents[1].doc_title == "Tool Doc"
+
     @pytest.mark.asyncio
     async def test_successful_query_with_conversation(
         self,
@@ -215,7 +309,10 @@ async def test_successful_query_with_conversation(
             "app.endpoints.query.prepare_responses_params",
             new=mocker.AsyncMock(return_value=mock_responses_params),
         )
-
+        mocker.patch(
+            "app.endpoints.query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
         mocker.patch(
             "app.endpoints.query.retrieve_response",
             new=mocker.AsyncMock(return_value=TurnSummary()),
@@ -276,6 +373,10 @@ async def test_query_with_attachments(
             "app.endpoints.query.get_topic_summary",
             new=mocker.AsyncMock(return_value=None),
         )
+        mocker.patch(
+            "app.endpoints.query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
 
         mock_responses_params = mocker.Mock(spec=ResponsesApiParams)
         mock_responses_params.model = "provider1/model1"
@@ -336,6 +437,10 @@ async def test_query_with_topic_summary(
             "app.endpoints.query.AsyncLlamaStackClientHolder",
             return_value=mock_client_holder,
         )
+        mocker.patch(
+            "app.endpoints.query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
 
         mock_responses_params = mocker.Mock(spec=ResponsesApiParams)
         mock_responses_params.model = "provider1/model1"
@@ -406,6 +511,10 @@ async def test_query_azure_token_refresh(
             "app.endpoints.query.get_topic_summary",
             new=mocker.AsyncMock(return_value=None),
         )
+        mocker.patch(
+            "app.endpoints.query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
 
         mock_responses_params = mocker.Mock(spec=ResponsesApiParams)
         mock_responses_params.model = "azure/model1"
@@ -477,6 +586,7 @@ async def test_retrieve_response_success(self, mocker: MockerFixture) -> None:
         mock_responses_params = mocker.Mock(spec=ResponsesApiParams)
         mock_responses_params.input = "test query"
         mock_responses_params.model = "provider1/model1"
+        mock_responses_params.tools = None
         mock_responses_params.model_dump.return_value = {
             "input": "test query",
             "model": "provider1/model1",
@@ -493,10 +603,6 @@ async def test_retrieve_response_success(self, mocker: MockerFixture) -> None:
         mock_response.output = [mock_output_item]
         mock_response.usage = mock_usage
 
-        mocker.patch(
-            "app.endpoints.query.run_shield_moderation",
-            return_value=mocker.Mock(decision="passed"),
-        )
         mock_client.responses.create = mocker.AsyncMock(return_value=mock_response)
 
         mock_summary = TurnSummary()
@@ -507,7 +613,9 @@ async def test_retrieve_response_success(self, mocker: MockerFixture) -> None:
             return_value=mock_summary,
         )
 
-        result = await retrieve_response(mock_client, mock_responses_params)
+        result = await retrieve_response(
+            mock_client, mock_responses_params, ShieldModerationPassed()
+        )
 
         assert isinstance(result, TurnSummary)
         assert result.llm_response == "Response text"
@@ -528,19 +636,20 @@ async def test_retrieve_response_shield_blocked(
             "model": "provider1/model1",
         }
 
+        mock_refusal = mocker.Mock()
         mock_moderation_result = mocker.Mock()
         mock_moderation_result.decision = "blocked"
         mock_moderation_result.message = "Content blocked by moderation"
-        mocker.patch(
-            "app.endpoints.query.run_shield_moderation",
-            new=mocker.AsyncMock(return_value=mock_moderation_result),
-        )
+        mock_moderation_result.moderation_id = "mod_123"
+        mock_moderation_result.refusal_response = mock_refusal
         mock_append = mocker.patch(
-            "app.endpoints.query.append_turn_to_conversation",
+            "app.endpoints.query.append_turn_items_to_conversation",
             new=mocker.AsyncMock(),
         )
 
-        result = await retrieve_response(mock_client, mock_responses_params)
+        result = await retrieve_response(
+            mock_client, mock_responses_params, mock_moderation_result
+        )
 
         assert isinstance(result, TurnSummary)
         assert result.llm_response == "Content blocked by moderation"
@@ -559,10 +668,6 @@ async def test_retrieve_response_connection_error(
             "model": "provider1/model1",
         }
 
-        mocker.patch(
-            "app.endpoints.query.run_shield_moderation",
-            return_value=mocker.Mock(decision="passed"),
-        )
         mock_client.responses.create = mocker.AsyncMock(
             side_effect=APIConnectionError(
                 message="Connection failed", request=mocker.Mock()
@@ -570,7 +675,9 @@ async def test_retrieve_response_connection_error(
         )
 
         with pytest.raises(HTTPException) as exc_info:
-            await retrieve_response(mock_client, mock_responses_params)
+            await retrieve_response(
+                mock_client, mock_responses_params, ShieldModerationPassed()
+            )
 
         assert exc_info.value.status_code == 503
 
@@ -588,10 +695,6 @@ async def test_retrieve_response_api_status_error(
             "model": "provider1/model1",
         }
 
-        mocker.patch(
-            "app.endpoints.query.run_shield_moderation",
-            return_value=mocker.Mock(decision="passed"),
-        )
         mock_client.responses.create = mocker.AsyncMock(
             side_effect=APIStatusError(
                 message="API error", response=mocker.Mock(request=None), body=None
@@ -608,7 +711,9 @@ async def test_retrieve_response_api_status_error(
         )
 
         with pytest.raises(HTTPException):
-            await retrieve_response(mock_client, mock_responses_params)
+            await retrieve_response(
+                mock_client, mock_responses_params, ShieldModerationPassed()
+            )
 
     @pytest.mark.asyncio
     async def test_retrieve_response_runtime_error_context_length(
@@ -624,16 +729,14 @@ async def test_retrieve_response_runtime_error_context_length(
             "model": "provider1/model1",
         }
 
-        mocker.patch(
-            "app.endpoints.query.run_shield_moderation",
-            return_value=mocker.Mock(decision="passed"),
-        )
         mock_client.responses.create = mocker.AsyncMock(
             side_effect=RuntimeError("context_length exceeded")
         )
 
         with pytest.raises(HTTPException) as exc_info:
-            await retrieve_response(mock_client, mock_responses_params)
+            await retrieve_response(
+                mock_client, mock_responses_params, ShieldModerationPassed()
+            )
 
         assert exc_info.value.status_code == 413
 
@@ -651,16 +754,14 @@ async def test_retrieve_response_runtime_error_other(
             "model": "provider1/model1",
         }
 
-        mocker.patch(
-            "app.endpoints.query.run_shield_moderation",
-            return_value=mocker.Mock(decision="passed"),
-        )
         mock_client.responses.create = mocker.AsyncMock(
             side_effect=RuntimeError("Some other error")
         )
 
         with pytest.raises(RuntimeError):
-            await retrieve_response(mock_client, mock_responses_params)
+            await retrieve_response(
+                mock_client, mock_responses_params, ShieldModerationPassed()
+            )
 
     @pytest.mark.asyncio
     async def test_retrieve_response_with_tool_calls(
@@ -671,6 +772,7 @@ async def test_retrieve_response_with_tool_calls(
         mock_responses_params = mocker.Mock(spec=ResponsesApiParams)
         mock_responses_params.input = "test query"
         mock_responses_params.model = "provider1/model1"
+        mock_responses_params.tools = None
         mock_responses_params.model_dump.return_value = {
             "input": "test query",
             "model": "provider1/model1",
@@ -683,10 +785,6 @@ async def test_retrieve_response_with_tool_calls(
         mock_response.output = [mocker.Mock(type="message")]
         mock_response.usage = mock_usage
 
-        mocker.patch(
-            "app.endpoints.query.run_shield_moderation",
-            return_value=mocker.Mock(decision="passed"),
-        )
         mock_client.responses.create = mocker.AsyncMock(return_value=mock_response)
 
         mock_tool_call = ToolCallSummary(id="1", name="test", args={})
@@ -703,7 +801,9 @@ async def test_retrieve_response_with_tool_calls(
             return_value=mock_summary,
         )
 
-        result = await retrieve_response(mock_client, mock_responses_params)
+        result = await retrieve_response(
+            mock_client, mock_responses_params, ShieldModerationPassed()
+        )
 
         assert result.llm_response == "Response text"
         assert len(result.tool_calls) == 1
@@ -712,4 +812,3 @@ async def test_retrieve_response_with_tool_calls(
         assert result.token_usage.output_tokens == 5
         assert result.rag_chunks == []
         assert result.referenced_documents == []
-        assert result.inline_rag_documents == []
diff --git a/tests/unit/app/endpoints/test_responses.py b/tests/unit/app/endpoints/test_responses.py
new file mode 100644
index 000000000..725e43a07
--- /dev/null
+++ b/tests/unit/app/endpoints/test_responses.py
@@ -0,0 +1,1374 @@
+# pylint: disable=redefined-outer-name, too-many-locals, too-many-lines
+"""Unit tests for the /responses REST API endpoint (LCORE Responses API)."""
+
+from datetime import UTC, datetime
+from typing import Any, cast
+
+import pytest
+from fastapi import HTTPException, Request
+from fastapi.responses import StreamingResponse
+from llama_stack_api import OpenAIResponseObject
+from llama_stack_api.openai_responses import OpenAIResponseMessage
+from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient
+from pytest_mock import MockerFixture
+
+from app.endpoints.responses import (
+    handle_non_streaming_response,
+    handle_streaming_response,
+    responses_endpoint_handler,
+)
+from configuration import AppConfig
+from models.config import Action
+from models.database.conversations import UserConversation
+from models.requests import ResponsesRequest
+from models.responses import ResponsesResponse
+from utils.types import RAGContext, ResponsesConversationContext, TurnSummary
+
+MOCK_AUTH = (
+    "00000001-0001-0001-0001-000000000001",
+    "mock_username",
+    False,
+    "mock_token",
+)
+VALID_CONV_ID = "conv_e6afd7aaa97b49ce8f4f96a801b07893d9cb784d72e53e3c"
+VALID_CONV_ID_NORMALIZED = "e6afd7aaa97b49ce8f4f96a801b07893d9cb784d72e53e3c"
+MODULE = "app.endpoints.responses"
+ENDPOINTS_MODULE = "utils.endpoints"
+UTILS_RESPONSES_MODULE = "utils.responses"
+
+
+def _patch_base(mocker: MockerFixture, config: AppConfig) -> None:
+    """Patch configuration and mandatory checks for responses endpoint."""
+    mocker.patch(f"{MODULE}.configuration", config)
+    mocker.patch(f"{MODULE}.check_configuration_loaded")
+    mocker.patch(f"{MODULE}.check_tokens_available")
+    mocker.patch(f"{MODULE}.validate_model_provider_override")
+    mock_holder = mocker.Mock()
+    mock_holder.get_client.return_value = mocker.Mock()
+    mocker.patch(
+        f"{UTILS_RESPONSES_MODULE}.AsyncLlamaStackClientHolder",
+        return_value=mock_holder,
+    )
+    mocker.patch(
+        f"{UTILS_RESPONSES_MODULE}.prepare_tools",
+        new=mocker.AsyncMock(return_value=None),
+    )
+
+
+def _patch_client(mocker: MockerFixture) -> Any:
+    """Patch AsyncLlamaStackClientHolder; return (mock_client, mock_holder)."""
+    mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+    mock_vector_stores = mocker.Mock()
+    mock_vector_stores.list = mocker.AsyncMock(return_value=mocker.Mock(data=[]))
+    mock_client.vector_stores = mock_vector_stores
+    mock_holder = mocker.Mock()
+    mock_holder.get_client.return_value = mock_client
+    mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder)
+    return mock_client, mock_holder
+
+
+def _patch_resolve_response_context(
+    mocker: MockerFixture,
+    *,
+    conversation: str = "conv_new",
+    user_conversation: UserConversation | None = None,
+    generate_topic_summary: bool = False,
+) -> None:
+    """Patch resolve_response_context to return the given conversation context."""
+    mocker.patch(
+        f"{MODULE}.resolve_response_context",
+        new=mocker.AsyncMock(
+            return_value=ResponsesConversationContext(
+                conversation=conversation,
+                user_conversation=user_conversation,
+                generate_topic_summary=generate_topic_summary,
+            )
+        ),
+    )
+
+
+def _patch_rag(
+    mocker: MockerFixture,
+    *,
+    rag_context: str = "",
+) -> None:
+    """Patch RAG for responses endpoint by mocking build_rag_context."""
+    mocker.patch(
+        f"{MODULE}.build_rag_context",
+        new=mocker.AsyncMock(
+            return_value=RAGContext(
+                context_text=rag_context,
+                referenced_documents=[],
+            ),
+        ),
+    )
+
+
+def _patch_moderation(mocker: MockerFixture, decision: str = "passed") -> Any:
+    """Patch run_shield_moderation; return mock moderation result."""
+    mock_moderation = mocker.Mock()
+    mock_moderation.decision = decision
+    mocker.patch(
+        f"{MODULE}.run_shield_moderation",
+        new=mocker.AsyncMock(return_value=mock_moderation),
+    )
+    return mock_moderation
+
+
+def _make_responses_response(
+    *,
+    output_text: str = "",
+    conversation: str = "",
+    model: str = "provider/model1",
+    **kwargs: Any,
+) -> ResponsesResponse:
+    """Build a minimal valid ResponsesResponse for tests."""
+    defaults = {
+        "id": "resp_1",
+        "object": "response",
+        "created_at": 0,
+        "status": "completed",
+        "model": model,
+        "output": [],
+        "conversation": conversation,
+        "completed_at": 0,
+        "output_text": output_text,
+        "available_quotas": {},
+    }
+    defaults.update(kwargs)
+    return ResponsesResponse(**defaults)
+
+
+def _patch_handle_non_streaming_common(
+    mocker: MockerFixture, config: AppConfig
+) -> None:
+    """Patch deps used by handle_non_streaming_response (blocked and success)."""
+    mocker.patch(f"{MODULE}.configuration", config)
+    mocker.patch(f"{MODULE}.get_available_quotas", return_value={})
+    mocker.patch(
+        f"{MODULE}.get_topic_summary",
+        new=mocker.AsyncMock(return_value=None),
+    )
+    mocker.patch(f"{MODULE}.store_query_results")
+
+
+@pytest.fixture(name="dummy_request")
+def dummy_request_fixture() -> Request:
+    """Minimal FastAPI Request with authorized_actions for responses endpoint."""
+    req = Request(scope={"type": "http", "headers": []})
+    req.state.authorized_actions = {Action.QUERY, Action.READ_OTHERS_CONVERSATIONS}
+    return req
+
+
+@pytest.fixture(name="minimal_config")
+def minimal_config_fixture() -> AppConfig:
+    """Minimal AppConfig for responses endpoint tests."""
+    cfg = AppConfig()
+    cfg.init_from_dict(
+        {
+            "name": "test",
+            "service": {"host": "localhost", "port": 8080},
+            "llama_stack": {
+                "api_key": "test-key",
+                "url": "http://test.com:1234",
+                "use_as_library_client": False,
+            },
+            "user_data_collection": {},
+            "authentication": {"module": "noop"},
+            "authorization": {"access_rules": []},
+        }
+    )
+    return cfg
+
+
+def _request_with_model_and_conv(
+    input_text: str = "Hello", model: str = "provider/model1"
+) -> ResponsesRequest:
+    """Build request with model and conversation set (as handler does)."""
+    return ResponsesRequest(
+        input=input_text,
+        model=model,
+        conversation=VALID_CONV_ID,
+    )
+
+
+def _request_with_previous_response_id(
+    input_text: str = "Hello",
+    model: str = "provider/model1",
+    previous_response_id: str = "resp_prev_123",
+    store: bool = True,
+) -> ResponsesRequest:
+    """Build request with previous_response_id (conversation set by handler)."""
+    request = ResponsesRequest(
+        input=input_text,
+        model=model,
+        previous_response_id=previous_response_id,
+        store=store,
+    )
+    request.conversation = VALID_CONV_ID
+    return request
+
+
+class TestResponsesEndpointHandler:
+    """Unit tests for responses_endpoint_handler."""
+
+    @pytest.mark.asyncio
+    async def test_successful_responses_string_input_non_streaming(
+        self,
+        dummy_request: Request,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test successful responses request with string input returns ResponsesResponse."""
+        responses_request = ResponsesRequest(input="What is Kubernetes?")
+        _patch_base(mocker, minimal_config)
+        _patch_client(mocker)
+        _patch_resolve_response_context(mocker, conversation="conv_new_123")
+        mocker.patch(
+            f"{MODULE}.select_model_for_responses",
+            new=mocker.AsyncMock(return_value="provider/model1"),
+        )
+        mocker.patch(
+            f"{MODULE}.check_model_configured",
+            new=mocker.AsyncMock(return_value=True),
+        )
+        _patch_rag(mocker)
+        _patch_moderation(mocker, decision="passed")
+
+        mock_response = _make_responses_response(
+            output_text="Kubernetes is a container orchestration platform.",
+            conversation="conv_new_123",
+        )
+        mocker.patch(
+            f"{MODULE}.handle_non_streaming_response",
+            new=mocker.AsyncMock(return_value=mock_response),
+        )
+
+        response = await responses_endpoint_handler(
+            request=dummy_request,
+            responses_request=responses_request,
+            auth=MOCK_AUTH,
+            mcp_headers={},
+        )
+        assert isinstance(response, ResponsesResponse)
+        assert (
+            response.output_text == "Kubernetes is a container orchestration platform."
+        )
+        assert response.conversation == "conv_new_123"
+
+    @pytest.mark.asyncio
+    async def test_responses_with_conversation_validates_and_retrieves(
+        self,
+        dummy_request: Request,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that providing conversation ID calls validate_and_retrieve_conversation."""
+        responses_request = ResponsesRequest(
+            input="Follow-up question",
+            conversation=VALID_CONV_ID,
+        )
+        _patch_base(mocker, minimal_config)
+        mock_user_conv = mocker.Mock(spec=UserConversation)
+        mock_user_conv.id = VALID_CONV_ID_NORMALIZED
+        mock_validate = mocker.patch(
+            f"{ENDPOINTS_MODULE}.validate_and_retrieve_conversation",
+            return_value=mock_user_conv,
+        )
+        _, mock_holder = _patch_client(mocker)
+        mocker.patch(
+            f"{ENDPOINTS_MODULE}.AsyncLlamaStackClientHolder",
+            return_value=mock_holder,
+        )
+        mocker.patch(
+            f"{ENDPOINTS_MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        mocker.patch(
+            f"{ENDPOINTS_MODULE}.to_llama_stack_conversation_id",
+            return_value=VALID_CONV_ID,
+        )
+        mocker.patch(
+            f"{MODULE}.select_model_for_responses",
+            new=mocker.AsyncMock(return_value="provider/model1"),
+        )
+        mocker.patch(
+            f"{MODULE}.check_model_configured",
+            new=mocker.AsyncMock(return_value=True),
+        )
+        _patch_rag(mocker)
+        _patch_moderation(mocker, decision="passed")
+        mocker.patch(
+            f"{MODULE}.handle_non_streaming_response",
+            new=mocker.AsyncMock(
+                return_value=_make_responses_response(
+                    output_text="Answer",
+                    conversation=VALID_CONV_ID_NORMALIZED,
+                )
+            ),
+        )
+
+        await responses_endpoint_handler(
+            request=dummy_request,
+            responses_request=responses_request,
+            auth=MOCK_AUTH,
+            mcp_headers={},
+        )
+
+        mock_validate.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_responses_model_not_configured_raises_404(
+        self,
+        dummy_request: Request,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that unconfigured model leads to 404 HTTPException."""
+        responses_request = ResponsesRequest(input="Hello", model="provider/unknown")
+        _patch_base(mocker, minimal_config)
+        _patch_client(mocker)
+        _patch_resolve_response_context(mocker)
+        mocker.patch(
+            f"{MODULE}.select_model_for_responses",
+            new=mocker.AsyncMock(return_value="provider/unknown"),
+        )
+        mocker.patch(
+            f"{MODULE}.check_model_configured",
+            new=mocker.AsyncMock(return_value=False),
+        )
+        mocker.patch(
+            f"{MODULE}.extract_provider_and_model_from_model_id",
+            return_value=("provider", "unknown"),
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await responses_endpoint_handler(
+                request=dummy_request,
+                responses_request=responses_request,
+                auth=MOCK_AUTH,
+                mcp_headers={},
+            )
+        assert exc_info.value.status_code == 404
+
+    @pytest.mark.asyncio
+    async def test_responses_streaming_returns_streaming_response(
+        self,
+        dummy_request: Request,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that stream=True delegates to handle_streaming_response."""
+        responses_request = ResponsesRequest(input="Stream this", stream=True)
+        _patch_base(mocker, minimal_config)
+        _patch_client(mocker)
+        _patch_resolve_response_context(mocker)
+        mocker.patch(
+            f"{MODULE}.select_model_for_responses",
+            new=mocker.AsyncMock(return_value="provider/model1"),
+        )
+        mocker.patch(
+            f"{MODULE}.check_model_configured",
+            new=mocker.AsyncMock(return_value=True),
+        )
+        _patch_rag(mocker)
+        _patch_moderation(mocker, decision="passed")
+        mock_streaming = mocker.Mock(spec=StreamingResponse)
+        mocker.patch(
+            f"{MODULE}.handle_streaming_response",
+            new=mocker.AsyncMock(return_value=mock_streaming),
+        )
+
+        response = await responses_endpoint_handler(
+            request=dummy_request,
+            responses_request=responses_request,
+            auth=MOCK_AUTH,
+            mcp_headers={},
+        )
+        assert response is mock_streaming
+
+    @pytest.mark.asyncio
+    async def test_responses_azure_token_refresh(
+        self,
+        dummy_request: Request,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that Azure token refresh is called when model starts with azure."""
+        responses_request = ResponsesRequest(input="Hi", model="azure/some-model")
+        _patch_base(mocker, minimal_config)
+        _patch_client(mocker)
+        _patch_resolve_response_context(mocker)
+        mocker.patch(
+            f"{MODULE}.select_model_for_responses",
+            new=mocker.AsyncMock(return_value="azure/some-model"),
+        )
+        mocker.patch(
+            f"{MODULE}.check_model_configured",
+            new=mocker.AsyncMock(return_value=True),
+        )
+        mock_azure = mocker.Mock()
+        mock_azure.is_entra_id_configured = True
+        mock_azure.is_token_expired = True
+        mock_azure.refresh_token.return_value = True
+        mocker.patch(f"{MODULE}.AzureEntraIDManager", return_value=mock_azure)
+        updated_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_update_token = mocker.patch(
+            f"{MODULE}.update_azure_token",
+            new=mocker.AsyncMock(return_value=updated_client),
+        )
+        _patch_rag(mocker)
+        _patch_moderation(mocker, decision="passed")
+        mocker.patch(
+            f"{MODULE}.handle_non_streaming_response",
+            new=mocker.AsyncMock(
+                return_value=_make_responses_response(
+                    output_text="Ok",
+                    conversation="conv_new",
+                    model="azure/some-model",
+                )
+            ),
+        )
+
+        await responses_endpoint_handler(
+            request=dummy_request,
+            responses_request=responses_request,
+            auth=MOCK_AUTH,
+            mcp_headers={},
+        )
+        mock_update_token.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_responses_structured_input_appends_rag_message(
+        self,
+        dummy_request: Request,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that non-string input uses extract_text and appends RAG message."""
+        structured_input: list[Any] = [
+            OpenAIResponseMessage(role="user", content="What is K8s?"),
+        ]
+        responses_request = ResponsesRequest(
+            input=cast(Any, structured_input),
+        )
+        _patch_base(mocker, minimal_config)
+        _patch_client(mocker)
+        _patch_resolve_response_context(mocker)
+        mocker.patch(
+            f"{MODULE}.select_model_for_responses",
+            new=mocker.AsyncMock(return_value="provider/model1"),
+        )
+        mocker.patch(
+            f"{MODULE}.check_model_configured",
+            new=mocker.AsyncMock(return_value=True),
+        )
+        mock_build_rag = mocker.patch(
+            f"{MODULE}.build_rag_context",
+            new=mocker.AsyncMock(
+                return_value=RAGContext(
+                    context_text="\n\nRelevant documentation:\nDoc1",
+                    referenced_documents=[],
+                ),
+            ),
+        )
+        _patch_moderation(mocker, decision="passed")
+        mocker.patch(
+            f"{MODULE}.handle_non_streaming_response",
+            new=mocker.AsyncMock(
+                return_value=_make_responses_response(
+                    output_text="K8s is Kubernetes.",
+                    conversation="conv_new",
+                )
+            ),
+        )
+
+        await responses_endpoint_handler(
+            request=dummy_request,
+            responses_request=responses_request,
+            auth=MOCK_AUTH,
+            mcp_headers={},
+        )
+
+        mock_build_rag.assert_called_once()
+        call_args = mock_build_rag.call_args[0]
+        assert (
+            call_args[2] == "What is K8s?"
+        )  # input_text (3rd arg to build_rag_context)
+
+    @pytest.mark.asyncio
+    async def test_responses_blocked_with_conversation_appends_refusal(
+        self,
+        dummy_request: Request,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Blocked moderation with conversation calls append_turn_items_to_conversation."""
+        responses_request = ResponsesRequest(
+            input="Bad",
+            conversation=VALID_CONV_ID,
+            stream=False,
+            model="provider/model1",
+        )
+        _patch_base(mocker, minimal_config)
+        mock_user_conv = mocker.Mock(spec=UserConversation)
+        mock_user_conv.id = VALID_CONV_ID_NORMALIZED
+        mocker.patch(
+            f"{ENDPOINTS_MODULE}.validate_and_retrieve_conversation",
+            return_value=mock_user_conv,
+        )
+        mock_client, mock_holder = _patch_client(mocker)
+        mocker.patch(
+            f"{ENDPOINTS_MODULE}.AsyncLlamaStackClientHolder",
+            return_value=mock_holder,
+        )
+        mocker.patch(
+            f"{ENDPOINTS_MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        mocker.patch(
+            f"{ENDPOINTS_MODULE}.to_llama_stack_conversation_id",
+            return_value=VALID_CONV_ID,
+        )
+        mocker.patch(
+            f"{MODULE}.select_model_for_responses",
+            new=mocker.AsyncMock(return_value="provider/model1"),
+        )
+        mocker.patch(
+            f"{MODULE}.check_model_configured",
+            new=mocker.AsyncMock(return_value=True),
+        )
+        _patch_rag(mocker)
+        mock_moderation = _patch_moderation(mocker, decision="blocked")
+        mock_moderation.message = "Blocked"
+        mock_moderation.moderation_id = "resp_blocked_123"
+        mock_moderation.refusal_response = OpenAIResponseMessage(
+            type="message", role="assistant", content="Blocked"
+        )
+        mock_append = mocker.patch(
+            f"{MODULE}.append_turn_items_to_conversation",
+            new=mocker.AsyncMock(),
+        )
+        mocker.patch(f"{MODULE}.store_query_results")
+
+        response = await responses_endpoint_handler(
+            request=dummy_request,
+            responses_request=responses_request,
+            auth=MOCK_AUTH,
+            mcp_headers={},
+        )
+
+        mock_append.assert_awaited_once_with(
+            client=mock_client,
+            conversation_id=VALID_CONV_ID,
+            user_input=responses_request.input,
+            llm_output=[mock_moderation.refusal_response],
+        )
+        assert isinstance(response, ResponsesResponse)
+        payload = response.model_dump()
+        assert "model" in payload, "Handler must set model on the response payload"
+        ResponsesResponse.model_validate(payload)
+
+
+class TestHandleNonStreamingResponse:
+    """Unit tests for handle_non_streaming_response."""
+
+    @pytest.mark.asyncio
+    async def test_handle_non_streaming_blocked_returns_refusal(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that blocked moderation returns response with refusal message."""
+        request = _request_with_model_and_conv("Bad input")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "blocked"
+        mock_moderation.message = "Content blocked"
+        mock_refusal = mocker.Mock(spec=OpenAIResponseMessage)
+        mock_refusal.type = "message"
+        mock_refusal.role = "assistant"
+        mock_refusal.content = "Content blocked"
+        mock_moderation.refusal_response = mock_refusal
+
+        _patch_handle_non_streaming_common(mocker, minimal_config)
+        mock_client.conversations.items.create = mocker.AsyncMock()
+        mock_api_response = mocker.Mock()
+        mock_api_response.output = [mock_refusal]
+        mock_api_response.model_dump.return_value = {
+            "id": "resp_blocked",
+            "object": "response",
+            "created_at": 0,
+            "status": "completed",
+            "model": "provider/model1",
+            "output": [mock_refusal],
+            "usage": {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "total_tokens": 0,
+                "input_tokens_details": {"cached_tokens": 0},
+                "output_tokens_details": {"reasoning_tokens": 0},
+            },
+        }
+        mocker.patch(
+            f"{MODULE}.OpenAIResponseObject.model_construct",
+            return_value=mock_api_response,
+        )
+
+        response = await handle_non_streaming_response(
+            client=mock_client,
+            request=request,
+            auth=MOCK_AUTH,
+            input_text="Bad input",
+            started_at=datetime.now(UTC),
+            moderation_result=mock_moderation,
+            inline_rag_context=RAGContext(),
+        )
+        assert isinstance(response, ResponsesResponse)
+        assert response.output_text == "Content blocked"
+        mock_client.responses.create.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_handle_non_streaming_success_returns_response(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test successful handle_non_streaming_response returns ResponsesResponse."""
+        request = _request_with_model_and_conv("Hello")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        mock_api_response = mocker.Mock(spec=OpenAIResponseObject)
+        mock_api_response.output = []
+        mock_api_response.usage = mocker.Mock(
+            input_tokens=1, output_tokens=2, total_tokens=3
+        )
+        mock_api_response.model_dump.return_value = {
+            "id": "resp_1",
+            "object": "response",
+            "created_at": 0,
+            "status": "completed",
+            "model": "provider/model1",
+            "output": [],
+            "usage": {
+                "input_tokens": 1,
+                "output_tokens": 2,
+                "total_tokens": 3,
+                "input_tokens_details": {"cached_tokens": 0},
+                "output_tokens_details": {"reasoning_tokens": 0},
+            },
+        }
+        mock_client.responses.create = mocker.AsyncMock(return_value=mock_api_response)
+
+        _patch_handle_non_streaming_common(mocker, minimal_config)
+        mocker.patch(
+            f"{MODULE}.extract_token_usage",
+            return_value=mocker.Mock(input_tokens=1, output_tokens=2),
+        )
+        mocker.patch(f"{MODULE}.consume_query_tokens")
+        mocker.patch(
+            f"{MODULE}.build_turn_summary",
+            return_value=mocker.Mock(referenced_documents=[]),
+        )
+        mocker.patch(
+            f"{MODULE}.extract_text_from_response_items",
+            return_value="Model reply",
+        )
+        mocker.patch(
+            f"{MODULE}.extract_vector_store_ids_from_tools",
+            return_value=[],
+        )
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+
+        response = await handle_non_streaming_response(
+            client=mock_client,
+            request=request,
+            auth=MOCK_AUTH,
+            input_text="Hello",
+            started_at=datetime.now(UTC),
+            moderation_result=mock_moderation,
+            inline_rag_context=RAGContext(),
+        )
+
+        assert isinstance(response, ResponsesResponse)
+        assert response.output_text == "Model reply"
+        mock_client.responses.create.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_handle_non_streaming_with_previous_response_id_appends_turn(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test append_turn_items_to_conversation triggers with store and previous_response_id."""
+        request = _request_with_previous_response_id("Hi", previous_response_id="r1")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        mock_api_response = mocker.Mock(spec=OpenAIResponseObject)
+        mock_api_response.output = []
+        mock_api_response.id = "resp_1"
+        mock_api_response.usage = mocker.Mock(
+            input_tokens=1, output_tokens=2, total_tokens=3
+        )
+        mock_api_response.model_dump.return_value = {
+            "id": "resp_1",
+            "object": "response",
+            "created_at": 0,
+            "status": "completed",
+            "model": "provider/model1",
+            "output": [],
+            "usage": {
+                "input_tokens": 1,
+                "output_tokens": 2,
+                "total_tokens": 3,
+                "input_tokens_details": {"cached_tokens": 0},
+                "output_tokens_details": {"reasoning_tokens": 0},
+            },
+        }
+        mock_client.responses.create = mocker.AsyncMock(return_value=mock_api_response)
+
+        _patch_handle_non_streaming_common(mocker, minimal_config)
+        mocker.patch(
+            f"{MODULE}.extract_token_usage",
+            return_value=mocker.Mock(input_tokens=1, output_tokens=2),
+        )
+        mocker.patch(f"{MODULE}.consume_query_tokens")
+        mocker.patch(
+            f"{MODULE}.build_turn_summary",
+            return_value=mocker.Mock(referenced_documents=[]),
+        )
+        mocker.patch(
+            f"{MODULE}.extract_text_from_response_items",
+            return_value="Reply",
+        )
+        mocker.patch(
+            f"{MODULE}.extract_vector_store_ids_from_tools",
+            return_value=[],
+        )
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        mock_append = mocker.patch(
+            f"{MODULE}.append_turn_items_to_conversation",
+            new=mocker.AsyncMock(),
+        )
+
+        await handle_non_streaming_response(
+            client=mock_client,
+            request=request,
+            auth=MOCK_AUTH,
+            input_text="Hi",
+            started_at=datetime.now(UTC),
+            moderation_result=mock_moderation,
+            inline_rag_context=RAGContext(),
+        )
+
+        mock_append.assert_awaited_once()
+        call_args = mock_append.call_args[0]
+        assert call_args[1] == VALID_CONV_ID
+        assert call_args[3] == []
+
+    @pytest.mark.asyncio
+    async def test_handle_non_streaming_context_length_raises_413(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that RuntimeError with context_length raises 413."""
+        request = _request_with_model_and_conv("Long input")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_client.responses.create = mocker.AsyncMock(
+            side_effect=RuntimeError("context_length exceeded")
+        )
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        _patch_handle_non_streaming_common(mocker, minimal_config)
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await handle_non_streaming_response(
+                client=mock_client,
+                request=request,
+                auth=MOCK_AUTH,
+                input_text="Long input",
+                started_at=datetime.now(UTC),
+                moderation_result=mock_moderation,
+                inline_rag_context=RAGContext(),
+            )
+
+        assert exc_info.value.status_code == 413
+
+    @pytest.mark.asyncio
+    async def test_handle_non_streaming_connection_error_raises_503(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that APIConnectionError raises 503."""
+        request = _request_with_model_and_conv("Hi")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_client.responses.create = mocker.AsyncMock(
+            side_effect=APIConnectionError(
+                message="Connection failed",
+                request=mocker.Mock(),
+            )
+        )
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        _patch_handle_non_streaming_common(mocker, minimal_config)
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await handle_non_streaming_response(
+                client=mock_client,
+                request=request,
+                auth=MOCK_AUTH,
+                input_text="Hi",
+                started_at=datetime.now(UTC),
+                moderation_result=mock_moderation,
+                inline_rag_context=RAGContext(),
+            )
+
+        assert exc_info.value.status_code == 503
+
+    @pytest.mark.asyncio
+    async def test_handle_non_streaming_api_status_error_raises_http(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that APIStatusError is handled and re-raised as HTTPException."""
+        request = _request_with_model_and_conv("Hi")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_client.responses.create = mocker.AsyncMock(
+            side_effect=APIStatusError(
+                message="API error",
+                response=mocker.Mock(request=None),
+                body=None,
+            )
+        )
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        _patch_handle_non_streaming_common(mocker, minimal_config)
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        mocker.patch(
+            f"{MODULE}.handle_known_apistatus_errors",
+            return_value=mocker.Mock(
+                model_dump=lambda: {
+                    "status_code": 500,
+                    "detail": {"response": "Error", "cause": "API error"},
+                }
+            ),
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await handle_non_streaming_response(
+                client=mock_client,
+                request=request,
+                auth=MOCK_AUTH,
+                input_text="Hi",
+                started_at=datetime.now(UTC),
+                moderation_result=mock_moderation,
+                inline_rag_context=RAGContext(),
+            )
+
+        assert exc_info.value.status_code == 500
+
+    @pytest.mark.asyncio
+    async def test_handle_non_streaming_runtime_error_without_context_reraises(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that RuntimeError without context_length is re-raised."""
+        request = _request_with_model_and_conv("Hi")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_client.responses.create = mocker.AsyncMock(
+            side_effect=RuntimeError("Some other error")
+        )
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        _patch_handle_non_streaming_common(mocker, minimal_config)
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+
+        with pytest.raises(RuntimeError, match="Some other error"):
+            await handle_non_streaming_response(
+                client=mock_client,
+                request=request,
+                auth=MOCK_AUTH,
+                input_text="Hi",
+                started_at=datetime.now(UTC),
+                moderation_result=mock_moderation,
+                inline_rag_context=RAGContext(),
+            )
+
+
+class TestHandleStreamingResponse:
+    """Unit tests for handle_streaming_response and streaming generators."""
+
+    @pytest.mark.asyncio
+    async def test_handle_streaming_blocked_returns_sse_consumes_shield_generator(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test streaming with blocked moderation yields SSE from shield_violation_generator."""
+        request = _request_with_model_and_conv("Bad", model="provider/model1")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "blocked"
+        mock_moderation.message = "Blocked"
+        mock_moderation.moderation_id = "mod_123"
+        mock_refusal = OpenAIResponseMessage(
+            role="assistant", content="Blocked", type="message"
+        )
+        mock_moderation.refusal_response = mock_refusal
+
+        mocker.patch(f"{MODULE}.configuration", minimal_config)
+        mocker.patch(f"{MODULE}.get_available_quotas", return_value={})
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        mocker.patch(
+            f"{MODULE}.get_topic_summary",
+            new=mocker.AsyncMock(return_value=None),
+        )
+        mocker.patch(f"{MODULE}.store_query_results")
+
+        mock_client.conversations.items.create = mocker.AsyncMock()
+        response = await handle_streaming_response(
+            client=mock_client,
+            request=request,
+            auth=MOCK_AUTH,
+            input_text="Bad",
+            started_at=datetime.now(UTC),
+            moderation_result=mock_moderation,
+            inline_rag_context=RAGContext(),
+        )
+
+        assert isinstance(response, StreamingResponse)
+        assert response.media_type == "text/event-stream"
+        collected: list[str] = []
+        async for part in response.body_iterator:
+            chunk_str = (
+                part.decode("utf-8")
+                if isinstance(part, bytes)
+                else (part if isinstance(part, str) else bytes(part).decode("utf-8"))
+            )
+            collected.append(chunk_str)
+        body = "".join(collected)
+        assert "event: response.created" in body
+        assert "event: response.output_item.added" in body
+        assert "event: response.output_item.done" in body
+        assert "event: response.completed" in body
+        assert "[DONE]" in body
+        mock_client.responses.create.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_handle_streaming_success_returns_sse_consumes_response_generator(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test streaming with passed moderation yields SSE from response_generator."""
+        request = _request_with_model_and_conv("Hi", model="provider/model1")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        mock_chunk = mocker.Mock()
+        mock_chunk.type = "response.completed"
+        mock_chunk.response = mocker.Mock()
+        mock_chunk.response.id = "r1"
+        mock_chunk.response.output = []
+        mock_chunk.response.usage = mocker.Mock(
+            input_tokens=1, output_tokens=2, total_tokens=3
+        )
+        mock_chunk.model_dump.return_value = {
+            "type": "response.completed",
+            "response": {"id": "r1", "usage": {"input_tokens": 1}},
+        }
+
+        async def mock_stream() -> Any:
+            yield mock_chunk
+
+        mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream())
+
+        mocker.patch(f"{MODULE}.configuration", minimal_config)
+        mocker.patch(f"{MODULE}.get_available_quotas", return_value={})
+        mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock())
+        mocker.patch(f"{MODULE}.consume_query_tokens")
+        mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[])
+        mocker.patch(
+            f"{MODULE}.build_turn_summary",
+            return_value=TurnSummary(referenced_documents=[]),
+        )
+        mocker.patch(
+            f"{MODULE}.get_topic_summary",
+            new=mocker.AsyncMock(return_value=None),
+        )
+        mocker.patch(f"{MODULE}.store_query_results")
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mock_client
+        mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder)
+        response = await handle_streaming_response(
+            client=mock_client,
+            request=request,
+            auth=MOCK_AUTH,
+            input_text="Hi",
+            started_at=datetime.now(UTC),
+            moderation_result=mock_moderation,
+            inline_rag_context=RAGContext(),
+        )
+        assert isinstance(response, StreamingResponse)
+        collected: list[str] = []
+        async for part in response.body_iterator:
+            chunk_str = (
+                part.decode("utf-8")
+                if isinstance(part, bytes)
+                else (part if isinstance(part, str) else bytes(part).decode("utf-8"))
+            )
+            collected.append(chunk_str)
+        body = "".join(collected)
+        assert "response.completed" in body or "event:" in body
+        assert "[DONE]" in body
+        mock_client.responses.create.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_handle_streaming_in_progress_chunk_sets_quotas_and_output_text(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test in_progress chunk includes available_quotas and output_text."""
+        request = _request_with_model_and_conv("Hi", model="provider/model1")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        in_progress_chunk = mocker.Mock()
+        in_progress_chunk.type = "response.in_progress"
+        in_progress_chunk.model_dump.return_value = {
+            "type": "response.in_progress",
+            "response": {"id": "r0"},
+        }
+
+        completed_chunk = mocker.Mock()
+        completed_chunk.type = "response.completed"
+        completed_chunk.response = mocker.Mock()
+        completed_chunk.response.id = "r1"
+        completed_chunk.response.output = []
+        completed_chunk.response.usage = mocker.Mock(
+            input_tokens=1, output_tokens=2, total_tokens=3
+        )
+        completed_chunk.model_dump.return_value = {
+            "type": "response.completed",
+            "response": {"id": "r1", "usage": {"input_tokens": 1}},
+        }
+
+        async def mock_stream() -> Any:
+            yield in_progress_chunk
+            yield completed_chunk
+
+        mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream())
+
+        mocker.patch(f"{MODULE}.configuration", minimal_config)
+        mocker.patch(f"{MODULE}.get_available_quotas", return_value={})
+        mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock())
+        mocker.patch(f"{MODULE}.consume_query_tokens")
+        mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[])
+        mocker.patch(
+            f"{MODULE}.build_turn_summary",
+            return_value=TurnSummary(referenced_documents=[]),
+        )
+        mocker.patch(
+            f"{MODULE}.get_topic_summary",
+            new=mocker.AsyncMock(return_value=None),
+        )
+        mocker.patch(f"{MODULE}.store_query_results")
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mock_client
+        mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder)
+
+        response = await handle_streaming_response(
+            client=mock_client,
+            request=request,
+            auth=MOCK_AUTH,
+            input_text="Hi",
+            started_at=datetime.now(UTC),
+            moderation_result=mock_moderation,
+            inline_rag_context=RAGContext(),
+        )
+        collected: list[str] = []
+        async for part in response.body_iterator:
+            chunk_str = (
+                part.decode("utf-8")
+                if isinstance(part, bytes)
+                else (part if isinstance(part, str) else bytes(part).decode("utf-8"))
+            )
+            collected.append(chunk_str)
+        body = "".join(collected)
+        assert "response.in_progress" in body
+        assert '"available_quotas":{}' in body or '"available_quotas": {}' in body
+        assert "[DONE]" in body
+
+    @pytest.mark.asyncio
+    async def test_handle_streaming_builds_tool_call_summary_from_output(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that response output items are passed to build_tool_call_summary."""
+        request = _request_with_model_and_conv("Hi", model="provider/model1")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        mock_output_item = mocker.Mock()
+        completed_chunk = mocker.Mock()
+        completed_chunk.type = "response.completed"
+        completed_chunk.response = mocker.Mock()
+        completed_chunk.response.id = "r1"
+        completed_chunk.response.output = [mock_output_item]
+        completed_chunk.response.usage = mocker.Mock(
+            input_tokens=1, output_tokens=2, total_tokens=3
+        )
+        completed_chunk.model_dump.return_value = {
+            "type": "response.completed",
+            "response": {"id": "r1", "usage": {"input_tokens": 1}},
+        }
+
+        async def mock_stream() -> Any:
+            yield completed_chunk
+
+        mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream())
+
+        mocker.patch(f"{MODULE}.configuration", minimal_config)
+        mocker.patch(f"{MODULE}.get_available_quotas", return_value={})
+        mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock())
+        mocker.patch(f"{MODULE}.consume_query_tokens")
+        mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[])
+        mocker.patch(
+            f"{MODULE}.build_turn_summary",
+            return_value=TurnSummary(referenced_documents=[]),
+        )
+        mock_build_tool_call = mocker.patch(
+            f"{MODULE}.build_tool_call_summary",
+            return_value=(mocker.Mock(), mocker.Mock()),
+        )
+        mocker.patch(
+            f"{MODULE}.get_topic_summary",
+            new=mocker.AsyncMock(return_value=None),
+        )
+        mocker.patch(f"{MODULE}.store_query_results")
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        mocker.patch(f"{MODULE}.parse_referenced_documents", return_value=[])
+        mocker.patch(
+            f"{MODULE}.deduplicate_referenced_documents", side_effect=lambda x: x
+        )
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mock_client
+        mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder)
+
+        response = await handle_streaming_response(
+            client=mock_client,
+            request=request,
+            auth=MOCK_AUTH,
+            input_text="Hi",
+            started_at=datetime.now(UTC),
+            moderation_result=mock_moderation,
+            inline_rag_context=RAGContext(),
+        )
+        collected: list[str] = []
+        async for part in response.body_iterator:
+            chunk_str = (
+                part.decode("utf-8")
+                if isinstance(part, bytes)
+                else (part if isinstance(part, str) else bytes(part).decode("utf-8"))
+            )
+            collected.append(chunk_str)
+        mock_build_tool_call.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_handle_streaming_with_previous_response_id_appends_turn(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test that store=True and previous_response_id in streaming triggers append_turn_items."""
+        request = _request_with_previous_response_id(
+            "Hi", previous_response_id="r_prev"
+        )
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        completed_chunk = mocker.Mock()
+        completed_chunk.type = "response.completed"
+        completed_chunk.response = mocker.Mock()
+        completed_chunk.response.id = "r1"
+        completed_chunk.response.output = []
+        completed_chunk.response.usage = mocker.Mock(
+            input_tokens=1, output_tokens=2, total_tokens=3
+        )
+        completed_chunk.model_dump.return_value = {
+            "type": "response.completed",
+            "response": {"id": "r1", "usage": {"input_tokens": 1}},
+        }
+
+        async def mock_stream() -> Any:
+            yield completed_chunk
+
+        mock_client.responses.create = mocker.AsyncMock(return_value=mock_stream())
+
+        mocker.patch(f"{MODULE}.configuration", minimal_config)
+        mocker.patch(f"{MODULE}.get_available_quotas", return_value={})
+        mocker.patch(f"{MODULE}.extract_token_usage", return_value=mocker.Mock())
+        mocker.patch(f"{MODULE}.consume_query_tokens")
+        mocker.patch(f"{MODULE}.extract_vector_store_ids_from_tools", return_value=[])
+        mocker.patch(
+            f"{MODULE}.build_turn_summary",
+            return_value=TurnSummary(referenced_documents=[]),
+        )
+        mocker.patch(
+            f"{MODULE}.get_topic_summary",
+            new=mocker.AsyncMock(return_value=None),
+        )
+        mocker.patch(f"{MODULE}.store_query_results")
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        mock_append = mocker.patch(
+            f"{MODULE}.append_turn_items_to_conversation",
+            new=mocker.AsyncMock(),
+        )
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mock_client
+        mocker.patch(f"{MODULE}.AsyncLlamaStackClientHolder", return_value=mock_holder)
+
+        response = await handle_streaming_response(
+            client=mock_client,
+            request=request,
+            auth=MOCK_AUTH,
+            input_text="Hi",
+            started_at=datetime.now(UTC),
+            moderation_result=mock_moderation,
+            inline_rag_context=RAGContext(),
+        )
+        collected: list[str] = []
+        async for part in response.body_iterator:
+            chunk_str = (
+                part.decode("utf-8")
+                if isinstance(part, bytes)
+                else (part if isinstance(part, str) else bytes(part).decode("utf-8"))
+            )
+            collected.append(chunk_str)
+        mock_append.assert_called_once()
+        call_args = mock_append.call_args[0]
+        assert call_args[1] == VALID_CONV_ID
+        assert call_args[3] == []
+
+    @pytest.mark.asyncio
+    async def test_handle_streaming_context_length_raises_413(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test streaming raises 413 when create raises RuntimeError context_length."""
+        request = _request_with_model_and_conv("Long", model="provider/model1")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_client.responses.create = mocker.AsyncMock(
+            side_effect=RuntimeError("context_length exceeded")
+        )
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        mocker.patch(f"{MODULE}.configuration", minimal_config)
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        with pytest.raises(HTTPException) as exc_info:
+            await handle_streaming_response(
+                client=mock_client,
+                request=request,
+                auth=MOCK_AUTH,
+                input_text="Long",
+                started_at=datetime.now(UTC),
+                moderation_result=mock_moderation,
+                inline_rag_context=RAGContext(),
+            )
+        assert exc_info.value.status_code == 413
+
+    @pytest.mark.asyncio
+    async def test_handle_streaming_connection_error_raises_503(
+        self,
+        minimal_config: AppConfig,
+        mocker: MockerFixture,
+    ) -> None:
+        """Test streaming raises 503 when create raises APIConnectionError."""
+        request = _request_with_model_and_conv("Hi", model="provider/model1")
+        mock_client = mocker.AsyncMock(spec=AsyncLlamaStackClient)
+        mock_client.responses.create = mocker.AsyncMock(
+            side_effect=APIConnectionError(
+                message="Connection failed",
+                request=mocker.Mock(),
+            )
+        )
+        mock_moderation = mocker.Mock()
+        mock_moderation.decision = "passed"
+
+        mocker.patch(f"{MODULE}.configuration", minimal_config)
+        mocker.patch(
+            f"{MODULE}.normalize_conversation_id",
+            return_value=VALID_CONV_ID_NORMALIZED,
+        )
+        with pytest.raises(HTTPException) as exc_info:
+            await handle_streaming_response(
+                client=mock_client,
+                request=request,
+                auth=MOCK_AUTH,
+                input_text="Hi",
+                started_at=datetime.now(UTC),
+                moderation_result=mock_moderation,
+                inline_rag_context=RAGContext(),
+            )
+
+        assert exc_info.value.status_code == 503
diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py
index 8107a387c..3e0670e94 100644
--- a/tests/unit/app/endpoints/test_streaming_query.py
+++ b/tests/unit/app/endpoints/test_streaming_query.py
@@ -52,7 +52,14 @@
 from models.responses import InternalServerErrorResponse
 from utils.token_counter import TokenCounter
 from utils.stream_interrupts import StreamInterruptRegistry
-from utils.types import RAGContext, ReferencedDocument, ResponsesApiParams, TurnSummary
+from utils.types import (
+    RAGChunk,
+    RAGContext,
+    ReferencedDocument,
+    ResponsesApiParams,
+    ShieldModerationPassed,
+    TurnSummary,
+)
 
 MOCK_AUTH_STREAMING = (
     "00000001-0001-0001-0001-000000000001",
@@ -354,6 +361,10 @@ async def test_successful_streaming_query(
             "app.endpoints.streaming_query.prepare_responses_params",
             new=mocker.AsyncMock(return_value=mock_responses_params),
         )
+        mocker.patch(
+            "app.endpoints.streaming_query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
 
         mocker.patch("app.endpoints.streaming_query.AzureEntraIDManager")
         mocker.patch(
@@ -437,6 +448,10 @@ async def test_streaming_query_text_media_type_header(
             "app.endpoints.streaming_query.prepare_responses_params",
             new=mocker.AsyncMock(return_value=mock_responses_params),
         )
+        mocker.patch(
+            "app.endpoints.streaming_query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
 
         mocker.patch("app.endpoints.streaming_query.AzureEntraIDManager")
         mocker.patch(
@@ -531,6 +546,10 @@ async def test_streaming_query_with_conversation(
             "app.endpoints.streaming_query.prepare_responses_params",
             new=mocker.AsyncMock(return_value=mock_responses_params),
         )
+        mocker.patch(
+            "app.endpoints.streaming_query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
 
         mocker.patch("app.endpoints.streaming_query.AzureEntraIDManager")
         mocker.patch(
@@ -623,6 +642,10 @@ async def test_streaming_query_with_attachments(
             "app.endpoints.streaming_query.prepare_responses_params",
             new=mocker.AsyncMock(return_value=mock_responses_params),
         )
+        mocker.patch(
+            "app.endpoints.streaming_query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
 
         mocker.patch("app.endpoints.streaming_query.AzureEntraIDManager")
         mocker.patch(
@@ -725,6 +748,10 @@ async def test_streaming_query_azure_token_refresh(
             "app.endpoints.streaming_query.extract_provider_and_model_from_model_id",
             return_value=("azure", "model1"),
         )
+        mocker.patch(
+            "app.endpoints.streaming_query.run_shield_moderation",
+            new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+        )
         mocker.patch("app.endpoints.streaming_query.metrics.llm_calls_total")
 
         async def mock_generator() -> AsyncIterator[str]:
@@ -784,17 +811,15 @@ async def test_retrieve_response_generator_success(
         mock_context.client = mock_client
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.query_request = QueryRequest(
             query="test"
         )  # pyright: ignore[reportCallIssue]
+        mock_context.moderation_result = ShieldModerationPassed()
 
         async def mock_response_gen() -> AsyncIterator[str]:
             yield "test"
 
-        mocker.patch(
-            "app.endpoints.streaming_query.run_shield_moderation",
-            new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)),
-        )
         mock_client.responses = mocker.Mock()
         mock_client.responses.create = mocker.AsyncMock(
             return_value=mock_response_gen()
@@ -812,7 +837,7 @@ async def mock_response_generator(
         )
 
         generator, turn_summary = await retrieve_response_generator(
-            mock_responses_params, mock_context, []
+            mock_responses_params, mock_context
         )
 
         assert isinstance(turn_summary, TurnSummary)
@@ -834,6 +859,7 @@ async def test_retrieve_response_generator_shield_blocked(
         mock_context.client = mock_client
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.query_request = QueryRequest(
             query="test", media_type=MEDIA_TYPE_TEXT
         )  # pyright: ignore[reportCallIssue]
@@ -841,17 +867,16 @@ async def test_retrieve_response_generator_shield_blocked(
         mock_moderation_result = mocker.Mock()
         mock_moderation_result.decision = "blocked"
         mock_moderation_result.message = "Content blocked"
+        mock_moderation_result.moderation_id = "mod_123"
+        mock_moderation_result.refusal_response = mocker.Mock()
+        mock_context.moderation_result = mock_moderation_result
         mocker.patch(
-            "app.endpoints.streaming_query.run_shield_moderation",
-            new=mocker.AsyncMock(return_value=mock_moderation_result),
-        )
-        mocker.patch(
-            "app.endpoints.streaming_query.append_turn_to_conversation",
+            "app.endpoints.streaming_query.append_turn_items_to_conversation",
             new=mocker.AsyncMock(),
         )
 
         _generator, turn_summary = await retrieve_response_generator(
-            mock_responses_params, mock_context, []
+            mock_responses_params, mock_context
         )
 
         assert isinstance(turn_summary, TurnSummary)
@@ -878,14 +903,12 @@ async def test_retrieve_response_generator_connection_error(
         mock_context.client = mock_client
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.query_request = QueryRequest(
             query="test"
         )  # pyright: ignore[reportCallIssue]
+        mock_context.moderation_result = ShieldModerationPassed()
 
-        mocker.patch(
-            "app.endpoints.streaming_query.run_shield_moderation",
-            new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)),
-        )
         mock_request_obj = mocker.Mock()
         mock_client.responses = mocker.Mock()
         mock_client.responses.create = mocker.AsyncMock(
@@ -908,7 +931,7 @@ async def test_retrieve_response_generator_connection_error(
         )
 
         with pytest.raises(HTTPException) as exc_info:
-            await retrieve_response_generator(mock_responses_params, mock_context, [])
+            await retrieve_response_generator(mock_responses_params, mock_context)
 
         assert exc_info.value.status_code == 503
 
@@ -933,14 +956,12 @@ async def test_retrieve_response_generator_api_status_error(
         mock_context.client = mock_client
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.query_request = QueryRequest(
             query="test"
         )  # pyright: ignore[reportCallIssue]
+        mock_context.moderation_result = ShieldModerationPassed()
 
-        mocker.patch(
-            "app.endpoints.streaming_query.run_shield_moderation",
-            new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)),
-        )
         mock_request_obj = mocker.Mock()
         mock_client.responses = mocker.Mock()
         mock_client.responses.create = mocker.AsyncMock(
@@ -960,7 +981,7 @@ async def test_retrieve_response_generator_api_status_error(
         )
 
         with pytest.raises(HTTPException) as exc_info:
-            await retrieve_response_generator(mock_responses_params, mock_context, [])
+            await retrieve_response_generator(mock_responses_params, mock_context)
 
         assert exc_info.value.status_code == 500
 
@@ -985,14 +1006,12 @@ async def test_retrieve_response_generator_runtime_error_context_length(
         mock_context.client = mock_client
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.query_request = QueryRequest(
             query="test"
         )  # pyright: ignore[reportCallIssue]
+        mock_context.moderation_result = ShieldModerationPassed()
 
-        mocker.patch(
-            "app.endpoints.streaming_query.run_shield_moderation",
-            new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)),
-        )
         mock_client.responses = mocker.Mock()
         mock_client.responses.create = mocker.AsyncMock(
             side_effect=RuntimeError("context_length exceeded")
@@ -1009,7 +1028,7 @@ async def test_retrieve_response_generator_runtime_error_context_length(
         )
 
         with pytest.raises(HTTPException) as exc_info:
-            await retrieve_response_generator(mock_responses_params, mock_context, [])
+            await retrieve_response_generator(mock_responses_params, mock_context)
 
         assert exc_info.value.status_code == 413
 
@@ -1034,21 +1053,19 @@ async def test_retrieve_response_generator_runtime_error_other(
         mock_context.client = mock_client
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.query_request = QueryRequest(
             query="test"
         )  # pyright: ignore[reportCallIssue]
+        mock_context.moderation_result = ShieldModerationPassed()
 
-        mocker.patch(
-            "app.endpoints.streaming_query.run_shield_moderation",
-            new=mocker.AsyncMock(return_value=mocker.Mock(blocked=False)),
-        )
         mock_client.responses = mocker.Mock()
         mock_client.responses.create = mocker.AsyncMock(
             side_effect=RuntimeError("Some other error")
         )
 
         with pytest.raises(RuntimeError):
-            await retrieve_response_generator(mock_responses_params, mock_context, [])
+            await retrieve_response_generator(mock_responses_params, mock_context)
 
 
 class TestGenerateResponse:
@@ -1077,6 +1094,7 @@ async def mock_generator() -> AsyncIterator[str]:
         mock_context.user_id = "user_123"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.query_request = QueryRequest(
             query="test"
         )  # pyright: ignore[reportCallIssue]
@@ -1134,6 +1152,7 @@ async def mock_generator() -> AsyncIterator[str]:
         mock_context.user_id = "user_123"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.query_request = QueryRequest(
             query="test", generate_topic_summary=True
         )  # pyright: ignore[reportCallIssue]
@@ -1186,6 +1205,7 @@ async def mock_generator() -> AsyncIterator[str]:
         mock_context.conversation_id = "conv_123"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.user_id = "user_123"
         mock_context.query_request = QueryRequest(
             query="test"
@@ -1228,6 +1248,7 @@ async def mock_generator() -> AsyncIterator[str]:
         mock_context.conversation_id = "conv_123"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.user_id = "user_123"
         mock_context.query_request = QueryRequest(
             query="test"
@@ -1273,6 +1294,7 @@ async def mock_generator() -> AsyncIterator[str]:
         mock_context.conversation_id = "conv_123"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.user_id = "user_123"
         mock_context.query_request = QueryRequest(
             query="test", media_type=MEDIA_TYPE_JSON
@@ -1320,6 +1342,7 @@ async def mock_generator() -> AsyncIterator[str]:
         mock_context.conversation_id = "conv_123"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
         mock_context.user_id = "user_123"
         mock_context.query_request = QueryRequest(
             query="test", media_type=MEDIA_TYPE_JSON
@@ -1608,6 +1631,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -1637,6 +1661,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -1667,6 +1692,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -1707,6 +1733,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -1748,6 +1775,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -1796,6 +1824,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -1846,6 +1875,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
         mock_turn_summary.llm_response = "Response"
@@ -1893,6 +1923,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -1938,6 +1969,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -1982,6 +2014,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -2024,6 +2057,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -2067,6 +2101,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -2108,6 +2143,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -2128,6 +2164,61 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         assert len(result) > 0
         assert any("error" in item for item in result)
 
+    @pytest.mark.asyncio
+    async def test_response_generator_merges_inline_and_tool_rag_chunks_and_documents(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Test that inline RAG and tool-based RAG chunks/docs are correctly merged."""
+        inline_chunk = RAGChunk(content="inline chunk content", source="byok")
+        inline_doc = ReferencedDocument(doc_title="Inline Doc")
+        inline_rag = RAGContext(
+            context_text="",
+            rag_chunks=[inline_chunk],
+            referenced_documents=[inline_doc],
+        )
+
+        tool_chunk = RAGChunk(content="tool chunk content", source="vs-1")
+        tool_ref_doc = ReferencedDocument(doc_title="Tool Doc")
+
+        mock_response_obj = mocker.Mock(spec=OpenAIResponseObject)
+        mock_response_obj.usage = mocker.Mock()
+        mock_response_obj.output = []
+
+        async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
+            completed_chunk = mocker.Mock(spec=CompletedChunk)
+            completed_chunk.type = "response.completed"
+            completed_chunk.response = mock_response_obj
+            yield completed_chunk
+
+        mock_context = mocker.Mock(spec=ResponseGeneratorContext)
+        mock_context.query_request = QueryRequest(
+            query="test", media_type=MEDIA_TYPE_JSON
+        )  # pyright: ignore[reportCallIssue]
+        mock_context.model_id = "provider1/model1"
+        mock_context.vector_store_ids = []
+        mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = inline_rag
+
+        mock_turn_summary = TurnSummary()
+        mock_turn_summary.rag_chunks = [tool_chunk]
+        mock_turn_summary.referenced_documents = [tool_ref_doc]
+        mocker.patch(
+            "app.endpoints.streaming_query.parse_referenced_documents",
+            return_value=[tool_ref_doc],
+        )
+
+        async for _ in response_generator(
+            mock_turn_response(), mock_context, mock_turn_summary
+        ):
+            pass
+
+        assert len(mock_turn_summary.rag_chunks) == 2
+        assert mock_turn_summary.rag_chunks[0].content == "inline chunk content"
+        assert mock_turn_summary.rag_chunks[1].content == "tool chunk content"
+        assert len(mock_turn_summary.referenced_documents) == 2
+        assert mock_turn_summary.referenced_documents[0].doc_title == "Inline Doc"
+        assert mock_turn_summary.referenced_documents[1].doc_title == "Tool Doc"
+
 
 class TestStreamHttpErrorEvent:
     """Tests for stream_http_error_event function."""
@@ -2230,6 +2321,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -2282,6 +2374,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -2354,6 +2447,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
@@ -2369,8 +2463,7 @@ def build_mcp_tool_call_side_effect(
             # Remove item from dict to simulate real behavior
             # arguments parameter is required by function signature but unused here
             _ = arguments
-            if output_index in mcp_call_items:
-                del mcp_call_items[output_index]
+            mcp_call_items.pop(output_index, None)
             return mock_tool_call
 
         mocker.patch(
@@ -2442,6 +2535,7 @@ async def mock_turn_response() -> AsyncIterator[OpenAIResponseObjectStream]:
         mock_context.model_id = "provider1/model1"
         mock_context.vector_store_ids = []
         mock_context.rag_id_mapping = {}
+        mock_context.inline_rag_context = RAGContext()
 
         mock_turn_summary = TurnSummary()
 
diff --git a/tests/unit/app/test_routers.py b/tests/unit/app/test_routers.py
index 754e3fdb2..668943a50 100644
--- a/tests/unit/app/test_routers.py
+++ b/tests/unit/app/test_routers.py
@@ -28,6 +28,7 @@
     rlsapi_v1,
     a2a,
     query,
+    responses,
 )
 
 
@@ -53,7 +54,7 @@ def include_router(  # pylint: disable=too-many-arguments
         prefix: str = "",
         tags: Optional[list] = None,
         dependencies: Optional[Sequence] = None,
-        responses: Optional[dict] = None,
+        responses: Optional[dict] = None,  # pylint: disable=redefined-outer-name
         deprecated: Optional[bool] = None,
         include_in_schema: Optional[bool] = None,
         default_response_class: Optional[Any] = None,
@@ -108,7 +109,7 @@ def test_include_routers() -> None:
     include_routers(app)
 
     # are all routers added?
-    assert len(app.routers) == 20
+    assert len(app.routers) == 21
     assert root.router in app.get_routers()
     assert info.router in app.get_routers()
     assert models.router in app.get_routers()
@@ -129,6 +130,7 @@ def test_include_routers() -> None:
     assert rlsapi_v1.router in app.get_routers()
     assert a2a.router in app.get_routers()
     assert stream_interrupt.router in app.get_routers()
+    assert responses.router in app.get_routers()
 
 
 def test_check_prefixes() -> None:
@@ -136,7 +138,7 @@ def test_check_prefixes() -> None:
 
     Verify that include_routers registers the expected routers with their configured URL prefixes.
 
-    Asserts that 16 routers are registered on a MockFastAPI instance and that
+    Asserts that 21 routers are registered on a MockFastAPI instance and that
     each router's prefix matches the expected value (e.g., root, health,
     authorized, metrics use an empty prefix; most API routers use "/v1";
     conversations_v2 uses "/v2").
@@ -145,7 +147,7 @@ def test_check_prefixes() -> None:
     include_routers(app)
 
     # are all routers added?
-    assert len(app.routers) == 20
+    assert len(app.routers) == 21
     assert app.get_router_prefix(root.router) == ""
     assert app.get_router_prefix(info.router) == "/v1"
     assert app.get_router_prefix(models.router) == "/v1"
@@ -167,3 +169,4 @@ def test_check_prefixes() -> None:
     assert app.get_router_prefix(rlsapi_v1.router) == "/v1"
     assert app.get_router_prefix(a2a.router) == ""
     assert app.get_router_prefix(stream_interrupt.router) == "/v1"
+    assert app.get_router_prefix(responses.router) == "/v1"
diff --git a/tests/unit/test_configuration.py b/tests/unit/test_configuration.py
index acd1ca5af..49565eab6 100644
--- a/tests/unit/test_configuration.py
+++ b/tests/unit/test_configuration.py
@@ -2,16 +2,18 @@
 
 # pylint: disable=too-many-lines
 
+from collections.abc import Generator
 from pathlib import Path
 from typing import Any
-from collections.abc import Generator
-from pydantic import ValidationError
 
 import pytest
+from pydantic import ValidationError
+
+import constants
+from cache.in_memory_cache import InMemoryCache
+from cache.sqlite_cache import SQLiteCache
 from configuration import AppConfig, LogicError
 from models.config import CustomProfile, ModelContextProtocolServer
-from cache.sqlite_cache import SQLiteCache
-from cache.in_memory_cache import InMemoryCache
 
 
 # pylint: disable=broad-exception-caught,protected-access
@@ -446,49 +448,56 @@ def test_configuration_not_loaded() -> None:
     """Test that accessing configuration before loading raises an error."""
     cfg = AppConfig()
     with pytest.raises(LogicError, match="logic error: configuration is not loaded"):
-        cfg.configuration  # pylint: disable=pointless-statement
+        c = cfg.configuration
+        assert c is not None
 
 
 def test_service_configuration_not_loaded() -> None:
     """Test that accessing service_configuration before loading raises an error."""
     cfg = AppConfig()
     with pytest.raises(LogicError, match="logic error: configuration is not loaded"):
-        cfg.service_configuration  # pylint: disable=pointless-statement
+        c = cfg.service_configuration
+        assert c is not None
 
 
 def test_llama_stack_configuration_not_loaded() -> None:
     """Test that accessing llama_stack_configuration before loading raises an error."""
     cfg = AppConfig()
     with pytest.raises(LogicError, match="logic error: configuration is not loaded"):
-        cfg.llama_stack_configuration  # pylint: disable=pointless-statement
+        c = cfg.llama_stack_configuration
+        assert c is not None
 
 
 def test_user_data_collection_configuration_not_loaded() -> None:
     """Test that accessing user_data_collection_configuration before loading raises an error."""
     cfg = AppConfig()
     with pytest.raises(LogicError, match="logic error: configuration is not loaded"):
-        cfg.user_data_collection_configuration  # pylint: disable=pointless-statement
+        c = cfg.user_data_collection_configuration
+        assert c is not None
 
 
 def test_mcp_servers_not_loaded() -> None:
     """Test that accessing mcp_servers before loading raises an error."""
     cfg = AppConfig()
     with pytest.raises(LogicError, match="logic error: configuration is not loaded"):
-        cfg.mcp_servers  # pylint: disable=pointless-statement
+        c = cfg.mcp_servers
+        assert c is not None
 
 
 def test_authentication_configuration_not_loaded() -> None:
     """Test that accessing authentication_configuration before loading raises an error."""
     cfg = AppConfig()
     with pytest.raises(LogicError, match="logic error: configuration is not loaded"):
-        cfg.authentication_configuration  # pylint: disable=pointless-statement
+        c = cfg.authentication_configuration
+        assert c is not None
 
 
 def test_customization_not_loaded() -> None:
     """Test that accessing customization before loading raises an error."""
     cfg = AppConfig()
     with pytest.raises(LogicError, match="logic error: configuration is not loaded"):
-        cfg.customization  # pylint: disable=pointless-statement
+        c = cfg.customization
+        assert c is not None
 
 
 def test_load_configuration_with_customization_system_prompt_path(tmpdir: Path) -> None:
@@ -947,11 +956,61 @@ def test_load_configuration_with_incomplete_azure_entra_id_raises(tmpdir: Path)
         cfg.load_configuration(str(cfg_filename))
 
 
-def test_rag_id_mapping_empty_when_no_byok(minimal_config: AppConfig) -> None:
-    """Test that rag_id_mapping returns empty dict when no BYOK RAG configured."""
+def test_rag_id_mapping_excludes_solr_when_okp_not_configured(
+    minimal_config: AppConfig,
+) -> None:
+    """Test that rag_id_mapping does not include OKP/Solr when OKP is not in rag config."""
     assert minimal_config.rag_id_mapping == {}
 
 
+def test_rag_id_mapping_includes_solr_when_okp_in_inline() -> None:
+    """Test that rag_id_mapping includes OKP/Solr mapping when OKP is in rag.inline."""
+    cfg = AppConfig()
+    cfg.init_from_dict(
+        {
+            "name": "test",
+            "service": {"host": "localhost", "port": 8080},
+            "llama_stack": {
+                "api_key": "k",
+                "url": "http://test.com:1234",
+                "use_as_library_client": False,
+            },
+            "user_data_collection": {},
+            "authentication": {"module": "noop"},
+            "rag": {"inline": [constants.OKP_RAG_ID]},
+        }
+    )
+    assert constants.SOLR_DEFAULT_VECTOR_STORE_ID in cfg.rag_id_mapping
+    assert (
+        cfg.rag_id_mapping[constants.SOLR_DEFAULT_VECTOR_STORE_ID]
+        == constants.OKP_RAG_ID
+    )
+
+
+def test_rag_id_mapping_includes_solr_when_okp_in_tool() -> None:
+    """Test that rag_id_mapping includes OKP/Solr mapping when OKP is in rag.tool."""
+    cfg = AppConfig()
+    cfg.init_from_dict(
+        {
+            "name": "test",
+            "service": {"host": "localhost", "port": 8080},
+            "llama_stack": {
+                "api_key": "k",
+                "url": "http://test.com:1234",
+                "use_as_library_client": False,
+            },
+            "user_data_collection": {},
+            "authentication": {"module": "noop"},
+            "rag": {"tool": [constants.OKP_RAG_ID]},
+        }
+    )
+    assert constants.SOLR_DEFAULT_VECTOR_STORE_ID in cfg.rag_id_mapping
+    assert (
+        cfg.rag_id_mapping[constants.SOLR_DEFAULT_VECTOR_STORE_ID]
+        == constants.OKP_RAG_ID
+    )
+
+
 def test_rag_id_mapping_with_byok(tmp_path: Path) -> None:
     """Test that rag_id_mapping builds correct mapping from BYOK config."""
     db_file = tmp_path / "test.db"
@@ -980,6 +1039,41 @@ def test_rag_id_mapping_with_byok(tmp_path: Path) -> None:
     assert cfg.rag_id_mapping == {"vs-001": "my-kb"}
 
 
+def test_rag_id_mapping_with_byok_and_okp(tmp_path: Path) -> None:
+    """Test that rag_id_mapping includes both BYOK and OKP entries when OKP is configured."""
+    db_file = tmp_path / "test.db"
+    db_file.touch()
+    cfg = AppConfig()
+    cfg.init_from_dict(
+        {
+            "name": "test",
+            "service": {"host": "localhost", "port": 8080},
+            "llama_stack": {
+                "api_key": "k",
+                "url": "http://test.com:1234",
+                "use_as_library_client": False,
+            },
+            "user_data_collection": {},
+            "authentication": {"module": "noop"},
+            "rag": {"inline": [constants.OKP_RAG_ID]},
+            "byok_rag": [
+                {
+                    "rag_id": "my-kb",
+                    "vector_db_id": "vs-001",
+                    "db_path": str(db_file),
+                },
+            ],
+        }
+    )
+    assert "vs-001" in cfg.rag_id_mapping
+    assert cfg.rag_id_mapping["vs-001"] == "my-kb"
+    assert constants.SOLR_DEFAULT_VECTOR_STORE_ID in cfg.rag_id_mapping
+    assert (
+        cfg.rag_id_mapping[constants.SOLR_DEFAULT_VECTOR_STORE_ID]
+        == constants.OKP_RAG_ID
+    )
+
+
 def test_resolve_index_name_with_mapping(minimal_config: AppConfig) -> None:
     """Test resolve_index_name uses mapping when available."""
     mapping = {"vs-x": "user-friendly-name"}
diff --git a/tests/unit/utils/test_conversations.py b/tests/unit/utils/test_conversations.py
index e4120f145..389793055 100644
--- a/tests/unit/utils/test_conversations.py
+++ b/tests/unit/utils/test_conversations.py
@@ -3,6 +3,9 @@
 from datetime import datetime, UTC
 from typing import Any
 
+from fastapi import HTTPException
+from llama_stack_api import OpenAIResponseMessage
+from llama_stack_client import APIConnectionError, APIStatusError
 import pytest
 from pytest_mock import MockerFixture
 
@@ -11,7 +14,9 @@
 from utils.conversations import (
     _build_tool_call_summary_from_item,
     _extract_text_from_content,
+    append_turn_items_to_conversation,
     build_conversation_turns_from_items,
+    get_all_conversation_items,
 )
 from utils.types import ToolCallSummary
 
@@ -720,3 +725,133 @@ def test_legacy_conversation_without_metadata(self, mocker: MockerFixture) -> No
         # Timestamps should match conversation start time
         assert turn.started_at == "2024-01-01T10:00:00Z"
         assert turn.completed_at == "2024-01-01T10:00:00Z"
+
+
+class TestAppendTurnItemsToConversation:  # pylint: disable=too-few-public-methods
+    """Tests for append_turn_items_to_conversation function."""
+
+    @pytest.mark.asyncio
+    async def test_appends_user_input_and_llm_output(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Test that append_turn_items_to_conversation creates conversation items correctly."""
+        mock_client = mocker.Mock()
+        mock_client.conversations.items.create = mocker.AsyncMock(return_value=None)
+        assistant_msg = OpenAIResponseMessage(
+            type="message",
+            role="assistant",
+            content="I cannot help with that",
+        )
+
+        await append_turn_items_to_conversation(
+            mock_client,
+            conversation_id="conv-123",
+            user_input="Hello",
+            llm_output=[assistant_msg],
+        )
+
+        mock_client.conversations.items.create.assert_called_once()
+        call_args = mock_client.conversations.items.create.call_args
+        assert call_args[0][0] == "conv-123"
+        items = call_args[1]["items"]
+        assert len(items) == 2
+        assert items[0]["type"] == "message" and items[0]["role"] == "user"
+        assert items[0]["content"] == "Hello"
+        assert items[1]["type"] == "message" and items[1]["role"] == "assistant"
+        assert items[1]["content"] == "I cannot help with that"
+
+
+class TestGetAllConversationItems:
+    """Tests for get_all_conversation_items function."""
+
+    @pytest.mark.asyncio
+    async def test_returns_single_page_items(self, mocker: MockerFixture) -> None:
+        """Test that a single page of items is returned."""
+        mock_client = mocker.Mock()
+        item_a = mocker.Mock(type="message", role="user", content="Hello")
+        item_b = mocker.Mock(type="message", role="assistant", content="Hi")
+        mock_page = mocker.Mock()
+        mock_page.data = [item_a, item_b]
+        mock_page.has_next_page.return_value = False
+
+        mock_client.conversations.items.list = mocker.AsyncMock(return_value=mock_page)
+
+        result = await get_all_conversation_items(
+            mock_client, "conv_0d21ba731f21f798dc9680125d5d6f49"
+        )
+
+        assert result == [item_a, item_b]
+        mock_client.conversations.items.list.assert_called_once_with(
+            conversation_id="conv_0d21ba731f21f798dc9680125d5d6f49",
+            order="asc",
+        )
+
+    @pytest.mark.asyncio
+    async def test_returns_all_items_across_pages(self, mocker: MockerFixture) -> None:
+        """Test that items from multiple pages are concatenated."""
+        mock_client = mocker.Mock()
+        item_1 = mocker.Mock(type="message", role="user", content="First")
+        item_2 = mocker.Mock(type="message", role="assistant", content="Second")
+        item_3 = mocker.Mock(type="message", role="user", content="Third")
+
+        first_page = mocker.Mock()
+        first_page.data = [item_1]
+        first_page.has_next_page.return_value = True
+        second_page = mocker.Mock()
+        second_page.data = [item_2, item_3]
+        second_page.has_next_page.return_value = False
+
+        first_page.get_next_page = mocker.AsyncMock(return_value=second_page)
+
+        mock_client.conversations.items.list = mocker.AsyncMock(return_value=first_page)
+
+        result = await get_all_conversation_items(mock_client, "conv_abc")
+
+        assert result == [item_1, item_2, item_3]
+
+    @pytest.mark.asyncio
+    async def test_handles_empty_data(self, mocker: MockerFixture) -> None:
+        """Test that None or empty page data is handled."""
+        mock_client = mocker.Mock()
+        mock_page = mocker.Mock()
+        mock_page.data = None
+        mock_page.has_next_page.return_value = False
+
+        mock_client.conversations.items.list = mocker.AsyncMock(return_value=mock_page)
+
+        result = await get_all_conversation_items(mock_client, "conv_empty")
+
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_handles_connection_error(self, mocker: MockerFixture) -> None:
+        """Test that APIConnectionError is converted to HTTPException 503."""
+        mock_client = mocker.Mock()
+        mock_client.conversations.items.list = mocker.AsyncMock(
+            side_effect=APIConnectionError(
+                message="connection refused", request=mocker.Mock()
+            )
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_all_conversation_items(mock_client, "conv_xyz")
+
+        assert exc_info.value.status_code == 503
+        assert "Llama Stack" in str(exc_info.value.detail)
+
+    @pytest.mark.asyncio
+    async def test_handles_api_status_error(self, mocker: MockerFixture) -> None:
+        """Test that APIStatusError is converted to HTTPException 500."""
+        mock_client = mocker.Mock()
+        mock_client.conversations.items.list = mocker.AsyncMock(
+            side_effect=APIStatusError(
+                message="internal error",
+                response=mocker.Mock(request=None),
+                body=None,
+            )
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_all_conversation_items(mock_client, "conv_xyz")
+
+        assert exc_info.value.status_code == 500
diff --git a/tests/unit/utils/test_endpoints.py b/tests/unit/utils/test_endpoints.py
index cc092a1ed..cf8b46568 100644
--- a/tests/unit/utils/test_endpoints.py
+++ b/tests/unit/utils/test_endpoints.py
@@ -11,9 +11,9 @@
 from pytest_mock import MockerFixture
 from sqlalchemy.exc import SQLAlchemyError
 
-from models.database.conversations import UserConversation
+from models.database.conversations import UserConversation, UserTurn
 from utils import endpoints
-from utils.types import ReferencedDocument
+from utils.types import ReferencedDocument, ResponsesConversationContext
 
 
 @pytest.fixture(name="input_file")
@@ -451,3 +451,266 @@ def test_default_others_allowed_false(self, mocker: MockerFixture) -> None:
         mock_query.filter_by.assert_called_once_with(
             id=conversation_id, user_id=user_id
         )
+
+
+class TestResolveResponseContext:
+    """Tests for resolve_response_context function."""
+
+    @pytest.mark.asyncio
+    async def test_conversation_id_returns_context_with_existing_conversation(
+        self, mocker: MockerFixture
+    ) -> None:
+        """When conversation_id is set, validate and return context with it."""
+        mock_holder = mocker.Mock()
+        mock_client = mocker.Mock()
+        mock_holder.get_client.return_value = mock_client
+        mocker.patch(
+            "utils.endpoints.AsyncLlamaStackClientHolder",
+            return_value=mock_holder,
+        )
+
+        mock_conv = mocker.Mock(spec=UserConversation)
+        mock_conv.id = "conv-normalized-123"
+        mocker.patch(
+            "utils.endpoints.normalize_conversation_id",
+            return_value="conv-normalized-123",
+        )
+        mocker.patch(
+            "utils.endpoints.to_llama_stack_conversation_id",
+            return_value="conv_conv-normalized-123",
+        )
+        mocker.patch(
+            "utils.endpoints.validate_and_retrieve_conversation",
+            return_value=mock_conv,
+        )
+
+        result = await endpoints.resolve_response_context(
+            user_id="user-1",
+            others_allowed=False,
+            conversation_id="conv-raw",
+            previous_response_id=None,
+            generate_topic_summary=None,
+        )
+
+        assert isinstance(result, ResponsesConversationContext)
+        assert result.conversation == "conv_conv-normalized-123"
+        assert result.user_conversation is mock_conv
+        assert result.generate_topic_summary is False
+
+    @pytest.mark.asyncio
+    async def test_previous_response_id_turn_not_found_raises_404(
+        self, mocker: MockerFixture
+    ) -> None:
+        """When previous_response_id is set but turn does not exist, raise 404."""
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mocker.Mock()
+        mocker.patch(
+            "utils.endpoints.AsyncLlamaStackClientHolder",
+            return_value=mock_holder,
+        )
+        mocker.patch("utils.endpoints.check_turn_existence", return_value=False)
+
+        with pytest.raises(HTTPException) as exc_info:
+            await endpoints.resolve_response_context(
+                user_id="user-1",
+                others_allowed=False,
+                conversation_id=None,
+                previous_response_id="resp-missing",
+                generate_topic_summary=None,
+            )
+
+        assert exc_info.value.status_code == 404
+        assert isinstance(exc_info.value.detail, dict)
+        assert "resp-missing" in str(exc_info.value.detail["cause"])
+
+    @pytest.mark.asyncio
+    async def test_previous_response_id_same_as_last_returns_existing_conversation(
+        self, mocker: MockerFixture
+    ) -> None:
+        """When previous_response_id equals last_response_id, use existing conv."""
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mocker.Mock()
+        mocker.patch(
+            "utils.endpoints.AsyncLlamaStackClientHolder",
+            return_value=mock_holder,
+        )
+        mocker.patch("utils.endpoints.check_turn_existence", return_value=True)
+
+        mock_turn = mocker.Mock(spec=UserTurn)
+        mock_turn.conversation_id = "conv-existing"
+        mocker.patch(
+            "utils.endpoints.retrieve_turn_by_response_id",
+            return_value=mock_turn,
+        )
+
+        mock_conv = mocker.Mock(spec=UserConversation)
+        mock_conv.id = "conv-existing"
+        mock_conv.last_response_id = "resp-123"  # same as previous_response_id
+        mocker.patch(
+            "utils.endpoints.validate_and_retrieve_conversation",
+            return_value=mock_conv,
+        )
+        mocker.patch(
+            "utils.endpoints.to_llama_stack_conversation_id",
+            return_value="conv_conv-existing",
+        )
+        mock_create = mocker.patch(
+            "utils.endpoints.create_new_conversation",
+            new=mocker.AsyncMock(),
+        )
+
+        result = await endpoints.resolve_response_context(
+            user_id="user-1",
+            others_allowed=False,
+            conversation_id=None,
+            previous_response_id="resp-123",
+            generate_topic_summary=None,
+        )
+
+        assert result.conversation == "conv_conv-existing"
+        assert result.user_conversation is mock_conv
+        assert result.generate_topic_summary is False
+        mock_create.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_previous_response_id_fork_creates_new_conversation(
+        self, mocker: MockerFixture
+    ) -> None:
+        """When last_response_id differs from previous_response_id, fork to new conv."""
+        mock_client = mocker.Mock()
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mock_client
+        mocker.patch(
+            "utils.endpoints.AsyncLlamaStackClientHolder",
+            return_value=mock_holder,
+        )
+        mocker.patch("utils.endpoints.check_turn_existence", return_value=True)
+
+        mock_turn = mocker.Mock(spec=UserTurn)
+        mock_turn.conversation_id = "conv-existing"
+        mocker.patch(
+            "utils.endpoints.retrieve_turn_by_response_id",
+            return_value=mock_turn,
+        )
+
+        mock_conv = mocker.Mock(spec=UserConversation)
+        mock_conv.id = "conv-existing"
+        mock_conv.last_response_id = "resp-latest"  # fork: different from prev
+        mocker.patch(
+            "utils.endpoints.validate_and_retrieve_conversation",
+            return_value=mock_conv,
+        )
+        mocker.patch(
+            "utils.endpoints.create_new_conversation",
+            new=mocker.AsyncMock(return_value="conv_new_fork"),
+        )
+
+        result = await endpoints.resolve_response_context(
+            user_id="user-1",
+            others_allowed=False,
+            conversation_id=None,
+            previous_response_id="resp-old",
+            generate_topic_summary=None,
+        )
+
+        assert result.conversation == "conv_new_fork"
+        assert result.user_conversation is mock_conv
+        assert result.generate_topic_summary is True
+
+    @pytest.mark.asyncio
+    async def test_previous_response_id_fork_respects_generate_topic_summary(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Fork path uses request generate_topic_summary when provided."""
+        mock_client = mocker.Mock()
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mock_client
+        mocker.patch(
+            "utils.endpoints.AsyncLlamaStackClientHolder",
+            return_value=mock_holder,
+        )
+        mocker.patch("utils.endpoints.check_turn_existence", return_value=True)
+
+        mock_turn = mocker.Mock(spec=UserTurn)
+        mock_turn.conversation_id = "conv-existing"
+        mocker.patch(
+            "utils.endpoints.retrieve_turn_by_response_id",
+            return_value=mock_turn,
+        )
+
+        mock_conv = mocker.Mock(spec=UserConversation)
+        mock_conv.id = "conv-existing"
+        mock_conv.last_response_id = "resp-latest"
+        mocker.patch(
+            "utils.endpoints.validate_and_retrieve_conversation",
+            return_value=mock_conv,
+        )
+        mocker.patch(
+            "utils.endpoints.create_new_conversation",
+            new=mocker.AsyncMock(return_value="conv_new"),
+        )
+
+        result = await endpoints.resolve_response_context(
+            user_id="user-1",
+            others_allowed=False,
+            conversation_id=None,
+            previous_response_id="resp-old",
+            generate_topic_summary=False,
+        )
+
+        assert result.generate_topic_summary is False
+
+    @pytest.mark.asyncio
+    async def test_no_context_creates_new_conversation(
+        self, mocker: MockerFixture
+    ) -> None:
+        """When neither conversation_id nor previous_response_id set, create new."""
+        mock_client = mocker.Mock()
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mock_client
+        mocker.patch(
+            "utils.endpoints.AsyncLlamaStackClientHolder",
+            return_value=mock_holder,
+        )
+        mocker.patch(
+            "utils.endpoints.create_new_conversation",
+            new=mocker.AsyncMock(return_value="conv_brand_new"),
+        )
+
+        result = await endpoints.resolve_response_context(
+            user_id="user-1",
+            others_allowed=False,
+            conversation_id=None,
+            previous_response_id=None,
+            generate_topic_summary=None,
+        )
+
+        assert result.conversation == "conv_brand_new"
+        assert result.user_conversation is None
+        assert result.generate_topic_summary is True
+
+    @pytest.mark.asyncio
+    async def test_no_context_respects_generate_topic_summary(
+        self, mocker: MockerFixture
+    ) -> None:
+        """New conversation path uses generate_topic_summary when provided."""
+        mock_holder = mocker.Mock()
+        mock_holder.get_client.return_value = mocker.Mock()
+        mocker.patch(
+            "utils.endpoints.AsyncLlamaStackClientHolder",
+            return_value=mock_holder,
+        )
+        mocker.patch(
+            "utils.endpoints.create_new_conversation",
+            new=mocker.AsyncMock(return_value="conv_new"),
+        )
+
+        result = await endpoints.resolve_response_context(
+            user_id="user-1",
+            others_allowed=False,
+            conversation_id=None,
+            previous_response_id=None,
+            generate_topic_summary=False,
+        )
+
+        assert result.generate_topic_summary is False
diff --git a/tests/unit/utils/test_query.py b/tests/unit/utils/test_query.py
index 867a5c6c6..5c13a3a2d 100644
--- a/tests/unit/utils/test_query.py
+++ b/tests/unit/utils/test_query.py
@@ -407,6 +407,7 @@ def query_side_effect(*args: Any) -> Any:
             model_id="model1",
             provider_id="provider1",
             topic_summary="Topic",
+            response_id="resp_1",
         )
 
         mock_session.add.assert_called()
@@ -454,6 +455,7 @@ def query_side_effect(*args: Any) -> Any:
             model_id="new_model",
             provider_id="new_provider",
             topic_summary=None,
+            response_id="resp_1",
         )
 
         assert existing_conv.last_used_model == "new_model"
@@ -497,6 +499,7 @@ def query_side_effect(*args: Any) -> Any:
             model_id="model1",
             provider_id="provider1",
             topic_summary="Topic",
+            response_id="resp_1",
         )
 
         # Verify that the turn number is incremented correctly
diff --git a/tests/unit/utils/test_responses.py b/tests/unit/utils/test_responses.py
index 54ae158aa..79c8209e0 100644
--- a/tests/unit/utils/test_responses.py
+++ b/tests/unit/utils/test_responses.py
@@ -98,8 +98,7 @@ def make_output_item(
     Returns:
         MockOutputItem: Mock object with type, role, and content attributes
     """
-    mock_item = MockOutputItem(item_type=item_type, role=role, content=content)
-    return mock_item
+    return MockOutputItem(item_type=item_type, role=role, content=content)
 
 
 def make_content_part(
@@ -127,9 +126,8 @@ def make_content_part(
         ("function_call", "assistant", "some text", ""),
         ("file_search_call", "assistant", "some text", ""),
         (None, "assistant", "some text", ""),
-        # User role messages are filtered out - return empty string
-        ("message", "user", "some text", ""),
-        # Valid assistant message with string content
+        # Message type extracts content regardless of role (input or output)
+        ("message", "user", "some text", "some text"),
         ("message", "assistant", "Hello, world!", "Hello, world!"),
         ("message", "assistant", "", ""),
     ],
@@ -137,7 +135,7 @@ def make_content_part(
         "function_call_type_returns_empty",
         "file_search_call_type_returns_empty",
         "none_type_returns_empty",
-        "user_role_returns_empty",
+        "user_message_extracts_content",
         "valid_string_content",
         "empty_string_content",
     ],
@@ -147,11 +145,7 @@ def test_extract_text_basic_cases(
 ) -> None:
     """Test basic extraction cases for different types, roles, and simple content.
 
-    Args:
-        item_type: Type of the output item
-        role: Role of the message
-        content: Content of the message
-        expected: Expected extracted text
+    Extraction works for both input and output items; role is not filtered.
     """
     output_item = make_output_item(item_type=item_type, role=role, content=content)
     result = extract_text_from_response_item(output_item)  # type: ignore[arg-type]
@@ -307,8 +301,8 @@ def test_extract_text_from_response_items_filters_non_messages(self) -> None:
         result = extract_text_from_response_items([item1, item2])  # type: ignore[arg-type]
         assert result == "Valid message"
 
-    def test_extract_text_from_response_items_filters_user_messages(self) -> None:
-        """Test extract_text_from_response_items filters out user role messages."""
+    def test_extract_text_from_response_items_includes_all_roles(self) -> None:
+        """Test extract_text_from_response_items extracts from all message roles."""
         item1 = make_output_item(
             item_type="message", role="assistant", content="Assistant message"
         )
@@ -316,8 +310,8 @@ def test_extract_text_from_response_items_filters_user_messages(self) -> None:
             item_type="message", role="user", content="User message"
         )
         result = extract_text_from_response_items([item1, item2])  # type: ignore[arg-type]
-        # User messages are filtered out - only assistant message is included
-        assert result == "Assistant message"
+        # All message items are included (generalizes for input and output)
+        assert result == "Assistant message User message"
 
     def test_extract_text_from_response_items_with_list_content(self) -> None:
         """Test extract_text_from_response_items with list-based content."""
@@ -2044,6 +2038,69 @@ def test_multiple_stores_attribute_not_in_mapping(
         )
         assert source == "vs-unknown"
 
+    def test_multiple_stores_source_attribute_fallback(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Test resolution falls back to source attribute when no vector_store_id."""
+        mock_result = mocker.Mock()
+        mock_result.filename = "file-abc123"
+        mock_result.attributes = {"source": "ocp-documentation"}
+
+        source = _resolve_source_for_result(
+            mock_result,
+            ["vs-001", "vs-002"],
+            {"vs-001": "ocp-4.18-docs"},
+        )
+        assert source == "ocp-documentation"
+
+    def test_multiple_stores_source_attribute_ignores_mapping(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Test source attribute is returned directly without rag_id_mapping lookup."""
+        mock_result = mocker.Mock()
+        mock_result.filename = "file-abc123"
+        mock_result.attributes = {"source": "custom-index"}
+
+        source = _resolve_source_for_result(
+            mock_result,
+            ["vs-001", "vs-002"],
+            {"custom-index": "should-not-be-used"},
+        )
+        assert source == "custom-index"
+
+    def test_multiple_stores_source_preferred_over_vector_store_id(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Test source attribute takes precedence over vector_store_id."""
+        mock_result = mocker.Mock()
+        mock_result.filename = "file-abc123"
+        mock_result.attributes = {
+            "vector_store_id": "vs-002",
+            "source": "ocp-documentation",
+        }
+
+        source = _resolve_source_for_result(
+            mock_result,
+            ["vs-001", "vs-002"],
+            {"vs-002": "rhel-9-docs"},
+        )
+        assert source == "ocp-documentation"
+
+    def test_multiple_stores_no_vector_store_id_no_source(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Test resolution returns None when neither vector_store_id nor source present."""
+        mock_result = mocker.Mock()
+        mock_result.filename = "file-abc123"
+        mock_result.attributes = {"title": "some doc"}
+
+        source = _resolve_source_for_result(
+            mock_result,
+            ["vs-001", "vs-002"],
+            {"vs-001": "ocp-docs"},
+        )
+        assert source is None
+
 
 class TestBuildChunkAttributes:
     """Tests for _build_chunk_attributes function."""
diff --git a/tests/unit/utils/test_shields.py b/tests/unit/utils/test_shields.py
index 55ee56886..5d68a73a2 100644
--- a/tests/unit/utils/test_shields.py
+++ b/tests/unit/utils/test_shields.py
@@ -1,5 +1,6 @@
 """Unit tests for utils/shields.py functions."""
 
+from llama_stack_client import APIConnectionError, APIStatusError
 import pytest
 from fastapi import HTTPException, status
 from pytest_mock import MockerFixture
@@ -9,6 +10,7 @@
     append_turn_to_conversation,
     detect_shield_violations,
     get_available_shields,
+    get_shields_for_request,
     run_shield_moderation,
     validate_shield_ids_override,
 )
@@ -305,60 +307,25 @@ async def test_raises_http_exception_when_shield_has_no_provider_resource_id(
         assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
 
     @pytest.mark.asyncio
-    async def test_returns_blocked_on_bad_request_error(
+    async def test_shield_ids_empty_list_runs_no_shields_returns_passed(
         self, mocker: MockerFixture
     ) -> None:
-        """Test that run_shield_moderation returns blocked when ValueError is raised."""
-        mock_metric = mocker.patch(
-            "utils.shields.metrics.llm_calls_validation_errors_total"
-        )
-        mock_client = mocker.Mock()
-
-        # Setup shield
-        shield = mocker.Mock()
-        shield.identifier = "test-shield"
-        shield.provider_resource_id = "moderation-model"
-        mock_client.shields.list = mocker.AsyncMock(return_value=[shield])
-
-        # Setup model
-        model = mocker.Mock()
-        model.id = "moderation-model"
-        mock_client.models.list = mocker.AsyncMock(return_value=[model])
-
-        # Setup moderation to raise ValueError (known Llama Stack bug)
-        mock_client.moderations.create = mocker.AsyncMock(
-            side_effect=ValueError("Bad request")
-        )
-
-        result = await run_shield_moderation(mock_client, "test input")
-
-        assert result.decision == "blocked"
-        assert result.message == DEFAULT_VIOLATION_MESSAGE
-        mock_metric.inc.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_shield_ids_empty_list_raises_422(
-        self, mocker: MockerFixture
-    ) -> None:
-        """Test that shield_ids=[] raises HTTPException 422 (prevents bypass)."""
+        """Test that shield_ids=[] runs no shields and returns passed."""
         mock_client = mocker.Mock()
         shield = mocker.Mock()
         shield.identifier = "shield-1"
         mock_client.shields.list = mocker.AsyncMock(return_value=[shield])
+        mock_client.models.list = mocker.AsyncMock(return_value=[])
 
-        with pytest.raises(HTTPException) as exc_info:
-            await run_shield_moderation(mock_client, "test input", shield_ids=[])
+        result = await run_shield_moderation(mock_client, "test input", shield_ids=[])
 
-        assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
-        assert "shield_ids provided but no shields selected" in str(
-            exc_info.value.detail
-        )
+        assert result.decision == "passed"
 
     @pytest.mark.asyncio
-    async def test_shield_ids_raises_exception_when_no_shields_found(
+    async def test_shield_ids_raises_404_when_no_shields_found(
         self, mocker: MockerFixture
     ) -> None:
-        """Test shield_ids raises HTTPException when no requested shields exist."""
+        """Test shield_ids raises HTTPException 404 when requested shield not configured."""
         mock_client = mocker.Mock()
         shield = mocker.Mock()
         shield.identifier = "shield-1"
@@ -369,8 +336,8 @@ async def test_shield_ids_raises_exception_when_no_shields_found(
                 mock_client, "test input", shield_ids=["typo-shield"]
             )
 
-        assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
-        assert "Invalid shield configuration" in exc_info.value.detail["response"]  # type: ignore
+        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        assert "Shield" in exc_info.value.detail["response"]  # type: ignore
         assert "typo-shield" in exc_info.value.detail["cause"]  # type: ignore
 
     @pytest.mark.asyncio
@@ -518,3 +485,132 @@ def test_raises_422_when_empty_list_shield_ids_and_override_disabled(
             validate_shield_ids_override(query_request, mock_config)
 
         assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
+
+
+class TestGetShieldsForRequest:
+    """Tests for get_shields_for_request function."""
+
+    @pytest.mark.asyncio
+    async def test_returns_all_shields_when_shield_ids_none(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Return all configured shields when shield_ids is None."""
+        mock_client = mocker.Mock()
+        shield1 = mocker.Mock()
+        shield1.identifier = "shield-1"
+        shield2 = mocker.Mock()
+        shield2.identifier = "shield-2"
+        mock_client.shields.list = mocker.AsyncMock(return_value=[shield1, shield2])
+
+        result = await get_shields_for_request(mock_client, shield_ids=None)
+
+        assert len(result) == 2
+        assert result[0].identifier == "shield-1"
+        assert result[1].identifier == "shield-2"
+        mock_client.shields.list.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_returns_empty_list_when_no_shields_configured(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Test that get_shields_for_request returns empty list when no shields configured."""
+        mock_client = mocker.Mock()
+        mock_client.shields.list = mocker.AsyncMock(return_value=[])
+
+        result = await get_shields_for_request(mock_client, shield_ids=None)
+
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_filters_to_requested_shields_when_all_exist(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Test that get_shields_for_request returns only requested shields when all exist."""
+        mock_client = mocker.Mock()
+        shield1 = mocker.Mock()
+        shield1.identifier = "shield-1"
+        shield2 = mocker.Mock()
+        shield2.identifier = "shield-2"
+        shield3 = mocker.Mock()
+        shield3.identifier = "shield-3"
+        mock_client.shields.list = mocker.AsyncMock(
+            return_value=[shield1, shield2, shield3]
+        )
+
+        result = await get_shields_for_request(
+            mock_client, shield_ids=["shield-1", "shield-3"]
+        )
+
+        assert len(result) == 2
+        assert result[0].identifier == "shield-1"
+        assert result[1].identifier == "shield-3"
+
+    @pytest.mark.asyncio
+    async def test_raises_404_when_requested_shield_not_configured(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Raise 404 when a requested shield is not configured."""
+        mock_client = mocker.Mock()
+        shield = mocker.Mock()
+        shield.identifier = "shield-1"
+        mock_client.shields.list = mocker.AsyncMock(return_value=[shield])
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_shields_for_request(
+                mock_client, shield_ids=["shield-1", "missing-shield"]
+            )
+
+        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        assert "Shield" in exc_info.value.detail["response"]  # type: ignore
+        assert "missing-shield" in exc_info.value.detail["cause"]  # type: ignore
+
+    @pytest.mark.asyncio
+    async def test_raises_404_when_multiple_requested_shields_not_configured(
+        self, mocker: MockerFixture
+    ) -> None:
+        """Raise 404 with all missing ids when multiple shields not configured."""
+        mock_client = mocker.Mock()
+        mock_client.shields.list = mocker.AsyncMock(return_value=[])
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_shields_for_request(
+                mock_client, shield_ids=["missing-1", "missing-2"]
+            )
+
+        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        assert "Shields" in exc_info.value.detail["response"]  # type: ignore
+        cause = exc_info.value.detail["cause"]  # type: ignore
+        assert "missing-1" in cause
+        assert "missing-2" in cause
+
+    @pytest.mark.asyncio
+    async def test_raises_503_on_connection_error(self, mocker: MockerFixture) -> None:
+        """Raise 503 on APIConnectionError."""
+        mock_client = mocker.Mock()
+        mock_client.shields.list = mocker.AsyncMock(
+            side_effect=APIConnectionError(
+                message="Connection failed", request=mocker.Mock()
+            )
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_shields_for_request(mock_client, shield_ids=None)
+
+        assert exc_info.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+
+    @pytest.mark.asyncio
+    async def test_raises_500_on_api_status_error(self, mocker: MockerFixture) -> None:
+        """Raise 500 on APIStatusError."""
+        mock_client = mocker.Mock()
+        mock_client.shields.list = mocker.AsyncMock(
+            side_effect=APIStatusError(
+                message="Server error",
+                response=mocker.Mock(request=None),
+                body=None,
+            )
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_shields_for_request(mock_client, shield_ids=None)
+
+        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
diff --git a/tests/unit/utils/test_vector_search.py b/tests/unit/utils/test_vector_search.py
index 4930cb846..930f59d36 100644
--- a/tests/unit/utils/test_vector_search.py
+++ b/tests/unit/utils/test_vector_search.py
@@ -462,7 +462,7 @@ async def test_both_sources_disabled(self, mocker) -> None:  # type: ignore[no-u
         mocker.patch("utils.vector_search.configuration", config_mock)
 
         client_mock = mocker.AsyncMock()
-        context = await build_rag_context(client_mock, "test query", None)
+        context = await build_rag_context(client_mock, "passed", "test query", None)
 
         assert context.context_text == ""
         assert context.rag_chunks == []
@@ -497,7 +497,7 @@ async def test_byok_enabled_only(self, mocker) -> None:  # type: ignore[no-untyp
         client_mock = mocker.AsyncMock()
         client_mock.vector_io.query.return_value = search_response
 
-        context = await build_rag_context(client_mock, "test query", None)
+        context = await build_rag_context(client_mock, "passed", "test query", None)
 
         assert len(context.rag_chunks) > 0
         assert "BYOK content" in context.context_text
diff --git a/uv.lock b/uv.lock
index e0d1cbcc0..d594f2a32 100644
--- a/uv.lock
+++ b/uv.lock
@@ -14,7 +14,7 @@ resolution-markers = [
 
 [[package]]
 name = "a2a-sdk"
-version = "0.3.24"
+version = "0.3.25"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "google-api-core" },
@@ -23,9 +23,9 @@ dependencies = [
     { name = "protobuf" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ad/76/cefa956fb2d3911cb91552a1da8ce2dbb339f1759cb475e2982f0ae2332b/a2a_sdk-0.3.24.tar.gz", hash = "sha256:3581e6e8a854cd725808f5732f90b7978e661b6d4e227a4755a8f063a3c1599d", size = 255550, upload-time = "2026-02-20T10:05:43.423Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/55/83/3c99b276d09656cce039464509f05bf385e5600d6dc046a131bbcf686930/a2a_sdk-0.3.25.tar.gz", hash = "sha256:afda85bab8d6af0c5d15e82f326c94190f6be8a901ce562d045a338b7127242f", size = 270638, upload-time = "2026-03-10T13:08:46.417Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/6e/cae5f0caea527b39c0abd7204d9416768764573c76649ca03cc345a372be/a2a_sdk-0.3.24-py3-none-any.whl", hash = "sha256:7b248767096bb55311f57deebf6b767349388d94c1b376c60cb8f6b715e053f6", size = 145752, upload-time = "2026-02-20T10:05:41.729Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/f9/6a62520b7ecb945188a6e1192275f4732ff9341cd4629bc975a6c146aeab/a2a_sdk-0.3.25-py3-none-any.whl", hash = "sha256:2fce38faea82eb0b6f9f9c2bcf761b0d78612c80ef0e599b50d566db1b2654b5", size = 149609, upload-time = "2026-03-10T13:08:44.7Z" },
 ]
 
 [[package]]
@@ -231,20 +231,20 @@ wheels = [
 
 [[package]]
 name = "azure-core"
-version = "1.38.2"
+version = "1.38.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "requests" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/00/fe/5c7710bc611a4070d06ba801de9a935cc87c3d4b689c644958047bdf2cba/azure_core-1.38.2.tar.gz", hash = "sha256:67562857cb979217e48dc60980243b61ea115b77326fa93d83b729e7ff0482e7", size = 363734, upload-time = "2026-02-18T19:33:05.6Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c8/29/9641b73248745774a52c7ce7f965ed1febbdea787ec21caad3ae6891d18a/azure_core-1.38.3.tar.gz", hash = "sha256:a7931fd445cb4af8802c6f39c6a326bbd1e34b115846550a8245fa656ead6f8e", size = 367267, upload-time = "2026-03-12T20:28:21.122Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/42/23/6371a551800d3812d6019cd813acd985f9fac0fedc1290129211a73da4ae/azure_core-1.38.2-py3-none-any.whl", hash = "sha256:074806c75cf239ea284a33a66827695ef7aeddac0b4e19dda266a93e4665ead9", size = 217957, upload-time = "2026-02-18T19:33:07.696Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/3d/ac86083efa45a439d0bbfb7947615227813d368b9e1e93d23fd30de6fec0/azure_core-1.38.3-py3-none-any.whl", hash = "sha256:bf59d29765bf4748ab9edf25f98a30b7ea9797f43e367c06d846a30b29c1f845", size = 218231, upload-time = "2026-03-12T20:28:22.462Z" },
 ]
 
 [[package]]
 name = "azure-identity"
-version = "1.25.2"
+version = "1.25.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "azure-core" },
@@ -253,9 +253,9 @@ dependencies = [
     { name = "msal-extensions" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c2/3a/439a32a5e23e45f6a91f0405949dc66cfe6834aba15a430aebfc063a81e7/azure_identity-1.25.2.tar.gz", hash = "sha256:030dbaa720266c796221c6cdbd1999b408c079032c919fef725fcc348a540fe9", size = 284709, upload-time = "2026-02-11T01:55:42.323Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c5/0e/3a63efb48aa4a5ae2cfca61ee152fbcb668092134d3eb8bfda472dd5c617/azure_identity-1.25.3.tar.gz", hash = "sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6", size = 286304, upload-time = "2026-03-13T01:12:20.892Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9b/77/f658c76f9e9a52c784bd836aaca6fd5b9aae176f1f53273e758a2bcda695/azure_identity-1.25.2-py3-none-any.whl", hash = "sha256:1b40060553d01a72ba0d708b9a46d0f61f56312e215d8896d836653ffdc6753d", size = 191423, upload-time = "2026-02-11T01:55:44.245Z" },
+    { url = "https://files.pythonhosted.org/packages/49/9a/417b3a533e01953a7c618884df2cb05a71e7b68bdbce4fbdb62349d2a2e8/azure_identity-1.25.3-py3-none-any.whl", hash = "sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c", size = 192138, upload-time = "2026-03-13T01:12:22.951Z" },
 ]
 
 [[package]]
@@ -292,7 +292,7 @@ wheels = [
 
 [[package]]
 name = "black"
-version = "26.3.0"
+version = "26.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -302,19 +302,19 @@ dependencies = [
     { name = "platformdirs" },
     { name = "pytokens" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/11/5f/25b7b149b8b7d3b958efa4faa56446560408c0f2651108a517526de0320a/black-26.3.0.tar.gz", hash = "sha256:4d438dfdba1c807c6c7c63c4f15794dda0820d2222e7c4105042ac9ddfc5dd0b", size = 664127, upload-time = "2026-03-06T17:42:33.7Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e1/c5/61175d618685d42b005847464b8fb4743a67b1b8fdb75e50e5a96c31a27a/black-26.3.1.tar.gz", hash = "sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07", size = 666155, upload-time = "2026-03-12T03:36:03.593Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/76/b21711045b7f4c4f1774048d0b34dd10a265c42255658b251ce3303ae3c7/black-26.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c2b1e5eec220b419e3591a0aaa6351bd3a9c01fe6291fbaf76d84308eb7a2ede", size = 1895944, upload-time = "2026-03-06T17:46:24.841Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/c3/8c56e73283326bc92a36101c660228fff09a2403a57a03cacf3f7f84cf62/black-26.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1bab64de70bccc992432bee56cdffbe004ceeaa07352127c386faa87e81f9261", size = 1718669, upload-time = "2026-03-06T17:46:26.639Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/8b/712a3ae8f17c1f3cd6f9ac2fffb167a27192f5c7aba68724e8c4ab8474ad/black-26.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b6c5f734290803b7b26493ffd734b02b72e6c90d82d45ac4d5b862b9bdf7720", size = 1794844, upload-time = "2026-03-06T17:46:28.334Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/5b/ee955040e446df86473287dd24dc69c80dd05e02cc358bca90e22059f7b1/black-26.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:7c767396af15b54e1a6aae99ddf241ae97e589f666b1d22c4b6618282a04e4ca", size = 1420461, upload-time = "2026-03-06T17:46:29.965Z" },
-    { url = "https://files.pythonhosted.org/packages/12/77/40b8bd44f032bb34c9ebf47ffc5bb47a2520d29e0a4b8a780ab515223b5a/black-26.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:765fd6ddd00f35c55250fdc6b790c272d54ac3f44da719cc42df428269b45980", size = 1229667, upload-time = "2026-03-06T17:46:31.654Z" },
-    { url = "https://files.pythonhosted.org/packages/28/c3/21a834ce3de02c64221243f2adac63fa3c3f441efdb3adbf4136b33dfeb0/black-26.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:59754fd8f43ef457be190594c07a52c999e22cb1534dc5344bff1d46fdf1027d", size = 1895195, upload-time = "2026-03-06T17:46:33.12Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/f9/212d9697dd78362dadb778d4616b74c8c2cf7f2e4a55aac2adeb0576f2e9/black-26.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1fd94cfee67b8d336761a0b08629a25938e4a491c440951ce517a7209c99b5ff", size = 1718472, upload-time = "2026-03-06T17:46:34.576Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/dd/da980b2f512441375b73cb511f38a2c3db4be83ccaa1302b8d39c9fa2dff/black-26.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f7b3e653a90ca1ef4e821c20f8edaee80b649c38d2532ed2e9073a9534b14a7", size = 1793741, upload-time = "2026-03-06T17:46:36.261Z" },
-    { url = "https://files.pythonhosted.org/packages/93/11/cd69ae8826fe3bc6eaf525c8c557266d522b258154a2968eb46d6d25fac7/black-26.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:f8fb9d7c2496adc83614856e1f6e55a9ce4b7ae7fc7f45b46af9189ddb493464", size = 1422522, upload-time = "2026-03-06T17:46:37.607Z" },
-    { url = "https://files.pythonhosted.org/packages/75/f5/647cf50255203eb286be197925e86eedc101d5409147505db3e463229228/black-26.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:e8618c1d06838f56afbcb3ffa1aa16436cec62b86b38c7b32ca86f53948ffb91", size = 1231807, upload-time = "2026-03-06T17:46:39.072Z" },
-    { url = "https://files.pythonhosted.org/packages/39/d7/7360654ba4f8b41afcaeb5aca973cfea5591da75aff79b0a8ae0bb8883f6/black-26.3.0-py3-none-any.whl", hash = "sha256:e825d6b121910dff6f04d7691f826d2449327e8e71c26254c030c4f3d2311985", size = 206848, upload-time = "2026-03-06T17:42:31.133Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/f8/da5eae4fc75e78e6dceb60624e1b9662ab00d6b452996046dfa9b8a6025b/black-26.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e6f89631eb88a7302d416594a32faeee9fb8fb848290da9d0a5f2903519fc1", size = 1895920, upload-time = "2026-03-12T03:40:13.921Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/9f/04e6f26534da2e1629b2b48255c264cabf5eedc5141d04516d9d68a24111/black-26.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41cd2012d35b47d589cb8a16faf8a32ef7a336f56356babd9fcf70939ad1897f", size = 1718499, upload-time = "2026-03-12T03:40:15.239Z" },
+    { url = "https://files.pythonhosted.org/packages/04/91/a5935b2a63e31b331060c4a9fdb5a6c725840858c599032a6f3aac94055f/black-26.3.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f76ff19ec5297dd8e66eb64deda23631e642c9393ab592826fd4bdc97a4bce7", size = 1794994, upload-time = "2026-03-12T03:40:17.124Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/0a/86e462cdd311a3c2a8ece708d22aba17d0b2a0d5348ca34b40cdcbea512e/black-26.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ddb113db38838eb9f043623ba274cfaf7d51d5b0c22ecb30afe58b1bb8322983", size = 1420867, upload-time = "2026-03-12T03:40:18.83Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/e5/22515a19cb7eaee3440325a6b0d95d2c0e88dd180cb011b12ae488e031d1/black-26.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:dfdd51fc3e64ea4f35873d1b3fb25326773d55d2329ff8449139ebaad7357efb", size = 1230124, upload-time = "2026-03-12T03:40:20.425Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/77/5728052a3c0450c53d9bb3945c4c46b91baa62b2cafab6801411b6271e45/black-26.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:855822d90f884905362f602880ed8b5df1b7e3ee7d0db2502d4388a954cc8c54", size = 1895034, upload-time = "2026-03-12T03:40:21.813Z" },
+    { url = "https://files.pythonhosted.org/packages/52/73/7cae55fdfdfbe9d19e9a8d25d145018965fe2079fa908101c3733b0c55a0/black-26.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8a33d657f3276328ce00e4d37fe70361e1ec7614da5d7b6e78de5426cb56332f", size = 1718503, upload-time = "2026-03-12T03:40:23.666Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/87/af89ad449e8254fdbc74654e6467e3c9381b61472cc532ee350d28cfdafb/black-26.3.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f1cd08e99d2f9317292a311dfe578fd2a24b15dbce97792f9c4d752275c1fa56", size = 1793557, upload-time = "2026-03-12T03:40:25.497Z" },
+    { url = "https://files.pythonhosted.org/packages/43/10/d6c06a791d8124b843bf325ab4ac7d2f5b98731dff84d6064eafd687ded1/black-26.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:c7e72339f841b5a237ff14f7d3880ddd0fc7f98a1199e8c4327f9a4f478c1839", size = 1422766, upload-time = "2026-03-12T03:40:27.14Z" },
+    { url = "https://files.pythonhosted.org/packages/59/4f/40a582c015f2d841ac24fed6390bd68f0fc896069ff3a886317959c9daf8/black-26.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:afc622538b430aa4c8c853f7f63bc582b3b8030fd8c80b70fb5fa5b834e575c2", size = 1232140, upload-time = "2026-03-12T03:40:28.882Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/0d/52d98722666d6fc6c3dd4c76df339501d6efd40e0ff95e6186a7b7f0befd/black-26.3.1-py3-none-any.whl", hash = "sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b", size = 207542, upload-time = "2026-03-12T03:36:01.668Z" },
 ]
 
 [[package]]
@@ -348,11 +348,11 @@ wheels = [
 
 [[package]]
 name = "cachetools"
-version = "7.0.3"
+version = "7.0.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/48/5c/3b882b82e9af737906539a2eafb62f96a229f1fa80255bede0c7b554cbc4/cachetools-7.0.3.tar.gz", hash = "sha256:8c246313b95849964e54a909c03b327a87ab0428b068fac10da7b105ca275ef6", size = 37187, upload-time = "2026-03-05T21:00:57.918Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/dd/57fe3fdb6e65b25a5987fd2cdc7e22db0aef508b91634d2e57d22928d41b/cachetools-7.0.5.tar.gz", hash = "sha256:0cd042c24377200c1dcd225f8b7b12b0ca53cc2c961b43757e774ebe190fd990", size = 37367, upload-time = "2026-03-09T20:51:29.451Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/4a/573185481c50a8841331f54ddae44e4a3469c46aa0b397731c53a004369a/cachetools-7.0.3-py3-none-any.whl", hash = "sha256:c128ffca156eef344c25fcd08a96a5952803786fa33097f5f2d49edf76f79d53", size = 13907, upload-time = "2026-03-05T21:00:56.486Z" },
+    { url = "https://files.pythonhosted.org/packages/06/f3/39cf3367b8107baa44f861dc802cbf16263c945b62d8265d36034fc07bea/cachetools-7.0.5-py3-none-any.whl", hash = "sha256:46bc8ebefbe485407621d0a4264b23c080cedd913921bad7ac3ed2f26c183114", size = 13918, upload-time = "2026-03-09T20:51:27.33Z" },
 ]
 
 [[package]]
@@ -401,21 +401,21 @@ wheels = [
 
 [[package]]
 name = "chardet"
-version = "7.0.1"
+version = "7.1.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6c/80/4684035f1a2a3096506bc377276a815ccf0be3c3316eab35d589e82d9f3c/chardet-7.0.1.tar.gz", hash = "sha256:6fce895c12c5495bb598e59ae3cd89306969b4464ec7b6dd609b9c86e3397fe3", size = 490240, upload-time = "2026-03-04T21:25:26.97Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0d/84/e72ea5c06e687db591283474b8442ab95665fc6bae7b06043b2a6f0eaf6c/chardet-7.1.0.tar.gz", hash = "sha256:8f47bc4accac17bd9accbb4acc1d563acc024a783806c0a43c3a583f5285690b", size = 505743, upload-time = "2026-03-11T21:39:37.603Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/88/4c6fe7dcd5d36a2cfd7030084fbd79264083f329faaf96038c23888a8e05/chardet-7.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f661edbfa77b8683a503043ddc9b9fe9036cf28af13064200e11fa1844ded79c", size = 541828, upload-time = "2026-03-04T21:24:58.726Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/fb/3b92a2433eadef83ae131fa720a17857cfbf7687c5f188bfb2f9eee2d3dd/chardet-7.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:169951fa88d449e72e0c6194cec1c5e405fd36a6cfbe74c7dab5494cc35f1700", size = 533571, upload-time = "2026-03-04T21:25:00.703Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/75/37bee6900183ea08a3a0ae04b9f018f9e64c6b10716e1f7b423db0c4356c/chardet-7.0.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd6db7505556ae8f9e2a3bf6d689c2b86aa6b459cf39552645d2c4d3fdbf489c", size = 554182, upload-time = "2026-03-04T21:25:02.168Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/ed/2fe5ea435ae480bd3a76be1415920ce52b3ff6e188d8eab6a635d6a2a1d1/chardet-7.0.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f907962b18df78d5ca87a7484e4034354408d2c97cec6f53634b0ea0424c594", size = 557933, upload-time = "2026-03-04T21:25:03.694Z" },
-    { url = "https://files.pythonhosted.org/packages/07/ba/7ca89301e492ac4184ba7f4736565d954ba3125acf6bf02c66a38a802bda/chardet-7.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:302798e1e62008ca34a216dd04ecc5e240993b2090628e2a35d4c0754313ea9a", size = 524256, upload-time = "2026-03-04T21:25:05.581Z" },
-    { url = "https://files.pythonhosted.org/packages/56/26/1a22b9a19b4ca167ca462eaf91d0fc31285874d80b0381c55fdc5bc5f066/chardet-7.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:67fe3f453416ed9343057dcf06583b36aae6d8bdb013370b3ff46bc37b7e30ac", size = 541652, upload-time = "2026-03-04T21:25:07.041Z" },
-    { url = "https://files.pythonhosted.org/packages/24/fe/2f2425f3b0801e897653723ee827bc87e5a0feacf826ab268a9216680615/chardet-7.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:63bc210ce73f8a1b87430b949f84d086cb326d67eb259305862e7c8861b73374", size = 533333, upload-time = "2026-03-04T21:25:08.886Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/8c/6b5f4b49c471b396bdbddad55b569e05d686ea65d91795dae6c774b285f0/chardet-7.0.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11f51985946b49739968b6dc2fa70e7d8f490bb15574377c5ee114f33d19ef7e", size = 553815, upload-time = "2026-03-04T21:25:10.861Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/45/860a82d618e5c3930faef0a0fe205b752323e5d10ce0c18fe5016fd4f8d2/chardet-7.0.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8714f0013c208452a98e23595d99cef53c5364565454425f431446eb586e2591", size = 557506, upload-time = "2026-03-04T21:25:14.081Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/44/7acb8f84fc7b5ad3c977ac31865b308881da1c0a6ca58be35554d2473dd7/chardet-7.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:c12abc65830068ad05bd257fb953aaaf63a551446688e03e145522086be5738c", size = 524145, upload-time = "2026-03-04T21:25:15.696Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/1f/c1a089db6333b1283409cad3714b8935e7e56722c9c60f9299726a1e57c2/chardet-7.0.1-py3-none-any.whl", hash = "sha256:e51e1ff2c51b2d622d97c9737bd5ee9d9b9038f05b7dd8f9ea10b9e2d9674c24", size = 408292, upload-time = "2026-03-04T21:25:25.214Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/b8/415efba024c5d6a3d81609de51598a11a99b9f2ffb916c42b72190da1973/chardet-7.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:43c1e3cba6c41d8958ee4acdab94c151dbe256d7ef8df4ae032dc62a892f294f", size = 542358, upload-time = "2026-03-11T21:39:11.023Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/d7/9517de8b58b487d5d05e957efacc8c9af180cb2cc97103b1a1c67120d8c0/chardet-7.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a3c22672c9502af99e0433b47421d0d72c8803efce2cd4a91a3ae1ab5972243", size = 534566, upload-time = "2026-03-11T21:39:12.462Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/33/1286f2a05935a80eaadcc13fc70fb0eaa00805acc756363f0f4aca2ed936/chardet-7.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fdfc42dfc44ccd569b84fe6a1fdea1df66dc0c48461bc3899dea5efea8d507f6", size = 556240, upload-time = "2026-03-11T21:39:14.388Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/cc/556aeffb4768b258cc461bc1063d3592e411e1744223da8c7fbbf524438e/chardet-7.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e096d9c211050fff40e22748e1d09d0cec8348fc13ee6e2e0a1da079345b8a86", size = 559737, upload-time = "2026-03-11T21:39:16.382Z" },
+    { url = "https://files.pythonhosted.org/packages/af/4a/147151940ad5ac8bf9f8728a1e46bc63502cd95e93c3a9796f01914188f9/chardet-7.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6492bebaba8882afb3e14c786fb69ed767326b6f514b8e093dcdf6e2a094d33", size = 526574, upload-time = "2026-03-11T21:39:18.311Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/79/2c61f33c87d3698f15ca01b0882fbd2fcb95911a783cc615d31adfae025a/chardet-7.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc8c7520a9736da766f5794bbabb1c6cdfe446676429a5cf691af878631a80bf", size = 542249, upload-time = "2026-03-11T21:39:20.133Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/0c/2d0c4897e43f1bb1b68dad840551cda224696eda9951524db50721d3bc18/chardet-7.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6f806f325825325e0682226269a2a4859993344cccca14f2463855d4f5a93272", size = 534544, upload-time = "2026-03-11T21:39:21.844Z" },
+    { url = "https://files.pythonhosted.org/packages/17/cb/a568eea24adc1a023da266854e9fc9e0eaffa72580d43c45b47f1b62dd2e/chardet-7.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bacc8f862998c59e9ee7fe4960538300d1cc3fe2c293b9cc99bbbc7bf3bedf51", size = 555894, upload-time = "2026-03-11T21:39:23.649Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/e7/958975ca18c7b5be9b94354c302a7f3d757c02e7c14e88e0c85af1e16c70/chardet-7.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d17822fc94467b7951adebd897cb01c0e37ac694be18d2cbd2b676d61df4f", size = 559286, upload-time = "2026-03-11T21:39:25.289Z" },
+    { url = "https://files.pythonhosted.org/packages/84/0b/1eddfd650e98bb80ec9f74c0bb98fa60cc36f63d9209214cd069b2a27340/chardet-7.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:b951107b254cdc766e52f4b8339dcfa97c7b45ca9f5509075308db2497e7f3af", size = 526406, upload-time = "2026-03-11T21:39:27.103Z" },
+    { url = "https://files.pythonhosted.org/packages/87/13/6aa6c9118ce153a806bb0472e27e8f8c24e6925db8a5b9fe99e03e45af15/chardet-7.1.0-py3-none-any.whl", hash = "sha256:7f677725333bf53f84b7f57458f44669a8a5eb2ac4092ac699cdfa9b1af08a5f", size = 411334, upload-time = "2026-03-11T21:39:36.198Z" },
 ]
 
 [[package]]
@@ -468,6 +468,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/52/93/342cc62a70ab727e093ed98e02a725d85b746345f05d2b5e5034649f4ec8/chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443", size = 11595, upload-time = "2021-01-02T22:47:57.847Z" },
 ]
 
+[[package]]
+name = "circuitbreaker"
+version = "2.1.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/df/ac/de7a92c4ed39cba31fe5ad9203b76a25ca67c530797f6bb420fff5f65ccb/circuitbreaker-2.1.3.tar.gz", hash = "sha256:1a4baee510f7bea3c91b194dcce7c07805fe96c4423ed5594b75af438531d084", size = 10787, upload-time = "2025-03-31T08:12:08.963Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ae/34/15f08edd4628f65217de1fc3c1a27c82e46fe357d60c217fc9881e12ebcc/circuitbreaker-2.1.3-py3-none-any.whl", hash = "sha256:87ba6a3ed03fdc7032bc175561c2b04d52ade9d5faf94ca2b035fbdc5e6b1dd1", size = 7737, upload-time = "2025-03-31T08:12:07.802Z" },
+]
+
 [[package]]
 name = "click"
 version = "8.3.1"
@@ -602,7 +611,7 @@ wheels = [
 
 [[package]]
 name = "datasets"
-version = "4.6.1"
+version = "4.7.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "dill" },
@@ -620,9 +629,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d7/94/eb81c6fe32e9b6ef92223141b5a553aeff2e9456968424a8533cbe88f476/datasets-4.6.1.tar.gz", hash = "sha256:140ce500bc41939ff6ce995702d66b1f4b2ee7f117bb9b07512fab6804d4070a", size = 593865, upload-time = "2026-02-27T23:26:49.482Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1c/9c/ba18de0b70858533e422ed6cfe0e46789473cef7fc7fc3653e23fa494730/datasets-4.7.0.tar.gz", hash = "sha256:4984cdfc65d04464da7f95205a55cb50515fd94ae3176caacb50a1b7273792e2", size = 602008, upload-time = "2026-03-09T19:01:49.298Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/37/f0/99fe6eb530c7ee9ee1faee48059eb8a6437f80c893a496b98a78864e0fc6/datasets-4.6.1-py3-none-any.whl", hash = "sha256:f53228e6dadc9f837037b1bf3051d7d8c054abbb3eb29f1f022926e08090e0da", size = 520667, upload-time = "2026-02-27T23:26:46.855Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/03/c6d9c3119cf712f638fe763e887ecaac6acbb62bf1e2acc3cbde0df340fd/datasets-4.7.0-py3-none-any.whl", hash = "sha256:d5fe3025ec6acc3b5649f10d5576dff5e054134927604e6913c1467a04adc3c2", size = 527530, upload-time = "2026-03-09T19:01:47.443Z" },
 ]
 
 [[package]]
@@ -779,11 +788,11 @@ wheels = [
 
 [[package]]
 name = "filelock"
-version = "3.25.0"
+version = "3.25.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/77/18/a1fd2231c679dcb9726204645721b12498aeac28e1ad0601038f94b42556/filelock-3.25.0.tar.gz", hash = "sha256:8f00faf3abf9dc730a1ffe9c354ae5c04e079ab7d3a683b7c32da5dd05f26af3", size = 40158, upload-time = "2026-03-01T15:08:45.916Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/b8/00651a0f559862f3bb7d6f7477b192afe3f583cc5e26403b44e59a55ab34/filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694", size = 40480, upload-time = "2026-03-11T20:45:38.487Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f9/0b/de6f54d4a8bedfe8645c41497f3c18d749f0bd3218170c667bf4b81d0cdd/filelock-3.25.0-py3-none-any.whl", hash = "sha256:5ccf8069f7948f494968fc0713c10e5c182a9c9d9eef3a636307a20c2490f047", size = 26427, upload-time = "2026-03-01T15:08:44.593Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759, upload-time = "2026-03-11T20:45:37.437Z" },
 ]
 
 [[package]]
@@ -893,16 +902,15 @@ grpc = [
 
 [[package]]
 name = "google-auth"
-version = "2.49.0"
+version = "2.49.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cryptography" },
     { name = "pyasn1-modules" },
-    { name = "rsa" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7d/59/7371175bfd949abfb1170aa076352131d7281bd9449c0f978604fc4431c3/google_auth-2.49.0.tar.gz", hash = "sha256:9cc2d9259d3700d7a257681f81052db6737495a1a46b610597f4b8bafe5286ae", size = 333444, upload-time = "2026-03-06T21:53:06.07Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/80/6a696a07d3d3b0a92488933532f03dbefa4a24ab80fb231395b9a2a1be77/google_auth-2.49.1.tar.gz", hash = "sha256:16d40da1c3c5a0533f57d268fe72e0ebb0ae1cc3b567024122651c045d879b64", size = 333825, upload-time = "2026-03-12T19:30:58.135Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/37/45/de64b823b639103de4b63dd193480dce99526bd36be6530c2dba85bf7817/google_auth-2.49.0-py3-none-any.whl", hash = "sha256:f893ef7307f19cf53700b7e2f61b5a6affe3aa0edf9943b13788920ab92d8d87", size = 240676, upload-time = "2026-03-06T21:52:38.304Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/eb/c6c2478d8a8d633460be40e2a8a6f8f429171997a35a96f81d3b680dec83/google_auth-2.49.1-py3-none-any.whl", hash = "sha256:195ebe3dca18eddd1b3db5edc5189b76c13e96f29e73043b923ebcf3f1a860f7", size = 240737, upload-time = "2026-03-12T19:30:53.159Z" },
 ]
 
 [package.optional-dependencies]
@@ -912,7 +920,7 @@ requests = [
 
 [[package]]
 name = "google-cloud-aiplatform"
-version = "1.140.0"
+version = "1.141.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "docstring-parser" },
@@ -928,9 +936,9 @@ dependencies = [
     { name = "pydantic" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1b/14/1c223faf986afffdd61c994a10c30a04985ed5ba072201058af2c6e1e572/google_cloud_aiplatform-1.140.0.tar.gz", hash = "sha256:ea7eb1870b4cf600f8c2472102e21c3a1bcaf723d6e49f00ed51bc6b88d54fff", size = 10146640, upload-time = "2026-03-04T00:56:38.95Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ac/dc/1209c7aab43bd7233cf631165a3b1b4284d22fc7fe7387c66228d07868ab/google_cloud_aiplatform-1.141.0.tar.gz", hash = "sha256:e3b1cdb28865dd862aac9c685dfc5ac076488705aba0a5354016efadcddd59c6", size = 10152688, upload-time = "2026-03-10T22:20:08.692Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c3/5c/bb64aee2da24895d57611eed00fac54739bfa34f98ab344020a6605875bf/google_cloud_aiplatform-1.140.0-py2.py3-none-any.whl", hash = "sha256:e94493a2682b9d17efa7146a53bb3665bf1595c3394fd3d0f45d18f71623fddc", size = 8355660, upload-time = "2026-03-04T00:56:34.441Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/fc/428af69a69ff2e477e7f5e12d227b31fe5790f1a8234aacd54297f49c836/google_cloud_aiplatform-1.141.0-py2.py3-none-any.whl", hash = "sha256:6bd25b4d514c40b8181ca703e1b313ad6d0454ab8006fc9907fb3e9f672f31d1", size = 8358409, upload-time = "2026-03-10T22:20:04.871Z" },
 ]
 
 [[package]]
@@ -1018,7 +1026,7 @@ wheels = [
 
 [[package]]
 name = "google-genai"
-version = "1.66.0"
+version = "1.67.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1032,9 +1040,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "websockets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9b/ba/0b343b0770d4710ad2979fd9301d7caa56c940174d5361ed4a7cc4979241/google_genai-1.66.0.tar.gz", hash = "sha256:ffc01647b65046bca6387320057aa51db0ad64bcc72c8e3e914062acfa5f7c49", size = 504386, upload-time = "2026-03-04T22:15:28.156Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/07/59a498f81f2c7b0649eacda2ea470b7fd8bd7149f20caba22962081bdd51/google_genai-1.67.0.tar.gz", hash = "sha256:897195a6a9742deb6de240b99227189ada8b2d901d61bdfba836c3092021eab6", size = 506972, upload-time = "2026-03-12T20:39:16.241Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/dd/403949d922d4e261b08b64aaa132af4e456c3b15c8e2a2d9e6ef693f66e2/google_genai-1.66.0-py3-none-any.whl", hash = "sha256:7f127a39cf695277104ce4091bb26e417c59bb46e952ff3699c3a982d9c474ee", size = 732174, upload-time = "2026-03-04T22:15:26.63Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/c2/562aa1f086e53529ffbeb5b43d5d8bc42c1b968102b5e2163fad005ce298/google_genai-1.67.0-py3-none-any.whl", hash = "sha256:58b0484ff2d4335fa53c724b489e9f807fcca8115d9cdbd8fdf341121fbd6d2d", size = 733542, upload-time = "2026-03-12T20:39:14.615Z" },
 ]
 
 [[package]]
@@ -1162,26 +1170,26 @@ wheels = [
 
 [[package]]
 name = "hf-xet"
-version = "1.3.2"
+version = "1.4.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8b/cb/9bb543bd987ffa1ee48202cc96a756951b734b79a542335c566148ade36c/hf_xet-1.3.2.tar.gz", hash = "sha256:e130ee08984783d12717444e538587fa2119385e5bd8fc2bb9f930419b73a7af", size = 643646, upload-time = "2026-02-27T17:26:08.051Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/09/08/23c84a26716382c89151b5b447b4beb19e3345f3a93d3b73009a71a57ad3/hf_xet-1.4.2.tar.gz", hash = "sha256:b7457b6b482d9e0743bd116363239b1fa904a5e65deede350fbc0c4ea67c71ea", size = 672357, upload-time = "2026-03-13T06:58:51.077Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/49/75/462285971954269432aad2e7938c5c7ff9ec7d60129cec542ab37121e3d6/hf_xet-1.3.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:335a8f36c55fd35a92d0062f4e9201b4015057e62747b7e7001ffb203c0ee1d2", size = 3761019, upload-time = "2026-02-27T17:25:49.441Z" },
-    { url = "https://files.pythonhosted.org/packages/35/56/987b0537ddaf88e17192ea09afa8eca853e55f39a4721578be436f8409df/hf_xet-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c1ae4d3a716afc774e66922f3cac8206bfa707db13f6a7e62dfff74bfc95c9a8", size = 3521565, upload-time = "2026-02-27T17:25:47.469Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/5c/7e4a33a3d689f77761156cc34558047569e54af92e4d15a8f493229f6767/hf_xet-1.3.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6dbdf231efac0b9b39adcf12a07f0c030498f9212a18e8c50224d0e84ab803d", size = 4176494, upload-time = "2026-02-27T17:25:40.247Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/b3/71e856bf9d9a69b3931837e8bf22e095775f268c8edcd4a9e8c355f92484/hf_xet-1.3.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c1980abfb68ecf6c1c7983379ed7b1e2b49a1aaf1a5aca9acc7d48e5e2e0a961", size = 3955601, upload-time = "2026-02-27T17:25:38.376Z" },
-    { url = "https://files.pythonhosted.org/packages/63/d7/aecf97b3f0a981600a67ff4db15e2d433389d698a284bb0ea5d8fcdd6f7f/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1c88fbd90ad0d27c46b77a445f0a436ebaa94e14965c581123b68b1c52f5fd30", size = 4154770, upload-time = "2026-02-27T17:25:56.756Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/e1/3af961f71a40e09bf5ee909842127b6b00f5ab4ee3817599dc0771b79893/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:35b855024ca37f2dd113ac1c08993e997fbe167b9d61f9ef66d3d4f84015e508", size = 4394161, upload-time = "2026-02-27T17:25:58.111Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/c3/859509bade9178e21b8b1db867b8e10e9f817ab9ac1de77cb9f461ced765/hf_xet-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:31612ba0629046e425ba50375685a2586e11fb9144270ebabd75878c3eaf6378", size = 3637377, upload-time = "2026-02-27T17:26:10.611Z" },
-    { url = "https://files.pythonhosted.org/packages/05/7f/724cfbef4da92d577b71f68bf832961c8919f36c60d28d289a9fc9d024d4/hf_xet-1.3.2-cp313-cp313t-win_arm64.whl", hash = "sha256:433c77c9f4e132b562f37d66c9b22c05b5479f243a1f06a120c1c06ce8b1502a", size = 3497875, upload-time = "2026-02-27T17:26:09.034Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/28/dbb024e2e3907f6f3052847ca7d1a2f7a3972fafcd53ff79018977fcb3e4/hf_xet-1.3.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f93b7595f1d8fefddfede775c18b5c9256757824f7f6832930b49858483cd56f", size = 3763961, upload-time = "2026-02-27T17:25:52.537Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/71/b99aed3823c9d1795e4865cf437d651097356a3f38c7d5877e4ac544b8e4/hf_xet-1.3.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a85d3d43743174393afe27835bde0cd146e652b5fcfdbcd624602daef2ef3259", size = 3526171, upload-time = "2026-02-27T17:25:50.968Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/ca/907890ce6ef5598b5920514f255ed0a65f558f820515b18db75a51b2f878/hf_xet-1.3.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7c2a054a97c44e136b1f7f5a78f12b3efffdf2eed3abc6746fc5ea4b39511633", size = 4180750, upload-time = "2026-02-27T17:25:43.125Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/ad/bc7f41f87173d51d0bce497b171c4ee0cbde1eed2d7b4216db5d0ada9f50/hf_xet-1.3.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:06b724a361f670ae557836e57801b82c75b534812e351a87a2c739f77d1e0635", size = 3961035, upload-time = "2026-02-27T17:25:41.837Z" },
-    { url = "https://files.pythonhosted.org/packages/73/38/600f4dda40c4a33133404d9fe644f1d35ff2d9babb4d0435c646c63dd107/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:305f5489d7241a47e0458ef49334be02411d1d0f480846363c1c8084ed9916f7", size = 4161378, upload-time = "2026-02-27T17:26:00.365Z" },
-    { url = "https://files.pythonhosted.org/packages/00/b3/7bc1ff91d1ac18420b7ad1e169b618b27c00001b96310a89f8a9294fe509/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:06cdbde243c85f39a63b28e9034321399c507bcd5e7befdd17ed2ccc06dfe14e", size = 4398020, upload-time = "2026-02-27T17:26:03.977Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/0b/99bfd948a3ed3620ab709276df3ad3710dcea61976918cce8706502927af/hf_xet-1.3.2-cp37-abi3-win_amd64.whl", hash = "sha256:9298b47cce6037b7045ae41482e703c471ce36b52e73e49f71226d2e8e5685a1", size = 3641624, upload-time = "2026-02-27T17:26:13.542Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/02/9a6e4ca1f3f73a164c0cd48e41b3cc56585dcc37e809250de443d673266f/hf_xet-1.3.2-cp37-abi3-win_arm64.whl", hash = "sha256:83d8ec273136171431833a6957e8f3af496bee227a0fe47c7b8b39c106d1749a", size = 3503976, upload-time = "2026-02-27T17:26:12.123Z" },
+    { url = "https://files.pythonhosted.org/packages/18/06/e8cf74c3c48e5485c7acc5a990d0d8516cdfb5fdf80f799174f1287cc1b5/hf_xet-1.4.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ac8202ae1e664b2c15cdfc7298cbb25e80301ae596d602ef7870099a126fcad4", size = 3796125, upload-time = "2026-03-13T06:58:33.177Z" },
+    { url = "https://files.pythonhosted.org/packages/66/d4/b73ebab01cbf60777323b7de9ef05550790451eb5172a220d6b9845385ec/hf_xet-1.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6d2f8ee39fa9fba9af929f8c0d0482f8ee6e209179ad14a909b6ad78ffcb7c81", size = 3555985, upload-time = "2026-03-13T06:58:31.797Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/e7/ded6d1bd041c3f2bca9e913a0091adfe32371988e047dd3a68a2463c15a2/hf_xet-1.4.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4642a6cf249c09da8c1f87fe50b24b2a3450b235bf8adb55700b52f0ea6e2eb6", size = 4212085, upload-time = "2026-03-13T06:58:24.323Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c1/a0a44d1f98934f7bdf17f7a915b934f9fca44bb826628c553589900f6df8/hf_xet-1.4.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:769431385e746c92dc05492dde6f687d304584b89c33d79def8367ace06cb555", size = 3988266, upload-time = "2026-03-13T06:58:22.887Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/82/be713b439060e7d1f1d93543c8053d4ef2fe7e6922c5b31642eaa26f3c4b/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c9dd1c1bc4cc56168f81939b0e05b4c36dd2d28c13dc1364b17af89aa0082496", size = 4188513, upload-time = "2026-03-13T06:58:40.858Z" },
+    { url = "https://files.pythonhosted.org/packages/21/a6/cbd4188b22abd80ebd0edbb2b3e87f2633e958983519980815fb8314eae5/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fca58a2ae4e6f6755cc971ac6fcdf777ea9284d7e540e350bb000813b9a3008d", size = 4428287, upload-time = "2026-03-13T06:58:42.601Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/4e/84e45b25e2e3e903ed3db68d7eafa96dae9a1d1f6d0e7fc85120347a852f/hf_xet-1.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:163aab46854ccae0ab6a786f8edecbbfbaa38fcaa0184db6feceebf7000c93c0", size = 3665574, upload-time = "2026-03-13T06:58:53.881Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/71/c5ac2b9a7ae39c14e91973035286e73911c31980fe44e7b1d03730c00adc/hf_xet-1.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:09b138422ecbe50fd0c84d4da5ff537d27d487d3607183cd10e3e53f05188e82", size = 3528760, upload-time = "2026-03-13T06:58:52.187Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/86/b40b83a2ff03ef05c4478d2672b1fc2b9683ff870e2b25f4f3af240f2e7b/hf_xet-1.4.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:71f02d6e4cdd07f344f6844845d78518cc7186bd2bc52d37c3b73dc26a3b0bc5", size = 3800339, upload-time = "2026-03-13T06:58:36.245Z" },
+    { url = "https://files.pythonhosted.org/packages/64/2e/af4475c32b4378b0e92a587adb1aa3ec53e3450fd3e5fe0372a874531c00/hf_xet-1.4.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e9b38d876e94d4bdcf650778d6ebbaa791dd28de08db9736c43faff06ede1b5a", size = 3559664, upload-time = "2026-03-13T06:58:34.787Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/4c/781267da3188db679e601de18112021a5cb16506fe86b246e22c5401a9c4/hf_xet-1.4.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:77e8c180b7ef12d8a96739a4e1e558847002afe9ea63b6f6358b2271a8bdda1c", size = 4217422, upload-time = "2026-03-13T06:58:27.472Z" },
+    { url = "https://files.pythonhosted.org/packages/68/47/d6cf4a39ecf6c7705f887a46f6ef5c8455b44ad9eb0d391aa7e8a2ff7fea/hf_xet-1.4.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c3b3c6a882016b94b6c210957502ff7877802d0dbda8ad142c8595db8b944271", size = 3992847, upload-time = "2026-03-13T06:58:25.989Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/ef/e80815061abff54697239803948abc665c6b1d237102c174f4f7a9a5ffc5/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9d9a634cc929cfbaf2e1a50c0e532ae8c78fa98618426769480c58501e8c8ac2", size = 4193843, upload-time = "2026-03-13T06:58:44.59Z" },
+    { url = "https://files.pythonhosted.org/packages/54/75/07f6aa680575d9646c4167db6407c41340cbe2357f5654c4e72a1b01ca14/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6b0932eb8b10317ea78b7da6bab172b17be03bbcd7809383d8d5abd6a2233e04", size = 4432751, upload-time = "2026-03-13T06:58:46.533Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/71/193eabd7e7d4b903c4aa983a215509c6114915a5a237525ec562baddb868/hf_xet-1.4.2-cp37-abi3-win_amd64.whl", hash = "sha256:ad185719fb2e8ac26f88c8100562dbf9dbdcc3d9d2add00faa94b5f106aea53f", size = 3671149, upload-time = "2026-03-13T06:58:57.07Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/7e/ccf239da366b37ba7f0b36095450efae4a64980bdc7ec2f51354205fdf39/hf_xet-1.4.2-cp37-abi3-win_arm64.whl", hash = "sha256:32c012286b581f783653e718c1862aea5b9eb140631685bb0c5e7012c8719a87", size = 3533426, upload-time = "2026-03-13T06:58:55.46Z" },
 ]
 
 [[package]]
@@ -1223,7 +1231,7 @@ wheels = [
 
 [[package]]
 name = "huggingface-hub"
-version = "1.6.0"
+version = "1.7.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -1236,9 +1244,9 @@ dependencies = [
     { name = "typer" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d5/7a/304cec37112382c4fe29a43bcb0d5891f922785d18745883d2aa4eb74e4b/huggingface_hub-1.6.0.tar.gz", hash = "sha256:d931ddad8ba8dfc1e816bf254810eb6f38e5c32f60d4184b5885662a3b167325", size = 717071, upload-time = "2026-03-06T14:19:18.524Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b4/a8/94ccc0aec97b996a3a68f3e1fa06a4bd7185dd02bf22bfba794a0ade8440/huggingface_hub-1.7.1.tar.gz", hash = "sha256:be38fe66e9b03c027ad755cb9e4b87ff0303c98acf515b5d579690beb0bf3048", size = 722097, upload-time = "2026-03-13T09:36:07.758Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/92/e3/e3a44f54c8e2f28983fcf07f13d4260b37bd6a0d3a081041bc60b91d230e/huggingface_hub-1.6.0-py3-none-any.whl", hash = "sha256:ef40e2d5cb85e48b2c067020fa5142168342d5108a1b267478ed384ecbf18961", size = 612874, upload-time = "2026-03-06T14:19:16.844Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/75/ca21955d6117a394a482c7862ce96216239d0e3a53133ae8510727a8bcfa/huggingface_hub-1.7.1-py3-none-any.whl", hash = "sha256:38c6cce7419bbde8caac26a45ed22b0cea24152a8961565d70ec21f88752bfaa", size = 616308, upload-time = "2026-03-13T09:36:06.062Z" },
 ]
 
 [[package]]
@@ -1628,9 +1636,9 @@ requires-dist = [
     { name = "jsonpath-ng", specifier = ">=1.6.1" },
     { name = "kubernetes", specifier = ">=30.1.0" },
     { name = "litellm", specifier = ">=1.75.5.post1" },
-    { name = "llama-stack", specifier = "==0.4.3" },
-    { name = "llama-stack-api", specifier = "==0.4.4" },
-    { name = "llama-stack-client", specifier = "==0.4.3" },
+    { name = "llama-stack", specifier = "==0.5.2" },
+    { name = "llama-stack-api", specifier = "==0.5.2" },
+    { name = "llama-stack-client", specifier = "==0.5.2" },
     { name = "openai", specifier = ">=1.99.9" },
     { name = "prometheus-client", specifier = ">=0.22.1" },
     { name = "psycopg2-binary", specifier = ">=2.9.10" },
@@ -1707,7 +1715,7 @@ llslibdev = [
 
 [[package]]
 name = "litellm"
-version = "1.82.0"
+version = "1.82.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -1723,14 +1731,14 @@ dependencies = [
     { name = "tiktoken" },
     { name = "tokenizers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6c/00/49bb5c28e0dea0f5086229a2a08d5fdc6c8dc0d8e2acb2a2d1f7dd9f4b70/litellm-1.82.0.tar.gz", hash = "sha256:d388f52447daccbcaafa19a3e68d17b75f1374b5bf2cde680d65e1cd86e50d22", size = 16800355, upload-time = "2026-03-01T02:35:30.363Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/60/12/010a86643f12ac0b004032d5927c260094299a84ed38b5ed20a8f8c7e3c4/litellm-1.82.2.tar.gz", hash = "sha256:f5f4c4049f344a88bf80b2e421bb927807687c99624515d7ff4152d533ec9dcb", size = 17353218, upload-time = "2026-03-13T21:24:24.5Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/28/89/eb28bfcf97d6b045c400e72eb047c381594467048c237dbb6c227764084c/litellm-1.82.0-py3-none-any.whl", hash = "sha256:5496b5d4532cccdc7a095c21cbac4042f7662021c57bc1d17be4e39838929e80", size = 14911978, upload-time = "2026-03-01T02:35:26.844Z" },
+    { url = "https://files.pythonhosted.org/packages/96/e4/87e3ca82a8bf6e6bfffb42a539a1350dd6ced1b7169397bd439ba56fde10/litellm-1.82.2-py3-none-any.whl", hash = "sha256:641ed024774fa3d5b4dd9347f0efb1e31fa422fba2a6500aabedee085d1194cb", size = 15524224, upload-time = "2026-03-13T21:24:21.288Z" },
 ]
 
 [[package]]
 name = "llama-stack"
-version = "0.4.3"
+version = "0.5.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -1743,9 +1751,14 @@ dependencies = [
     { name = "jinja2" },
     { name = "jsonschema" },
     { name = "llama-stack-api" },
+    { name = "mcp" },
+    { name = "numpy" },
+    { name = "oci" },
     { name = "openai" },
+    { name = "opentelemetry-distro" },
     { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "opentelemetry-sdk" },
+    { name = "oracledb" },
     { name = "pillow" },
     { name = "prompt-toolkit" },
     { name = "psycopg2-binary" },
@@ -1763,14 +1776,14 @@ dependencies = [
     { name = "urllib3" },
     { name = "uvicorn" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/57/f8/b46c825c7d4050524ca4da9ff7f2622b101044f65cf50f708cf5b6ac935d/llama_stack-0.4.3.tar.gz", hash = "sha256:70d379ae9dbb5b1d0693f14054d9817aba183ffcd805133f0a4442baee132c6d", size = 3357773, upload-time = "2026-01-26T21:46:01.588Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ce/a8/3724d0c06a06578a639345f5086b93ba234a0ac247ec4ed7854d0d5e5ca6/llama_stack-0.5.2.tar.gz", hash = "sha256:9334c781e4ded6520aa60c3301a9087e9fb8fdaea8e5f30f8e21d85b17231d8d", size = 16035748, upload-time = "2026-03-06T13:25:59.356Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f2/eb/a4c6c6e6391e13b7d71a116df847b13c334355e9ec18441635140b8fbe1f/llama_stack-0.4.3-py3-none-any.whl", hash = "sha256:423207eae2b640894992a9075ff9dd6300ff904ab06a49fe38cfe0bb809d4669", size = 3695786, upload-time = "2026-01-26T21:45:59.607Z" },
+    { url = "https://files.pythonhosted.org/packages/62/4c/fea3f2ffeead47a934704f1527685106766c5ea69dd99c0a83e872b22aa7/llama_stack-0.5.2-py3-none-any.whl", hash = "sha256:581fda638088ee029aab20afe3c42ba8f7f6ef21c80bd9ebcae20bb13c3409d3", size = 3979442, upload-time = "2026-03-06T13:25:56.581Z" },
 ]
 
 [[package]]
 name = "llama-stack-api"
-version = "0.4.4"
+version = "0.5.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "fastapi" },
@@ -1780,14 +1793,14 @@ dependencies = [
     { name = "opentelemetry-sdk" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a9/84/075d5e4b2419777f7dc92a1153c683d82180739754c39ccb3ae01a9dc535/llama_stack_api-0.4.4.tar.gz", hash = "sha256:3973ca3bacf86916e04e521f77e7909533eec7364d32c3eabc35dc2976dbfe7d", size = 106579, upload-time = "2026-01-30T16:28:38.051Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/3d/ecc5cba3613a37887439f08bf202b455ad1d5411818c91833acfaaeee569/llama_stack_api-0.5.2.tar.gz", hash = "sha256:a272e4b803fe24a8ba7d22e6d904bf88abd118ba0b6610a20ff5dedb09f38ad7", size = 126436, upload-time = "2026-03-06T13:25:14.169Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2d/a0/e32f5b39a029c6fd120216d122a5333e8e4889103da56ff9efd6601eb987/llama_stack_api-0.4.4-py3-none-any.whl", hash = "sha256:7bbc63330ed186502dcd48f65cae014dbeb788ba5690be738c98693cfcd2f599", size = 107030, upload-time = "2026-01-30T16:28:36.725Z" },
+    { url = "https://files.pythonhosted.org/packages/06/a7/caa050e0beb93147593766e8ea58a0aeab0de59d747ed74ec928c75ab113/llama_stack_api-0.5.2-py3-none-any.whl", hash = "sha256:6531556dd8bb6555d778360ecfcd850aad7a49a8172b68146995d538e71641f0", size = 151603, upload-time = "2026-03-06T13:25:12.876Z" },
 ]
 
 [[package]]
 name = "llama-stack-client"
-version = "0.4.3"
+version = "0.5.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1806,9 +1819,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/db/3d/2aaeeef910e821ef7d3e65f3d773ba183cc84b7852f877396f64619a250c/llama_stack_client-0.4.3.tar.gz", hash = "sha256:cb807be258206e8fedeb5e5ceba7be7108d3badb31d74199406808c3d1679c35", size = 352952, upload-time = "2026-01-26T21:45:09.725Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/99/8a/8742475db7cedc2d452a3a7677da7f24aa84bdd262bc97543029c62df772/llama_stack_client-0.5.2.tar.gz", hash = "sha256:17c1bbad90f7699da4eb3cae256e8823caa4d2be945512a45c8c6f89ab899f28", size = 368612, upload-time = "2026-03-06T13:24:22.252Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/39/193aad0d49d834539fdc04c5f774fda22283267aff2400b68ffeb307474c/llama_stack_client-0.4.3-py3-none-any.whl", hash = "sha256:97b8cc5032bad4f0cdd1b0ae992cf44f5554679d315b7c40f46deb358c041f50", size = 375940, upload-time = "2026-01-26T21:45:08.067Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/f9/f6224b8819748358a573e3a2b8e299c0b6ba5f9cedf2942188c361c8e555/llama_stack_client-0.5.2-py3-none-any.whl", hash = "sha256:473f4d67ac0b243b0fc29555a0203a742615d31bea606b4332d9e2f193f73d6a", size = 391951, upload-time = "2026-03-06T13:24:20.559Z" },
 ]
 
 [[package]]
@@ -2218,9 +2231,26 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" },
 ]
 
+[[package]]
+name = "oci"
+version = "2.168.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "circuitbreaker" },
+    { name = "cryptography" },
+    { name = "pyopenssl" },
+    { name = "python-dateutil" },
+    { name = "pytz" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/68/edf8ffbb42e97ad44d64fce85be00818d979b472dd4377dc948155f811e9/oci-2.168.1.tar.gz", hash = "sha256:b941674171b41e999b8e3adb38d4797d7b42d2bb5ff40d17c26e8ce2a7d4b605", size = 16751235, upload-time = "2026-03-10T10:50:16.244Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/3e/29e05b4f8bed3b4a89b52fc57e76ac86669fc43a59e128eb526e395eda7b/oci-2.168.1-py3-none-any.whl", hash = "sha256:d106cfffc9153b5c9de628877c967ed87bbbfbbc9d411c97feee0eba8f2e4eab", size = 34033119, upload-time = "2026-03-10T10:50:08.501Z" },
+]
+
 [[package]]
 name = "openai"
-version = "2.26.0"
+version = "2.28.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2232,9 +2262,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d7/91/2a06c4e9597c338cac1e5e5a8dd6f29e1836fc229c4c523529dca387fda8/openai-2.26.0.tar.gz", hash = "sha256:b41f37c140ae0034a6e92b0c509376d907f3a66109935fba2c1b471a7c05a8fb", size = 666702, upload-time = "2026-03-05T23:17:35.874Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/56/87/eb0abb4ef88ddb95b3c13149384c4c288f584f3be17d6a4f63f8c3e3c226/openai-2.28.0.tar.gz", hash = "sha256:bb7fdff384d2a787fa82e8822d1dd3c02e8cf901d60f1df523b7da03cbb6d48d", size = 670334, upload-time = "2026-03-13T19:56:27.306Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c6/2e/3f73e8ca53718952222cacd0cf7eecc9db439d020f0c1fe7ae717e4e199a/openai-2.26.0-py3-none-any.whl", hash = "sha256:6151bf8f83802f036117f06cc8a57b3a4da60da9926826cc96747888b57f394f", size = 1136409, upload-time = "2026-03-05T23:17:34.072Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/5a/df122348638885526e53140e9c6b0d844af7312682b3bde9587eebc28b47/openai-2.28.0-py3-none-any.whl", hash = "sha256:79aa5c45dba7fef84085701c235cf13ba88485e1ef4f8dfcedc44fc2a698fc1d", size = 1141218, upload-time = "2026-03-13T19:56:25.46Z" },
 ]
 
 [[package]]
@@ -2264,6 +2294,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" },
 ]
 
+[[package]]
+name = "opentelemetry-distro"
+version = "0.61b0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-instrumentation" },
+    { name = "opentelemetry-sdk" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/00/1f8acc51326956a596fefaf67751380001af36029132a7a07d4debce3c06/opentelemetry_distro-0.61b0.tar.gz", hash = "sha256:975b845f50181ad53753becf4fd4b123b54fa04df5a9d78812264436d6518981", size = 2590, upload-time = "2026-03-04T14:20:12.453Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/56/2c/efcc995cd7484e6e55b1d26bd7fa6c55ca96bd415ff94310b52c19f330b0/opentelemetry_distro-0.61b0-py3-none-any.whl", hash = "sha256:f21d1ac0627549795d75e332006dd068877f00e461b1b2e8fe4568d6eb7b9590", size = 3349, upload-time = "2026-03-04T14:18:57.788Z" },
+]
+
 [[package]]
 name = "opentelemetry-exporter-otlp"
 version = "1.40.0"
@@ -2379,6 +2423,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" },
 ]
 
+[[package]]
+name = "oracledb"
+version = "3.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f7/02/70a872d1a4a739b4f7371ab8d3d5ed8c6e57e142e2503531aafcb220893c/oracledb-3.4.2.tar.gz", hash = "sha256:46e0f2278ff1fe83fbc33a3b93c72d429323ec7eed47bc9484e217776cd437e5", size = 855467, upload-time = "2026-01-28T17:25:39.91Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/81/2e6154f34b71cd93b4946c73ea13b69d54b8d45a5f6bbffe271793240d21/oracledb-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a7396664e592881225ba66385ee83ce339d864f39003d6e4ca31a894a7e7c552", size = 4220806, upload-time = "2026-01-28T17:26:04.322Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a9/a1d59aaac77d8f727156ec6a3b03399917c90b7da4f02d057f92e5601f56/oracledb-3.4.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f04a2d62073407672f114d02529921de0677c6883ed7c64d8d1a3c04caa3238", size = 2233795, upload-time = "2026-01-28T17:26:05.877Z" },
+    { url = "https://files.pythonhosted.org/packages/94/ec/8c4a38020cd251572bd406ddcbde98ca052ec94b5684f9aa9ef1ddfcc68c/oracledb-3.4.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d8d75e4f879b908be66cce05ba6c05791a5dbb4a15e39abc01aa25c8a2492bd9", size = 2424756, upload-time = "2026-01-28T17:26:07.35Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/7d/c251c2a8567151ccfcfbe3467ea9a60fb5480dc4719342e2e6b7a9679e5d/oracledb-3.4.2-cp312-cp312-win32.whl", hash = "sha256:31b7ee83c23d0439778303de8a675717f805f7e8edb5556d48c4d8343bcf14f5", size = 1453486, upload-time = "2026-01-28T17:26:08.869Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/78/c939f3c16fb39400c4734d5a3340db5659ba4e9dce23032d7b33ccfd3fe5/oracledb-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:ac25a0448fc830fb7029ad50cd136cdbfcd06975d53967e269772cc5cb8c203a", size = 1794445, upload-time = "2026-01-28T17:26:10.66Z" },
+    { url = "https://files.pythonhosted.org/packages/22/68/f7126f5d911c295b57720c6b1a0609a5a2667b4546946433552a4de46333/oracledb-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:643c25d301a289a371e37fcedb59e5fa5e54fb321708e5c12821c4b55bdd8a4d", size = 4205176, upload-time = "2026-01-28T17:26:12.463Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/93/2fced60f92dc82e66980a8a3ba5c1ea48110bf1dd81d030edb69d88f992e/oracledb-3.4.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55397e7eb43bb7017c03a981c736c25724182f5210951181dfe3fab0e5d457fb", size = 2231298, upload-time = "2026-01-28T17:26:14.497Z" },
+    { url = "https://files.pythonhosted.org/packages/75/a7/4dd286f3a6348d786fef9e6ab2e6c9b74ca9195d9a756f2a67e45743cdf0/oracledb-3.4.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26a10f9c790bd141ffc8af68520803ed4a44a9258bf7d1eea9bfdd36bd6df7f", size = 2439430, upload-time = "2026-01-28T17:26:16.044Z" },
+    { url = "https://files.pythonhosted.org/packages/19/28/94bc753e5e969c60ee5d9c914e2b4ef79999eaca8e91bcab2fbf0586b80b/oracledb-3.4.2-cp313-cp313-win32.whl", hash = "sha256:b974caec2c330c22bbe765705a5ac7d98ec3022811dec2042d561a3c65cb991b", size = 1458209, upload-time = "2026-01-28T17:26:17.652Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/2b/593a9b2d4c12c9de3289e67d84fe023336d99f36ba51442a5a0f5ce6acf7/oracledb-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:3df8eee1410d25360599968b1625b000f10c5ae0e47274031a7842a9dc418890", size = 1793558, upload-time = "2026-01-28T17:26:19.914Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "26.0"
@@ -2965,11 +3031,11 @@ wheels = [
 
 [[package]]
 name = "pyjwt"
-version = "2.11.0"
+version = "2.12.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" },
 ]
 
 [package.optional-dependencies]
@@ -2995,6 +3061,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d5/6f/9ac2548e290764781f9e7e2aaf0685b086379dabfb29ca38536985471eaf/pylint-4.0.5-py3-none-any.whl", hash = "sha256:00f51c9b14a3b3ae08cff6b2cdd43f28165c78b165b628692e428fb1f8dc2cf2", size = 536694, upload-time = "2026-02-20T09:07:31.028Z" },
 ]
 
+[[package]]
+name = "pyopenssl"
+version = "25.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/80/be/97b83a464498a79103036bc74d1038df4a7ef0e402cfaf4d5e113fb14759/pyopenssl-25.3.0.tar.gz", hash = "sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329", size = 184073, upload-time = "2025-09-17T00:32:21.037Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" },
+]
+
 [[package]]
 name = "pyproject-hooks"
 version = "1.2.0"
@@ -3100,18 +3179,14 @@ wheels = [
 
 [[package]]
 name = "pythainlp"
-version = "5.2.0"
+version = "5.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy" },
-    { name = "pandas" },
-    { name = "pyyaml" },
-    { name = "requests" },
     { name = "tzdata", marker = "sys_platform == 'win32'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/60/af/839446c681079c288d3734a007e7bc083e91e2c9bea17165647f0e12c63c/pythainlp-5.2.0.tar.gz", hash = "sha256:04c6e4bdd806204be742f139b1f2e666411c4509c270dfff1a8b5afa69d36d2b", size = 18719875, upload-time = "2025-12-20T12:55:14.163Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/40/18/dfaad6a9fa546c3cdf37d7930acb117d206575e639559a1aa0ee84ad457c/pythainlp-5.3.1.tar.gz", hash = "sha256:516c34d22689c2b469dd74bb18221eb9336e42f5137aa32940008293f1895de4", size = 19294788, upload-time = "2026-03-14T07:10:02.208Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/30/57/b29402fbabd8df3120b8319d731cab88881dda0ac9147b308ecb01d49ed3/pythainlp-5.2.0-py3-none-any.whl", hash = "sha256:fd64d6b3d33973782390822e74b8e2c9b867760eeed19d0d218945165b431e35", size = 19263794, upload-time = "2025-12-20T12:55:11.414Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/4f/3cec6cc70da44b6d684441732f709750edf1259357f9a15fbc0848c20f0f/pythainlp-5.3.1-py3-none-any.whl", hash = "sha256:f33fb134fcfbd281fb64494c924fddb5e7cc27e053f7a73f18b6b5acbb7a4e2d", size = 19843902, upload-time = "2026-03-14T07:09:58.552Z" },
 ]
 
 [[package]]
@@ -3163,6 +3238,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c6/78/397db326746f0a342855b81216ae1f0a32965deccfd7c830a2dbc66d2483/pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de", size = 13729, upload-time = "2026-01-30T01:03:45.029Z" },
 ]
 
+[[package]]
+name = "pytz"
+version = "2026.1.post1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/56/db/b8721d71d945e6a8ac63c0fc900b2067181dbb50805958d4d4661cf7d277/pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1", size = 321088, upload-time = "2026-03-03T07:47:50.683Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" },
+]
+
 [[package]]
 name = "pywin32"
 version = "311"
@@ -3411,41 +3495,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" },
 ]
 
-[[package]]
-name = "rsa"
-version = "4.9.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pyasn1" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" },
-]
-
 [[package]]
 name = "ruff"
-version = "0.15.5"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" },
-    { url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" },
-    { url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" },
-    { url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" },
-    { url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" },
-    { url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" },
-    { url = "https://files.pythonhosted.org/packages/28/3a/950367aee7c69027f4f422059227b290ed780366b6aecee5de5039d50fa8/ruff-0.15.5-py3-none-win32.whl", hash = "sha256:732e5ee1f98ba5b3679029989a06ca39a950cced52143a0ea82a2102cb592b74", size = 10551676, upload-time = "2026-03-05T20:06:13.705Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/00/bf077a505b4e649bdd3c47ff8ec967735ce2544c8e4a43aba42ee9bf935d/ruff-0.15.5-py3-none-win_amd64.whl", hash = "sha256:821d41c5fa9e19117616c35eaa3f4b75046ec76c65e7ae20a333e9a8696bc7fe", size = 11678972, upload-time = "2026-03-05T20:06:45.379Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/4e/cd76eca6db6115604b7626668e891c9dd03330384082e33662fb0f113614/ruff-0.15.5-py3-none-win_arm64.whl", hash = "sha256:b498d1c60d2fe5c10c45ec3f698901065772730b411f164ae270bb6bfcc4740b", size = 10965572, upload-time = "2026-03-05T20:06:16.984Z" },
+version = "0.15.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/51/df/f8629c19c5318601d3121e230f74cbee7a3732339c52b21daa2b82ef9c7d/ruff-0.15.6.tar.gz", hash = "sha256:8394c7bb153a4e3811a4ecdacd4a8e6a4fa8097028119160dffecdcdf9b56ae4", size = 4597916, upload-time = "2026-03-12T23:05:47.51Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/2f/4e03a7e5ce99b517e98d3b4951f411de2b0fa8348d39cf446671adcce9a2/ruff-0.15.6-py3-none-linux_armv6l.whl", hash = "sha256:7c98c3b16407b2cf3d0f2b80c80187384bc92c6774d85fefa913ecd941256fff", size = 10508953, upload-time = "2026-03-12T23:05:17.246Z" },
+    { url = "https://files.pythonhosted.org/packages/70/60/55bcdc3e9f80bcf39edf0cd272da6fa511a3d94d5a0dd9e0adf76ceebdb4/ruff-0.15.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ee7dcfaad8b282a284df4aa6ddc2741b3f4a18b0555d626805555a820ea181c3", size = 10942257, upload-time = "2026-03-12T23:05:23.076Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/f9/005c29bd1726c0f492bfa215e95154cf480574140cb5f867c797c18c790b/ruff-0.15.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3bd9967851a25f038fc8b9ae88a7fbd1b609f30349231dffaa37b6804923c4bb", size = 10322683, upload-time = "2026-03-12T23:05:33.738Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/74/2f861f5fd7cbb2146bddb5501450300ce41562da36d21868c69b7a828169/ruff-0.15.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13f4594b04e42cd24a41da653886b04d2ff87adbf57497ed4f728b0e8a4866f8", size = 10660986, upload-time = "2026-03-12T23:05:53.245Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/a1/309f2364a424eccb763cdafc49df843c282609f47fe53aa83f38272389e0/ruff-0.15.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2ed8aea2f3fe57886d3f00ea5b8aae5bf68d5e195f487f037a955ff9fbaac9e", size = 10332177, upload-time = "2026-03-12T23:05:56.145Z" },
+    { url = "https://files.pythonhosted.org/packages/30/41/7ebf1d32658b4bab20f8ac80972fb19cd4e2c6b78552be263a680edc55ac/ruff-0.15.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70789d3e7830b848b548aae96766431c0dc01a6c78c13381f423bf7076c66d15", size = 11170783, upload-time = "2026-03-12T23:06:01.742Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/6d488f6adca047df82cd62c304638bcb00821c36bd4881cfca221561fdfc/ruff-0.15.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:542aaf1de3154cea088ced5a819ce872611256ffe2498e750bbae5247a8114e9", size = 12044201, upload-time = "2026-03-12T23:05:28.697Z" },
+    { url = "https://files.pythonhosted.org/packages/71/68/e6f125df4af7e6d0b498f8d373274794bc5156b324e8ab4bf5c1b4fc0ec7/ruff-0.15.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c22e6f02c16cfac3888aa636e9eba857254d15bbacc9906c9689fdecb1953ab", size = 11421561, upload-time = "2026-03-12T23:05:31.236Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/9f/f85ef5fd01a52e0b472b26dc1b4bd228b8f6f0435975442ffa4741278703/ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98893c4c0aadc8e448cfa315bd0cc343a5323d740fe5f28ef8a3f9e21b381f7e", size = 11310928, upload-time = "2026-03-12T23:05:45.288Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/26/b75f8c421f5654304b89471ed384ae8c7f42b4dff58fa6ce1626d7f2b59a/ruff-0.15.6-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:70d263770d234912374493e8cc1e7385c5d49376e41dfa51c5c3453169dc581c", size = 11235186, upload-time = "2026-03-12T23:05:50.677Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/d4/d5a6d065962ff7a68a86c9b4f5500f7d101a0792078de636526c0edd40da/ruff-0.15.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:55a1ad63c5a6e54b1f21b7514dfadc0c7fb40093fa22e95143cf3f64ebdcd512", size = 10635231, upload-time = "2026-03-12T23:05:37.044Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/56/7c3acf3d50910375349016cf33de24be021532042afbed87942858992491/ruff-0.15.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8dc473ba093c5ec238bb1e7429ee676dca24643c471e11fbaa8a857925b061c0", size = 10340357, upload-time = "2026-03-12T23:06:04.748Z" },
+    { url = "https://files.pythonhosted.org/packages/06/54/6faa39e9c1033ff6a3b6e76b5df536931cd30caf64988e112bbf91ef5ce5/ruff-0.15.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:85b042377c2a5561131767974617006f99f7e13c63c111b998f29fc1e58a4cfb", size = 10860583, upload-time = "2026-03-12T23:05:58.978Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/509a201b843b4dfb0b32acdedf68d951d3377988cae43949ba4c4133a96a/ruff-0.15.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cef49e30bc5a86a6a92098a7fbf6e467a234d90b63305d6f3ec01225a9d092e0", size = 11410976, upload-time = "2026-03-12T23:05:39.955Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/25/3fc9114abf979a41673ce877c08016f8e660ad6cf508c3957f537d2e9fa9/ruff-0.15.6-py3-none-win32.whl", hash = "sha256:bbf67d39832404812a2d23020dda68fee7f18ce15654e96fb1d3ad21a5fe436c", size = 10616872, upload-time = "2026-03-12T23:05:42.451Z" },
+    { url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" },
 ]
 
 [[package]]
@@ -3567,7 +3639,7 @@ wheels = [
 
 [[package]]
 name = "sentence-transformers"
-version = "5.2.3"
+version = "5.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
@@ -3580,18 +3652,18 @@ dependencies = [
     { name = "transformers" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5b/30/21664028fc0776eb1ca024879480bbbab36f02923a8ff9e4cae5a150fa35/sentence_transformers-5.2.3.tar.gz", hash = "sha256:3cd3044e1f3fe859b6a1b66336aac502eaae5d3dd7d5c8fc237f37fbf58137c7", size = 381623, upload-time = "2026-02-17T14:05:20.238Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/26/448453925b6ce0c29d8b54327caa71ee4835511aef02070467402273079c/sentence_transformers-5.3.0.tar.gz", hash = "sha256:414a0a881f53a4df0e6cbace75f823bfcb6b94d674c42a384b498959b7c065e2", size = 403330, upload-time = "2026-03-12T14:53:40.778Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/46/9f/dba4b3e18ebbe1eaa29d9f1764fbc7da0cd91937b83f2b7928d15c5d2d36/sentence_transformers-5.2.3-py3-none-any.whl", hash = "sha256:6437c62d4112b615ddebda362dfc16a4308d604c5b68125ed586e3e95d5b2e30", size = 494225, upload-time = "2026-02-17T14:05:18.596Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/9c/2fa7224058cad8df68d84bafee21716f30892cecc7ad1ad73bde61d23754/sentence_transformers-5.3.0-py3-none-any.whl", hash = "sha256:dca6b98db790274a68185d27a65801b58b4caf653a4e556b5f62827509347c7d", size = 512390, upload-time = "2026-03-12T14:53:39.035Z" },
 ]
 
 [[package]]
 name = "setuptools"
-version = "82.0.0"
+version = "82.0.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/82/f3/748f4d6f65d1756b9ae577f329c951cda23fb900e4de9f70900ced962085/setuptools-82.0.0.tar.gz", hash = "sha256:22e0a2d69474c6ae4feb01951cb69d515ed23728cf96d05513d36e42b62b37cb", size = 1144893, upload-time = "2026-02-08T15:08:40.206Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/c6/76dc613121b793286a3f91621d7b75a2b493e0390ddca50f11993eadf192/setuptools-82.0.0-py3-none-any.whl", hash = "sha256:70b18734b607bd1da571d097d236cfcfacaf01de45717d59e6e04b96877532e0", size = 1003468, upload-time = "2026-02-08T15:08:38.723Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" },
 ]
 
 [[package]]
@@ -3870,21 +3942,19 @@ wheels = [
 
 [[package]]
 name = "tornado"
-version = "6.5.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/37/1d/0a336abf618272d53f62ebe274f712e213f5a03c0b2339575430b8362ef2/tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7", size = 513632, upload-time = "2025-12-15T19:21:03.836Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ab/a9/e94a9d5224107d7ce3cc1fab8d5dc97f5ea351ccc6322ee4fb661da94e35/tornado-6.5.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9", size = 443909, upload-time = "2025-12-15T19:20:48.382Z" },
-    { url = "https://files.pythonhosted.org/packages/db/7e/f7b8d8c4453f305a51f80dbb49014257bb7d28ccb4bbb8dd328ea995ecad/tornado-6.5.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843", size = 442163, upload-time = "2025-12-15T19:20:49.791Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/b5/206f82d51e1bfa940ba366a8d2f83904b15942c45a78dd978b599870ab44/tornado-6.5.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17", size = 445746, upload-time = "2025-12-15T19:20:51.491Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/9d/1a3338e0bd30ada6ad4356c13a0a6c35fbc859063fa7eddb309183364ac1/tornado-6.5.4-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335", size = 445083, upload-time = "2025-12-15T19:20:52.778Z" },
-    { url = "https://files.pythonhosted.org/packages/50/d4/e51d52047e7eb9a582da59f32125d17c0482d065afd5d3bc435ff2120dc5/tornado-6.5.4-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f", size = 445315, upload-time = "2025-12-15T19:20:53.996Z" },
-    { url = "https://files.pythonhosted.org/packages/27/07/2273972f69ca63dbc139694a3fc4684edec3ea3f9efabf77ed32483b875c/tornado-6.5.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84", size = 446003, upload-time = "2025-12-15T19:20:56.101Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/83/41c52e47502bf7260044413b6770d1a48dda2f0246f95ee1384a3cd9c44a/tornado-6.5.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f", size = 445412, upload-time = "2025-12-15T19:20:57.398Z" },
-    { url = "https://files.pythonhosted.org/packages/10/c7/bc96917f06cbee182d44735d4ecde9c432e25b84f4c2086143013e7b9e52/tornado-6.5.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8", size = 445392, upload-time = "2025-12-15T19:20:58.692Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/1a/d7592328d037d36f2d2462f4bc1fbb383eec9278bc786c1b111cbbd44cfa/tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1", size = 446481, upload-time = "2025-12-15T19:21:00.008Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/6d/c69be695a0a64fd37a97db12355a035a6d90f79067a3cf936ec2b1dc38cd/tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc", size = 446886, upload-time = "2025-12-15T19:21:01.287Z" },
-    { url = "https://files.pythonhosted.org/packages/50/49/8dc3fd90902f70084bd2cd059d576ddb4f8bb44c2c7c0e33a11422acb17e/tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1", size = 445910, upload-time = "2025-12-15T19:21:02.571Z" },
+version = "6.5.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/f1/3173dfa4a18db4a9b03e5d55325559dab51ee653763bb8745a75af491286/tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9", size = 516006, upload-time = "2026-03-10T21:31:02.067Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/59/8c/77f5097695f4dd8255ecbd08b2a1ed8ba8b953d337804dd7080f199e12bf/tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa", size = 445983, upload-time = "2026-03-10T21:30:44.28Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/5e/7625b76cd10f98f1516c36ce0346de62061156352353ef2da44e5c21523c/tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521", size = 444246, upload-time = "2026-03-10T21:30:46.571Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/04/7b5705d5b3c0fab088f434f9c83edac1573830ca49ccf29fb83bf7178eec/tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5", size = 447229, upload-time = "2026-03-10T21:30:48.273Z" },
+    { url = "https://files.pythonhosted.org/packages/34/01/74e034a30ef59afb4097ef8659515e96a39d910b712a89af76f5e4e1f93c/tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07", size = 448192, upload-time = "2026-03-10T21:30:51.22Z" },
+    { url = "https://files.pythonhosted.org/packages/be/00/fe9e02c5a96429fce1a1d15a517f5d8444f9c412e0bb9eadfbe3b0fc55bf/tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e", size = 448039, upload-time = "2026-03-10T21:30:53.52Z" },
+    { url = "https://files.pythonhosted.org/packages/82/9e/656ee4cec0398b1d18d0f1eb6372c41c6b889722641d84948351ae19556d/tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca", size = 447445, upload-time = "2026-03-10T21:30:55.541Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/76/4921c00511f88af86a33de770d64141170f1cfd9c00311aea689949e274e/tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7", size = 448582, upload-time = "2026-03-10T21:30:57.142Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/23/f6c6112a04d28eed765e374435fb1a9198f73e1ec4b4024184f21faeb1ad/tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b", size = 448990, upload-time = "2026-03-10T21:30:58.857Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/c8/876602cbc96469911f0939f703453c1157b0c826ecb05bdd32e023397d4e/tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6", size = 448016, upload-time = "2026-03-10T21:31:00.43Z" },
 ]
 
 [[package]]