mrlvsb · Kobzol · Feb 23, 2026 · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -1,5 +1,34 @@
+# Kelvin
 tasks/
 submits/
 submit_results/
+kelvin_data/
+
+# Python
 .venv/
+__pycache__/
+*.py[cod]
+*.pyd
+*.pyo
+*.so
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.coverage
+htmlcov/
+
+# Node
 node_modules/
+**/dist/
+**/.vite/
+
+# VCS / tooling
+.git/
+
+# Logs
+**/*.log
+
+# Editor
+.vscode/
+.idea/
+.DS_Store
diff --git a/.env.example b/.env.example
@@ -1,4 +1,5 @@
 ### Kelvin
+# ------------------------------------------------------------------------------
 
 # !!! IMPORTANT: For Production deployments using Deployment Service, all file paths must be specified as absolute due to use of DooD (Docker out of Docker)
 
@@ -12,6 +13,13 @@ KELVIN__TASKS_PATH=./tasks
 KELVIN__SUBMITS_PATH=./submits
 # Path where submit results will be stored
 KELVIN__SUBMIT_RESULTS_PATH=./submit_results
+# (Optional) Internal base URL used by the evaluator to contact the app.
+# Required for local Docker development, where the request Host is 'localhost'
+# (unreachable from other containers). Set to 'https://nginx' so the evaluator
+# reaches the app through the internal nginx container.
+# In production, leave unset — the DNS alias on the nginx service routes the
+# real public hostname (e.g. kelvin.cs.vsb.cz) to nginx inside Docker.
+# EVALUATION_LINK_BASEURL=https://nginx
 
 ### Postgres
 DATABASE__HOST=127.0.0.1
@@ -40,9 +48,21 @@ OPENAI__API_KEY=your_openai_api_key_here
 OPENAI__API_URL=http://localhost:8080/v1
 OPENAI__MODEL=openai/gpt-oss-120b
 
+### Evaluator Workers
+# ------------------------------------------------------------------------------
+# Number of worker processes
+EVALUATOR_CPU_REPLICAS=32
+EVALUATOR_CUDA_REPLICAS=32
+
+# Redis Connection for Evaluators
+# - If running LOCALLY (same machine as app): Leave these commented out or set to 'redis' and '6379'.
+# - If running DISTRIBUTED (on a different machine): Set these to the IP/Host and Port of the main server's Redis.
+# EVALUATOR_REDIS__HOST=redis
+# EVALUATOR_REDIS__PORT=6379
+
+
 ### Deployment Service
-# ID of the docker group on the host machine (get it via `getent group docker | cut -d: -f3`)
-DOCKER_GROUP_ID=999
+# ------------------------------------------------------------------------------
 SECURITY__WEBHOOK_SECRET=yoursecretvalue
 SECURITY__ALLOWED_HOSTS=["localhost", "127.0.0.1", "nginx", "kelvin.cs.vsb.cz"]
 

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -175,14 +175,25 @@ jobs:
         uses: docker/build-push-action@v6
         with:
           context: "{{defaultContext}}:deployment_service"
-          cache-from: type=registry,ref=ghcr.io/mrlvsb/deployment-ci-cache
-          # Only write the cache in the master branch or workflow_dispatch builds
+          cache-from: type=gha
+          # Only write the cache in the merge_group or workflow_dispatch builds
           # https://github.com/docker/build-push-action/issues/845#issuecomment-1512619265
-          cache-to: ${{ (github.event_name == 'merge_group' || github.event_name == 'workflow_dispatch') && 'type=registry,ref=ghcr.io/mrlvsb/deployment-ci-cache,compression=zstd' || '' }}
+          cache-to: ${{ (github.event_name == 'merge_group' || github.event_name == 'workflow_dispatch') && 'type=gha,mode=max' || '' }}
           tags: ghcr.io/mrlvsb/deployment:latest,ghcr.io/mrlvsb/deployment:${{ github.sha }}
           outputs: type=docker,dest=${{ runner.temp }}/deployment.tar
 
-      - name: Share Kelvin built image
+      - name: Build Kelvin-Evaluator Docker image
+        uses: docker/build-push-action@v6
+        with:
+          target: evaluator
+          cache-from: type=gha
+          # Only write the cache in the merge_group or workflow_dispatch builds
+          # https://github.com/docker/build-push-action/issues/845#issuecomment-1512619265
+          cache-to: ${{ (github.event_name == 'merge_group' || github.event_name == 'workflow_dispatch') && 'type=gha,mode=max' || '' }}
+          tags: ghcr.io/mrlvsb/kelvin-evaluator:latest,ghcr.io/mrlvsb/kelvin-evaluator:${{ github.sha }}
+          outputs: type=docker,dest=${{ runner.temp }}/kelvin-evaluator.tar
+
+      - name: Share built image
         uses: actions/upload-artifact@v6
         with:
           name: kelvin
@@ -196,6 +207,13 @@ jobs:
           path: ${{ runner.temp }}/deployment.tar
           retention-days: 1
 
+      - name: Share Kelvin-Evaluator image
+        uses: actions/upload-artifact@v6
+        with:
+          name: kelvin-evaluator
+          path: ${{ runner.temp }}/kelvin-evaluator.tar
+          retention-days: 1
+
   build-docs:
     runs-on: ubuntu-latest
     steps:
@@ -266,6 +284,12 @@ jobs:
           name: deployment
           path: ${{ runner.temp }}
 
+      - name: Download Kelvin-Evaluator image
+        uses: actions/download-artifact@v6
+        with:
+          name: kelvin-evaluator
+          path: ${{ runner.temp }}
+
       - name: Load image
         id: load_image
         run: |
@@ -276,6 +300,12 @@ jobs:
           echo "$LOADED"
           SHA_TAG=$(echo "$LOADED" | grep -v ':latest' | awk '{print $3}')
           echo "app_image_tag=$SHA_TAG" >> $GITHUB_OUTPUT
+
+          LOADED_EVAL=$(docker load --input ${{ runner.temp }}/kelvin-evaluator.tar)
+          echo "$LOADED_EVAL"
+          SHA_TAG_EVAL=$(echo "$LOADED_EVAL" | grep -v ':latest' | awk '{print $3}')
+          echo "evaluator_image_tag=$SHA_TAG_EVAL" >> $GITHUB_OUTPUT
+
           if [ "${{ steps.changed-files-deployment.outputs.any_changed }}" = "true" ]; then
             docker load --input ${{ runner.temp }}/deployment.tar
           fi
@@ -291,6 +321,9 @@ jobs:
       - name: Push Docker image with SHA tag
         run: docker push ${{ steps.load_image.outputs.app_image_tag }}
 
+      - name: Push Kelvin-Evaluator Docker image with SHA tag
+        run: docker push ${{ steps.load_image.outputs.evaluator_image_tag }}
+
       - name: Trigger on-prem deployment
         run: |
           python3 deployment_service/deploy.py \
@@ -306,6 +339,9 @@ jobs:
       - name: Push Kelvin Docker image with latest tag
         run: docker push ghcr.io/mrlvsb/kelvin:latest
 
+      - name: Push Kelvin Evaluator Docker image with latest tag
+        run: docker push ghcr.io/mrlvsb/kelvin-evaluator:latest
+
       - name: Push Deployment_service Docker image with all tags
         if: steps.changed-files-deployment.outputs.any_changed == 'true'
         run: docker push --all-tags ghcr.io/mrlvsb/deployment
@@ -323,6 +359,12 @@ jobs:
           package-type: 'container'
           min-versions-to-keep: 15
 
+      - uses: actions/delete-package-versions@v5
+        with:
+          package-name: 'kelvin-evaluator'
+          package-type: 'container'
+          min-versions-to-keep: 15
+
   deploy-docs:
     runs-on: ubuntu-latest
     needs: [ build-docs ]

diff --git a/Dockerfile b/Dockerfile
@@ -1,10 +1,13 @@
-FROM ghcr.io/astral-sh/uv:python3.12-bookworm AS build-backend
+FROM python:3.12-slim-bookworm AS build-backend
+
+COPY --from=ghcr.io/astral-sh/uv:0.10.0 /uv /usr/local/bin/uv
 
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update && \
     apt-get install -y \
     -o APT::Install-Recommends=false \
     -o APT::Install-Suggests=false \
+    build-essential \
     libsasl2-dev \
     libgraphviz-dev
 
@@ -26,14 +29,15 @@ RUN npm ci
 
 RUN npm run build
 
-FROM python:3.12-bookworm AS runtime
+FROM python:3.12-slim-bookworm AS runtime
 
 RUN export DEBIAN_FRONTEND=noninteractive && \
     apt-get update && \
     apt-get install -y \
     -o APT::Install-Recommends=false \
     -o APT::Install-Suggests=false \
-    graphviz && \
+    graphviz \
+    libmagic1 && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 
@@ -43,6 +47,8 @@ WORKDIR /app
 # We want to use ID 1000, to have the same ID as the default outside user
 # And we also want group 101, to provide share access to the Unix uWSGI
 # socket with the nginx image.
+RUN getent group 101 >/dev/null || groupadd -g 101 webserver
+
 RUN useradd --uid 1000 --gid 101 --shell /bin/false --system webserver
 
 RUN chown -R webserver .
@@ -72,3 +78,44 @@ COPY --chown=webserver deploy/entrypoint.sh ./
 STOPSIGNAL SIGINT
 
 ENTRYPOINT ["/app/entrypoint.sh"]
+
+FROM runtime AS evaluator
+
+# Switch temporary to root user to install Docker CLI and other system dependencies
+USER root
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    apt-get update && \
+    apt-get install -y \
+    -o APT::Install-Recommends=false \
+    -o APT::Install-Suggests=false \
+    ca-certificates \
+    curl \
+    procps && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+RUN mkdir -p /etc/apt/keyrings && \
+    curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc
+RUN chmod a+r /etc/apt/keyrings/docker.asc
+
+RUN echo \
+    "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \
+    $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
+    tee /etc/apt/sources.list.d/docker.list > /dev/null
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    apt-get update && \
+    apt-get install -y \
+    -o APT::Install-Recommends=false \
+    -o APT::Install-Suggests=false \
+    docker-ce docker-ce-cli containerd.io docker-compose-plugin && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+USER webserver
+
+ENTRYPOINT []
+CMD ["python", "manage.py", "rqworker", "default", "evaluator", "--with-scheduler"]
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD pgrep -f "rqworker" || exit 1
diff --git a/common/ai_review/job.py b/common/ai_review/job.py
@@ -5,6 +5,7 @@
 
 import django_rq
 import requests
+from django.conf import settings
 from serde import from_dict
 from serde.json import to_json
 
@@ -32,6 +33,17 @@ def detect_language(filename: str) -> Optional[str]:
 
 def upload_result(submit_url: str, result: AIReviewResult) -> None:
     session = requests.Session()
+    # Disable SSL verification in DEBUG mode (local Docker development environment).
+    #
+    # EXPLANATION:
+    # In the local Docker development environment (DEBUG=True), the services communicate
+    # via internal Docker network names (e.g. 'https://nginx').
+    # The Nginx service uses self-signed certificates for HTTPS.
+    # Since these certificates are not issued by a trusted Certificate Authority (CA),
+    # requests would fail with an SSL error. Disabling verification allows
+    # the evaluator to download submissions and upload results in this dev environment.
+    if settings.DEBUG:
+        session.verify = False
 
     json_body = to_json(result, indent=2)
     logging.debug("Result JSON body: \n%s", json_body)

diff --git a/common/ai_review/processor.py b/common/ai_review/processor.py
@@ -14,7 +14,7 @@
 )
 from common.ai_review.job import review_job
 from common.models import SuggestedComment, Submit
-from common.utils import build_absolute_uri
+from common.utils import build_evaluation_download_uri
 
 AI_REVIEW_COMMENT_TYPE: str = "ai-review"
 AI_REVIEW_COMMENT_AUTHOR: str = "LLM"
@@ -30,7 +30,7 @@ def enqueue_llm_review_job(
     if not llm_config.enabled:
         return None
 
-    review_upload_url = build_absolute_uri(
+    review_upload_url = build_evaluation_download_uri(
         request,
         reverse(
             "v2:upload_submit_llm_review_result",
@@ -40,7 +40,7 @@ def enqueue_llm_review_job(
         ),
     )
 
-    review_prompt_url = build_absolute_uri(
+    review_prompt_url = build_evaluation_download_uri(
         request,
         reverse(
             "v2:retrieve_llm_review_prompt",

diff --git a/common/evaluate.py b/common/evaluate.py
@@ -8,12 +8,13 @@
 import django_rq
 import requests
 import yaml
+from django.conf import settings
 from django.core import signing
 from django.urls import reverse
 from django.utils import timezone
 
 from common.ai_review.processor import enqueue_llm_review_job
-from common.utils import is_teacher, build_absolute_uri
+from common.utils import is_teacher, build_evaluation_download_uri
 from evaluator.evaluator import Evaluation
 from evaluator.testsets import TestSet
 from kelvin.settings import BASE_DIR
@@ -39,7 +40,7 @@ def load_task_config(task_path: str) -> Optional[dict]:
 
 
 def evaluate_submit(request, submit, meta=None):
-    submit_url = build_absolute_uri(
+    submit_url = build_evaluation_download_uri(
         request,
         reverse(
             "task_detail",
@@ -51,7 +52,7 @@ def evaluate_submit(request, submit, meta=None):
         ),
     )
 
-    task_url = build_absolute_uri(
+    task_url = build_evaluation_download_uri(
         request,
         reverse(
             "teacher_task_tar",
@@ -102,6 +103,18 @@ def evaluate_job(submit_url, task_url, token, meta):
     logging.basicConfig(level=logging.DEBUG)
     s = requests.Session()
 
+    # Disable SSL verification in DEBUG mode (local Docker development environment).
+    #
+    # EXPLANATION:
+    # In the local Docker development environment (DEBUG=True), the services communicate
+    # via internal Docker network names (e.g. 'https://nginx').
+    # The Nginx service uses self-signed certificates for HTTPS.
+    # Since these certificates are not issued by a trusted Certificate Authority (CA),
+    # requests would fail with an SSL error. Disabling verification allows
+    # the evaluator to download submissions and upload results in this dev environment.
+    if settings.DEBUG:
+        s.verify = False
+
     logging.info(f"Evaluating {submit_url}")
 
     with tempfile.TemporaryDirectory() as workdir: