From 44dc90a8d787a4c361e5c11cfc276fd46cc0da2f Mon Sep 17 00:00:00 2001 From: JersyJ Date: Sun, 15 Feb 2026 03:19:03 +0100 Subject: [PATCH 1/4] Evaluator Gcc Rewrite --- evaluator/pipelines.py | 65 +++++++ evaluator/type_handlers.py | 387 +++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + uv.lock | 35 ++++ 4 files changed, 488 insertions(+) create mode 100644 evaluator/type_handlers.py diff --git a/evaluator/pipelines.py b/evaluator/pipelines.py index e9cc24603..299d4de3a 100644 --- a/evaluator/pipelines.py +++ b/evaluator/pipelines.py @@ -11,6 +11,8 @@ from . import testsets from .utils import parse_human_size, copyfile +from . import type_handlers +from dataclasses import fields, replace logger = logging.getLogger("evaluator") @@ -40,6 +42,69 @@ } +class TypePipe: + handler_cls = None + id = None + default_limits = type_handlers.ExecutionLimits(fsize="16M", memory="128M", network="none") + + def __init__(self, image=None, limits=None, before=None, **kwargs): + self.image = image + self.kwargs = kwargs + self.limits = limits if limits else {} + self.before = [] if not before else before + + def _resolve_limits(self): + # 1. Start with class defaults (e.g. GccPipe defaults) + limits_obj = self.default_limits + + # 2. Update with user config limits by iterating over dataclass fields + updates = {} + for f in fields(type_handlers.ExecutionLimits): + if f.name in self.limits: + val = self.limits[f.name] + # Coerce to the field's declared type + updates[f.name] = f.type(val) if callable(f.type) else val + + return replace(limits_obj, **updates) + + def run(self, evaluation): + result_dir = os.path.join(evaluation.result_path, self.id) + os.mkdir(result_dir) + + image_name = self.image + if self.image: + image_name = prepare_container(docker_image(self.image), self.before) + + resolved_limits = self._resolve_limits() + + handler = self.handler_cls(self.kwargs, evaluation, resolved_limits) + result = handler.compile(image_name) + + if result.comments or result.tests: + with open(os.path.join(result_dir, "piperesult.json"), "w") as f: + json.dump({"comments": result.simple_comments, "tests": result.tests}, f, indent=4) + + with open(os.path.join(result_dir, "result.html"), "w") as f: + f.write(result.html) + + return { + "failed": not result.success, + "html": result.html, + "comments": result.simple_comments, + "tests": result.tests, + } + + +class GccPipe(TypePipe): + handler_cls = type_handlers.Gcc + default_limits = type_handlers.ExecutionLimits(fsize="64M", memory="128M", network="none") + + def __init__(self, **kwargs): + if "image" not in kwargs: + kwargs["image"] = "kelvin/gcc" + super().__init__(**kwargs) + + def create_docker_cmd(evaluation, image, additional_args=None, cmd=None, limits=None, env=None): if not limits: limits = {} diff --git a/evaluator/type_handlers.py b/evaluator/type_handlers.py new file mode 100644 index 000000000..6c9c241a6 --- /dev/null +++ b/evaluator/type_handlers.py @@ -0,0 +1,387 @@ +import os +import shlex +import subprocess +import logging +import json +from functools import cached_property +from pathlib import Path + +import nh3 + +from collections import defaultdict +from typing import Any, Optional +from dataclasses import dataclass, field + +from evaluator.utils import parse_human_size + +logger = logging.getLogger(__name__) + + +@dataclass +class Comment: + line: int + text: str + source: str + file: str + url: Optional[str] = None + + +@dataclass +class BuildResult: + success: bool + html: str + comments: list[Comment] = field(default_factory=list) + tests: list[dict] = field(default_factory=list) + + @property + def simple_comments(self) -> dict[str, list[dict]]: + """ + Returns comments in the format expected by piperesult.json: + { + "filename": [ + {"line": 1, "text": "...", "source": "..."}, + ... + ] + } + """ + out = defaultdict(list) + for c in self.comments: + out[c.file].append( + { + "line": c.line, + "text": c.text, + "source": c.source, + "url": c.url, + } + ) + return dict(out) + + @staticmethod + def fail(html_error: str) -> "BuildResult": + return BuildResult(False, html_error) + + +@dataclass +class DockerCommandResult: + returncode: int + stdout: str + stderr: str + + +@dataclass +class ExecutionLimits: + time: int = 300 # seconds + memory: str = "128M" + fsize: str = "16M" + network: str = "none" + + +@dataclass +class BuildCommand: + cmd: list[str] + env: dict[str, str] = field(default_factory=dict) + output_dir: Optional[str] = None + + +class BuildError(Exception): + def __init__(self, message: str, logs: str = ""): + self.message = nh3.clean_text(message) + self.logs = nh3.clean_text(logs) + + +class TypeHandler: + def __init__(self, pipeline_config: dict[str, Any], evaluation: Any, limits: ExecutionLimits): + """ + Initialize the TypeHandler. + + Args: + pipeline_config: The dictionary from the YAML configuration for this pipeline step. + evaluation: The evaluation context object containing paths/metadata. + limits: The resolved ExecutionLimits object. + """ + self.config = pipeline_config + self.evaluation = evaluation + self.limits = limits + + def compile(self, container_image: str) -> BuildResult: + """ + Runs the compilation/linting step. + Returns BuildResult(success, html_output). + """ + try: + return self._compile(container_image) + except BuildError as e: + html = e.logs + f"
{e.message}
" + return BuildResult.fail(html) + + def _compile(self, container_image: str) -> BuildResult: + raise NotImplementedError + + def _run_docker_command( + self, image: str, cmd: list[str], env: dict[str, str] | None = None + ) -> DockerCommandResult: + """ + Helper to run a command in a docker container. + + Mounts: + - /work: Student submission (read-write) + - /template: Teacher provided template files (read-only), if present. + """ + if env is None: + env = {} + + # 1. Base Docker Run Arguments + # --rm: Remove container after exit + # --user: Run as the current user (uid) to avoid permission issues with mapped files + # -w /work: Set working directory + docker_cmd = [ + "docker", + "run", + "--rm", + "--user", + str(os.getuid()), + "-w", + "/work", + ] + + # 2. Network Configuration + # Forcefully disable using --network=host for security + network_mode = self.limits.network + if network_mode == "host": + network_mode = "bridge" + docker_cmd.extend(["--network", network_mode]) + + # 3. Resource Limits + # ulimit: Restricts file size created by the process + # memory/memory-swap: Restricts RAM usage + # Note: ulimit fsize usually requires integer (bytes or blocks), so we parse it. + # Docker -m accepts strings like "128M". + fsize_bytes = parse_human_size(self.limits.fsize) + docker_cmd.extend( + [ + "--ulimit", + f"fsize={fsize_bytes}:{fsize_bytes}", + "-m", + self.limits.memory, + "--memory-swap", + self.limits.memory, + ] + ) + + # 4. Mounts + # 4a. Submission Mount + docker_cmd.extend(["-v", f"{self.evaluation.submit_path}:/work"]) + + # 4b. Template Mount + # This allows separation of student code vs immutable teacher code (headers, data). + template_path = os.path.join(self.evaluation.task_path, "template") + if os.path.isdir(template_path): + docker_cmd.extend(["-v", f"{template_path}:/template:ro"]) + + for k, v in env.items(): + docker_cmd.extend(["-e", f"{k}={v}"]) + + docker_cmd.append(image) + docker_cmd.extend(cmd) + + logger.info(f"Executing: {' '.join(docker_cmd)}") + + try: + # We use subprocess.run to capture output + result = subprocess.run( + docker_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=self.limits.time, + ) + return DockerCommandResult( + returncode=result.returncode, + stdout=result.stdout.decode("utf-8", errors="replace"), + stderr=result.stderr.decode("utf-8", errors="replace"), + ) + except subprocess.TimeoutExpired: + return DockerCommandResult(-1, "", "Compilation timed out") + except Exception as e: + return DockerCommandResult(-1, "", f"Error running docker: {e}") + + def _format_html_generic( + self, + cmd: list[str], + stdout: str, + stderr: str, + returncode: int, + message: Optional[str] = None, + ) -> str: + cmd_str = nh3.clean_text(" ".join(cmd)) + html_out = f"$ {cmd_str}" + + safe_out = nh3.clean_text(stdout.strip()) + safe_err = nh3.clean_text(stderr.strip()) + safe_msg = nh3.clean_text(message) if message else None + + content = "" + + if safe_err: + if returncode == 0: + content += f"
Stderr{safe_err}
" + if safe_out: + content += "\n" + else: + # Show stderr openly on failure + content += f"{safe_err}" + if safe_out: + content += "\n" + + if safe_out: + content += safe_out + + if content: + html_out += f"{content}" + + if returncode == 0: + if safe_msg: + html_out += f"
{safe_msg} (Exit code: {returncode})
" + else: + html_out += ( + f"
Build successful (Exit code: {returncode})
" + ) + else: + if safe_msg: + html_out += f"
{safe_msg} (Exit code: {returncode})
" + else: + html_out += f"
Build failed (Exit code: {returncode})
" + return html_out + + +class Gcc(TypeHandler): + @cached_property + def _common_env(self) -> dict[str, str]: + flags = self.config.get("flags", "") + ldflags = self.config.get("ldflags", "") + return { + "CC": "gcc", + "CXX": "g++", + "CFLAGS": flags, + "CXXFLAGS": flags, + "LDFLAGS": ldflags, + "CLICOLOR_FORCE": "1", + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + } + + def _compile(self, container_image: str) -> BuildResult: + build_cmds = self._build_command() + + html_output = "" + + for i, bcmd in enumerate(build_cmds): + res = self._run_docker_command(container_image, bcmd.cmd, bcmd.env) + + # Determine message based on command type (heuristics) + msg = None + if bcmd.cmd[0] == "cmake": + if len(bcmd.cmd) > 1 and bcmd.cmd[1] == "--build": + msg = "Build succeeded" if res.returncode == 0 else "Build failed" + else: + msg = ( + "CMake configuration succeeded" + if res.returncode == 0 + else "Could not run CMake" + ) + elif bcmd.cmd[0] == "make": + msg = "Make succeeded" if res.returncode == 0 else "Could not run Make" + elif bcmd.cmd[0].endswith("gcc") or bcmd.cmd[0].endswith("g++"): + msg = "Compilation succeeded" if res.returncode == 0 else "Failed to run GCC" + + html_output += self._format_html_generic( + bcmd.cmd, res.stdout, res.stderr, res.returncode, message=msg + ) + + if res.returncode != 0: + # Return immediately if build command fails + # We return BuildResult directly here as it is not a "check" failure but a "run" failure + return BuildResult.fail(html_output) + + # Find any new executable and rename it to the expected output + output_bin = self.config.get("output", "main") + output_path = os.path.join(self.evaluation.submit_path, output_bin) + + if not os.path.exists(output_path): + executables = [] + for f in os.listdir(self.evaluation.submit_path): + fpath = os.path.join(self.evaluation.submit_path, f) + if os.access(fpath, os.X_OK) and not os.path.isdir(fpath): + executables.append(f) + + if len(executables) == 0: + raise BuildError("No executable has been built.", logs=html_output) + elif len(executables) > 1: + raise BuildError( + f"Multiple executables have been built: {','.join(executables)}", + logs=html_output, + ) + else: + # Rename found executable to output + src = os.path.join(self.evaluation.submit_path, executables[0]) + os.rename(src, output_path) + + # Fake the mv command log using generic formatter + html_output += self._format_html_generic( + ["mv", executables[0], output_bin], + "", + "", + 0, + message="Artifact moved and renamed", + ) + + return BuildResult(True, html_output) + + def _build_command(self) -> list[BuildCommand]: + files = [f.lower() for f in os.listdir(self.evaluation.submit_path)] + env = self._common_env + + if "cmakelists.txt" in files: + cmakeflags = self.config.get("cmakeflags", "[]") + try: + c_flags_parsed = json.loads(cmakeflags) + except Exception: + c_flags_parsed = [] + + # 1. Configure + cmd_conf = BuildCommand(["cmake", *c_flags_parsed, "."], env) + # 2. Build + cmd_build = BuildCommand(["cmake", "--build", "."], env) + return [cmd_conf, cmd_build] + + if "makefile" in files: + makeflags = self.config.get("makeflags", "[]") + try: + m_flags_parsed = json.loads(makeflags) + except Exception: + m_flags_parsed = [] + return [BuildCommand(["make", *m_flags_parsed], env)] + + # GCC fallback + output_bin = self.config.get("output", "main") + flags = self.config.get("flags", "") + ldflags = self.config.get("ldflags", "") + + sources = [] + for root, dirs, filenames in os.walk(self.evaluation.submit_path): + for f in filenames: + suffix = Path(f).suffix + if suffix in (".c", ".cpp", ".cc", ".cxx"): + # We use relative path for docker execution + rel_dir = os.path.relpath(root, self.evaluation.submit_path) + if rel_dir == ".": + sources.append(f) + else: + sources.append(os.path.join(rel_dir, f)) + + if not sources: + raise BuildError("Missing source files! please upload .c or .cpp files!") + + use_cpp = any(Path(f).suffix in (".cpp", ".cc", ".cxx") for f in sources) + compiler = "g++" if use_cpp else "gcc" + + cmd = [compiler] + sources + ["-o", output_bin] + shlex.split(flags) + shlex.split(ldflags) + return [BuildCommand(cmd, env)] diff --git a/pyproject.toml b/pyproject.toml index 1d40f0cfc..92a4edfd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ dependencies = [ "django-ninja==1.4.5", "docker~=7.1.0", "py7zr==1.0.0", + "nh3>=0.3.3", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index f31c0078d..25598c515 100644 --- a/uv.lock +++ b/uv.lock @@ -598,6 +598,7 @@ dependencies = [ { name = "mdx-truly-sane-lists" }, { name = "mosspy" }, { name = "networkx" }, + { name = "nh3" }, { name = "numpy" }, { name = "oauthlib" }, { name = "openai" }, @@ -651,6 +652,7 @@ requires-dist = [ { name = "mdx-truly-sane-lists", specifier = "==1.3" }, { name = "mosspy", specifier = "==1.0.9" }, { name = "networkx", specifier = "==2.5" }, + { name = "nh3", specifier = ">=0.3.3" }, { name = "numpy", specifier = "==1.26.4" }, { name = "oauthlib", specifier = "==3.2.2" }, { name = "openai", specifier = ">=2.6.1" }, @@ -789,6 +791,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/cd/dc52755d30ba41c60243235460961fc28022e5b6731f16c268667625baea/networkx-2.5-py3-none-any.whl", hash = "sha256:8c5812e9f798d37c50570d15c4a69d5710a18d77bafc903ee9c5fba7454c616c", size = 1615413, upload-time = "2020-08-22T20:38:37.263Z" }, ] +[[package]] +name = "nh3" +version = "0.3.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/37/ab55eb2b05e334ff9a1ad52c556ace1f9c20a3f63613a165d384d5387657/nh3-0.3.3.tar.gz", hash = "sha256:185ed41b88c910b9ca8edc89ca3b4be688a12cb9de129d84befa2f74a0039fee", size = 18968, upload-time = "2026-02-14T09:35:15.664Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/3e/aef8cf8e0419b530c95e96ae93a5078e9b36c1e6613eeb1df03a80d5194e/nh3-0.3.3-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e8ee96156f7dfc6e30ecda650e480c5ae0a7d38f0c6fafc3c1c655e2500421d9", size = 1448640, upload-time = "2026-02-14T09:34:49.316Z" }, + { url = "https://files.pythonhosted.org/packages/ca/43/d2011a4f6c0272cb122eeff40062ee06bb2b6e57eabc3a5e057df0d582df/nh3-0.3.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45fe0d6a607264910daec30360c8a3b5b1500fd832d21b2da608256287bcb92d", size = 839405, upload-time = "2026-02-14T09:34:50.779Z" }, + { url = "https://files.pythonhosted.org/packages/f8/f3/965048510c1caf2a34ed04411a46a04a06eb05563cd06f1aa57b71eb2bc8/nh3-0.3.3-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5bc1d4b30ba1ba896669d944b6003630592665974bd11a3dc2f661bde92798a7", size = 825849, upload-time = "2026-02-14T09:34:52.622Z" }, + { url = "https://files.pythonhosted.org/packages/78/99/b4bbc6ad16329d8db2c2c320423f00b549ca3b129c2b2f9136be2606dbb0/nh3-0.3.3-cp38-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f433a2dd66545aad4a720ad1b2150edcdca75bfff6f4e6f378ade1ec138d5e77", size = 1068303, upload-time = "2026-02-14T09:34:54.179Z" }, + { url = "https://files.pythonhosted.org/packages/3f/34/3420d97065aab1b35f3e93ce9c96c8ebd423ce86fe84dee3126790421a2a/nh3-0.3.3-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52e973cb742e95b9ae1b35822ce23992428750f4b46b619fe86eba4205255b30", size = 1029316, upload-time = "2026-02-14T09:34:56.186Z" }, + { url = "https://files.pythonhosted.org/packages/f1/9a/99eda757b14e596fdb2ca5f599a849d9554181aa899274d0d183faef4493/nh3-0.3.3-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c730617bdc15d7092dcc0469dc2826b914c8f874996d105b4bc3842a41c1cd9", size = 919944, upload-time = "2026-02-14T09:34:57.886Z" }, + { url = "https://files.pythonhosted.org/packages/6f/84/c0dc75c7fb596135f999e59a410d9f45bdabb989f1cb911f0016d22b747b/nh3-0.3.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e98fa3dbfd54e25487e36ba500bc29bca3a4cab4ffba18cfb1a35a2d02624297", size = 811461, upload-time = "2026-02-14T09:34:59.65Z" }, + { url = "https://files.pythonhosted.org/packages/7e/ec/b1bf57cab6230eec910e4863528dc51dcf21b57aaf7c88ee9190d62c9185/nh3-0.3.3-cp38-abi3-manylinux_2_31_riscv64.whl", hash = "sha256:3a62b8ae7c235481715055222e54c682422d0495a5c73326807d4e44c5d14691", size = 840360, upload-time = "2026-02-14T09:35:01.444Z" }, + { url = "https://files.pythonhosted.org/packages/37/5e/326ae34e904dde09af1de51219a611ae914111f0970f2f111f4f0188f57e/nh3-0.3.3-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc305a2264868ec8fa16548296f803d8fd9c1fa66cd28b88b605b1bd06667c0b", size = 859872, upload-time = "2026-02-14T09:35:03.348Z" }, + { url = "https://files.pythonhosted.org/packages/09/38/7eba529ce17ab4d3790205da37deabb4cb6edcba15f27b8562e467f2fc97/nh3-0.3.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:90126a834c18af03bfd6ff9a027bfa6bbf0e238527bc780a24de6bd7cc1041e2", size = 1023550, upload-time = "2026-02-14T09:35:04.829Z" }, + { url = "https://files.pythonhosted.org/packages/05/a2/556fdecd37c3681b1edee2cf795a6799c6ed0a5551b2822636960d7e7651/nh3-0.3.3-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:24769a428e9e971e4ccfb24628f83aaa7dc3c8b41b130c8ddc1835fa1c924489", size = 1105212, upload-time = "2026-02-14T09:35:06.821Z" }, + { url = "https://files.pythonhosted.org/packages/dd/e3/5db0b0ad663234967d83702277094687baf7c498831a2d3ad3451c11770f/nh3-0.3.3-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:b7a18ee057761e455d58b9d31445c3e4b2594cff4ddb84d2e331c011ef46f462", size = 1069970, upload-time = "2026-02-14T09:35:08.504Z" }, + { url = "https://files.pythonhosted.org/packages/79/b2/2ea21b79c6e869581ce5f51549b6e185c4762233591455bf2a326fb07f3b/nh3-0.3.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5a4b2c1f3e6f3cbe7048e17f4fefad3f8d3e14cc0fd08fb8599e0d5653f6b181", size = 1047588, upload-time = "2026-02-14T09:35:09.911Z" }, + { url = "https://files.pythonhosted.org/packages/e2/92/2e434619e658c806d9c096eed2cdff9a883084299b7b19a3f0824eb8e63d/nh3-0.3.3-cp38-abi3-win32.whl", hash = "sha256:e974850b131fdffa75e7ad8e0d9c7a855b96227b093417fdf1bd61656e530f37", size = 616179, upload-time = "2026-02-14T09:35:11.366Z" }, + { url = "https://files.pythonhosted.org/packages/73/88/1ce287ef8649dc51365b5094bd3713b76454838140a32ab4f8349973883c/nh3-0.3.3-cp38-abi3-win_amd64.whl", hash = "sha256:2efd17c0355d04d39e6d79122b42662277ac10a17ea48831d90b46e5ef7e4fc0", size = 631159, upload-time = "2026-02-14T09:35:12.77Z" }, + { url = "https://files.pythonhosted.org/packages/31/f1/b4835dbde4fb06f29db89db027576d6014081cd278d9b6751facc3e69e43/nh3-0.3.3-cp38-abi3-win_arm64.whl", hash = "sha256:b838e619f483531483d26d889438e53a880510e832d2aafe73f93b7b1ac2bce2", size = 616645, upload-time = "2026-02-14T09:35:14.062Z" }, +] + +[[package]] +name = "nodeenv" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" }, +] + [[package]] name = "numpy" version = "1.26.4" From aad83f8e774632c94088f58ed0eeea1b612e8c93 Mon Sep 17 00:00:00 2001 From: JersyJ Date: Sun, 15 Feb 2026 04:05:22 +0100 Subject: [PATCH 2/4] Filename Security features --- common/models.py | 9 +++++++-- common/upload.py | 2 ++ common/utils.py | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/common/models.py b/common/models.py index 1f13af4eb..1789cbfc7 100644 --- a/common/models.py +++ b/common/models.py @@ -16,7 +16,7 @@ from .ai_review.dto import LlmReviewPromptDTO from .event_log import UserEventModel # noqa from .emails.models import Email # noqa -from .utils import is_teacher +from .utils import has_unsafe_filename, is_teacher def current_semester() -> Optional["Semester"]: @@ -316,7 +316,12 @@ def all_sources(self) -> List[SourcePath]: for root, dirs, files in os.walk(self.dir()): for f in files: path = os.path.join(root, f) - sources.append(SourcePath(path[offset:], path)) + virt = path[offset:] + # Skip files with XSS-dangerous characters in their names. + # These can be created by student code during Docker execution. + if has_unsafe_filename(virt): + continue + sources.append(SourcePath(virt, path)) return sources diff --git a/common/upload.py b/common/upload.py index 3ec2ce4e4..7a56c2a8a 100644 --- a/common/upload.py +++ b/common/upload.py @@ -12,6 +12,7 @@ from django.core.files.uploadedfile import UploadedFile from common.models import Submit +from common.utils import has_unsafe_filename mimedetector = magic.Magic(mime=True) @@ -207,6 +208,7 @@ def reset_file() -> UploadedFile: try: files = uploader.get_files() files = [(os.path.normpath(path), f) for (path, f) in files] + files = [(path, f) for (path, f) in files if not has_unsafe_filename(path)] files = filter_files_by_filename(files) for path, file in files: diff --git a/common/utils.py b/common/utils.py index 8c05f0e47..9fb958786 100644 --- a/common/utils.py +++ b/common/utils.py @@ -7,12 +7,30 @@ from typing import NewType import django.contrib.auth.models +import nh3 import requests from django.http import HttpRequest from ipware import get_client_ip from .inbus import inbus +# Characters that are dangerous in HTML context and have no legitimate use in source filenames. +_UNSAFE_FILENAME_CHARS = re.compile(r'[<>"\'&;`|]') + + +def has_unsafe_filename(path: str) -> bool: + """ + Returns True if any component of the path contains characters that could + be used for XSS or shell injection attacks. + Legitimate source files should never contain these characters. + + Uses two layers of detection: + 1. Regex for shell/HTML dangerous characters + 2. nh3.is_html() to catch any HTML syntax patterns the regex might miss + """ + return bool(_UNSAFE_FILENAME_CHARS.search(path)) or nh3.is_html(path) + + IPAddressString = NewType("IPAddressString", str) From 2f1e609573ea361d1c04709f2010b7f441308589 Mon Sep 17 00:00:00 2001 From: JersyJ Date: Tue, 17 Feb 2026 22:22:59 +0100 Subject: [PATCH 3/4] Remove old entry.py --- evaluator/images/gcc/entry.py | 139 ---------------------------------- evaluator/images/gcc/wrapper | 2 - evaluator/pipelines.py | 20 ++++- evaluator/type_handlers.py | 5 +- 4 files changed, 20 insertions(+), 146 deletions(-) delete mode 100755 evaluator/images/gcc/entry.py delete mode 100755 evaluator/images/gcc/wrapper diff --git a/evaluator/images/gcc/entry.py b/evaluator/images/gcc/entry.py deleted file mode 100755 index b0a4844ef..000000000 --- a/evaluator/images/gcc/entry.py +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/python3 -import html -import io -import json -import subprocess -import os -import shlex -import traceback - -import bleach - -SANITIZED_FILES = ["result.html", "piperesult.json"] - -output = os.getenv("PIPE_OUTPUT", "main") -flags = os.getenv("PIPE_FLAGS", "") -ldflags = os.getenv("PIPE_LDFLAGS", "") -cmakeflags = os.getenv("PIPE_CMAKEFLAGS", "[]") -makeflags = os.getenv("PIPE_MAKEFLAGS", "[]") - - -# TODO: replace with shlex.join on python3.8 -def shlex_join(split_command): - return " ".join(shlex.quote(arg) for arg in split_command) - - -def cmd_run(cmd, out, show_cmd=None, env=None) -> int: - if not show_cmd: - show_cmd = cmd - - if env: - env = {**os.environ, **env} - - out.write(f"$ {bleach.clean(shlex_join(show_cmd))}") - - with open("/tmp/out", "w+", errors="ignore") as gcc_out: - p = subprocess.Popen(cmd, stdout=gcc_out, stderr=gcc_out, env=env) - p.wait() - - gcc_out.seek(0) - out.write(f"{html.escape(gcc_out.read())}") - return p.returncode - - -class CompilationException(BaseException): - pass - - -def compile(makeflags: str, cmakeflags: str, html_output: io.StringIO): - env = { - "CC": "gcc", - "CXX": "g++", - "CFLAGS": flags, - "CXXFLAGS": flags, - "LDFLAGS": ldflags, - "CLICOLOR_FORCE": "1", - "PATH": f'/wrapper:{os.getenv("PATH")}', - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - } - - if "cmakelists.txt" in [f.lower() for f in os.listdir(".")]: - cmakeflags = json.loads(cmakeflags) - cmake_exitcode = cmd_run(["cmake", *cmakeflags, "."], html_output, env=env) - if cmake_exitcode != 0: - raise CompilationException(f"Could not run CMake, exit code {cmake_exitcode}") - - # The file list needs to be queried again - if "makefile" in [f.lower() for f in os.listdir(".")]: - makeflags = json.loads(makeflags) - make_exitcode = cmd_run(["make", *makeflags], html_output, env=env) - if make_exitcode != 0: - raise CompilationException(f"Could not run Make, exit code {make_exitcode}") - else: - sources = [] - for root, dirs, files in os.walk("."): - for f in files: - if f.split(".")[-1] in ["c", "cpp"]: - sources.append(os.path.join(root, f)) - - if not sources: - raise CompilationException( - "Missing source files! please upload .c or .cpp files!" - ) - - use_cpp = any(f.endswith(".cpp") for f in sources) - compile_cmd = [ - "g++" if use_cpp else "gcc", - *sources, - "-o", - output, - *shlex.split(flags), - *shlex.split(ldflags), - ] - gcc_exitcode = cmd_run(compile_cmd, html_output, show_cmd=compile_cmd, env=env) - - if gcc_exitcode == 0: - html_output.write("
Compilation succeeded
") - else: - raise CompilationException(f"Failed to run GCC, exit code {gcc_exitcode}") - - if output and not os.path.exists(output): - executables = [f for f in os.listdir() if os.access(f, os.X_OK) and not os.path.isdir(f)] - if len(executables) == 0: - raise CompilationException("No executable has been built.") - elif len(executables) > 1: - raise CompilationException( - f"Multiple executables have been built: {','.join(executables)}" - ) - - html_output.write( - f"$ mv {bleach.clean(executables[0])} {output}" - ) - os.rename(executables[0], output) - - -result_file = "result.html" - -html_output = io.StringIO() -returncode = 1 - -try: - compile(makeflags, cmakeflags, html_output) - returncode = 0 -except BaseException as e: - if isinstance(e, CompilationException): - html_output.write(f"
{bleach.clean(str(e))}
") - else: - traceback.print_exception(e) -finally: - for file in SANITIZED_FILES: - try: - # Make sure that no sanitized file was written - os.unlink(file) - except: # noqa - pass - -with open("result.html", "w") as out: - out.write(html_output.getvalue()) - -exit(returncode) diff --git a/evaluator/images/gcc/wrapper b/evaluator/images/gcc/wrapper deleted file mode 100755 index 19a878a75..000000000 --- a/evaluator/images/gcc/wrapper +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -/usr/bin/$(basename $0) -fdiagnostics-color=always "$@" ${PIPE_FLAGS} ${LD_FLAGS} diff --git a/evaluator/pipelines.py b/evaluator/pipelines.py index 299d4de3a..1be7809ed 100644 --- a/evaluator/pipelines.py +++ b/evaluator/pipelines.py @@ -62,8 +62,22 @@ def _resolve_limits(self): for f in fields(type_handlers.ExecutionLimits): if f.name in self.limits: val = self.limits[f.name] - # Coerce to the field's declared type - updates[f.name] = f.type(val) if callable(f.type) else val + if val is None: + continue + # Coerce val to the field's type if needed (e.g. "30" → int). + # isinstance(field_type, type) skips complex hints like Optional[str]. + field_type = f.type + try: + if isinstance(field_type, type) and not isinstance(val, field_type): + val = field_type(val) + updates[f.name] = val + except (TypeError, ValueError): + logger.warning( + "Could not coerce limit %r value %r to %s, using default", + f.name, + val, + field_type.__name__, + ) return replace(limits_obj, **updates) @@ -151,7 +165,7 @@ def fmt_value(v): "-v", evaluation.submit_path + ":/work", "--ulimit", - f'fsize={limits["fsize"]}:{limits["fsize"]}', + f"fsize={limits['fsize']}:{limits['fsize']}", "-m", str(limits["memory"]), "--memory-swap", diff --git a/evaluator/type_handlers.py b/evaluator/type_handlers.py index 6c9c241a6..d60891504 100644 --- a/evaluator/type_handlers.py +++ b/evaluator/type_handlers.py @@ -80,7 +80,6 @@ class ExecutionLimits: class BuildCommand: cmd: list[str] env: dict[str, str] = field(default_factory=dict) - output_dir: Optional[str] = None class BuildError(Exception): @@ -303,6 +302,7 @@ def _compile(self, container_image: str) -> BuildResult: # Find any new executable and rename it to the expected output output_bin = self.config.get("output", "main") + output_bin = os.path.basename(output_bin) output_path = os.path.join(self.evaluation.submit_path, output_bin) if not os.path.exists(output_path): @@ -362,6 +362,7 @@ def _build_command(self) -> list[BuildCommand]: # GCC fallback output_bin = self.config.get("output", "main") + output_bin = os.path.basename(output_bin) flags = self.config.get("flags", "") ldflags = self.config.get("ldflags", "") @@ -378,7 +379,7 @@ def _build_command(self) -> list[BuildCommand]: sources.append(os.path.join(rel_dir, f)) if not sources: - raise BuildError("Missing source files! please upload .c or .cpp files!") + raise BuildError("Missing source files! Please upload .c or .cpp files!") use_cpp = any(Path(f).suffix in (".cpp", ".cc", ".cxx") for f in sources) compiler = "g++" if use_cpp else "gcc" From e7db66f61f49e7088536e945597a21a444f9d512 Mon Sep 17 00:00:00 2001 From: JersyJ Date: Tue, 17 Feb 2026 22:28:00 +0100 Subject: [PATCH 4/4] Remove unnecessary wrapper and entry.py from Dockerfile --- evaluator/images/gcc/Dockerfile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/evaluator/images/gcc/Dockerfile b/evaluator/images/gcc/Dockerfile index 0c044e5c7..7b58b7a1a 100644 --- a/evaluator/images/gcc/Dockerfile +++ b/evaluator/images/gcc/Dockerfile @@ -1,6 +1 @@ FROM kelvin/base -ADD wrapper /wrapper/gcc -ADD wrapper /wrapper/cc -ADD wrapper /wrapper/g++ -ADD entry.py / -CMD /entry.py