diff --git a/languages/python/custom/src/DetectUnsanitizedRglobPathTraversal.ql b/languages/python/custom/src/DetectUnsanitizedRglobPathTraversal.ql new file mode 100644 index 0000000..346ad4e --- /dev/null +++ b/languages/python/custom/src/DetectUnsanitizedRglobPathTraversal.ql @@ -0,0 +1,26 @@ +/** + * @name Uncontrolled data used in path expression (with pathlib sanitizer support) + * @description Accessing paths influenced by users can allow an attacker to access + * unexpected resources. This query extends the standard py/path-injection + * analysis to recognize `pathlib.Path.resolve()` as path normalization + * and `pathlib.Path.is_relative_to()` as a safe access check. + * @kind path-problem + * @problem.severity warning + * @security-severity 7.5 + * @precision high + * @id python/detect-unsanitized-rglob-path-traversal + * @tags security + * external/cwe/cwe-022 + */ + +import python +// Import custom sanitizer extensions; these extend Path::PathNormalization::Range +// and Path::SafeAccessCheck::Range so the standard PathInjectionConfig picks them up. +import PathInjectionSanitizers +import semmle.python.security.dataflow.PathInjectionQuery +import PathInjectionFlow::PathGraph + +from PathInjectionFlow::PathNode source, PathInjectionFlow::PathNode sink +where PathInjectionFlow::flowPath(source, sink) +select sink.getNode(), source, sink, "This path depends on a $@.", source.getNode(), + "user-provided value" diff --git a/languages/python/custom/src/PathInjectionSanitizers.qll b/languages/python/custom/src/PathInjectionSanitizers.qll new file mode 100644 index 0000000..71edf88 --- /dev/null +++ b/languages/python/custom/src/PathInjectionSanitizers.qll @@ -0,0 +1,73 @@ +/** + * Provides custom sanitizer extensions for the standard `py/path-injection` query. + * + * This module extends the CodeQL standard library's path injection analysis + * to recognize additional sanitization patterns: + * + * 1. `pathlib.Path.resolve()` as a path normalization step + * 2. `pathlib.Path.is_relative_to()` as a safe access check + * + * These extensions enable the standard query to understand that a pattern like: + * ```python + * resolved = job_dir.resolve() + * if not resolved.is_relative_to(JOBS_ROOT): + * raise ValueError("path escapes root") + * matches = list(resolved.rglob("summary.csv")) + * ``` + * properly sanitizes the path before use. + * + * NOTE: If contributing to the CodeQL standard library (re-bundling), these + * extensions should be added directly to `semmle/python/frameworks/Stdlib.qll`: + * - `PathlibResolveCall` alongside `OsPathRealpathCall` (around line 1065) + * - `IsRelativeToCall` alongside `StartswithCall` (around line 5090) + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts + +/** + * A call to `pathlib.Path.resolve()`, modeled as a path normalization step. + * + * `resolve()` makes the path absolute and resolves all symlinks, producing + * a canonical path. This is semantically equivalent to `os.path.realpath()`, + * which is already modeled as `Path::PathNormalization::Range` in the + * standard library. + * + * See https://docs.python.org/3/library/pathlib.html#pathlib.Path.resolve + */ +private class PathlibResolveCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode { + /** The attribute read for the `.resolve` method access on a path object. */ + DataFlow::AttrRead resolveMethodAccess; + + PathlibResolveCall() { + resolveMethodAccess.getAttributeName() = "resolve" and + resolveMethodAccess.(DataFlow::LocalSourceNode).flowsTo(this.getFunction()) + } + + override DataFlow::Node getPathArg() { result = resolveMethodAccess.getObject() } +} + +/** + * A call to `pathlib.PurePath.is_relative_to()`, modeled as a safe access check. + * + * `is_relative_to(other)` returns `True` if the path is relative to `other`, + * which is commonly used as a path confinement check to verify that a resolved + * path remains within an expected directory. This is semantically similar to + * `str.startswith()`, which is already modeled as `Path::SafeAccessCheck::Range` + * in the standard library. + * + * See https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.is_relative_to + */ +private class IsRelativeToCall extends Path::SafeAccessCheck::Range { + IsRelativeToCall() { + this.(CallNode).getFunction().(AttrNode).getName() = "is_relative_to" + } + + override predicate checks(ControlFlowNode node, boolean branch) { + // When is_relative_to() returns True (branch = true), the path is confirmed + // to be confined within the expected directory, making it safe to access. + node = this.(CallNode).getFunction().(AttrNode).getObject() and + branch = true + } +} diff --git a/languages/python/custom/test/DetectUnsanitizedRglobPathTraversal/DetectUnsanitizedRglobPathTraversal.expected b/languages/python/custom/test/DetectUnsanitizedRglobPathTraversal/DetectUnsanitizedRglobPathTraversal.expected new file mode 100644 index 0000000..1b83bc8 --- /dev/null +++ b/languages/python/custom/test/DetectUnsanitizedRglobPathTraversal/DetectUnsanitizedRglobPathTraversal.expected @@ -0,0 +1,94 @@ +edges +| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | provenance | | +| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:14:14:14:20 | ControlFlowNode for request | provenance | | +| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:23:14:23:20 | ControlFlowNode for request | provenance | | +| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:36:14:36:20 | ControlFlowNode for request | provenance | | +| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:46:14:46:20 | ControlFlowNode for request | provenance | | +| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:57:14:57:20 | ControlFlowNode for request | provenance | | +| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:70:14:70:20 | ControlFlowNode for request | provenance | | +| test_rglob_sanitization.py:14:5:14:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:15:5:15:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:14:14:14:20 | ControlFlowNode for request | test_rglob_sanitization.py:14:14:14:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:14:14:14:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:14:14:14:43 | ControlFlowNode for Attribute() | provenance | dict.get | +| test_rglob_sanitization.py:14:14:14:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:14:5:14:10 | ControlFlowNode for job_id | provenance | | +| test_rglob_sanitization.py:15:5:15:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:16:20:16:26 | ControlFlowNode for job_dir | provenance | | +| test_rglob_sanitization.py:23:5:23:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:24:5:24:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:23:14:23:20 | ControlFlowNode for request | test_rglob_sanitization.py:23:14:23:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:23:14:23:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:23:14:23:43 | ControlFlowNode for Attribute() | provenance | dict.get | +| test_rglob_sanitization.py:23:14:23:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:23:5:23:10 | ControlFlowNode for job_id | provenance | | +| test_rglob_sanitization.py:24:5:24:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:25:16:25:22 | ControlFlowNode for job_dir | provenance | | +| test_rglob_sanitization.py:36:5:36:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:37:5:37:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:36:14:36:20 | ControlFlowNode for request | test_rglob_sanitization.py:36:14:36:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:36:14:36:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:36:14:36:43 | ControlFlowNode for Attribute() | provenance | dict.get | +| test_rglob_sanitization.py:36:14:36:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:36:5:36:10 | ControlFlowNode for job_id | provenance | | +| test_rglob_sanitization.py:37:5:37:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | provenance | | +| test_rglob_sanitization.py:37:5:37:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | provenance | | +| test_rglob_sanitization.py:38:5:38:12 | ControlFlowNode for resolved | test_rglob_sanitization.py:39:20:39:27 | ControlFlowNode for resolved | provenance | | +| test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | test_rglob_sanitization.py:38:16:38:32 | ControlFlowNode for Attribute() | provenance | Config | +| test_rglob_sanitization.py:38:16:38:32 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:38:5:38:12 | ControlFlowNode for resolved | provenance | | +| test_rglob_sanitization.py:46:5:46:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:47:5:47:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:46:14:46:20 | ControlFlowNode for request | test_rglob_sanitization.py:46:14:46:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:46:14:46:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:46:14:46:43 | ControlFlowNode for Attribute() | provenance | dict.get | +| test_rglob_sanitization.py:46:14:46:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:46:5:46:10 | ControlFlowNode for job_id | provenance | | +| test_rglob_sanitization.py:47:5:47:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:50:20:50:26 | ControlFlowNode for job_dir | provenance | | +| test_rglob_sanitization.py:57:5:57:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:58:5:58:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:57:14:57:20 | ControlFlowNode for request | test_rglob_sanitization.py:57:14:57:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:57:14:57:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:57:14:57:43 | ControlFlowNode for Attribute() | provenance | dict.get | +| test_rglob_sanitization.py:57:14:57:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:57:5:57:10 | ControlFlowNode for job_id | provenance | | +| test_rglob_sanitization.py:58:5:58:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:59:16:59:22 | ControlFlowNode for job_dir | provenance | | +| test_rglob_sanitization.py:70:5:70:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:71:5:71:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:70:14:70:20 | ControlFlowNode for request | test_rglob_sanitization.py:70:14:70:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep | +| test_rglob_sanitization.py:70:14:70:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:70:14:70:43 | ControlFlowNode for Attribute() | provenance | dict.get | +| test_rglob_sanitization.py:70:14:70:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:70:5:70:10 | ControlFlowNode for job_id | provenance | | +| test_rglob_sanitization.py:71:5:71:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:72:9:72:15 | ControlFlowNode for job_dir | provenance | | +nodes +| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember | +| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test_rglob_sanitization.py:14:5:14:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id | +| test_rglob_sanitization.py:14:14:14:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test_rglob_sanitization.py:14:14:14:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test_rglob_sanitization.py:14:14:14:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| test_rglob_sanitization.py:15:5:15:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:16:20:16:26 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:23:5:23:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id | +| test_rglob_sanitization.py:23:14:23:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test_rglob_sanitization.py:23:14:23:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test_rglob_sanitization.py:23:14:23:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| test_rglob_sanitization.py:24:5:24:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:25:16:25:22 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:36:5:36:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id | +| test_rglob_sanitization.py:36:14:36:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test_rglob_sanitization.py:36:14:36:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test_rglob_sanitization.py:36:14:36:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| test_rglob_sanitization.py:37:5:37:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:38:5:38:12 | ControlFlowNode for resolved | semmle.label | ControlFlowNode for resolved | +| test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:38:16:38:32 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| test_rglob_sanitization.py:39:20:39:27 | ControlFlowNode for resolved | semmle.label | ControlFlowNode for resolved | +| test_rglob_sanitization.py:46:5:46:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id | +| test_rglob_sanitization.py:46:14:46:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test_rglob_sanitization.py:46:14:46:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test_rglob_sanitization.py:46:14:46:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| test_rglob_sanitization.py:47:5:47:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:50:20:50:26 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:57:5:57:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id | +| test_rglob_sanitization.py:57:14:57:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test_rglob_sanitization.py:57:14:57:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test_rglob_sanitization.py:57:14:57:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| test_rglob_sanitization.py:58:5:58:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:59:16:59:22 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:70:5:70:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id | +| test_rglob_sanitization.py:70:14:70:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test_rglob_sanitization.py:70:14:70:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test_rglob_sanitization.py:70:14:70:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| test_rglob_sanitization.py:71:5:71:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +| test_rglob_sanitization.py:72:9:72:15 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir | +subpaths +#select +| test_rglob_sanitization.py:16:20:16:26 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:16:20:16:26 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value | +| test_rglob_sanitization.py:25:16:25:22 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:25:16:25:22 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value | +| test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value | +| test_rglob_sanitization.py:39:20:39:27 | ControlFlowNode for resolved | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:39:20:39:27 | ControlFlowNode for resolved | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value | +| test_rglob_sanitization.py:50:20:50:26 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:50:20:50:26 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value | +| test_rglob_sanitization.py:59:16:59:22 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:59:16:59:22 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value | +| test_rglob_sanitization.py:72:9:72:15 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:72:9:72:15 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value | diff --git a/languages/python/custom/test/DetectUnsanitizedRglobPathTraversal/DetectUnsanitizedRglobPathTraversal.qlref b/languages/python/custom/test/DetectUnsanitizedRglobPathTraversal/DetectUnsanitizedRglobPathTraversal.qlref new file mode 100644 index 0000000..70bac22 --- /dev/null +++ b/languages/python/custom/test/DetectUnsanitizedRglobPathTraversal/DetectUnsanitizedRglobPathTraversal.qlref @@ -0,0 +1 @@ +DetectUnsanitizedRglobPathTraversal.ql diff --git a/languages/python/custom/test/DetectUnsanitizedRglobPathTraversal/test_rglob_sanitization.py b/languages/python/custom/test/DetectUnsanitizedRglobPathTraversal/test_rglob_sanitization.py new file mode 100644 index 0000000..028c8c3 --- /dev/null +++ b/languages/python/custom/test/DetectUnsanitizedRglobPathTraversal/test_rglob_sanitization.py @@ -0,0 +1,85 @@ +import os +import pathlib + +from flask import Flask, request + +app = Flask(__name__) + +JOBS_ROOT = pathlib.Path("/data/jobs") + + +@app.route("/unsanitized") +def unsanitized_rglob(): + """Detected: no sanitization at all.""" + job_id = request.args.get("job_id", "") + job_dir = JOBS_ROOT / job_id + matches = list(job_dir.rglob("summary.csv")) # Alert: unsanitized path + return str(matches) + + +@app.route("/sanitized_resolve_and_is_relative_to") +def sanitized_rglob(): + """resolve() + is_relative_to() properly sanitizes the rglob call.""" + job_id = request.args.get("job_id", "") + job_dir = JOBS_ROOT / job_id + resolved = job_dir.resolve() # Alert on resolve() itself (it is a FileSystemAccess in stdlib) + if not resolved.is_relative_to(JOBS_ROOT): + raise ValueError("path escapes root") + # NOT detected: resolve() normalized the path, is_relative_to() confirmed confinement + matches = list(resolved.rglob("summary.csv")) + return str(matches) + + +@app.route("/resolve_only") +def resolve_only_rglob(): + """Detected: resolve() without is_relative_to() is insufficient.""" + job_id = request.args.get("job_id", "") + job_dir = JOBS_ROOT / job_id + resolved = job_dir.resolve() # Alert on resolve() itself (FileSystemAccess) + matches = list(resolved.rglob("summary.csv")) # Alert: normalized but not checked + return str(matches) + + +@app.route("/is_relative_to_without_resolve") +def is_relative_to_without_resolve(): + """Detected: is_relative_to() without resolve() first is bypassable.""" + job_id = request.args.get("job_id", "") + job_dir = JOBS_ROOT / job_id + if not job_dir.is_relative_to(JOBS_ROOT): + raise ValueError("path escapes root") + matches = list(job_dir.rglob("summary.csv")) # Alert: checked but not normalized + return str(matches) + + +@app.route("/sanitized_open") +def sanitized_open(): + """resolve() + is_relative_to() properly sanitizes the open() call.""" + job_id = request.args.get("job_id", "") + job_dir = JOBS_ROOT / job_id + resolved = job_dir.resolve() # Alert on resolve() itself (FileSystemAccess) + if not resolved.is_relative_to(JOBS_ROOT): + raise ValueError("path escapes root") + # NOT detected: properly sanitized + f = resolved.open() + return f.read() + + +@app.route("/unsanitized_open") +def unsanitized_open(): + """Detected: unsanitized open().""" + job_id = request.args.get("job_id", "") + job_dir = JOBS_ROOT / job_id + f = job_dir.open() # Alert: unsanitized path + return f.read() + + +@app.route("/realpath_startswith") +def realpath_startswith(): + """Not detected: os.path.realpath + startswith (existing sanitizer).""" + job_id = request.args.get("job_id", "") + path = os.path.join("/data/jobs", job_id) + npath = os.path.realpath(path) + if npath.startswith("/data/jobs"): + f = open(npath) # Not detected: properly sanitized by existing sanitizer + return f.read() + return "error"