Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions languages/python/custom/src/DetectUnsanitizedRglobPathTraversal.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/**
* @name Uncontrolled data used in path expression (with pathlib sanitizer support)
* @description Accessing paths influenced by users can allow an attacker to access
* unexpected resources. This query extends the standard py/path-injection
* analysis to recognize `pathlib.Path.resolve()` as path normalization
* and `pathlib.Path.is_relative_to()` as a safe access check.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @id python/detect-unsanitized-rglob-path-traversal
* @tags security
* external/cwe/cwe-022
*/

import python
// Import custom sanitizer extensions; these extend Path::PathNormalization::Range
// and Path::SafeAccessCheck::Range so the standard PathInjectionConfig picks them up.
import PathInjectionSanitizers
import semmle.python.security.dataflow.PathInjectionQuery
import PathInjectionFlow::PathGraph

from PathInjectionFlow::PathNode source, PathInjectionFlow::PathNode sink
where PathInjectionFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "This path depends on a $@.", source.getNode(),
"user-provided value"
73 changes: 73 additions & 0 deletions languages/python/custom/src/PathInjectionSanitizers.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/**
* Provides custom sanitizer extensions for the standard `py/path-injection` query.
*
* This module extends the CodeQL standard library's path injection analysis
* to recognize additional sanitization patterns:
*
* 1. `pathlib.Path.resolve()` as a path normalization step
* 2. `pathlib.Path.is_relative_to()` as a safe access check
*
* These extensions enable the standard query to understand that a pattern like:
* ```python
* resolved = job_dir.resolve()
* if not resolved.is_relative_to(JOBS_ROOT):
* raise ValueError("path escapes root")
* matches = list(resolved.rglob("summary.csv"))
* ```
* properly sanitizes the path before use.
*
* NOTE: If contributing to the CodeQL standard library (re-bundling), these
* extensions should be added directly to `semmle/python/frameworks/Stdlib.qll`:
* - `PathlibResolveCall` alongside `OsPathRealpathCall` (around line 1065)
* - `IsRelativeToCall` alongside `StartswithCall` (around line 5090)
*/

private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts

/**
* A call to `pathlib.Path.resolve()`, modeled as a path normalization step.
*
* `resolve()` makes the path absolute and resolves all symlinks, producing
* a canonical path. This is semantically equivalent to `os.path.realpath()`,
* which is already modeled as `Path::PathNormalization::Range` in the
* standard library.
*
* See https://docs.python.org/3/library/pathlib.html#pathlib.Path.resolve
*/
private class PathlibResolveCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode {
/** The attribute read for the `.resolve` method access on a path object. */
DataFlow::AttrRead resolveMethodAccess;

PathlibResolveCall() {
resolveMethodAccess.getAttributeName() = "resolve" and
resolveMethodAccess.(DataFlow::LocalSourceNode).flowsTo(this.getFunction())
}

override DataFlow::Node getPathArg() { result = resolveMethodAccess.getObject() }
}

/**
* A call to `pathlib.PurePath.is_relative_to()`, modeled as a safe access check.
*
* `is_relative_to(other)` returns `True` if the path is relative to `other`,
* which is commonly used as a path confinement check to verify that a resolved
* path remains within an expected directory. This is semantically similar to
* `str.startswith()`, which is already modeled as `Path::SafeAccessCheck::Range`
* in the standard library.
*
* See https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.is_relative_to
*/
private class IsRelativeToCall extends Path::SafeAccessCheck::Range {
IsRelativeToCall() {
this.(CallNode).getFunction().(AttrNode).getName() = "is_relative_to"
}

override predicate checks(ControlFlowNode node, boolean branch) {
// When is_relative_to() returns True (branch = true), the path is confirmed
// to be confined within the expected directory, making it safe to access.
node = this.(CallNode).getFunction().(AttrNode).getObject() and
branch = true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
edges
| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | provenance | |
| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:14:14:14:20 | ControlFlowNode for request | provenance | |
| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:23:14:23:20 | ControlFlowNode for request | provenance | |
| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:36:14:36:20 | ControlFlowNode for request | provenance | |
| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:46:14:46:20 | ControlFlowNode for request | provenance | |
| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:57:14:57:20 | ControlFlowNode for request | provenance | |
| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | test_rglob_sanitization.py:70:14:70:20 | ControlFlowNode for request | provenance | |
| test_rglob_sanitization.py:14:5:14:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:15:5:15:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:14:14:14:20 | ControlFlowNode for request | test_rglob_sanitization.py:14:14:14:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:14:14:14:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:14:14:14:43 | ControlFlowNode for Attribute() | provenance | dict.get |
| test_rglob_sanitization.py:14:14:14:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:14:5:14:10 | ControlFlowNode for job_id | provenance | |
| test_rglob_sanitization.py:15:5:15:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:16:20:16:26 | ControlFlowNode for job_dir | provenance | |
| test_rglob_sanitization.py:23:5:23:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:24:5:24:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:23:14:23:20 | ControlFlowNode for request | test_rglob_sanitization.py:23:14:23:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:23:14:23:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:23:14:23:43 | ControlFlowNode for Attribute() | provenance | dict.get |
| test_rglob_sanitization.py:23:14:23:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:23:5:23:10 | ControlFlowNode for job_id | provenance | |
| test_rglob_sanitization.py:24:5:24:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:25:16:25:22 | ControlFlowNode for job_dir | provenance | |
| test_rglob_sanitization.py:36:5:36:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:37:5:37:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:36:14:36:20 | ControlFlowNode for request | test_rglob_sanitization.py:36:14:36:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:36:14:36:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:36:14:36:43 | ControlFlowNode for Attribute() | provenance | dict.get |
| test_rglob_sanitization.py:36:14:36:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:36:5:36:10 | ControlFlowNode for job_id | provenance | |
| test_rglob_sanitization.py:37:5:37:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | provenance | |
| test_rglob_sanitization.py:37:5:37:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | provenance | |
| test_rglob_sanitization.py:38:5:38:12 | ControlFlowNode for resolved | test_rglob_sanitization.py:39:20:39:27 | ControlFlowNode for resolved | provenance | |
| test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | test_rglob_sanitization.py:38:16:38:32 | ControlFlowNode for Attribute() | provenance | Config |
| test_rglob_sanitization.py:38:16:38:32 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:38:5:38:12 | ControlFlowNode for resolved | provenance | |
| test_rglob_sanitization.py:46:5:46:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:47:5:47:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:46:14:46:20 | ControlFlowNode for request | test_rglob_sanitization.py:46:14:46:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:46:14:46:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:46:14:46:43 | ControlFlowNode for Attribute() | provenance | dict.get |
| test_rglob_sanitization.py:46:14:46:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:46:5:46:10 | ControlFlowNode for job_id | provenance | |
| test_rglob_sanitization.py:47:5:47:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:50:20:50:26 | ControlFlowNode for job_dir | provenance | |
| test_rglob_sanitization.py:57:5:57:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:58:5:58:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:57:14:57:20 | ControlFlowNode for request | test_rglob_sanitization.py:57:14:57:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:57:14:57:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:57:14:57:43 | ControlFlowNode for Attribute() | provenance | dict.get |
| test_rglob_sanitization.py:57:14:57:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:57:5:57:10 | ControlFlowNode for job_id | provenance | |
| test_rglob_sanitization.py:58:5:58:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:59:16:59:22 | ControlFlowNode for job_dir | provenance | |
| test_rglob_sanitization.py:70:5:70:10 | ControlFlowNode for job_id | test_rglob_sanitization.py:71:5:71:11 | ControlFlowNode for job_dir | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:70:14:70:20 | ControlFlowNode for request | test_rglob_sanitization.py:70:14:70:25 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
| test_rglob_sanitization.py:70:14:70:25 | ControlFlowNode for Attribute | test_rglob_sanitization.py:70:14:70:43 | ControlFlowNode for Attribute() | provenance | dict.get |
| test_rglob_sanitization.py:70:14:70:43 | ControlFlowNode for Attribute() | test_rglob_sanitization.py:70:5:70:10 | ControlFlowNode for job_id | provenance | |
| test_rglob_sanitization.py:71:5:71:11 | ControlFlowNode for job_dir | test_rglob_sanitization.py:72:9:72:15 | ControlFlowNode for job_dir | provenance | |
nodes
| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test_rglob_sanitization.py:14:5:14:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id |
| test_rglob_sanitization.py:14:14:14:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test_rglob_sanitization.py:14:14:14:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test_rglob_sanitization.py:14:14:14:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test_rglob_sanitization.py:15:5:15:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:16:20:16:26 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:23:5:23:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id |
| test_rglob_sanitization.py:23:14:23:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test_rglob_sanitization.py:23:14:23:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test_rglob_sanitization.py:23:14:23:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test_rglob_sanitization.py:24:5:24:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:25:16:25:22 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:36:5:36:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id |
| test_rglob_sanitization.py:36:14:36:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test_rglob_sanitization.py:36:14:36:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test_rglob_sanitization.py:36:14:36:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test_rglob_sanitization.py:37:5:37:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:38:5:38:12 | ControlFlowNode for resolved | semmle.label | ControlFlowNode for resolved |
| test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:38:16:38:32 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test_rglob_sanitization.py:39:20:39:27 | ControlFlowNode for resolved | semmle.label | ControlFlowNode for resolved |
| test_rglob_sanitization.py:46:5:46:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id |
| test_rglob_sanitization.py:46:14:46:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test_rglob_sanitization.py:46:14:46:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test_rglob_sanitization.py:46:14:46:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test_rglob_sanitization.py:47:5:47:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:50:20:50:26 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:57:5:57:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id |
| test_rglob_sanitization.py:57:14:57:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test_rglob_sanitization.py:57:14:57:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test_rglob_sanitization.py:57:14:57:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test_rglob_sanitization.py:58:5:58:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:59:16:59:22 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:70:5:70:10 | ControlFlowNode for job_id | semmle.label | ControlFlowNode for job_id |
| test_rglob_sanitization.py:70:14:70:20 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test_rglob_sanitization.py:70:14:70:25 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test_rglob_sanitization.py:70:14:70:43 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test_rglob_sanitization.py:71:5:71:11 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
| test_rglob_sanitization.py:72:9:72:15 | ControlFlowNode for job_dir | semmle.label | ControlFlowNode for job_dir |
subpaths
#select
| test_rglob_sanitization.py:16:20:16:26 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:16:20:16:26 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value |
| test_rglob_sanitization.py:25:16:25:22 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:25:16:25:22 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value |
| test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:38:16:38:22 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value |
| test_rglob_sanitization.py:39:20:39:27 | ControlFlowNode for resolved | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:39:20:39:27 | ControlFlowNode for resolved | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value |
| test_rglob_sanitization.py:50:20:50:26 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:50:20:50:26 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value |
| test_rglob_sanitization.py:59:16:59:22 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:59:16:59:22 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value |
| test_rglob_sanitization.py:72:9:72:15 | ControlFlowNode for job_dir | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | test_rglob_sanitization.py:72:9:72:15 | ControlFlowNode for job_dir | This path depends on a $@. | test_rglob_sanitization.py:4:26:4:32 | ControlFlowNode for ImportMember | user-provided value |
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DetectUnsanitizedRglobPathTraversal.ql
Loading