Skip to content

Commit

Permalink
Merge pull request #1950 from DaveLak/fix-fuzz-submodules-filename-ex…
Browse files Browse the repository at this point in the history
…ception

Fix Several Bugs in the `fuzz_submodule` Causing a lot of False Alarms in the OSS-Fuzz Bug Tracker
  • Loading branch information
Byron authored Aug 9, 2024
2 parents 855befa + 2ed3334 commit a621ff0
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 33 deletions.
47 changes: 16 additions & 31 deletions fuzzing/fuzz-targets/fuzz_submodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,16 @@
import os
import tempfile
from configparser import ParsingError
from utils import is_expected_exception_message, get_max_filename_length

if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover
path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git"))
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary

from utils import (
setup_git_environment,
handle_exception,
get_max_filename_length,
)

# Setup the git environment
setup_git_environment()
from git import Repo, GitCommandError, InvalidGitRepositoryError

if not sys.warnoptions: # pragma: no cover
# The warnings filter below can be overridden by passing the -W option
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
import warnings
import logging

# Fuzzing data causes some modules to generate a large number of warnings
# which are not usually interesting and make the test output hard to read, so we ignore them.
warnings.simplefilter("ignore")
logging.getLogger().setLevel(logging.ERROR)


def TestOneInput(data):
fdp = atheris.FuzzedDataProvider(data)
Expand All @@ -35,12 +26,13 @@ def TestOneInput(data):
sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool())
sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512)))

submodule_name = f"submodule_{fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))}"
submodule_name = fdp.ConsumeUnicodeNoSurrogates(
fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(repo.working_tree_dir)))
)
submodule_path = os.path.join(repo.working_tree_dir, submodule_name)
submodule_url = sub_repo.git_dir

submodule = repo.create_submodule(submodule_name, submodule_path, url=submodule_url)
repo.index.commit(f"Added submodule {submodule_name}")
submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir)
repo.index.commit("Added submodule")

with submodule.config_writer() as writer:
key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes()))
Expand Down Expand Up @@ -88,18 +80,11 @@ def TestOneInput(data):
BrokenPipeError,
):
return -1
except ValueError as e:
expected_messages = [
"SHA is empty",
"Reference at",
"embedded null byte",
"This submodule instance does not exist anymore",
"cmd stdin was empty",
]
if is_expected_exception_message(e, expected_messages):
except Exception as e:
if isinstance(e, ValueError) and "embedded null byte" in str(e):
return -1
else:
raise e
return handle_exception(e)


def main():
Expand Down
87 changes: 86 additions & 1 deletion fuzzing/fuzz-targets/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import atheris # pragma: no cover
import os # pragma: no cover
from typing import List # pragma: no cover
import re # pragma: no cover
import traceback # pragma: no cover
import sys # pragma: no cover
from typing import Set, Tuple, List # pragma: no cover


@atheris.instrument_func
Expand Down Expand Up @@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int: # pragma: no cover
int: The maximum filename length.
"""
return os.pathconf(path, "PC_NAME_MAX")


@atheris.instrument_func
def read_lines_from_file(file_path: str) -> list:
"""Read lines from a file and return them as a list."""
try:
with open(file_path, "r") as f:
return [line.strip() for line in f if line.strip()]
except FileNotFoundError:
print(f"File not found: {file_path}")
return []
except IOError as e:
print(f"Error reading file {file_path}: {e}")
return []


@atheris.instrument_func
def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]:
"""Load and parse the exception list from a default or specified file."""
try:
bundle_dir = os.path.dirname(os.path.abspath(__file__))
full_path = os.path.join(bundle_dir, file_path)
lines = read_lines_from_file(full_path)
exception_list: Set[Tuple[str, str]] = set()
for line in lines:
match = re.match(r"(.+):(\d+):", line)
if match:
file_path: str = match.group(1).strip()
line_number: str = str(match.group(2).strip())
exception_list.add((file_path, line_number))
return exception_list
except Exception as e:
print(f"Error loading exception list: {e}")
return set()


@atheris.instrument_func
def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool:
"""Match exception traceback with the entries in the exception list."""
for filename, lineno, _, _ in traceback.extract_tb(exc_traceback):
for file_pattern, line_pattern in exception_list:
# Ensure filename and line_number are strings for regex matching
if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)):
return True
return False


@atheris.instrument_func
def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool:
"""Check if the exception traceback matches any entry in the exception list."""
exception_list = load_exception_list(exception_file)
return match_exception_with_traceback(exception_list, exc_traceback)


@atheris.instrument_func
def handle_exception(e: Exception) -> int:
"""Encapsulate exception handling logic for reusability."""
exc_traceback = e.__traceback__
if check_exception_against_list(exc_traceback):
return -1
else:
raise e


@atheris.instrument_func
def setup_git_environment() -> None:
"""Set up the environment variables for Git."""
bundle_dir = os.path.dirname(os.path.abspath(__file__))
if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover
bundled_git_binary_path = os.path.join(bundle_dir, "git")
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path

if not sys.warnoptions: # pragma: no cover
# The warnings filter below can be overridden by passing the -W option
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
import warnings
import logging

# Fuzzing data causes some modules to generate a large number of warnings
# which are not usually interesting and make the test output hard to read, so we ignore them.
warnings.simplefilter("ignore")
logging.getLogger().setLevel(logging.ERROR)
2 changes: 1 addition & 1 deletion fuzzing/oss-fuzz-scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ find "$SRC" -maxdepth 1 \

# Build fuzzers in $OUT.
find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do
compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):."
compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:."
done
11 changes: 11 additions & 0 deletions fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,17 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora"

prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing"

pushd "$SRC/gitpython/"
# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the
# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'.
# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise
# anticipated exceptions to reduce false positive test failures.

git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt"

popd


# The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below.
python3 -m pip install --upgrade pip
# Upgrade to the latest versions known to work at the time the below changes were introduced:
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ lint.unfixable = [
"test/**" = [
"B018", # useless-expression
]
"fuzzing/fuzz-targets/**" = [
"E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file
]


[tool.codespell]
ignore-words-list="gud,doesnt"
Expand Down

0 comments on commit a621ff0

Please sign in to comment.