Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Several Bugs in the fuzz_submodule Causing a lot of False Alarms in the OSS-Fuzz Bug Tracker #1950

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 16 additions & 31 deletions fuzzing/fuzz-targets/fuzz_submodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,16 @@
import os
import tempfile
from configparser import ParsingError
from utils import is_expected_exception_message, get_max_filename_length

if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover
path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git"))
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary

from utils import (
setup_git_environment,
handle_exception,
get_max_filename_length,
)

# Setup the git environment
setup_git_environment()
from git import Repo, GitCommandError, InvalidGitRepositoryError

if not sys.warnoptions: # pragma: no cover
# The warnings filter below can be overridden by passing the -W option
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
import warnings
import logging

# Fuzzing data causes some modules to generate a large number of warnings
# which are not usually interesting and make the test output hard to read, so we ignore them.
warnings.simplefilter("ignore")
logging.getLogger().setLevel(logging.ERROR)


def TestOneInput(data):
fdp = atheris.FuzzedDataProvider(data)
Expand All @@ -35,12 +26,13 @@ def TestOneInput(data):
sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool())
sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512)))

submodule_name = f"submodule_{fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))}"
submodule_name = fdp.ConsumeUnicodeNoSurrogates(
fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(repo.working_tree_dir)))
)
submodule_path = os.path.join(repo.working_tree_dir, submodule_name)
submodule_url = sub_repo.git_dir

submodule = repo.create_submodule(submodule_name, submodule_path, url=submodule_url)
repo.index.commit(f"Added submodule {submodule_name}")
submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir)
repo.index.commit("Added submodule")

with submodule.config_writer() as writer:
key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes()))
Expand Down Expand Up @@ -88,18 +80,11 @@ def TestOneInput(data):
BrokenPipeError,
):
return -1
except ValueError as e:
expected_messages = [
"SHA is empty",
"Reference at",
"embedded null byte",
"This submodule instance does not exist anymore",
"cmd stdin was empty",
]
if is_expected_exception_message(e, expected_messages):
except Exception as e:
if isinstance(e, ValueError) and "embedded null byte" in str(e):
return -1
else:
raise e
return handle_exception(e)


def main():
Expand Down
87 changes: 86 additions & 1 deletion fuzzing/fuzz-targets/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import atheris # pragma: no cover
import os # pragma: no cover
from typing import List # pragma: no cover
import re # pragma: no cover
import traceback # pragma: no cover
import sys # pragma: no cover
from typing import Set, Tuple, List # pragma: no cover


@atheris.instrument_func
Expand Down Expand Up @@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int: # pragma: no cover
int: The maximum filename length.
"""
return os.pathconf(path, "PC_NAME_MAX")


@atheris.instrument_func
def read_lines_from_file(file_path: str) -> list:
"""Read lines from a file and return them as a list."""
try:
with open(file_path, "r") as f:
return [line.strip() for line in f if line.strip()]
except FileNotFoundError:
print(f"File not found: {file_path}")
return []
except IOError as e:
print(f"Error reading file {file_path}: {e}")
return []


@atheris.instrument_func
def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, this file isn't my most beautiful work (e.g., I'm not proud of hardcoding "explicit-exceptions-list.txt" in two places) but it worked quite well in local testing so I figure getting deployed to CLusterFuzz and validating it there was worth it for now 🤷‍♀️

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Admittedly, I just checked that all changes are within the fuzzing code, but didn't review the changes themselves as these are entirely owned by you, a trusted contributor. Thus you are unlikely to hear complaints from me 😅.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mostly just left that comment in hopes that lightly shaming myself in public will push me to clean it up sooner than not acknowledging it at all 😅

"""Load and parse the exception list from a default or specified file."""
try:
bundle_dir = os.path.dirname(os.path.abspath(__file__))
full_path = os.path.join(bundle_dir, file_path)
lines = read_lines_from_file(full_path)
exception_list: Set[Tuple[str, str]] = set()
for line in lines:
match = re.match(r"(.+):(\d+):", line)
if match:
file_path: str = match.group(1).strip()
line_number: str = str(match.group(2).strip())
exception_list.add((file_path, line_number))
return exception_list
except Exception as e:
print(f"Error loading exception list: {e}")
return set()


@atheris.instrument_func
def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool:
"""Match exception traceback with the entries in the exception list."""
for filename, lineno, _, _ in traceback.extract_tb(exc_traceback):
for file_pattern, line_pattern in exception_list:
# Ensure filename and line_number are strings for regex matching
if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)):
return True
return False


@atheris.instrument_func
def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool:
"""Check if the exception traceback matches any entry in the exception list."""
exception_list = load_exception_list(exception_file)
return match_exception_with_traceback(exception_list, exc_traceback)


@atheris.instrument_func
def handle_exception(e: Exception) -> int:
"""Encapsulate exception handling logic for reusability."""
exc_traceback = e.__traceback__
if check_exception_against_list(exc_traceback):
return -1
else:
raise e


@atheris.instrument_func
def setup_git_environment() -> None:
"""Set up the environment variables for Git."""
bundle_dir = os.path.dirname(os.path.abspath(__file__))
if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover
bundled_git_binary_path = os.path.join(bundle_dir, "git")
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path

if not sys.warnoptions: # pragma: no cover
# The warnings filter below can be overridden by passing the -W option
# to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
import warnings
import logging

# Fuzzing data causes some modules to generate a large number of warnings
# which are not usually interesting and make the test output hard to read, so we ignore them.
warnings.simplefilter("ignore")
logging.getLogger().setLevel(logging.ERROR)
2 changes: 1 addition & 1 deletion fuzzing/oss-fuzz-scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ find "$SRC" -maxdepth 1 \

# Build fuzzers in $OUT.
find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do
compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):."
compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:."
done
11 changes: 11 additions & 0 deletions fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,17 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora"

prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing"

pushd "$SRC/gitpython/"
# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the
# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'.
# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise
# anticipated exceptions to reduce false positive test failures.

git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt"

popd


# The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below.
python3 -m pip install --upgrade pip
# Upgrade to the latest versions known to work at the time the below changes were introduced:
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ lint.unfixable = [
"test/**" = [
"B018", # useless-expression
]
"fuzzing/fuzz-targets/**" = [
"E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file
]


[tool.codespell]
ignore-words-list="gud,doesnt"
Expand Down
Loading