Skip to content

Commit

Permalink
OSSFuzz Initial Integration (#1198)
Browse files Browse the repository at this point in the history
  • Loading branch information
ennamarie19 authored Nov 15, 2023
1 parent 69f95a4 commit 11696c1
Show file tree
Hide file tree
Showing 14 changed files with 219 additions and 0 deletions.
8 changes: 8 additions & 0 deletions fuzzing/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cd "$SRC"/dateparser
pip3 install .

# Build fuzzers in $OUT
for fuzzer in $(find fuzzing -name '*_fuzzer.py');do
compile_python_fuzzer "$fuzzer"
done
zip -q $OUT/dateparser_fuzzer_seed_corpus.zip $SRC/corpus/*
1 change: 1 addition & 0 deletions fuzzing/corpus/current
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
now EST
1 change: 1 addition & 0 deletions fuzzing/corpus/date_time
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
January 12, 2012 10:00 PM
1 change: 1 addition & 0 deletions fuzzing/corpus/french
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Le 11 Décembre 2014 à 09:00
1 change: 1 addition & 0 deletions fuzzing/corpus/minutes_offset
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 minutes ago
1 change: 1 addition & 0 deletions fuzzing/corpus/russian
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
13 января 2015 г. в 13:34
1 change: 1 addition & 0 deletions fuzzing/corpus/thai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 เดือนตุลาคม 2005, 1:00 AM
1 change: 1 addition & 0 deletions fuzzing/corpus/time
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
10:00 am
1 change: 1 addition & 0 deletions fuzzing/corpus/time_offset
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 hours ago -0500
1 change: 1 addition & 0 deletions fuzzing/corpus/turkish
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
yaklaşık 23 saat önce
114 changes: 114 additions & 0 deletions fuzzing/dateparser_fuzzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import sys
from typing import List

import atheris
from fuzz_helpers import EnhancedFuzzedDataProvider

with atheris.instrument_imports():
import dateparser

import re

import pytz

import dateparser.data
import dateparser.parser

language_codes = dateparser.data.languages_info.language_order
directives = [
"%a",
"%A",
"%w",
"%d",
"%b",
"%B",
"%m",
"%y",
"%Y",
"%H",
"%I",
"%p",
"%M",
"%S",
"%f",
"%z",
"%Z",
"%j",
"%U",
"%W",
"%c",
"%x",
"%X",
"%%",
"%G",
"%u",
"%V",
"%:Z",
]
locale_codes = ["fr-PF", "qu-EC", "af-NA"]
date_order = list(dateparser.parser.date_order_chart.keys())
timezone = list(pytz.all_timezones)
preferred_date = ["last", "first", "current"]
preferred_dates_from = ["past", "future", "current_period"]
parsers = [
"timestamp",
"negative-timestamp",
"relative-time",
"custom-formats",
"absolute-time",
"no-spaces-time",
]


def _get_format_strings(fdp: EnhancedFuzzedDataProvider) -> List[str]:
format_strings = []
for _ in range(fdp.ConsumeIntInRange(0, 5)):
format_strings.append(fdp.ConsumeString(1).join(fdp.ConsumeSublist(directives)))
return format_strings


def TestOneInput(data):
fdp = EnhancedFuzzedDataProvider(data)

settings = {
"DATE_ORDER": fdp.PickValueInList(date_order),
"PREFER_LOCALE_DATE_ORDER": fdp.ConsumeBool(),
"TIMEZONE": fdp.PickValueInList(timezone),
"TO_TIMEZONE": fdp.PickValueInList(timezone),
"RETURN_AS_TIMEZONE_AWARE": fdp.ConsumeBool(),
"PREFER_MONTH_OF_YEAR": fdp.PickValueInList(preferred_date),
"PREFER_DAY_OF_MONTH": fdp.PickValueInList(preferred_date),
"PREFER_DATES_FROM": fdp.PickValueInList(preferred_dates_from),
"RELATIVE_BASE": fdp.ConsumeDate(),
"STRICT_PARSING": fdp.ConsumeBool(),
"REQUIRE_PARTS": [],
"SKIP_TOKENS": [
fdp.ConsumeRandomString() for _ in range(fdp.ConsumeIntInRange(0, 3))
],
"NORMALIZE": fdp.ConsumeBool(),
"RETURN_TIME_AS_PERIOD": fdp.ConsumeBool(),
"PARSERS": fdp.ConsumeSublist(parsers),
"DEFAULT_LANGUAGES": fdp.ConsumeSublist(language_codes),
"LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": fdp.ConsumeProbability(),
}

try:
dateparser.parse(
fdp.ConsumeRandomString(),
date_formats=_get_format_strings(fdp),
languages=fdp.ConsumeSublist(language_codes),
locales=fdp.ConsumeSublist(locale_codes),
region=fdp.ConsumeString(2),
settings=settings,
)
except re.error:
return -1


def main():
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()


if __name__ == "__main__":
main()
86 changes: 86 additions & 0 deletions fuzzing/fuzz_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import contextlib
import datetime
import io
import tempfile
from typing import List, TypeVar

import atheris

T = TypeVar("T")


class EnhancedFuzzedDataProvider(atheris.FuzzedDataProvider):
def ConsumeRandomBytes(self) -> bytes:
return self.ConsumeBytes(self.ConsumeIntInRange(0, self.remaining_bytes()))

def ConsumeRandomString(self) -> str:
return self.ConsumeUnicodeNoSurrogates(
self.ConsumeIntInRange(0, self.remaining_bytes())
)

def ConsumeRemainingString(self) -> str:
return self.ConsumeUnicodeNoSurrogates(self.remaining_bytes())

def ConsumeRemainingBytes(self) -> bytes:
return self.ConsumeBytes(self.remaining_bytes())

def ConsumeSublist(self, source: List[T]) -> List[T]:
"""
Returns a shuffled sub-list of the given list of len [1, len(source)]
"""
chosen = [elem for elem in source if self.ConsumeBool()]

# Shuffle
for i in range(len(chosen) - 1, 1, -1):
j = self.ConsumeIntInRange(0, i)
chosen[i], chosen[j] = chosen[j], chosen[i]

return chosen or [self.PickValueInList(source)]

def ConsumeDate(self) -> datetime.datetime:
try:
return datetime.datetime.fromtimestamp(self.ConsumeFloat())
except (OverflowError, OSError, ValueError):
return datetime.datetime(year=1970, month=1, day=1)

@contextlib.contextmanager
def ConsumeMemoryFile(
self, all_data: bool = False, as_bytes: bool = True
) -> io.BytesIO:
if all_data:
file_data = (
self.ConsumeRemainingBytes()
if as_bytes
else self.ConsumeRemainingString()
)
else:
file_data = (
self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString()
)

file = io.BytesIO(file_data) if as_bytes else io.StringIO(file_data)
yield file
file.close()

@contextlib.contextmanager
def ConsumeTemporaryFile(
self, suffix: str, all_data: bool = False, as_bytes: bool = True
) -> str:
if all_data:
file_data = (
self.ConsumeRemainingBytes()
if as_bytes
else self.ConsumeRemainingString()
)
else:
file_data = (
self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString()
)

mode = "w+b" if as_bytes else "w+"
tfile = tempfile.NamedTemporaryFile(mode=mode, suffix=suffix)
tfile.write(file_data)
tfile.seek(0)
tfile.flush()
yield tfile.name
tfile.close()
1 change: 1 addition & 0 deletions fuzzing/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
atheris
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ envlist = flake8, py3
deps =
-rdateparser_scripts/requirements.txt
-rtests/requirements.txt
-rfuzzing/requirements.txt

[testenv]
deps =
Expand Down

0 comments on commit 11696c1

Please sign in to comment.