diff --git a/fuzzing/build.sh b/fuzzing/build.sh new file mode 100755 index 000000000..17d1abaab --- /dev/null +++ b/fuzzing/build.sh @@ -0,0 +1,8 @@ +cd "$SRC"/dateparser +pip3 install . + +# Build fuzzers in $OUT +for fuzzer in $(find fuzzing -name '*_fuzzer.py');do + compile_python_fuzzer "$fuzzer" +done +zip -q $OUT/dateparser_fuzzer_seed_corpus.zip $SRC/corpus/* diff --git a/fuzzing/corpus/current b/fuzzing/corpus/current new file mode 100644 index 000000000..ec7fb3ae7 --- /dev/null +++ b/fuzzing/corpus/current @@ -0,0 +1 @@ +now EST diff --git a/fuzzing/corpus/date_time b/fuzzing/corpus/date_time new file mode 100644 index 000000000..f76c0b13a --- /dev/null +++ b/fuzzing/corpus/date_time @@ -0,0 +1 @@ +January 12, 2012 10:00 PM diff --git a/fuzzing/corpus/french b/fuzzing/corpus/french new file mode 100644 index 000000000..9925f98c6 --- /dev/null +++ b/fuzzing/corpus/french @@ -0,0 +1 @@ +Le 11 Décembre 2014 à 09:00 diff --git a/fuzzing/corpus/minutes_offset b/fuzzing/corpus/minutes_offset new file mode 100644 index 000000000..4f8a00e5b --- /dev/null +++ b/fuzzing/corpus/minutes_offset @@ -0,0 +1 @@ +2 minutes ago diff --git a/fuzzing/corpus/russian b/fuzzing/corpus/russian new file mode 100644 index 000000000..059676b19 --- /dev/null +++ b/fuzzing/corpus/russian @@ -0,0 +1 @@ +13 января 2015 г. в 13:34 diff --git a/fuzzing/corpus/thai b/fuzzing/corpus/thai new file mode 100644 index 000000000..13dad6b68 --- /dev/null +++ b/fuzzing/corpus/thai @@ -0,0 +1 @@ +1 เดือนตุลาคม 2005, 1:00 AM diff --git a/fuzzing/corpus/time b/fuzzing/corpus/time new file mode 100644 index 000000000..8aaa71d99 --- /dev/null +++ b/fuzzing/corpus/time @@ -0,0 +1 @@ +10:00 am diff --git a/fuzzing/corpus/time_offset b/fuzzing/corpus/time_offset new file mode 100644 index 000000000..62abaed2c --- /dev/null +++ b/fuzzing/corpus/time_offset @@ -0,0 +1 @@ +2 hours ago -0500 diff --git a/fuzzing/corpus/turkish b/fuzzing/corpus/turkish new file mode 100644 index 000000000..eb79047e4 --- /dev/null +++ b/fuzzing/corpus/turkish @@ -0,0 +1 @@ +yaklaşık 23 saat önce diff --git a/fuzzing/dateparser_fuzzer.py b/fuzzing/dateparser_fuzzer.py new file mode 100644 index 000000000..b8ca8bf12 --- /dev/null +++ b/fuzzing/dateparser_fuzzer.py @@ -0,0 +1,114 @@ +import sys +from typing import List + +import atheris +from fuzz_helpers import EnhancedFuzzedDataProvider + +with atheris.instrument_imports(): + import dateparser + +import re + +import pytz + +import dateparser.data +import dateparser.parser + +language_codes = dateparser.data.languages_info.language_order +directives = [ + "%a", + "%A", + "%w", + "%d", + "%b", + "%B", + "%m", + "%y", + "%Y", + "%H", + "%I", + "%p", + "%M", + "%S", + "%f", + "%z", + "%Z", + "%j", + "%U", + "%W", + "%c", + "%x", + "%X", + "%%", + "%G", + "%u", + "%V", + "%:Z", +] +locale_codes = ["fr-PF", "qu-EC", "af-NA"] +date_order = list(dateparser.parser.date_order_chart.keys()) +timezone = list(pytz.all_timezones) +preferred_date = ["last", "first", "current"] +preferred_dates_from = ["past", "future", "current_period"] +parsers = [ + "timestamp", + "negative-timestamp", + "relative-time", + "custom-formats", + "absolute-time", + "no-spaces-time", +] + + +def _get_format_strings(fdp: EnhancedFuzzedDataProvider) -> List[str]: + format_strings = [] + for _ in range(fdp.ConsumeIntInRange(0, 5)): + format_strings.append(fdp.ConsumeString(1).join(fdp.ConsumeSublist(directives))) + return format_strings + + +def TestOneInput(data): + fdp = EnhancedFuzzedDataProvider(data) + + settings = { + "DATE_ORDER": fdp.PickValueInList(date_order), + "PREFER_LOCALE_DATE_ORDER": fdp.ConsumeBool(), + "TIMEZONE": fdp.PickValueInList(timezone), + "TO_TIMEZONE": fdp.PickValueInList(timezone), + "RETURN_AS_TIMEZONE_AWARE": fdp.ConsumeBool(), + "PREFER_MONTH_OF_YEAR": fdp.PickValueInList(preferred_date), + "PREFER_DAY_OF_MONTH": fdp.PickValueInList(preferred_date), + "PREFER_DATES_FROM": fdp.PickValueInList(preferred_dates_from), + "RELATIVE_BASE": fdp.ConsumeDate(), + "STRICT_PARSING": fdp.ConsumeBool(), + "REQUIRE_PARTS": [], + "SKIP_TOKENS": [ + fdp.ConsumeRandomString() for _ in range(fdp.ConsumeIntInRange(0, 3)) + ], + "NORMALIZE": fdp.ConsumeBool(), + "RETURN_TIME_AS_PERIOD": fdp.ConsumeBool(), + "PARSERS": fdp.ConsumeSublist(parsers), + "DEFAULT_LANGUAGES": fdp.ConsumeSublist(language_codes), + "LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": fdp.ConsumeProbability(), + } + + try: + dateparser.parse( + fdp.ConsumeRandomString(), + date_formats=_get_format_strings(fdp), + languages=fdp.ConsumeSublist(language_codes), + locales=fdp.ConsumeSublist(locale_codes), + region=fdp.ConsumeString(2), + settings=settings, + ) + except re.error: + return -1 + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz_helpers.py b/fuzzing/fuzz_helpers.py new file mode 100644 index 000000000..1ed057131 --- /dev/null +++ b/fuzzing/fuzz_helpers.py @@ -0,0 +1,86 @@ +import contextlib +import datetime +import io +import tempfile +from typing import List, TypeVar + +import atheris + +T = TypeVar("T") + + +class EnhancedFuzzedDataProvider(atheris.FuzzedDataProvider): + def ConsumeRandomBytes(self) -> bytes: + return self.ConsumeBytes(self.ConsumeIntInRange(0, self.remaining_bytes())) + + def ConsumeRandomString(self) -> str: + return self.ConsumeUnicodeNoSurrogates( + self.ConsumeIntInRange(0, self.remaining_bytes()) + ) + + def ConsumeRemainingString(self) -> str: + return self.ConsumeUnicodeNoSurrogates(self.remaining_bytes()) + + def ConsumeRemainingBytes(self) -> bytes: + return self.ConsumeBytes(self.remaining_bytes()) + + def ConsumeSublist(self, source: List[T]) -> List[T]: + """ + Returns a shuffled sub-list of the given list of len [1, len(source)] + """ + chosen = [elem for elem in source if self.ConsumeBool()] + + # Shuffle + for i in range(len(chosen) - 1, 1, -1): + j = self.ConsumeIntInRange(0, i) + chosen[i], chosen[j] = chosen[j], chosen[i] + + return chosen or [self.PickValueInList(source)] + + def ConsumeDate(self) -> datetime.datetime: + try: + return datetime.datetime.fromtimestamp(self.ConsumeFloat()) + except (OverflowError, OSError, ValueError): + return datetime.datetime(year=1970, month=1, day=1) + + @contextlib.contextmanager + def ConsumeMemoryFile( + self, all_data: bool = False, as_bytes: bool = True + ) -> io.BytesIO: + if all_data: + file_data = ( + self.ConsumeRemainingBytes() + if as_bytes + else self.ConsumeRemainingString() + ) + else: + file_data = ( + self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString() + ) + + file = io.BytesIO(file_data) if as_bytes else io.StringIO(file_data) + yield file + file.close() + + @contextlib.contextmanager + def ConsumeTemporaryFile( + self, suffix: str, all_data: bool = False, as_bytes: bool = True + ) -> str: + if all_data: + file_data = ( + self.ConsumeRemainingBytes() + if as_bytes + else self.ConsumeRemainingString() + ) + else: + file_data = ( + self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString() + ) + + mode = "w+b" if as_bytes else "w+" + tfile = tempfile.NamedTemporaryFile(mode=mode, suffix=suffix) + tfile.write(file_data) + tfile.seek(0) + tfile.flush() + yield tfile.name + tfile.close() diff --git a/fuzzing/requirements.txt b/fuzzing/requirements.txt new file mode 100644 index 000000000..58397a6ef --- /dev/null +++ b/fuzzing/requirements.txt @@ -0,0 +1 @@ +atheris diff --git a/tox.ini b/tox.ini index 208d05c31..b6abc959b 100644 --- a/tox.ini +++ b/tox.ini @@ -5,6 +5,7 @@ envlist = flake8, py3 deps = -rdateparser_scripts/requirements.txt -rtests/requirements.txt + -rfuzzing/requirements.txt [testenv] deps =