Skip to content

Commit

Permalink
OSSFuzz Initial Integration
Browse files Browse the repository at this point in the history
  • Loading branch information
ennamarie19 authored and capuanob committed Nov 11, 2023
1 parent 69f95a4 commit a7b267b
Show file tree
Hide file tree
Showing 14 changed files with 165 additions and 0 deletions.
8 changes: 8 additions & 0 deletions fuzzing/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cd "$SRC"/dateparser
pip3 install .

# Build fuzzers in $OUT
for fuzzer in $(find fuzzing -name '*_fuzzer.py');do
compile_python_fuzzer "$fuzzer"
done
zip -q $OUT/dateparser_fuzzer_seed_corpus.zip $SRC/corpus/*
1 change: 1 addition & 0 deletions fuzzing/corpus/current
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
now EST
1 change: 1 addition & 0 deletions fuzzing/corpus/date_time
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
January 12, 2012 10:00 PM
1 change: 1 addition & 0 deletions fuzzing/corpus/french
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Le 11 Décembre 2014 à 09:00
1 change: 1 addition & 0 deletions fuzzing/corpus/minutes_offset
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 minutes ago
1 change: 1 addition & 0 deletions fuzzing/corpus/russian
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
13 января 2015 г. в 13:34
1 change: 1 addition & 0 deletions fuzzing/corpus/thai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 เดือนตุลาคม 2005, 1:00 AM
1 change: 1 addition & 0 deletions fuzzing/corpus/time
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
10:00 am
1 change: 1 addition & 0 deletions fuzzing/corpus/time_offset
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 hours ago -0500
1 change: 1 addition & 0 deletions fuzzing/corpus/turkish
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
yaklaşık 23 saat önce
78 changes: 78 additions & 0 deletions fuzzing/dateparser_fuzzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from typing import List

import atheris
import sys

from fuzz_helpers import EnhancedFuzzedDataProvider

with atheris.instrument_imports():
import dateparser

import dateparser.data
import dateparser.parser

import pytz
import re

language_codes = dateparser.data.languages_info.language_order
directives = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I", "%p", "%M",
"%S", "%f", "%z", "%Z", "%j", "%U", "%W", "%c", "%x", "%X", "%%", "%G", "%u",
"%V", "%:Z"]
locale_codes = ["fr-PF", "qu-EC", "af-NA"]
date_order = list(dateparser.parser.date_order_chart.keys())
timezone = list(pytz.all_timezones)
preferred_date = ["last", "first", "current"]
preferred_dates_from = ["past", "future", "current_period"]
parsers = ["timestamp", "negative-timestamp", "relative-time", "custom-formats", "absolute-time", "no-spaces-time"]


def _get_format_strings(fdp: EnhancedFuzzedDataProvider) -> List[str]:
format_strings = []
for _ in range(fdp.ConsumeIntInRange(0, 5)):
format_strings.append(fdp.ConsumeString(1).join(fdp.ConsumeSublist(directives)))
return format_strings


def TestOneInput(data):
fdp = EnhancedFuzzedDataProvider(data)

settings = {
"DATE_ORDER": fdp.PickValueInList(date_order),
"PREFER_LOCALE_DATE_ORDER": fdp.ConsumeBool(),
"TIMEZONE": fdp.PickValueInList(timezone),
"TO_TIMEZONE": fdp.PickValueInList(timezone),
"RETURN_AS_TIMEZONE_AWARE": fdp.ConsumeBool(),
"PREFER_MONTH_OF_YEAR": fdp.PickValueInList(preferred_date),
"PREFER_DAY_OF_MONTH": fdp.PickValueInList(preferred_date),
"PREFER_DATES_FROM": fdp.PickValueInList(preferred_dates_from),
"RELATIVE_BASE": fdp.ConsumeDate(),
"STRICT_PARSING": fdp.ConsumeBool(),
"REQUIRE_PARTS": [],
"SKIP_TOKENS": [fdp.ConsumeRandomString() for _ in range(fdp.ConsumeIntInRange(0, 3))],
"NORMALIZE": fdp.ConsumeBool(),
"RETURN_TIME_AS_PERIOD": fdp.ConsumeBool(),
"PARSERS": fdp.ConsumeSublist(parsers),
"DEFAULT_LANGUAGES": fdp.ConsumeSublist(language_codes),
"LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": fdp.ConsumeProbability(),
}

try:
dateparser.parse(
fdp.ConsumeRandomString(),
date_formats=_get_format_strings(fdp),
languages=fdp.ConsumeSublist(language_codes),
locales=fdp.ConsumeSublist(locale_codes),
region=fdp.ConsumeString(2),
settings=settings
)
except re.error:
return -1


def main():
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()


if __name__ == "__main__":
main()
68 changes: 68 additions & 0 deletions fuzzing/fuzz_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import io
import tempfile
import datetime

import atheris
import contextlib
from typing import TypeVar, List

T = TypeVar('T')


class EnhancedFuzzedDataProvider(atheris.FuzzedDataProvider):
def ConsumeRandomBytes(self) -> bytes:
return self.ConsumeBytes(self.ConsumeIntInRange(0, self.remaining_bytes()))

def ConsumeRandomString(self) -> str:
return self.ConsumeUnicodeNoSurrogates(self.ConsumeIntInRange(0, self.remaining_bytes()))

def ConsumeRemainingString(self) -> str:
return self.ConsumeUnicodeNoSurrogates(self.remaining_bytes())

def ConsumeRemainingBytes(self) -> bytes:
return self.ConsumeBytes(self.remaining_bytes())

def ConsumeSublist(self, source: List[T]) -> List[T]:
"""
Returns a shuffled sub-list of the given list of len [1, len(source)]
"""
chosen = [elem for elem in source if self.ConsumeBool()]

# Shuffle
for i in range(len(chosen) - 1, 1, -1):
j = self.ConsumeIntInRange(0, i)
chosen[i], chosen[j] = chosen[j], chosen[i]

return chosen or [self.PickValueInList(source)]

def ConsumeDate(self) -> datetime.datetime:
try:
return datetime.datetime.fromtimestamp(self.ConsumeFloat())
except (OverflowError, OSError, ValueError):
return datetime.datetime(year=1970, month=1, day=1)

@contextlib.contextmanager
def ConsumeMemoryFile(self, all_data: bool = False, as_bytes: bool = True) -> io.BytesIO:
if all_data:
file_data = self.ConsumeRemainingBytes() if as_bytes else self.ConsumeRemainingString()
else:
file_data = self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString()

file = io.BytesIO(file_data) if as_bytes else io.StringIO(file_data)
yield file
file.close()

@contextlib.contextmanager
def ConsumeTemporaryFile(self, suffix: str, all_data: bool = False, as_bytes: bool = True) -> str:
if all_data:
file_data = self.ConsumeRemainingBytes() if as_bytes else self.ConsumeRemainingString()
else:
file_data = self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString()

mode = 'w+b' if as_bytes else 'w+'
tfile = tempfile.NamedTemporaryFile(mode=mode, suffix=suffix)
tfile.write(file_data)
tfile.seek(0)
tfile.flush()
yield tfile.name
tfile.close()
1 change: 1 addition & 0 deletions fuzzing/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
atheris
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ envlist = flake8, py3
deps =
-rdateparser_scripts/requirements.txt
-rtests/requirements.txt
-rfuzzing/requirements.txt

[testenv]
deps =
Expand Down

0 comments on commit a7b267b

Please sign in to comment.