diff --git a/.gitignore b/.gitignore index 669c9f718..d8af92e12 100644 --- a/.gitignore +++ b/.gitignore @@ -51,4 +51,4 @@ docs/_build .vscode/ # Other -raw_data +raw_data \ No newline at end of file diff --git a/dateparser/data/date_translation_data/en.py b/dateparser/data/date_translation_data/en.py index 62e2649c2..490943ba4 100644 --- a/dateparser/data/date_translation_data/en.py +++ b/dateparser/data/date_translation_data/en.py @@ -188,6 +188,21 @@ ], "in 1 decade": [ "next decade" + ], + "1 century ago": [ + "last century", + "this century" + ], + "in 1 century": [ + "next century" + ], + "1 fortnight ago": [ + "last fortnight", + "this fortnight" + ], + "in 1 fortnight": [ + "next fortnight", + "coming fortnight" ] }, "relative-type-regex": { @@ -264,6 +279,19 @@ ], "\\1 decade ago": [ "(\\d+) decades? ago" + ], + "in \\1 century": [ + "in (\\d+) centurys?", + "in (\\d+) centuries?" + ], + "\\1 century ago": [ + "(\\d+) centurys? ago" + ], + "in \\1 fortnight": [ + "in (\\d+) fortnight?" + ], + "\\1 fortnight ago": [ + "(\\d+) fortnight? ago" ] }, "locale_specific": { @@ -771,6 +799,15 @@ "decade", "decades" ], + "century": [ + "century", + "centurys", + "centuries" + ], + "fortnight": [ + "fortnight", + "fortnights" + ], "ago": [ "ago" ], diff --git a/dateparser/data/date_translation_data/hi.py b/dateparser/data/date_translation_data/hi.py index 613bf69db..302f7ebb6 100644 --- a/dateparser/data/date_translation_data/hi.py +++ b/dateparser/data/date_translation_data/hi.py @@ -162,6 +162,12 @@ ], "2 day ago": [ "परसों" + ], + "1 decade ago": [ + "पिछला दशक" + ], + "in 1 decade": [ + "अगला दशक" ] }, "relative-type-regex": { @@ -212,6 +218,20 @@ ], "in \\1 year": [ "(\\d+) वर्ष में" + ], + "in \\1 decade": [ + "(\\d+) दशक में", + "(\\d+) दशकों में", + "(\\d+) दशक मे", + "(\\d+) दशकों मे", + "(\\d+) दशक बाद", + "(\\d+) दशकों बाद", + "(\\d+) दशक पश्चात", + "(\\d+) दशकों पश्चात" + ], + "\\1 decade ago": [ + "(\\d+) दशक पहले", + "(\\d+) दशकों पहले" ] }, "locale_specific": {}, @@ -235,12 +255,59 @@ "," ], "sentence_splitter_group": 3, + "decade": [ + "दशक", + "दशकों" + ], "ago": [ "पहले", "पूर्व" ], "in": [ "में", - "बाद" + "बाद", + "पश्चात", + "मे" + ], + "simplifications": [ + { + "एक": "1" + }, + { + "दो": "2" + }, + { + "तीन": "3" + }, + { + "चार": "4" + }, + { + "पांच": "5" + }, + { + "छह": "6" + }, + { + "सात": "7" + }, + { + "आठ": "8" + }, + { + "नौ": "9" + }, + { + "दस": "10" + }, + { + "दश": "10" + }, + { + "ग्यारह": "11" + }, + { + "बारह": "12" + } ] } diff --git a/dateparser/freshness_date_parser.py b/dateparser/freshness_date_parser.py index 4a64f4ce5..3ba921d0a 100644 --- a/dateparser/freshness_date_parser.py +++ b/dateparser/freshness_date_parser.py @@ -10,7 +10,7 @@ from .timezone_parser import pop_tz_offset_from_string -_UNITS = r'decade|year|month|week|day|hour|minute|second' +_UNITS = r'fortnight|century|decade|year|month|week|day|hour|minute|second' PATTERN = re.compile(r'(\d+)\s*(%s)\b' % _UNITS, re.I | re.S | re.U) @@ -148,6 +148,12 @@ def get_kwargs(self, date_string): if 'decades' in kwargs: kwargs['years'] = 10 * kwargs['decades'] + kwargs.get('years', 0) del kwargs['decades'] + if 'centurys' in kwargs: + kwargs['years'] = 100 * kwargs['centurys'] + kwargs.get('years', 0) + del kwargs['centurys'] + if 'fortnights' in kwargs: + kwargs['days'] = 14 * kwargs['fortnights'] + kwargs.get('days', 0) + del kwargs['fortnights'] return kwargs def get_date_data(self, date_string, settings=None): diff --git a/dateparser/languages/dictionary.py b/dateparser/languages/dictionary.py index 33c692df6..d01c4842d 100644 --- a/dateparser/languages/dictionary.py +++ b/dateparser/languages/dictionary.py @@ -10,7 +10,7 @@ KNOWN_WORD_TOKENS = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', - 'october', 'november', 'december', 'decade', 'year', + 'october', 'november', 'december', 'decade', 'century', 'fortnight', 'year', 'month', 'week', 'day', 'hour', 'minute', 'second', 'ago', 'in', 'am', 'pm'] diff --git a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml index 600cf325c..95e11b0c8 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml @@ -12,6 +12,13 @@ september: decade: - decade - decades +century: + - century + - centurys + - centuries +fortnight: + - fortnight + - fortnights year: - years month: @@ -47,12 +54,32 @@ relative-type: - this decade in 1 decade: - next decade + 1 century ago: + - last century + - this century + in 1 century: + - next century + 1 fortnight ago: + - last fortnight + - this fortnight + in 1 fortnight: + - next fortnight + - coming fortnight relative-type-regex: in \1 decade: - in (\d+) decades? \1 decade ago: - (\d+) decades? ago + in \1 century: + - in (\d+) centurys? + - in (\d+) centuries? + \1 century ago: + - (\d+) centurys? ago + in \1 fortnight: + - in (\d+) fortnight? + \1 fortnight ago: + - (\d+) fortnight? ago simplifications: - an: '1' diff --git a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml index 284c0baf4..cbbe765da 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml @@ -10,6 +10,9 @@ november: - नवम्बर december: - दिसम्बर +decade: + - दशक + - दशकों year: - साल @@ -29,7 +32,44 @@ ago: in: - में - बाद + - पश्चात + - मे relative-type: 2 day ago: - परसों + 1 decade ago: + - पिछला दशक + in 1 decade: + - अगला दशक + +relative-type-regex: + in \1 decade: + - (\d+) दशक में + - (\d+) दशकों में + - (\d+) दशक मे + - (\d+) दशकों मे + - (\d+) दशक बाद + - (\d+) दशकों बाद + - (\d+) दशक पश्चात + - (\d+) दशकों पश्चात + + \1 decade ago: + - (\d+) दशक पहले + - (\d+) दशकों पहले + + +simplifications: + - एक: '1' + - दो: '2' + - तीन: '3' + - चार: '4' + - पांच: '5' + - छह: '6' + - सात: '7' + - आठ: '8' + - नौ: '9' + - दस: '10' + - दश: '10' + - ग्यारह: '11' + - बारह: '12' \ No newline at end of file diff --git a/tests/test_data.py b/tests/test_data.py index 550e923b3..bf7cbed8d 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -18,7 +18,7 @@ 'name', 'date_order', 'skip', 'pertain', 'simplifications', 'no_word_spacing', 'ago', 'in', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', - 'october', 'november', 'december', 'decade', 'year', 'month', 'week', 'day', 'hour', 'minute', + 'october', 'november', 'december', 'decade', 'century', 'fortnight', 'year', 'month', 'week', 'day', 'hour', 'minute', 'second', 'am', 'pm', 'relative-type', 'relative-type-regex', 'sentence_splitter_group'] NECESSARY_KEYS = ['name', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 41854adb2..3edce390b 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -57,6 +57,20 @@ def test_relative_past_dates_with_time_as_period(self, date_string, ago, period) @parameterized.expand([ # English dates + param('1 fortnight', ago={'days': 14}, period='day'), + param('last fortnight', ago={'days': 14}, period='day'), + param('14 fortnight', ago={'days': 196}, period='day'), + param('a fortnight ago', ago={'days': 14}, period='day'), + param("1 century", ago={'years': 100}, period='year'), + param("1 century 2 years", ago={'years': 102}, period='year'), + param("1 century 12 months", ago={'years': 100, 'months': 12}, period='month'), + param("1 century and 11 months", ago={'years': 100, 'months': 11}, period='month'), + param("last century", ago={'years': 100}, period='year'), + param("a century ago", ago={'years': 100}, period='year'), + param("6 centurys", ago={'years': 600}, period='year'), + param("10 centuries", ago={'years': 1000}, period='year'), + param("2 century ago", ago={'years': 200}, period='year'), + param("two centuries ago", ago={'years': 200}, period='year'), param("1 decade", ago={'years': 10}, period='year'), param("1 decade 2 years", ago={'years': 12}, period='year'), param("1 decade 12 months", ago={'years': 10, 'months': 12}, period='month'), @@ -353,6 +367,8 @@ def test_relative_past_dates_with_time_as_period(self, date_string, ago, period) param('1 वर्ष, 8 महीने, 2 सप्ताह', ago={'years': 1, 'months': 8, 'weeks': 2}, period='week'), param('1 वर्ष 7 महीने', ago={'years': 1, 'months': 7}, period='month'), param('आज', ago={'days': 0}, period='day'), + param('1 दशक', ago={'years': 10}, period='year'), + param('1 दशक पहले', ago={'years': 10}, period='year'), # af param("2 uur gelede", ago={'hours': 2}, period='day'), @@ -576,6 +592,18 @@ def test_relative_past_dates(self, date_string, ago, period): @parameterized.expand([ # English dates + param('1 fortnight', ago={'days': 14}, period='day'), + param('last fortnight', ago={'days': 14}, period='day'), + param('14 fortnight', ago={'days': 196}, period='day'), + param('a fortnight ago', ago={'days': 14}, period='day'), + param("1 century", ago={'years': 100}, period='year'), + param("1 century 2 years", ago={'years': 102}, period='year'), + param("1 century 12 months", ago={'years': 100, 'months': 12}, period='month'), + param("1 century and 11 months", ago={'years': 100, 'months': 11}, period='month'), + param("last century", ago={'years': 100}, period='year'), + param("a century ago", ago={'years': 100}, period='year'), + param("6 centurys", ago={'years': 600}, period='year'), + param("10 centuries", ago={'years': 1000}, period='year'), param("1 decade", ago={'years': 10}, period='year'), param("1 decade 2 years", ago={'years': 12}, period='year'), param("1 decade 12 months", ago={'years': 10, 'months': 12}, period='month'), @@ -841,6 +869,10 @@ def test_relative_past_dates(self, date_string, ago, period): param('1 वर्ष, 8 महीने, 2 सप्ताह', ago={'years': 1, 'months': 8, 'weeks': 2}, period='week'), param('1 वर्ष 7 महीने', ago={'years': 1, 'months': 7}, period='month'), param('आज', ago={'days': 0}, period='day'), + param('1 दशक पहले', ago={'years': 10}, period='year'), + param('1 दशक पूर्व', ago={'years': 10}, period='year'), + param('दो दशक पहले', ago={'years': 20}, period='year'), + param('10 दशकों पहले', ago={'years': 100}, period='year'), # af param("2 uur gelede", ago={'hours': 2}, period='day'), @@ -1066,6 +1098,18 @@ def test_normalized_relative_dates(self, date_string, ago, period): @parameterized.expand([ # English dates + param('in a fortnight', in_future={'days': 14}, period='day'), + param('next fortnight', in_future={'days': 14}, period='day'), + param('coming fortnight', in_future={'days': 14}, period='day'), + param('in coming fortnight', in_future={'days': 14}, period='day'), + param('in 1 century 2 months', in_future={'years': 100, 'months': 2}, period='month'), + param('in 10 century', in_future={'years': 1000}, period='year'), + param('in 1 century 12 years', in_future={'years': 112}, period='year'), + param('next century', in_future={'years': 100}, period='year'), + param('in a century', in_future={'years': 100}, period='year'), + param('in 3 centurys', in_future={'years': 300}, period='year'), + param('in 3 centuries', in_future={'years': 300}, period='year'), + param('in 10 centuries', in_future={'years': 1000}, period='year'), param('in 1 decade 2 months', in_future={'years': 10, 'months': 2}, period='month'), param('in 100 decades', in_future={'years': 1000}, period='year'), param('in 1 decade 12 years', in_future={'years': 22}, period='year'), @@ -1160,6 +1204,11 @@ def test_normalized_relative_dates(self, date_string, ago, period): param('17 सेकंड बाद', in_future={'seconds': 17}, period='day'), param('1 वर्ष, 5 महीने, 1 सप्ताह में', in_future={'years': 1, 'months': 5, 'weeks': 1}, period='week'), + param('1 दशक में', in_future={'years': 10}, period='year'), + param('पांच दशक बाद', in_future={'years': 50}, period='year'), + param('दश दशक पश्चात', in_future={'years': 100}, period='year'), + param('9 दशकों मे', in_future={'years': 90}, period='year'), + # af param("oor 10 jaar", in_future={'years': 10}, period='year'),