From ddff3ebe1ff7516b7845115be6277c2db8a14060 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 27 Sep 2021 17:03:09 +0530 Subject: [PATCH 01/29] added support for decade in hindi --- dataparser_changes/pyvenv.cfg | 3 +++ dateparser/data/date_translation_data/hi.py | 15 +++++++++++++++ .../date_translation_data/hi.yaml | 14 ++++++++++++++ tests/test_freshness_date_parser.py | 5 +++++ 4 files changed, 37 insertions(+) create mode 100644 dataparser_changes/pyvenv.cfg diff --git a/dataparser_changes/pyvenv.cfg b/dataparser_changes/pyvenv.cfg new file mode 100644 index 000000000..6b39f5ee3 --- /dev/null +++ b/dataparser_changes/pyvenv.cfg @@ -0,0 +1,3 @@ +home = /opt/homebrew/opt/python@3.9/bin +include-system-site-packages = false +version = 3.9.7 diff --git a/dateparser/data/date_translation_data/hi.py b/dateparser/data/date_translation_data/hi.py index 613bf69db..6ce92798d 100644 --- a/dateparser/data/date_translation_data/hi.py +++ b/dateparser/data/date_translation_data/hi.py @@ -162,6 +162,12 @@ ], "2 day ago": [ "परसों" + ], + "1 decade ago": [ + "पिछला दशक" + ], + "in 1 decade": [ + "अगला दशक" ] }, "relative-type-regex": { @@ -212,6 +218,12 @@ ], "in \\1 year": [ "(\\d+) वर्ष में" + ], + "in \\1 decade": [ + "(\\d+) दशक में" + ], + "\\1 decade ago": [ + "(\\d+) दशक पहले" ] }, "locale_specific": {}, @@ -235,6 +247,9 @@ "," ], "sentence_splitter_group": 3, + "decade": [ + "दशक" + ], "ago": [ "पहले", "पूर्व" diff --git a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml index 284c0baf4..6374ff80e 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml @@ -10,6 +10,8 @@ november: - नवम्बर december: - दिसम्बर +decade: + - दशक year: - साल @@ -33,3 +35,15 @@ in: relative-type: 2 day ago: - परसों + 1 decade ago: + - पिछला दशक + in 1 decade: + - अगला दशक + +relative-type-regex: + in \1 decade: + - (\d+) दशक में + \1 decade ago: + - (\d+) दशक पहले + + diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 41854adb2..9089254c2 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -353,6 +353,8 @@ def test_relative_past_dates_with_time_as_period(self, date_string, ago, period) param('1 वर्ष, 8 महीने, 2 सप्ताह', ago={'years': 1, 'months': 8, 'weeks': 2}, period='week'), param('1 वर्ष 7 महीने', ago={'years': 1, 'months': 7}, period='month'), param('आज', ago={'days': 0}, period='day'), + param('1 दशक', ago={'years': 10}, period='year'), + param('1 दशक पहले', ago={'years': 10}, period='year'), # af param("2 uur gelede", ago={'hours': 2}, period='day'), @@ -841,6 +843,7 @@ def test_relative_past_dates(self, date_string, ago, period): param('1 वर्ष, 8 महीने, 2 सप्ताह', ago={'years': 1, 'months': 8, 'weeks': 2}, period='week'), param('1 वर्ष 7 महीने', ago={'years': 1, 'months': 7}, period='month'), param('आज', ago={'days': 0}, period='day'), + param('1 दशक पहले', ago={'years': 10}, period='year'), # af param("2 uur gelede", ago={'hours': 2}, period='day'), @@ -1160,6 +1163,8 @@ def test_normalized_relative_dates(self, date_string, ago, period): param('17 सेकंड बाद', in_future={'seconds': 17}, period='day'), param('1 वर्ष, 5 महीने, 1 सप्ताह में', in_future={'years': 1, 'months': 5, 'weeks': 1}, period='week'), + param('1 दशक में', in_future={'years': 10}, period='year'), + # af param("oor 10 jaar", in_future={'years': 10}, period='year'), From 96d2e52b2c15128b4dc6670b46a5551ca3d8ac18 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 27 Sep 2021 17:07:07 +0530 Subject: [PATCH 02/29] added support for decade in hindi --- dataparser_changes/pyvenv.cfg | 3 --- tests/test_freshness_date_parser.py | 1 - 2 files changed, 4 deletions(-) delete mode 100644 dataparser_changes/pyvenv.cfg diff --git a/dataparser_changes/pyvenv.cfg b/dataparser_changes/pyvenv.cfg deleted file mode 100644 index 6b39f5ee3..000000000 --- a/dataparser_changes/pyvenv.cfg +++ /dev/null @@ -1,3 +0,0 @@ -home = /opt/homebrew/opt/python@3.9/bin -include-system-site-packages = false -version = 3.9.7 diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 9089254c2..77feea2b9 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -1165,7 +1165,6 @@ def test_normalized_relative_dates(self, date_string, ago, period): in_future={'years': 1, 'months': 5, 'weeks': 1}, period='week'), param('1 दशक में', in_future={'years': 10}, period='year'), - # af param("oor 10 jaar", in_future={'years': 10}, period='year'), param("oor 5 min 3 sek", in_future={'minutes': 5, 'seconds': 3}, period='day'), From e826a0615c7043faa106dbdbe9b8e00b07ca464f Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 27 Sep 2021 23:59:05 +0530 Subject: [PATCH 03/29] added support for fortnignt and century --- century/pyvenv.cfg | 3 ++ dateparser/data/date_translation_data/en.py | 35 +++++++++++++++++++ dateparser/freshness_date_parser.py | 8 ++++- dateparser/languages/dictionary.py | 2 +- .../date_translation_data/en.yaml | 25 +++++++++++++ tests/test_data.py | 2 +- tests/test_freshness_date_parser.py | 31 ++++++++++++++++ 7 files changed, 103 insertions(+), 3 deletions(-) create mode 100644 century/pyvenv.cfg diff --git a/century/pyvenv.cfg b/century/pyvenv.cfg new file mode 100644 index 000000000..6b39f5ee3 --- /dev/null +++ b/century/pyvenv.cfg @@ -0,0 +1,3 @@ +home = /opt/homebrew/opt/python@3.9/bin +include-system-site-packages = false +version = 3.9.7 diff --git a/dateparser/data/date_translation_data/en.py b/dateparser/data/date_translation_data/en.py index 62e2649c2..b0fb33505 100644 --- a/dateparser/data/date_translation_data/en.py +++ b/dateparser/data/date_translation_data/en.py @@ -188,6 +188,20 @@ ], "in 1 decade": [ "next decade" + ], + "1 century ago": [ + "last century", + "this century" + ], + "in 1 century": [ + "next century" + ], + "1 fortnight ago": [ + "last fortnight", + "this fortnight" + ], + "in 1 fortnight": [ + "next fortnight" ] }, "relative-type-regex": { @@ -264,6 +278,18 @@ ], "\\1 decade ago": [ "(\\d+) decades? ago" + ], + "in \\1 century": [ + "in (\\d+) century?" + ], + "\\1 century ago": [ + "(\\d+) century? ago" + ], + "in \\1 fortnight": [ + "in (\\d+) fortnight?" + ], + "\\1 fortnight ago": [ + "(\\d+) fortnight? ago" ] }, "locale_specific": { @@ -771,6 +797,15 @@ "decade", "decades" ], + "century": [ + "century", + "centurys", + "centuries" + ], + "fortnight": [ + "fortnight", + "fortnights" + ], "ago": [ "ago" ], diff --git a/dateparser/freshness_date_parser.py b/dateparser/freshness_date_parser.py index 4a64f4ce5..3ba921d0a 100644 --- a/dateparser/freshness_date_parser.py +++ b/dateparser/freshness_date_parser.py @@ -10,7 +10,7 @@ from .timezone_parser import pop_tz_offset_from_string -_UNITS = r'decade|year|month|week|day|hour|minute|second' +_UNITS = r'fortnight|century|decade|year|month|week|day|hour|minute|second' PATTERN = re.compile(r'(\d+)\s*(%s)\b' % _UNITS, re.I | re.S | re.U) @@ -148,6 +148,12 @@ def get_kwargs(self, date_string): if 'decades' in kwargs: kwargs['years'] = 10 * kwargs['decades'] + kwargs.get('years', 0) del kwargs['decades'] + if 'centurys' in kwargs: + kwargs['years'] = 100 * kwargs['centurys'] + kwargs.get('years', 0) + del kwargs['centurys'] + if 'fortnights' in kwargs: + kwargs['days'] = 14 * kwargs['fortnights'] + kwargs.get('days', 0) + del kwargs['fortnights'] return kwargs def get_date_data(self, date_string, settings=None): diff --git a/dateparser/languages/dictionary.py b/dateparser/languages/dictionary.py index 33c692df6..d01c4842d 100644 --- a/dateparser/languages/dictionary.py +++ b/dateparser/languages/dictionary.py @@ -10,7 +10,7 @@ KNOWN_WORD_TOKENS = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', - 'october', 'november', 'december', 'decade', 'year', + 'october', 'november', 'december', 'decade', 'century', 'fortnight', 'year', 'month', 'week', 'day', 'hour', 'minute', 'second', 'ago', 'in', 'am', 'pm'] diff --git a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml index 600cf325c..7e30e3124 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml @@ -12,6 +12,13 @@ september: decade: - decade - decades +century: + - century + - centurys + - centuries +fortnight: + - fortnight + - fortnights year: - years month: @@ -47,12 +54,30 @@ relative-type: - this decade in 1 decade: - next decade + 1 century ago: + - last century + - this century + in 1 century: + - next century + 1 fortnight ago: + - last fortnight + - this fortnight + in 1 fortnight: + - next fortnight relative-type-regex: in \1 decade: - in (\d+) decades? \1 decade ago: - (\d+) decades? ago + in \1 century: + - in (\d+) century? + \1 century ago: + - (\d+) century? ago + in \1 fortnight: + - in (\d+) fortnight? + \1 fortnight ago: + - (\d+) fortnight? ago simplifications: - an: '1' diff --git a/tests/test_data.py b/tests/test_data.py index 550e923b3..bf7cbed8d 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -18,7 +18,7 @@ 'name', 'date_order', 'skip', 'pertain', 'simplifications', 'no_word_spacing', 'ago', 'in', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', - 'october', 'november', 'december', 'decade', 'year', 'month', 'week', 'day', 'hour', 'minute', + 'october', 'november', 'december', 'decade', 'century', 'fortnight', 'year', 'month', 'week', 'day', 'hour', 'minute', 'second', 'am', 'pm', 'relative-type', 'relative-type-regex', 'sentence_splitter_group'] NECESSARY_KEYS = ['name', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 77feea2b9..7a9211acb 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -57,6 +57,18 @@ def test_relative_past_dates_with_time_as_period(self, date_string, ago, period) @parameterized.expand([ # English dates + param('1 fortnight', ago={'days': 14}, period='day'), + param('last fortnight', ago={'days': 14}, period='day'), + param('14 fortnight', ago={'days': 196}, period='day'), + param('a fortnight ago', ago={'days': 14}, period='day'), + param('last fortnight', ago={'days': 14}, period='day'), + param("1 century", ago={'years': 100}, period='year'), + param("1 century 2 years", ago={'years': 102}, period='year'), + param("1 century 12 months", ago={'years': 100, 'months': 12}, period='month'), + param("1 century and 11 months", ago={'years': 100, 'months': 11}, period='month'), + param("last century", ago={'years': 100}, period='year'), + param("a century ago", ago={'years': 100}, period='year'), + param("10 century", ago={'years': 1000}, period='year'), param("1 decade", ago={'years': 10}, period='year'), param("1 decade 2 years", ago={'years': 12}, period='year'), param("1 decade 12 months", ago={'years': 10, 'months': 12}, period='month'), @@ -578,6 +590,18 @@ def test_relative_past_dates(self, date_string, ago, period): @parameterized.expand([ # English dates + param('1 fortnight', ago={'days': 14}, period='day'), + param('last fortnight', ago={'days': 14}, period='day'), + param('14 fortnight', ago={'days': 196}, period='day'), + param('a fortnight ago', ago={'days': 14}, period='day'), + param('last fortnight', ago={'days': 14}, period='day'), + param("1 century", ago={'years': 100}, period='year'), + param("1 century 2 years", ago={'years': 102}, period='year'), + param("1 century 12 months", ago={'years': 100, 'months': 12}, period='month'), + param("1 century and 11 months", ago={'years': 100, 'months': 11}, period='month'), + param("last century", ago={'years': 100}, period='year'), + param("a century ago", ago={'years': 100}, period='year'), + param("10 century", ago={'years': 1000}, period='year'), param("1 decade", ago={'years': 10}, period='year'), param("1 decade 2 years", ago={'years': 12}, period='year'), param("1 decade 12 months", ago={'years': 10, 'months': 12}, period='month'), @@ -1069,6 +1093,13 @@ def test_normalized_relative_dates(self, date_string, ago, period): @parameterized.expand([ # English dates + param('in a fortnight', in_future={'days': 14}, period='day'), + param('next fortnight', in_future={'days': 14}, period='day'), + param('in 1 century 2 months', in_future={'years': 100, 'months': 2}, period='month'), + param('in 10 century', in_future={'years': 1000}, period='year'), + param('in 1 century 12 years', in_future={'years': 112}, period='year'), + param('next century', in_future={'years': 100}, period='year'), + param('in a century', in_future={'years': 100}, period='year'), param('in 1 decade 2 months', in_future={'years': 10, 'months': 2}, period='month'), param('in 100 decades', in_future={'years': 1000}, period='year'), param('in 1 decade 12 years', in_future={'years': 22}, period='year'), From 7185282e8cc1c8718763649bb5928d4f045c24ec Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Tue, 28 Sep 2021 00:01:18 +0530 Subject: [PATCH 04/29] Delete pyvenv.cfg --- century/pyvenv.cfg | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 century/pyvenv.cfg diff --git a/century/pyvenv.cfg b/century/pyvenv.cfg deleted file mode 100644 index 6b39f5ee3..000000000 --- a/century/pyvenv.cfg +++ /dev/null @@ -1,3 +0,0 @@ -home = /opt/homebrew/opt/python@3.9/bin -include-system-site-packages = false -version = 3.9.7 From 127149b732e276b8f78d8d19caeb2157f60560f6 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Tue, 28 Sep 2021 00:01:54 +0530 Subject: [PATCH 05/29] Delete hi.py --- dateparser/data/date_translation_data/hi.py | 261 -------------------- 1 file changed, 261 deletions(-) delete mode 100644 dateparser/data/date_translation_data/hi.py diff --git a/dateparser/data/date_translation_data/hi.py b/dateparser/data/date_translation_data/hi.py deleted file mode 100644 index 6ce92798d..000000000 --- a/dateparser/data/date_translation_data/hi.py +++ /dev/null @@ -1,261 +0,0 @@ -info = { - "name": "hi", - "date_order": "DMY", - "january": [ - "जनवरी", - "जन॰" - ], - "february": [ - "फ़रवरी", - "फ़र॰" - ], - "march": [ - "मार्च" - ], - "april": [ - "अप्रैल" - ], - "may": [ - "मई" - ], - "june": [ - "जून" - ], - "july": [ - "जुलाई", - "जुल॰" - ], - "august": [ - "अगस्त", - "अग॰" - ], - "september": [ - "सितंबर", - "सित॰", - "सितम्बर" - ], - "october": [ - "अक्तूबर", - "अक्तू॰", - "अक्टूबर" - ], - "november": [ - "नवंबर", - "नव॰", - "नवम्बर" - ], - "december": [ - "दिसंबर", - "दिस॰", - "दिसम्बर" - ], - "monday": [ - "सोम", - "सोमवार" - ], - "tuesday": [ - "मंगल", - "मंगलवार" - ], - "wednesday": [ - "बुध", - "बुधवार" - ], - "thursday": [ - "गुरु", - "गुरुवार" - ], - "friday": [ - "शुक्र", - "शुक्रवार" - ], - "saturday": [ - "शनि", - "शनिवार" - ], - "sunday": [ - "रवि", - "रविवार" - ], - "am": [ - "पूर्वाह्न" - ], - "pm": [ - "अपराह्न" - ], - "year": [ - "वर्ष", - "साल", - "वर्षों" - ], - "month": [ - "माह", - "महीना", - "मास", - "महीने" - ], - "week": [ - "सप्ताह" - ], - "day": [ - "दिन", - "दिवस" - ], - "hour": [ - "घं", - "घंटा", - "घंटे" - ], - "minute": [ - "मि", - "मिनट" - ], - "second": [ - "से", - "सेकंड" - ], - "relative-type": { - "0 day ago": [ - "आज" - ], - "0 hour ago": [ - "यह घंटा" - ], - "0 minute ago": [ - "यह मिनट" - ], - "0 month ago": [ - "इस माह" - ], - "0 second ago": [ - "अब" - ], - "0 week ago": [ - "इस सप्ताह" - ], - "0 year ago": [ - "इस वर्ष" - ], - "1 day ago": [ - "कल" - ], - "1 month ago": [ - "पिछला माह" - ], - "1 week ago": [ - "पिछला सप्ताह" - ], - "1 year ago": [ - "पिछला वर्ष" - ], - "in 1 day": [ - "कल" - ], - "in 1 month": [ - "अगला माह" - ], - "in 1 week": [ - "अगला सप्ताह" - ], - "in 1 year": [ - "अगला वर्ष" - ], - "2 day ago": [ - "परसों" - ], - "1 decade ago": [ - "पिछला दशक" - ], - "in 1 decade": [ - "अगला दशक" - ] - }, - "relative-type-regex": { - "\\1 day ago": [ - "(\\d+) दिन पहले" - ], - "\\1 hour ago": [ - "(\\d+) घं पहले", - "(\\d+) घंटे पहले" - ], - "\\1 minute ago": [ - "(\\d+) मि पहले", - "(\\d+) मिनट पहले" - ], - "\\1 month ago": [ - "(\\d+) माह पहले" - ], - "\\1 second ago": [ - "(\\d+) से पहले", - "(\\d+) सेकंड पहले" - ], - "\\1 week ago": [ - "(\\d+) सप्ताह पहले" - ], - "\\1 year ago": [ - "(\\d+) वर्ष पहले" - ], - "in \\1 day": [ - "(\\d+) दिन में" - ], - "in \\1 hour": [ - "(\\d+) घं में", - "(\\d+) घंटे में" - ], - "in \\1 minute": [ - "(\\d+) मि में", - "(\\d+) मिनट में" - ], - "in \\1 month": [ - "(\\d+) माह में" - ], - "in \\1 second": [ - "(\\d+) से में", - "(\\d+) सेकंड में" - ], - "in \\1 week": [ - "(\\d+) सप्ताह में" - ], - "in \\1 year": [ - "(\\d+) वर्ष में" - ], - "in \\1 decade": [ - "(\\d+) दशक में" - ], - "\\1 decade ago": [ - "(\\d+) दशक पहले" - ] - }, - "locale_specific": {}, - "skip": [ - "के", - "को", - "बजे", - "सन्", - "से", - " ", - "'", - ",", - "-", - ".", - "/", - ";", - "@", - "[", - "]", - "|", - "," - ], - "sentence_splitter_group": 3, - "decade": [ - "दशक" - ], - "ago": [ - "पहले", - "पूर्व" - ], - "in": [ - "में", - "बाद" - ] -} From 2f1f8fc5007cc89ffadcf949bc7cbf812b8f971e Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Tue, 28 Sep 2021 00:02:24 +0530 Subject: [PATCH 06/29] Delete hi.yaml --- .../date_translation_data/hi.yaml | 49 ------------------- 1 file changed, 49 deletions(-) delete mode 100644 dateparser_data/supplementary_language_data/date_translation_data/hi.yaml diff --git a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml deleted file mode 100644 index 6374ff80e..000000000 --- a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml +++ /dev/null @@ -1,49 +0,0 @@ -skip: ["के", "को", "बजे", "सन्", "से"] - -sentence_splitter_group : 3 - -september: - - सितम्बर -october: - - अक्टूबर -november: - - नवम्बर -december: - - दिसम्बर -decade: - - दशक - -year: - - साल - - वर्षों -month: - - महीना - - मास - - महीने -day: - - दिवस -hour: - - घंटे - -ago: - - पहले - - पूर्व -in: - - में - - बाद - -relative-type: - 2 day ago: - - परसों - 1 decade ago: - - पिछला दशक - in 1 decade: - - अगला दशक - -relative-type-regex: - in \1 decade: - - (\d+) दशक में - \1 decade ago: - - (\d+) दशक पहले - - From 7edac70717c6b9e1769df2d847ed6b87c5f79485 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Tue, 28 Sep 2021 12:50:45 +0530 Subject: [PATCH 07/29] fixed accidentally deleted files --- dateparser/data/date_translation_data/hi.py | 261 ++++++++++++++++++ .../date_translation_data/hi.yaml | 49 ++++ 2 files changed, 310 insertions(+) create mode 100644 dateparser/data/date_translation_data/hi.py create mode 100644 dateparser_data/supplementary_language_data/date_translation_data/hi.yaml diff --git a/dateparser/data/date_translation_data/hi.py b/dateparser/data/date_translation_data/hi.py new file mode 100644 index 000000000..6ce92798d --- /dev/null +++ b/dateparser/data/date_translation_data/hi.py @@ -0,0 +1,261 @@ +info = { + "name": "hi", + "date_order": "DMY", + "january": [ + "जनवरी", + "जन॰" + ], + "february": [ + "फ़रवरी", + "फ़र॰" + ], + "march": [ + "मार्च" + ], + "april": [ + "अप्रैल" + ], + "may": [ + "मई" + ], + "june": [ + "जून" + ], + "july": [ + "जुलाई", + "जुल॰" + ], + "august": [ + "अगस्त", + "अग॰" + ], + "september": [ + "सितंबर", + "सित॰", + "सितम्बर" + ], + "october": [ + "अक्तूबर", + "अक्तू॰", + "अक्टूबर" + ], + "november": [ + "नवंबर", + "नव॰", + "नवम्बर" + ], + "december": [ + "दिसंबर", + "दिस॰", + "दिसम्बर" + ], + "monday": [ + "सोम", + "सोमवार" + ], + "tuesday": [ + "मंगल", + "मंगलवार" + ], + "wednesday": [ + "बुध", + "बुधवार" + ], + "thursday": [ + "गुरु", + "गुरुवार" + ], + "friday": [ + "शुक्र", + "शुक्रवार" + ], + "saturday": [ + "शनि", + "शनिवार" + ], + "sunday": [ + "रवि", + "रविवार" + ], + "am": [ + "पूर्वाह्न" + ], + "pm": [ + "अपराह्न" + ], + "year": [ + "वर्ष", + "साल", + "वर्षों" + ], + "month": [ + "माह", + "महीना", + "मास", + "महीने" + ], + "week": [ + "सप्ताह" + ], + "day": [ + "दिन", + "दिवस" + ], + "hour": [ + "घं", + "घंटा", + "घंटे" + ], + "minute": [ + "मि", + "मिनट" + ], + "second": [ + "से", + "सेकंड" + ], + "relative-type": { + "0 day ago": [ + "आज" + ], + "0 hour ago": [ + "यह घंटा" + ], + "0 minute ago": [ + "यह मिनट" + ], + "0 month ago": [ + "इस माह" + ], + "0 second ago": [ + "अब" + ], + "0 week ago": [ + "इस सप्ताह" + ], + "0 year ago": [ + "इस वर्ष" + ], + "1 day ago": [ + "कल" + ], + "1 month ago": [ + "पिछला माह" + ], + "1 week ago": [ + "पिछला सप्ताह" + ], + "1 year ago": [ + "पिछला वर्ष" + ], + "in 1 day": [ + "कल" + ], + "in 1 month": [ + "अगला माह" + ], + "in 1 week": [ + "अगला सप्ताह" + ], + "in 1 year": [ + "अगला वर्ष" + ], + "2 day ago": [ + "परसों" + ], + "1 decade ago": [ + "पिछला दशक" + ], + "in 1 decade": [ + "अगला दशक" + ] + }, + "relative-type-regex": { + "\\1 day ago": [ + "(\\d+) दिन पहले" + ], + "\\1 hour ago": [ + "(\\d+) घं पहले", + "(\\d+) घंटे पहले" + ], + "\\1 minute ago": [ + "(\\d+) मि पहले", + "(\\d+) मिनट पहले" + ], + "\\1 month ago": [ + "(\\d+) माह पहले" + ], + "\\1 second ago": [ + "(\\d+) से पहले", + "(\\d+) सेकंड पहले" + ], + "\\1 week ago": [ + "(\\d+) सप्ताह पहले" + ], + "\\1 year ago": [ + "(\\d+) वर्ष पहले" + ], + "in \\1 day": [ + "(\\d+) दिन में" + ], + "in \\1 hour": [ + "(\\d+) घं में", + "(\\d+) घंटे में" + ], + "in \\1 minute": [ + "(\\d+) मि में", + "(\\d+) मिनट में" + ], + "in \\1 month": [ + "(\\d+) माह में" + ], + "in \\1 second": [ + "(\\d+) से में", + "(\\d+) सेकंड में" + ], + "in \\1 week": [ + "(\\d+) सप्ताह में" + ], + "in \\1 year": [ + "(\\d+) वर्ष में" + ], + "in \\1 decade": [ + "(\\d+) दशक में" + ], + "\\1 decade ago": [ + "(\\d+) दशक पहले" + ] + }, + "locale_specific": {}, + "skip": [ + "के", + "को", + "बजे", + "सन्", + "से", + " ", + "'", + ",", + "-", + ".", + "/", + ";", + "@", + "[", + "]", + "|", + "," + ], + "sentence_splitter_group": 3, + "decade": [ + "दशक" + ], + "ago": [ + "पहले", + "पूर्व" + ], + "in": [ + "में", + "बाद" + ] +} diff --git a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml new file mode 100644 index 000000000..6374ff80e --- /dev/null +++ b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml @@ -0,0 +1,49 @@ +skip: ["के", "को", "बजे", "सन्", "से"] + +sentence_splitter_group : 3 + +september: + - सितम्बर +october: + - अक्टूबर +november: + - नवम्बर +december: + - दिसम्बर +decade: + - दशक + +year: + - साल + - वर्षों +month: + - महीना + - मास + - महीने +day: + - दिवस +hour: + - घंटे + +ago: + - पहले + - पूर्व +in: + - में + - बाद + +relative-type: + 2 day ago: + - परसों + 1 decade ago: + - पिछला दशक + in 1 decade: + - अगला दशक + +relative-type-regex: + in \1 decade: + - (\d+) दशक में + \1 decade ago: + - (\d+) दशक पहले + + From 9a585be693093d295772db96b7d0a8a8b978f1a8 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Tue, 28 Sep 2021 19:38:10 +0530 Subject: [PATCH 08/29] added test for centuries --- tests/test_freshness_date_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 7a9211acb..627f66a50 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -68,7 +68,7 @@ def test_relative_past_dates_with_time_as_period(self, date_string, ago, period) param("1 century and 11 months", ago={'years': 100, 'months': 11}, period='month'), param("last century", ago={'years': 100}, period='year'), param("a century ago", ago={'years': 100}, period='year'), - param("10 century", ago={'years': 1000}, period='year'), + param("10 centuries", ago={'years': 1000}, period='year'), param("1 decade", ago={'years': 10}, period='year'), param("1 decade 2 years", ago={'years': 12}, period='year'), param("1 decade 12 months", ago={'years': 10, 'months': 12}, period='month'), @@ -601,7 +601,7 @@ def test_relative_past_dates(self, date_string, ago, period): param("1 century and 11 months", ago={'years': 100, 'months': 11}, period='month'), param("last century", ago={'years': 100}, period='year'), param("a century ago", ago={'years': 100}, period='year'), - param("10 century", ago={'years': 1000}, period='year'), + param("10 centuries", ago={'years': 1000}, period='year'), param("1 decade", ago={'years': 10}, period='year'), param("1 decade 2 years", ago={'years': 12}, period='year'), param("1 decade 12 months", ago={'years': 10, 'months': 12}, period='month'), From ec50655cce6463717a9a1614b4d59f2ed80a1dac Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Tue, 28 Sep 2021 19:44:52 +0530 Subject: [PATCH 09/29] added test for centuries --- tests/test_freshness_date_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 627f66a50..dd81bd557 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -68,6 +68,7 @@ def test_relative_past_dates_with_time_as_period(self, date_string, ago, period) param("1 century and 11 months", ago={'years': 100, 'months': 11}, period='month'), param("last century", ago={'years': 100}, period='year'), param("a century ago", ago={'years': 100}, period='year'), + param("6 centurys", ago={'years': 600}, period='year'), param("10 centuries", ago={'years': 1000}, period='year'), param("1 decade", ago={'years': 10}, period='year'), param("1 decade 2 years", ago={'years': 12}, period='year'), @@ -601,6 +602,7 @@ def test_relative_past_dates(self, date_string, ago, period): param("1 century and 11 months", ago={'years': 100, 'months': 11}, period='month'), param("last century", ago={'years': 100}, period='year'), param("a century ago", ago={'years': 100}, period='year'), + param("6 centurys", ago={'years': 600}, period='year'), param("10 centuries", ago={'years': 1000}, period='year'), param("1 decade", ago={'years': 10}, period='year'), param("1 decade 2 years", ago={'years': 12}, period='year'), From 5b55b2a1bb5762faa5bbf9bbc3e6d1974cd91b53 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 4 Oct 2021 21:00:02 +0530 Subject: [PATCH 10/29] fixed and added centurys support --- dateparser/data/date_translation_data/en.py | 4 ++-- .../supplementary_language_data/date_translation_data/en.yaml | 4 ++-- dateparser_scripts/tests/pyvenv.cfg | 3 +++ tests/test_freshness_date_parser.py | 1 + 4 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 dateparser_scripts/tests/pyvenv.cfg diff --git a/dateparser/data/date_translation_data/en.py b/dateparser/data/date_translation_data/en.py index b0fb33505..2d7f23067 100644 --- a/dateparser/data/date_translation_data/en.py +++ b/dateparser/data/date_translation_data/en.py @@ -280,10 +280,10 @@ "(\\d+) decades? ago" ], "in \\1 century": [ - "in (\\d+) century?" + "in (\\d+) centurys?" ], "\\1 century ago": [ - "(\\d+) century? ago" + "(\\d+) centurys? ago" ], "in \\1 fortnight": [ "in (\\d+) fortnight?" diff --git a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml index 7e30e3124..addf87c1c 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml @@ -71,9 +71,9 @@ relative-type-regex: \1 decade ago: - (\d+) decades? ago in \1 century: - - in (\d+) century? + - in (\d+) centurys? \1 century ago: - - (\d+) century? ago + - (\d+) centurys? ago in \1 fortnight: - in (\d+) fortnight? \1 fortnight ago: diff --git a/dateparser_scripts/tests/pyvenv.cfg b/dateparser_scripts/tests/pyvenv.cfg new file mode 100644 index 000000000..6b39f5ee3 --- /dev/null +++ b/dateparser_scripts/tests/pyvenv.cfg @@ -0,0 +1,3 @@ +home = /opt/homebrew/opt/python@3.9/bin +include-system-site-packages = false +version = 3.9.7 diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index dd81bd557..3809cd35a 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -1102,6 +1102,7 @@ def test_normalized_relative_dates(self, date_string, ago, period): param('in 1 century 12 years', in_future={'years': 112}, period='year'), param('next century', in_future={'years': 100}, period='year'), param('in a century', in_future={'years': 100}, period='year'), + param('in 3 centurys', in_future={'years': 300}, period='year'), param('in 1 decade 2 months', in_future={'years': 10, 'months': 2}, period='month'), param('in 100 decades', in_future={'years': 1000}, period='year'), param('in 1 decade 12 years', in_future={'years': 22}, period='year'), From b461617cec231de79bf1096caa87be187554fd33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 5 Oct 2021 07:53:44 +0200 Subject: [PATCH 11/29] Delete pyvenv.cfg --- dateparser_scripts/tests/pyvenv.cfg | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 dateparser_scripts/tests/pyvenv.cfg diff --git a/dateparser_scripts/tests/pyvenv.cfg b/dateparser_scripts/tests/pyvenv.cfg deleted file mode 100644 index 6b39f5ee3..000000000 --- a/dateparser_scripts/tests/pyvenv.cfg +++ /dev/null @@ -1,3 +0,0 @@ -home = /opt/homebrew/opt/python@3.9/bin -include-system-site-packages = false -version = 3.9.7 From ceae086a6a953826de32688fe2a852981dcf2a93 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 6 Dec 2021 00:36:25 +0530 Subject: [PATCH 12/29] fixed all the bugs now its working fine --- tests/test_freshness_date_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 3809cd35a..0edaaac18 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -70,6 +70,8 @@ def test_relative_past_dates_with_time_as_period(self, date_string, ago, period) param("a century ago", ago={'years': 100}, period='year'), param("6 centurys", ago={'years': 600}, period='year'), param("10 centuries", ago={'years': 1000}, period='year'), + param("2 century ago", ago={'years': 200}, period='year'), + param("two centuries ago", ago={'years': 200}, period='year'), param("1 decade", ago={'years': 10}, period='year'), param("1 decade 2 years", ago={'years': 12}, period='year'), param("1 decade 12 months", ago={'years': 10, 'months': 12}, period='month'), From 52582cd843a20bf08410ac24eac727327578c532 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 6 Dec 2021 00:40:03 +0530 Subject: [PATCH 13/29] fixed all bugs , now this is working fine --- date/pyvenv.cfg | 3 +++ dateparser/data/date_translation_data/en.py | 3 ++- .../supplementary_language_data/date_translation_data/en.yaml | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 date/pyvenv.cfg diff --git a/date/pyvenv.cfg b/date/pyvenv.cfg new file mode 100644 index 000000000..623ce65c4 --- /dev/null +++ b/date/pyvenv.cfg @@ -0,0 +1,3 @@ +home = /Users/anton/opt/anaconda3/bin +include-system-site-packages = false +version = 3.8.8 diff --git a/dateparser/data/date_translation_data/en.py b/dateparser/data/date_translation_data/en.py index 2d7f23067..7a1853d03 100644 --- a/dateparser/data/date_translation_data/en.py +++ b/dateparser/data/date_translation_data/en.py @@ -280,7 +280,8 @@ "(\\d+) decades? ago" ], "in \\1 century": [ - "in (\\d+) centurys?" + "in (\\d+) centurys?", + "in (\\d+) centur(?:ys?|ies)" ], "\\1 century ago": [ "(\\d+) centurys? ago" diff --git a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml index addf87c1c..8724a1927 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml @@ -72,6 +72,7 @@ relative-type-regex: - (\d+) decades? ago in \1 century: - in (\d+) centurys? + - in (\d+) centur(?:ys?|ies) \1 century ago: - (\d+) centurys? ago in \1 fortnight: From 83c593d64d7e40e6f55bf8170c4ffc615b560e4d Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 03:19:22 +0530 Subject: [PATCH 14/29] Removed repeated test --- tests/test_freshness_date_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 0edaaac18..5e8278e97 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -61,7 +61,6 @@ def test_relative_past_dates_with_time_as_period(self, date_string, ago, period) param('last fortnight', ago={'days': 14}, period='day'), param('14 fortnight', ago={'days': 196}, period='day'), param('a fortnight ago', ago={'days': 14}, period='day'), - param('last fortnight', ago={'days': 14}, period='day'), param("1 century", ago={'years': 100}, period='year'), param("1 century 2 years", ago={'years': 102}, period='year'), param("1 century 12 months", ago={'years': 100, 'months': 12}, period='month'), From e9ab0d0d4cfec91cae85ee90be656a5afe97d70b Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 03:21:00 +0530 Subject: [PATCH 15/29] Removed repeated test for last fortnight --- tests/test_freshness_date_parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 5e8278e97..57e21cd20 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -596,7 +596,6 @@ def test_relative_past_dates(self, date_string, ago, period): param('last fortnight', ago={'days': 14}, period='day'), param('14 fortnight', ago={'days': 196}, period='day'), param('a fortnight ago', ago={'days': 14}, period='day'), - param('last fortnight', ago={'days': 14}, period='day'), param("1 century", ago={'years': 100}, period='year'), param("1 century 2 years", ago={'years': 102}, period='year'), param("1 century 12 months", ago={'years': 100, 'months': 12}, period='month'), From b97612e35dd3ae04d6e8f14ee788a151f0c9e501 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 03:24:15 +0530 Subject: [PATCH 16/29] added my env to gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index afa82848f..08a6ace5f 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,6 @@ docs/_build # Other raw_data + +#venv +dateparserenv \ No newline at end of file From fcab861117b1d87fac3c0f73eaf58ba28cb36107 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 03:26:17 +0530 Subject: [PATCH 17/29] Added plural for decade in hindi --- .../supplementary_language_data/date_translation_data/hi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml index 6374ff80e..c77c42a09 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml @@ -12,6 +12,7 @@ december: - दिसम्बर decade: - दशक + - दशकों year: - साल From 93863fb3bbf358363dc9d533cb2b3e3ae71dec8f Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 03:34:35 +0530 Subject: [PATCH 18/29] Added plural for decade in hindi --- dateparser/data/date_translation_data/hi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dateparser/data/date_translation_data/hi.py b/dateparser/data/date_translation_data/hi.py index 6ce92798d..6bba1b451 100644 --- a/dateparser/data/date_translation_data/hi.py +++ b/dateparser/data/date_translation_data/hi.py @@ -248,7 +248,8 @@ ], "sentence_splitter_group": 3, "decade": [ - "दशक" + "दशक", + "दशकों" ], "ago": [ "पहले", From 11c6a5f22506c8de8d0f94e4f0e3d2ff4db77022 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 03:40:52 +0530 Subject: [PATCH 19/29] Removed some garbage files --- date/pyvenv.cfg | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 date/pyvenv.cfg diff --git a/date/pyvenv.cfg b/date/pyvenv.cfg deleted file mode 100644 index 623ce65c4..000000000 --- a/date/pyvenv.cfg +++ /dev/null @@ -1,3 +0,0 @@ -home = /Users/anton/opt/anaconda3/bin -include-system-site-packages = false -version = 3.8.8 From 7649958da36eba21cf3c5c4a4084e8591b5c2c13 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 04:18:04 +0530 Subject: [PATCH 20/29] Added support for counting from 1 to 12 in Hindi --- dateparser/data/date_translation_data/hi.py | 38 +++++++++++++++++++ .../date_translation_data/hi.yaml | 13 +++++++ 2 files changed, 51 insertions(+) diff --git a/dateparser/data/date_translation_data/hi.py b/dateparser/data/date_translation_data/hi.py index 6bba1b451..7bf8f2be3 100644 --- a/dateparser/data/date_translation_data/hi.py +++ b/dateparser/data/date_translation_data/hi.py @@ -258,5 +258,43 @@ "in": [ "में", "बाद" + ], + "simplifications": [ + { + "एक": "1" + }, + { + "दो": "2" + }, + { + "तीन": "3" + }, + { + "चार": "4" + }, + { + "पांच": "5" + }, + { + "छह": "6" + }, + { + "सात": "7" + }, + { + "आठ": "8" + }, + { + "नौ": "9" + }, + { + "दस": "10" + }, + { + "ग्यारह": "11" + }, + { + "बारह": "12" + } ] } diff --git a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml index c77c42a09..43cfaafb9 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml @@ -48,3 +48,16 @@ relative-type-regex: - (\d+) दशक पहले +simplifications: + - एक: '1' + - दो: '2' + - तीन: '3' + - चार: '4' + - पांच: '5' + - छह: '6' + - सात: '7' + - आठ: '8' + - नौ: '9' + - दस: '10' + - ग्यारह: '11' + - बारह: '12' \ No newline at end of file From 472cf9bd7ca52252835f1018a945c76b6f28e797 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 04:18:51 +0530 Subject: [PATCH 21/29] Added test for 2 centuries ago in hindi --- tests/test_freshness_date_parser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 57e21cd20..f824cbcde 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -870,6 +870,9 @@ def test_relative_past_dates(self, date_string, ago, period): param('1 वर्ष 7 महीने', ago={'years': 1, 'months': 7}, period='month'), param('आज', ago={'days': 0}, period='day'), param('1 दशक पहले', ago={'years': 10}, period='year'), + param('1 दशक पूर्व', ago={'years': 10}, period='year'), + param('दो दशक पहले', ago={'years': 20}, period='year'), + param('10 दशकों पहले', ago={'years': 100}, period='year'), # af param("2 uur gelede", ago={'hours': 2}, period='day'), From e13c9257cff9243f35c2e287e617da6e9ad17d26 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 04:43:52 +0530 Subject: [PATCH 22/29] Add more words for (in) in hindi language --- dateparser/data/date_translation_data/hi.py | 19 ++++++++++++++++--- .../date_translation_data/hi.yaml | 12 ++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/dateparser/data/date_translation_data/hi.py b/dateparser/data/date_translation_data/hi.py index 7bf8f2be3..302f7ebb6 100644 --- a/dateparser/data/date_translation_data/hi.py +++ b/dateparser/data/date_translation_data/hi.py @@ -220,10 +220,18 @@ "(\\d+) वर्ष में" ], "in \\1 decade": [ - "(\\d+) दशक में" + "(\\d+) दशक में", + "(\\d+) दशकों में", + "(\\d+) दशक मे", + "(\\d+) दशकों मे", + "(\\d+) दशक बाद", + "(\\d+) दशकों बाद", + "(\\d+) दशक पश्चात", + "(\\d+) दशकों पश्चात" ], "\\1 decade ago": [ - "(\\d+) दशक पहले" + "(\\d+) दशक पहले", + "(\\d+) दशकों पहले" ] }, "locale_specific": {}, @@ -257,7 +265,9 @@ ], "in": [ "में", - "बाद" + "बाद", + "पश्चात", + "मे" ], "simplifications": [ { @@ -290,6 +300,9 @@ { "दस": "10" }, + { + "दश": "10" + }, { "ग्यारह": "11" }, diff --git a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml index 43cfaafb9..cbbe765da 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/hi.yaml @@ -32,6 +32,8 @@ ago: in: - में - बाद + - पश्चात + - मे relative-type: 2 day ago: @@ -44,8 +46,17 @@ relative-type: relative-type-regex: in \1 decade: - (\d+) दशक में + - (\d+) दशकों में + - (\d+) दशक मे + - (\d+) दशकों मे + - (\d+) दशक बाद + - (\d+) दशकों बाद + - (\d+) दशक पश्चात + - (\d+) दशकों पश्चात + \1 decade ago: - (\d+) दशक पहले + - (\d+) दशकों पहले simplifications: @@ -59,5 +70,6 @@ simplifications: - आठ: '8' - नौ: '9' - दस: '10' + - दश: '10' - ग्यारह: '11' - बारह: '12' \ No newline at end of file From 6f7630ca31d7bbd39cc3be47c7cadf522bb9fd13 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 04:44:18 +0530 Subject: [PATCH 23/29] Added tests for the in_future in decade in hindi --- tests/test_freshness_date_parser.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index f824cbcde..ce96d4bc6 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -1201,6 +1201,10 @@ def test_normalized_relative_dates(self, date_string, ago, period): param('1 वर्ष, 5 महीने, 1 सप्ताह में', in_future={'years': 1, 'months': 5, 'weeks': 1}, period='week'), param('1 दशक में', in_future={'years': 10}, period='year'), + param('पांच दशक बाद', in_future={'years': 50}, period='year'), + # param('दश दशक पश्चात', in_future={'years': 100}, period='year'), + param('9 दशकों मे', in_future={'years': 90}, period='year'), + # af param("oor 10 jaar", in_future={'years': 10}, period='year'), From d89210c9cea03584f0da46c85afcf449ef95080c Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 15:24:45 +0530 Subject: [PATCH 24/29] Added test for decade in future in hindi --- tests/test_freshness_date_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index ce96d4bc6..23ce18c14 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -1202,7 +1202,7 @@ def test_normalized_relative_dates(self, date_string, ago, period): in_future={'years': 1, 'months': 5, 'weeks': 1}, period='week'), param('1 दशक में', in_future={'years': 10}, period='year'), param('पांच दशक बाद', in_future={'years': 50}, period='year'), - # param('दश दशक पश्चात', in_future={'years': 100}, period='year'), + param('दश दशक पश्चात', in_future={'years': 100}, period='year'), param('9 दशकों मे', in_future={'years': 90}, period='year'), From 46dcbc99f17e5388758bc495df535e1a77378c2a Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 15:31:24 +0530 Subject: [PATCH 25/29] Added test for coming fortnight --- tests/test_freshness_date_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 23ce18c14..a7d0ce2c1 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -1100,6 +1100,8 @@ def test_normalized_relative_dates(self, date_string, ago, period): # English dates param('in a fortnight', in_future={'days': 14}, period='day'), param('next fortnight', in_future={'days': 14}, period='day'), + param('coming fortnight', in_future={'days': 14}, period='day'), + param('in coming fortnight', in_future={'days': 14}, period='day'), param('in 1 century 2 months', in_future={'years': 100, 'months': 2}, period='month'), param('in 10 century', in_future={'years': 1000}, period='year'), param('in 1 century 12 years', in_future={'years': 112}, period='year'), From 10ce3a4f6325747fe316396c7684f0cc8e3f91d1 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Mon, 20 Jun 2022 15:32:00 +0530 Subject: [PATCH 26/29] Added Support for in coming fortnight --- dateparser/data/date_translation_data/en.py | 3 ++- .../supplementary_language_data/date_translation_data/en.yaml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dateparser/data/date_translation_data/en.py b/dateparser/data/date_translation_data/en.py index 7a1853d03..0994fb680 100644 --- a/dateparser/data/date_translation_data/en.py +++ b/dateparser/data/date_translation_data/en.py @@ -201,7 +201,8 @@ "this fortnight" ], "in 1 fortnight": [ - "next fortnight" + "next fortnight", + "coming fortnight" ] }, "relative-type-regex": { diff --git a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml index 8724a1927..04beaa0b1 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml @@ -64,6 +64,7 @@ relative-type: - this fortnight in 1 fortnight: - next fortnight + - coming fortnight relative-type-regex: in \1 decade: From a36c48b9df19015adb6cd4814e078507953135e8 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Tue, 21 Jun 2022 00:35:00 +0530 Subject: [PATCH 27/29] Fixed gitignore --- .gitignore | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 08e6d63c2..d8af92e12 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,4 @@ docs/_build .vscode/ # Other -raw_data - -#venv -dateparserenv \ No newline at end of file +raw_data \ No newline at end of file From 2738b283fd26e68bfda37001e1ca264c17a52c7a Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Tue, 21 Jun 2022 00:43:39 +0530 Subject: [PATCH 28/29] Fixed bug for centuries --- dateparser/data/date_translation_data/en.py | 2 +- .../supplementary_language_data/date_translation_data/en.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dateparser/data/date_translation_data/en.py b/dateparser/data/date_translation_data/en.py index 0994fb680..490943ba4 100644 --- a/dateparser/data/date_translation_data/en.py +++ b/dateparser/data/date_translation_data/en.py @@ -282,7 +282,7 @@ ], "in \\1 century": [ "in (\\d+) centurys?", - "in (\\d+) centur(?:ys?|ies)" + "in (\\d+) centuries?" ], "\\1 century ago": [ "(\\d+) centurys? ago" diff --git a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml index 04beaa0b1..95e11b0c8 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/en.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/en.yaml @@ -73,7 +73,7 @@ relative-type-regex: - (\d+) decades? ago in \1 century: - in (\d+) centurys? - - in (\d+) centur(?:ys?|ies) + - in (\d+) centuries? \1 century ago: - (\d+) centurys? ago in \1 fortnight: From 0943ee103bbeef63a69f1d14421bcab7d691e334 Mon Sep 17 00:00:00 2001 From: Kanishk Pachauri Date: Tue, 21 Jun 2022 00:44:59 +0530 Subject: [PATCH 29/29] Added tests for centuries --- tests/test_freshness_date_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index a7d0ce2c1..3edce390b 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -1108,6 +1108,8 @@ def test_normalized_relative_dates(self, date_string, ago, period): param('next century', in_future={'years': 100}, period='year'), param('in a century', in_future={'years': 100}, period='year'), param('in 3 centurys', in_future={'years': 300}, period='year'), + param('in 3 centuries', in_future={'years': 300}, period='year'), + param('in 10 centuries', in_future={'years': 1000}, period='year'), param('in 1 decade 2 months', in_future={'years': 10, 'months': 2}, period='month'), param('in 100 decades', in_future={'years': 1000}, period='year'), param('in 1 decade 12 years', in_future={'years': 22}, period='year'),