From 350cafa88f504a73bb46c708c796d719e96a35f4 Mon Sep 17 00:00:00 2001 From: Serhii A Date: Tue, 31 Jan 2023 11:48:02 +0200 Subject: [PATCH] Hotfix id sunday (#1134) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add new rule for 'week' and remove existing 'week' translation from Indonesia (id) * Update id * Simplify extension of “week ago” support for Indonesian * Indonesian: interpret minggu as Sunday if not following a number --------- Co-authored-by: Adrián Chaves --- dateparser/data/date_translation_data/id.py | 9 ++++++++- dateparser/languages/locale.py | 1 + .../date_translation_data/id.yaml | 7 ++++++- tests/test_data.py | 5 +---- tests/test_languages.py | 4 ++++ 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/dateparser/data/date_translation_data/id.py b/dateparser/data/date_translation_data/id.py index a9d43a053..fe3e59e6b 100644 --- a/dateparser/data/date_translation_data/id.py +++ b/dateparser/data/date_translation_data/id.py @@ -202,7 +202,8 @@ ], "\\1 week ago": [ "(\\d+[.,]?\\d*) mgg lalu", - "(\\d+[.,]?\\d*) minggu yang lalu" + "(\\d+[.,]?\\d*) minggu yang lalu", + "(\\d+[.,]?\\d*) minggu lalu" ], "\\1 year ago": [ "(\\d+[.,]?\\d*) tahun yang lalu", @@ -235,6 +236,12 @@ "in \\1 year": [ "dalam (\\d+[.,]?\\d*) tahun", "dlm (\\d+[.,]?\\d*) thn" + ], + "\\1 week": [ + "(\\d+[.,]?\\d*) minggu" + ], + "sunday": [ + "minggu" ] }, "locale_specific": {}, diff --git a/dateparser/languages/locale.py b/dateparser/languages/locale.py index e3b43b6be..17b3906c9 100644 --- a/dateparser/languages/locale.py +++ b/dateparser/languages/locale.py @@ -134,6 +134,7 @@ def translate(self, date_string, keep_formatting=False, settings=None): for pattern, replacement in relative_translations.items(): if pattern.match(word): date_string_tokens[i] = pattern.sub(replacement, word) + break else: if word in dictionary: fallback = word if keep_formatting and not word.isalpha() else '' diff --git a/dateparser_data/supplementary_language_data/date_translation_data/id.yaml b/dateparser_data/supplementary_language_data/date_translation_data/id.yaml index 5500dcdbd..d02ada75c 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/id.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/id.yaml @@ -30,8 +30,13 @@ relative-type: - baru saja relative-type-regex: + \1 week ago: + - (\d+[.,]?\d*) minggu lalu \1 month ago: - (\d+[.,]?\d*) bulan lalu - \1 year ago: - (\d+[.,]?\d*) tahun lalu + \1 week: + - (\d+[.,]?\d*) minggu + sunday: + - minggu diff --git a/tests/test_data.py b/tests/test_data.py index ad1f2cd1f..0f28a6275 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -62,10 +62,7 @@ def is_invalid_relative_regex_mapping(relative_regex_mapping): key, value = relative_regex_mapping if not (key and value and isinstance(key, str) and isinstance(value, list)): return True - if '\\1' not in key: - return True - return not (all([isinstance(x, str) for x in value]) - and all(['(\\d+[.,]?\\d*)' in x for x in value])) + return not all([isinstance(x, str) for x in value]) class TestLocaleInfo(BaseTestCase): diff --git a/tests/test_languages.py b/tests/test_languages.py index 96569fa9e..c27e5b83f 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -131,9 +131,13 @@ def setUp(self): # Tagalog param('tl', "Biyernes Hulyo 3, 2015", "friday july 3 2015"), param('tl', "Pebrero 5, 2015 7:00 pm", "february 5 2015 7:00 pm"), + # Indonesian param('id', "06 Sep 2015", "06 september 2015"), param('id', "07 Feb 2015 20:15", "07 february 2015 20:15"), + param('id', "Minggu, 18 Mar 2018 07:30", "sunday 18 march 2018 07:30"), + param('id', "3 minggu yang lalu", "3 week ago"), + param('id', "5 minggu", "5 week"), # Miscellaneous param('en', "2014-12-12T12:33:39-08:00", "2014-12-12 12:33:39-08:00"),