From 7e30093ec6a19d213ef7e063e89f63227a31d313 Mon Sep 17 00:00:00 2001 From: Morteza Date: Sun, 16 Jul 2023 18:44:40 +0200 Subject: [PATCH 1/2] arabic month name parsing added to hijri date parser --- dateparser/calendars/hijri_parser.py | 36 ++++++++++++++++++++++++++++ dateparser/data/__init__.py | 3 +-- tests/test_hijri.py | 8 +++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/dateparser/calendars/hijri_parser.py b/dateparser/calendars/hijri_parser.py index 6b0cf6144..98a48e367 100644 --- a/dateparser/calendars/hijri_parser.py +++ b/dateparser/calendars/hijri_parser.py @@ -1,3 +1,5 @@ +from collections import OrderedDict +from functools import reduce from hijri_converter import convert from dateparser.calendars import non_gregorian_parser @@ -47,6 +49,40 @@ class hijri_parser(non_gregorian_parser): "pm": ["مساءً"], } + _months = OrderedDict( + [ + ("01", ["مُحرم", "محرم"]), + ("02", ["صفر"]), + ("03", ["ربيع الأول", "ربيع الاول"]), + ("04", ["ربيع الثاني", "ربيع الاخر`"]), + ("05", ["جمادي الأول", "جمادي الاول"]), + ("06", ["جمادي الثاني", "جمادي الاخر"]), + ("07", ["رجب"]), + ("08", ["شعبان"]), + ("09", ["رمضان"]), + ("10", ["شوال"]), + ("11", ["ذو القعدة"]), + ("12", ["ذو الحجة"]), + ] + ) + + + @classmethod + def _replace_months(cls, source): + print("Source is ", source) + result = source + for arabic, number in reduce( + lambda a, b: a + b, + [ + [(value, month) for value in rpl] + for month, rpl in cls._months.items() + ], + ): + print("arabic", arabic, " latin", number) + result = result.replace(arabic, number) + return result + + @classmethod def _replace_time_conventions(cls, source): result = source diff --git a/dateparser/data/__init__.py b/dateparser/data/__init__.py index d09e0eb7a..0bd6b7d25 100644 --- a/dateparser/data/__init__.py +++ b/dateparser/data/__init__.py @@ -1,3 +1,2 @@ from dateparser.data import date_translation_data - -from .languages_info import language_locale_dict, language_order +from .languages_info import language_order, language_locale_dict diff --git a/tests/test_hijri.py b/tests/test_hijri.py index c311a83d8..933627879 100644 --- a/tests/test_hijri.py +++ b/tests/test_hijri.py @@ -51,6 +51,14 @@ def then_parsed_datetime_is(self, dt): dt_string="04-03-1433 هـ, 10:08 مساءً", dt_obj=datetime(2012, 1, 27, 22, 8), ), + param( + dt_string="06 ربيع الأول 1433 هـ, 16:32 مساءً", + dt_obj=datetime(2012, 1, 29, 16, 32), + ), + param( + dt_string="13 شعبان 1441 هـ, 03:12 صباحاً", + dt_obj=datetime(2020, 4, 6, 3, 12), + ), ] ) def test_datetime_parsing( From 546ae3fac9f85d17ee9405e4be1be41ec775c639 Mon Sep 17 00:00:00 2001 From: Morteza Date: Sun, 16 Jul 2023 18:48:24 +0200 Subject: [PATCH 2/2] add digit conversion to the code --- dateparser/calendars/hijri_parser.py | 21 +++++++++++++++++++++ tests/test_hijri.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/dateparser/calendars/hijri_parser.py b/dateparser/calendars/hijri_parser.py index 98a48e367..785ccc55a 100644 --- a/dateparser/calendars/hijri_parser.py +++ b/dateparser/calendars/hijri_parser.py @@ -49,6 +49,19 @@ class hijri_parser(non_gregorian_parser): "pm": ["مساءً"], } + _digits = { + "۰": 0, + "۱": 1, + "۲": 2, + "۳": 3, + "۴": 4, + "۵": 5, + "۶": 6, + "۷": 7, + "۸": 8, + "۹": 9, + } + _months = OrderedDict( [ ("01", ["مُحرم", "محرم"]), @@ -67,6 +80,14 @@ class hijri_parser(non_gregorian_parser): ) + @classmethod + def _replace_digits(cls, source): + result = source + for pers_digit, number in cls._digits.items(): + result = result.replace(pers_digit, str(number)) + return result + + @classmethod def _replace_months(cls, source): print("Source is ", source) diff --git a/tests/test_hijri.py b/tests/test_hijri.py index 933627879..44ea6c152 100644 --- a/tests/test_hijri.py +++ b/tests/test_hijri.py @@ -56,7 +56,7 @@ def then_parsed_datetime_is(self, dt): dt_obj=datetime(2012, 1, 29, 16, 32), ), param( - dt_string="13 شعبان 1441 هـ, 03:12 صباحاً", + dt_string="13 شعبان 1441 هـ, 03:۱۲ صباحاً", dt_obj=datetime(2020, 4, 6, 3, 12), ), ]