diff --git a/dateparser/conf.py b/dateparser/conf.py index c14374a3f..c11860f03 100644 --- a/dateparser/conf.py +++ b/dateparser/conf.py @@ -31,19 +31,21 @@ class Settings: _pyfile_data = None _mod_settings = dict() - def __init__(self, settings=None): - if settings: - self._updateall(settings.items()) - else: + def __init__(self, **kwargs): + if not kwargs.get('settings'): self._updateall(self._get_settings_from_pyfile().items()) + elif len(self.__dict__) == 1: + self._updateall(kwargs['settings'].items()) + @classmethod - def get_key(cls, settings=None): - if not settings: - return 'default' + def get_key(cls, **kwargs): + if kwargs: + keys = sorted('{}-{}'.format(key, val) for key, val in kwargs.pop('settings').items()) + keys.extend(sorted('{}-{}'.format(key, val) for key, val in kwargs.items() if val)) + return hashlib.md5(''.join(sorted(keys)).encode('utf-8')).hexdigest() - keys = sorted(['%s-%s' % (key, str(settings[key])) for key in settings]) - return hashlib.md5(''.join(keys).encode('utf-8')).hexdigest() + return 'default' @classmethod def _get_settings_from_pyfile(cls): @@ -57,18 +59,20 @@ def _updateall(self, iterable): setattr(self, key, value) def replace(self, mod_settings=None, **kwds): - for k, v in kwds.items(): + _settings = kwds.get('settings', {}).copy() + for k, v in _settings.items(): if v is None: raise TypeError('Invalid {{"{}": {}}}'.format(k, v)) - for x in self._get_settings_from_pyfile().keys(): - kwds.setdefault(x, getattr(self, x)) + for key in self._get_settings_from_pyfile().keys(): + _settings.setdefault(key, getattr(self, key)) - kwds['_default'] = False + _settings['_default'] = False if mod_settings: - kwds['_mod_settings'] = mod_settings + _settings['_mod_settings'] = mod_settings - return self.__class__(settings=kwds) + kwds['settings'] = _settings + return self.__class__(**kwds) settings = Settings() @@ -77,11 +81,15 @@ def replace(self, mod_settings=None, **kwds): def apply_settings(f): @wraps(f) def wrapper(*args, **kwargs): - mod_settings = kwargs.get('settings') - kwargs['settings'] = mod_settings or settings + mod_settings = kwargs.get('settings', {}) + if mod_settings is None: + kwargs['settings'], mod_settings = {}, {} - if isinstance(kwargs['settings'], dict): - kwargs['settings'] = settings.replace(mod_settings=mod_settings, **kwargs['settings']) + if kwargs: + if isinstance(mod_settings, dict): + kwargs['settings'] = settings.replace(mod_settings=mod_settings.copy(), **kwargs) + else: + kwargs['settings'] = settings if not isinstance(kwargs['settings'], Settings): raise TypeError("settings can only be either dict or instance of Settings class") diff --git a/dateparser/freshness_date_parser.py b/dateparser/freshness_date_parser.py index 969eb8a59..cd8609c99 100644 --- a/dateparser/freshness_date_parser.py +++ b/dateparser/freshness_date_parser.py @@ -16,8 +16,6 @@ class FreshnessDateDataParser: """ Parses date string like "1 year, 2 months ago" and "3 hours, 50 minutes ago" """ - def __init__(self): - self.now = None def _are_all_words_units(self, date_string): skip = [_UNITS, @@ -59,42 +57,42 @@ def apply_time(dateobj, timeobj): ) if settings.RELATIVE_BASE: - self.now = settings.RELATIVE_BASE + now = settings.RELATIVE_BASE if 'local' not in _settings_tz: - self.now = localize_timezone(self.now, settings.TIMEZONE) + now = localize_timezone(now, settings.TIMEZONE) if ptz: - if self.now.tzinfo: - self.now = self.now.astimezone(ptz) + if now.tzinfo: + now = now.astimezone(ptz) else: if hasattr(ptz, 'localize'): - self.now = ptz.localize(self.now) + now = ptz.localize(now) else: - self.now = self.now.replace(tzinfo=ptz) + now = now.replace(tzinfo=ptz) - if not self.now.tzinfo: + if not now.tzinfo: if hasattr(self.get_local_tz(), 'localize'): - self.now = self.get_local_tz().localize(self.now) + now = self.get_local_tz().localize(now) else: - self.now = self.now.replace(tzinfo=self.get_local_tz()) + now = now.replace(tzinfo=self.get_local_tz()) elif ptz: _now = datetime.now(ptz) if 'local' in _settings_tz: - self.now = _now + now = _now else: - self.now = apply_timezone(_now, settings.TIMEZONE) + now = apply_timezone(_now, settings.TIMEZONE) else: if 'local' not in _settings_tz: utc_dt = datetime.utcnow() - self.now = apply_timezone(utc_dt, settings.TIMEZONE) + now = apply_timezone(utc_dt, settings.TIMEZONE) else: - self.now = datetime.now(self.get_local_tz()) + now = datetime.now(self.get_local_tz()) - date, period = self._parse_date(date_string, settings.PREFER_DATES_FROM) + date, period = self._parse_date(date_string, now, settings.PREFER_DATES_FROM) if date: old_date = date @@ -112,10 +110,9 @@ def apply_time(dateobj, timeobj): ): date = date.replace(tzinfo=None) - self.now = None return date, period - def _parse_date(self, date_string, prefer_dates_from): + def _parse_date(self, date_string, now, prefer_dates_from): if not self._are_all_words_units(date_string): return None, None @@ -135,9 +132,9 @@ def _parse_date(self, date_string, prefer_dates_from): or re.search(r'\bfuture\b', prefer_dates_from) and not re.search(r'\bago\b', date_string) ): - date = self.now + td + date = now + td else: - date = self.now - td + date = now - td return date, period def get_kwargs(self, date_string): diff --git a/dateparser/search/text_detection.py b/dateparser/search/text_detection.py index c9b45aa2a..4317cf5e3 100644 --- a/dateparser/search/text_detection.py +++ b/dateparser/search/text_detection.py @@ -11,7 +11,7 @@ def __init__(self, languages): self.language_chars = [] def get_unique_characters(self, settings): - settings = settings.replace(NORMALIZE=False) + settings = settings.replace(settings={'NORMALIZE': False}) for language in self.languages: chars = language.get_wordchars_for_detection(settings=settings) diff --git a/tests/test_concurrency.py b/tests/test_concurrency.py new file mode 100644 index 000000000..2cdb29b5c --- /dev/null +++ b/tests/test_concurrency.py @@ -0,0 +1,58 @@ +import concurrent.futures +import random +from datetime import datetime + +import dateparser +from tests import BaseTestCase + +RELATIVE = {'RELATIVE_BASE': datetime(2014, 9, 15, 10, 30)} + +TEST_DATA = [ + {'ds': 'Tue May 07, 2018 10:55 PM', 'expected': datetime(2018, 5, 7, 22, 55), 'loc': 'en'}, + {'ds': '2018-10-07T22:55:01', 'expected': datetime(2018, 10, 7, 22, 55, 1), 'loc': 'en'}, + {'ds': '2018-Oct-11', 'expected': datetime(2018, 10, 11, 0, 0), 'loc': 'en'}, + {'ds': '12.04.2018', 'expected': datetime(2018, 12, 4, 0, 0), 'loc': 'en'}, + {'ds': '12-10-2018 20:13', 'expected': datetime(2018, 12, 10, 20, 13), 'loc': 'en'}, + {'ds': '03.04.2019', 'expected': datetime(2019, 4, 3, 0, 0), 'loc': 'en-150'}, + {'ds': 'on Tue October 7, 2019 04:55 PM', 'expected': datetime(2019, 10, 7, 16, 55), 'loc': 'en-150'}, + {'ds': '2019Oct8', 'expected': datetime(2019, 10, 8, 0, 0), 'loc': 'en-150'}, + {'ds': '07.03.2020 - 11:13', 'expected': datetime(2020, 3, 7, 11, 13), 'loc': 'ru'}, + {'ds': '9 Авг. 2020 17:11:01', 'expected': datetime(2020, 8, 9, 17, 11, 1), 'loc': 'ru'}, + {'ds': '07.01.2020', 'expected': datetime(2020, 1, 7, 0, 0), 'loc': 'ru'}, + {'ds': 'yesterday 11:00', 'expected': datetime(2014, 9, 14, 11), 'loc': 'en', 'extra': RELATIVE}, + {'ds': '13 days ago', 'expected': datetime(2014, 9, 2, 10, 30), 'loc': 'en', 'extra': RELATIVE}, +] * 180 + +random.shuffle(TEST_DATA) + + +class TestConcurrency(BaseTestCase): + + def test_concurrency(self): + with concurrent.futures.ThreadPoolExecutor() as executor: + + results = list(executor.map(self.concurrency_test, TEST_DATA)) + results_with_error = [(r['ds'], r['error']) for r in results if r['error']] + msg = '{}Threads failed with errors:\n{}' + self.assertEqual([], results_with_error, + msg.format(len(results_with_error), set(results_with_error))) + + wrong_results = [str(r) for r in results if (r['expected'] != r['date'])] + msg = '{} Threads returned wrong date time:\n{}' + self.assertEqual([], wrong_results, + msg.format(len(wrong_results), '\n'.join(wrong_results))) + + @staticmethod + def concurrency_test(data_for_test): + try: + date_string = data_for_test['ds'] + date = dateparser.parse(date_string, locales=[data_for_test['loc']], + settings=data_for_test.get('extra')) + if date: + data_for_test['date'] = date + data_for_test['error'] = None + except Exception as error: + data_for_test['error'] = str(error) + data_for_test['date'] = None + finally: + return data_for_test diff --git a/tests/test_freshness_date_parser.py b/tests/test_freshness_date_parser.py index 10cef1834..5f8961e2d 100644 --- a/tests/test_freshness_date_parser.py +++ b/tests/test_freshness_date_parser.py @@ -1547,7 +1547,7 @@ def test_freshness_date_with_timezone_conversion(self, date_string, timezone, to self.then_time_is(time) def test_freshness_date_with_to_timezone_setting(self): - _settings = settings.replace(**{ + _settings = settings.replace(settings={ 'TIMEZONE': 'local', 'TO_TIMEZONE': 'UTC', 'RELATIVE_BASE': datetime(2014, 9, 1, 10, 30) @@ -1667,7 +1667,6 @@ def wrapped(*args, **kwargs): collecting_get_date_data(freshness_date_parser.get_date_data))) self.freshness_parser = Mock(wraps=freshness_date_parser) - self.add_patch(patch.object(self.freshness_parser, 'now', self.now)) dt_mock = Mock(wraps=dateparser.freshness_date_parser.datetime) dt_mock.utcnow = Mock(return_value=self.now) diff --git a/tests/test_settings.py b/tests/test_settings.py index 44c355c9c..b08b23583 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -76,7 +76,7 @@ def given_configurations(self, confs): if 'TIMEZONE' not in confs: confs.update({'TIMEZONE': 'local'}) - self.confs = settings.replace(**confs) + self.confs = settings.replace(settings=confs) def when_date_is_parsed(self): self.result = parse(self.given_ds, settings=(self.confs or {})) diff --git a/tests/test_utils.py b/tests/test_utils.py index a344606df..697290edb 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -84,7 +84,9 @@ def test_apply_timezone_function(self, date, timezone, expected): param(datetime(2015, 12, 12, 10, 12), timezone='-0500', expected=datetime(2015, 12, 12, 5, 12)), ]) def test_apply_timezone_from_settings_function(self, date, timezone, expected): - result = apply_timezone_from_settings(date, settings.replace(**{'TO_TIMEZONE': timezone, 'TIMEZONE': 'UTC'})) + result = apply_timezone_from_settings(date, + settings.replace(settings={'TO_TIMEZONE': timezone, 'TIMEZONE': 'UTC'}) + ) self.assertEqual(expected, result) @parameterized.expand([ @@ -101,7 +103,7 @@ def test_apply_timezone_from_settings_function_none_settings(self, date, expecte param(datetime(2015, 12, 12, 10, 12),), ]) def test_apply_timezone_from_settings_function_should_return_tz(self, date): - result = apply_timezone_from_settings(date, settings.replace(**{'RETURN_AS_TIMEZONE_AWARE': True})) + result = apply_timezone_from_settings(date, settings.replace(settings={'RETURN_AS_TIMEZONE_AWARE': True})) self.assertTrue(bool(result.tzinfo)) def test_registry_when_get_keys_not_implemented(self):