diff --git a/ckanext/harvest/helpers.py b/ckanext/harvest/helpers.py index ba234c658..c9bfd51aa 100644 --- a/ckanext/harvest/helpers.py +++ b/ckanext/harvest/helpers.py @@ -6,7 +6,7 @@ import ckan.lib.helpers as h import ckan.plugins as p -from ckanext.harvest.model import UPDATE_FREQUENCIES, UPDATE_TIMES +from ckanext.harvest.model import UPDATE_FREQUENCIES, UPDATE_TIMES, DAYS_OF_WEEK from ckanext.harvest.plugin import DATASET_TYPE_NAME from ckanext.harvest.interfaces import IHarvester @@ -116,6 +116,11 @@ def harvest_times(): for f in UPDATE_TIMES] +def harvest_day_of_week(): + return [{'text': p.toolkit._(f), 'value': f} + for f in DAYS_OF_WEEK] + + def harvest_default_time(): default_time = datetime.datetime.now().strftime('%I:00 %p') return default_time diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index 4ff626142..464754ae2 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -25,7 +25,7 @@ from ckanext.harvest.queue import ( get_gather_publisher, resubmit_jobs, resubmit_objects) -from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject +from ckanext.harvest.model import HarvestSource, HarvestJob, HarvestObject, DAYS_OF_WEEK, UPDATE_FREQUENCIES from ckanext.harvest.logic import HarvestJobExists from ckanext.harvest.logic.dictization import harvest_job_dictize @@ -448,34 +448,58 @@ def harvest_objects_import(context, data_dict): return last_objects_count -def _calculate_next_run(frequency, time): +def _calculate_next_run(frequency, time, day_of_week=None, start_date=None): + def correct_date_due_to_day_of_the_week(date, day_of_week): + final_date_day_of_week = date.weekday() + day_of_week_int = DAYS_OF_WEEK.index(day_of_week) + + if day_of_week_int < final_date_day_of_week: + days_diff = abs(final_date_day_of_week - (day_of_week_int + 7)) + else: + days_diff = abs(final_date_day_of_week - day_of_week_int) + + if days_diff <= 3: + while date.weekday() != day_of_week_int: + date = date + datetime.timedelta(days=1) + else: + while date.weekday() != day_of_week_int: + date = date - datetime.timedelta(days=1) + return date + + if frequency not in UPDATE_FREQUENCIES: + raise Exception('Frequency {freq} not recognised'.format(freq=frequency)) + + if start_date is None: + start_date = datetime.datetime.utcnow() - now = datetime.datetime.utcnow() if time and frequency != 'ALWAYS': t = datetime.datetime.strptime(time, '%I:%M %p') set_hour = int(t.strftime("%H")) - now = now.replace(hour=set_hour, minute=0, second=0, microsecond=0) + start_date = start_date.replace(hour=set_hour, minute=0, second=0, microsecond=0) + final_date = start_date if frequency == 'ALWAYS': - return now + pass if frequency == 'WEEKLY': - return now + datetime.timedelta(weeks=1) + final_date = start_date + datetime.timedelta(weeks=1) if frequency == 'BIWEEKLY': - return now + datetime.timedelta(weeks=2) + final_date = start_date + datetime.timedelta(weeks=2) if frequency == 'DAILY': - return now + datetime.timedelta(days=1) + final_date = start_date + datetime.timedelta(days=1) if frequency == 'MONTHLY': - if now.month in (4, 6, 9, 11): + if start_date.month in (4, 6, 9, 11): days = 30 - elif now.month == 2: - if now.year % 4 == 0: + elif start_date.month == 2: + if start_date.year % 4 == 0: days = 29 else: days = 28 else: days = 31 - return now + datetime.timedelta(days=days) - raise Exception('Frequency {freq} not recognised'.format(freq=frequency)) + final_date = start_date + datetime.timedelta(days=days) + if frequency not in ["DAILY", "ALWAYS"]: + final_date = correct_date_due_to_day_of_the_week(final_date, day_of_week) + return final_date def _make_scheduled_jobs(context, data_dict): @@ -491,7 +515,7 @@ def _make_scheduled_jobs(context, data_dict): except HarvestJobExists: log.info('Trying to rerun job for %s skipping', source.id) - source.next_run = _calculate_next_run(source.frequency, source.time) + source.next_run = _calculate_next_run(source.frequency, source.time, source.day_of_week) source.save() diff --git a/ckanext/harvest/logic/schema.py b/ckanext/harvest/logic/schema.py index 23e2bf34f..95e402919 100644 --- a/ckanext/harvest/logic/schema.py +++ b/ckanext/harvest/logic/schema.py @@ -42,6 +42,7 @@ def harvest_source_schema(): 'private': [ignore_missing, boolean_validator], 'organization': [ignore_missing], 'frequency': [ignore_missing, unicode, harvest_source_frequency_exists, convert_to_extras], + 'day_of_week': [ignore_missing, unicode, convert_to_extras], 'time': [ignore_missing, convert_to_extras], 'state': [ignore_missing], 'config': [ignore_missing, harvest_source_config_validator, convert_to_extras], @@ -84,6 +85,7 @@ def harvest_source_show_package_schema(): schema.update({ 'source_type': [convert_from_extras, ignore_missing], 'frequency': [convert_from_extras, ignore_missing], + 'day_of_week': [convert_from_extras, ignore_missing], 'time': [convert_from_extras, ignore_missing], 'config': [convert_from_extras, harvest_source_convert_from_config, ignore_missing], 'metadata_created': [], diff --git a/ckanext/harvest/model/__init__.py b/ckanext/harvest/model/__init__.py index ce93427d1..5dcfb85f4 100644 --- a/ckanext/harvest/model/__init__.py +++ b/ckanext/harvest/model/__init__.py @@ -20,6 +20,7 @@ UPDATE_FREQUENCIES = ['MANUAL', 'MONTHLY', 'WEEKLY', 'BIWEEKLY', 'DAILY', 'ALWAYS'] UPDATE_TIMES = [datetime.time(i).strftime('%I:%M %p') for i in range(24)] +DAYS_OF_WEEK = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] log = logging.getLogger(__name__) @@ -248,6 +249,7 @@ def define_harvester_tables(): Column('user_id', types.UnicodeText, default=u''), Column('publisher_id', types.UnicodeText, default=u''), Column('frequency', types.UnicodeText, default=u'MANUAL'), + Column('day_of_week', types.UnicodeText, default=u'Monday'), Column('time', types.UnicodeText, default=u''), Column('next_run', types.DateTime), ) diff --git a/ckanext/harvest/plugin.py b/ckanext/harvest/plugin.py index b3ce7e959..dedeb0680 100644 --- a/ckanext/harvest/plugin.py +++ b/ckanext/harvest/plugin.py @@ -310,6 +310,7 @@ def get_helpers(self): 'harvesters_info': harvest_helpers.harvesters_info, 'harvester_types': harvest_helpers.harvester_types, 'harvest_frequencies': harvest_helpers.harvest_frequencies, + 'harvest_day_of_week': harvest_helpers.harvest_day_of_week, 'harvest_times': harvest_helpers.harvest_times, 'harvest_default_time': harvest_helpers.harvest_default_time, 'link_for_harvest_object': harvest_helpers.link_for_harvest_object, @@ -389,7 +390,7 @@ def _create_harvest_source_object(context, data_dict): source.type = data_dict['source_type'] opt = ['active', 'title', 'description', 'user_id', - 'publisher_id', 'config', 'frequency', 'time'] + 'publisher_id', 'config', 'frequency', 'day_of_week', 'time'] for o in opt: if o in data_dict and data_dict[o] is not None: source.__setattr__(o, data_dict[o]) diff --git a/ckanext/harvest/templates/source/new_source_form.html b/ckanext/harvest/templates/source/new_source_form.html index cbcd61768..1ff907ba5 100644 --- a/ckanext/harvest/templates/source/new_source_form.html +++ b/ckanext/harvest/templates/source/new_source_form.html @@ -42,6 +42,8 @@ {{ form.select('frequency', id='field-frequency', label=_('Update frequency'), options=h.harvest_frequencies(), selected=data.frequency, error=errors.frequency) }} {{ form.select('time', id='field-time', label=_('Update time'), options=h.harvest_times(), selected=data.time or h.harvest_default_time(), error=errors.time) }} + {{ form.select('day_of_week', id='field-time-day-of-week', label=_('Day of week for update'), options=h.harvest_day_of_week(), selected=data.day_of_week, error=errors.time) }} + {% block extra_config %} {{ form.textarea('config', id='field-config', label=_('Configuration'), value=data.config, error=errors.config) }} diff --git a/ckanext/harvest/tests/harvesters/test_harvester_next_run.py b/ckanext/harvest/tests/harvesters/test_harvester_next_run.py new file mode 100644 index 000000000..c5aad76e4 --- /dev/null +++ b/ckanext/harvest/tests/harvesters/test_harvester_next_run.py @@ -0,0 +1,87 @@ +import unittest +from datetime import datetime + +from ckanext.harvest.logic.action.update import _calculate_next_run +from numpy.ma.testutils import assert_equal + +DAYS_OF_WEEK = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] + + +class TestNextRunCalculation(unittest.TestCase): + + def test_monday_sunday_weekly(self): + start_date = datetime.strptime('2020-08-10T06:30:00', '%Y-%m-%dT%H:%M:%S') + expected_result = datetime.strptime('2020-08-16T06:30:00', '%Y-%m-%dT%H:%M:%S') + actual_date = _calculate_next_run( + start_date=start_date, + frequency="WEEKLY", + day_of_week="Sunday", + time=None + ) + assert_equal(actual_date, expected_result) + + def test_monday_saturday_weekly(self): + start_date = datetime.strptime('2020-08-10T06:30:00', '%Y-%m-%dT%H:%M:%S') + expected_result = datetime.strptime('2020-08-15T06:30:00', '%Y-%m-%dT%H:%M:%S') + actual_date = _calculate_next_run( + start_date=start_date, + frequency="WEEKLY", + day_of_week="Saturday", + time=None + ) + assert_equal(actual_date, expected_result) + + def test_monday_tuesday_weekly(self): + start_date = datetime.strptime('2020-08-10T06:30:00', '%Y-%m-%dT%H:%M:%S') + expected_result = datetime.strptime('2020-08-18T06:30:00', '%Y-%m-%dT%H:%M:%S') + actual_date = _calculate_next_run( + start_date=start_date, + frequency="WEEKLY", + day_of_week="Tuesday", + time=None + ) + assert_equal(actual_date, expected_result) + + def test_wednesday_monday_monthly(self): + start_date = datetime.strptime('2020-08-12T06:30:00', '%Y-%m-%dT%H:%M:%S') + expected_result = datetime.strptime('2020-09-14T06:30:00', '%Y-%m-%dT%H:%M:%S') + actual_date = _calculate_next_run( + start_date=start_date, + frequency="MONTHLY", + day_of_week="Monday", + time=None + ) + assert_equal(actual_date, expected_result) + + def test_tuesday_sunday_biweekly(self): + start_date = datetime.strptime('2020-08-13T06:30:00', '%Y-%m-%dT%H:%M:%S') + expected_result = datetime.strptime('2020-08-30T06:30:00', '%Y-%m-%dT%H:%M:%S') + actual_date = _calculate_next_run( + start_date=start_date, + frequency="BIWEEKLY", + day_of_week="Sunday", + time=None + ) + assert_equal(actual_date, expected_result) + + def test_tuesday_sunday_daily(self): + start_date = datetime.strptime('2020-08-13T06:30:00', '%Y-%m-%dT%H:%M:%S') + expected_result = datetime.strptime('2020-08-14T06:30:00', '%Y-%m-%dT%H:%M:%S') + actual_date = _calculate_next_run( + start_date=start_date, + frequency="DAILY", + day_of_week="Sunday", + time=None + ) + assert_equal(actual_date, expected_result) + + def test_tuesday_sunday_always(self): + start_date = datetime.strptime('2020-08-13T06:30:00', '%Y-%m-%dT%H:%M:%S') + expected_result = datetime.strptime('2020-08-13T06:30:00', '%Y-%m-%dT%H:%M:%S') + actual_date = _calculate_next_run( + start_date=start_date, + frequency="ALWAYS", + day_of_week="Sunday", + time=None + ) + assert_equal(actual_date, expected_result)