diff --git a/django_cron/cron.py b/django_cron/cron.py index 013524e..0f92b71 100644 --- a/django_cron/cron.py +++ b/django_cron/cron.py @@ -7,48 +7,101 @@ class FailedRunsNotificationCronJob(CronJobBase): """ - Send email if cron failed to run X times in a row - """ - RUN_EVERY_MINS = 30 + A regular job to send email reports for failed Cron jobs. - schedule = Schedule(run_every_mins=RUN_EVERY_MINS) + The job log is used to check for all unreported failures for each job + class specified within the CRON_CLASSES dictionary. When the number of + failures for each job type exceeds the limit (which can be specified + either per-job or project wide) an email is sent to all relevant parties + detailing the error. + """ code = 'django_cron.FailedRunsNotificationCronJob' + def __init__(self, *args, **kwargs): + super(FailedRunsNotificationCronJob, self).__init__(*args, **kwargs) + self.config = self.get_config() + self.schedule = Schedule(run_every_mins=self.config['RUN_EVERY_MINS']) + def do(self): + """ + Check all Cron jobs defined in CRON_CLASSES for failed runs. + """ + cron_classes = [ + get_class(class_name) for class_name in settings.CRON_CLASSES + ] + + for cron_class in cron_classes: + # The FailedRuns Cron job should ignore itself + if isinstance(self, cron_class): + continue + + self.check_for_failures(cron_class) + + def get_config(self): + """ + Combine the default configuration with any project-specific ones. + """ + config = dict( + RUN_EVERY_MINS=0, + EMAIL_PREFIX='[Cron Failure] - ', + MIN_NUM_FAILURES=10, + FROM_EMAIL=settings.DEFAULT_FROM_EMAIL, + EMAIL_RECIPIENTS=[email for _, email in settings.ADMINS] + ) + config.update(getattr(settings, 'CRON_FAILURE_REPORT', {})) + return config + + def check_for_failures(self, cron_cls): + """ + Check the given Cron task for failed jobs, and report if required. + """ + min_failures = getattr( + cron_cls, 'MIN_NUM_FAILURES', self.config['MIN_NUM_FAILURES'] + ) + + failed_jobs = CronJobLog.objects.filter( + code=cron_cls.code, is_success=False, failure_reported=False + ) - CRONS_TO_CHECK = map(lambda x: get_class(x), settings.CRON_CLASSES) - EMAILS = [admin[1] for admin in settings.ADMINS] + if failed_jobs.count() < min_failures: + return - try: - FAILED_RUNS_CRONJOB_EMAIL_PREFIX = settings.FAILED_RUNS_CRONJOB_EMAIL_PREFIX - except: - FAILED_RUNS_CRONJOB_EMAIL_PREFIX = '' + self.report_failure(cron_cls, failed_jobs) + failed_jobs.update(failure_reported=True) - for cron in CRONS_TO_CHECK: + def report_failure(self, cron_cls, failed_jobs): + """ + Report the failed jobs by sending an email (using django-common). + """ + send_mail(**self.get_send_mail_kwargs(cron_cls, failed_jobs)) - try: - min_failures = cron.MIN_NUM_FAILURES - except AttributeError: - min_failures = 10 + def get_send_mail_kwargs(self, cron_cls, failed_jobs): + """ + Return the arguments to pass to send_mail for the given failed jobs. + """ + failed_reports = [] - failures = 0 + for job in failed_jobs: + failed_reports.append( + u"Job ran at {start_time}:\n{message}" + .format(start_time=job.start_time, message=job.message) + ) - jobs = CronJobLog.objects.filter(code=cron.code).order_by('-end_time')[:min_failures] + divider = "\n\n{0}\n\n".format("=" * 80) + message = divider.join(failed_reports) - message = '' + subject = "{prefix}{code} failed".format( + prefix=self.config['EMAIL_PREFIX'], + code=cron_cls.code + ) - for job in jobs: - if not job.is_success: - failures += 1 - message += 'Job ran at %s : \n\n %s \n\n' % (job.start_time, job.message) + if len(failed_reports) > 1: + subject = "{subject} {times} times".format( + subject=subject, times=len(failed_reports) + ) - if failures == min_failures: - send_mail( - '%s%s failed %s times in a row!' % ( - FAILED_RUNS_CRONJOB_EMAIL_PREFIX, - cron.code, - min_failures - ), - message, - settings.DEFAULT_FROM_EMAIL, EMAILS - ) + return dict( + subject=subject, message=message, + from_email=self.config['FROM_EMAIL'], + recipient_emails=self.config['EMAIL_RECIPIENTS'] + ) diff --git a/django_cron/migrations/0003_cronjoblog_failure_reported.py b/django_cron/migrations/0003_cronjoblog_failure_reported.py new file mode 100644 index 0000000..4ff556d --- /dev/null +++ b/django_cron/migrations/0003_cronjoblog_failure_reported.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.3 on 2016-11-29 14:35 +from __future__ import unicode_literals + +from django.db import migrations, models + + +def set_old_logs_as_reported(apps, *args, **kwargs): + CronJobLog = apps.get_model('django_cron', 'CronJobLog') + CronJobLog.objects.update(failure_reported=True) + + +def noop(*args, **kwargs): + pass + + +class Migration(migrations.Migration): + + dependencies = [ + ('django_cron', '0002_remove_max_length_from_CronJobLog_message'), + ] + + operations = [ + migrations.AddField( + model_name='cronjoblog', + name='failure_reported', + field=models.BooleanField(default=False), + ), + migrations.RunPython(set_old_logs_as_reported, reverse_code=noop) + ] diff --git a/django_cron/models.py b/django_cron/models.py index bd2db70..49bb57d 100644 --- a/django_cron/models.py +++ b/django_cron/models.py @@ -9,6 +9,7 @@ class CronJobLog(models.Model): start_time = models.DateTimeField(db_index=True) end_time = models.DateTimeField(db_index=True) is_success = models.BooleanField(default=False) + failure_reported = models.BooleanField(default=False) message = models.TextField(default='', blank=True) # TODO: db_index=True """ diff --git a/django_cron/tests.py b/django_cron/tests.py index 58b9322..3d7129f 100644 --- a/django_cron/tests.py +++ b/django_cron/tests.py @@ -2,8 +2,11 @@ from time import sleep from datetime import timedelta -from django import db +from mock import patch +from freezegun import freeze_time +from django import db +from django.conf import settings from django.test import TransactionTestCase from django.core.management import call_command from django.test.utils import override_settings @@ -11,10 +14,20 @@ from django.core.urlresolvers import reverse from django.contrib.auth.models import User -from freezegun import freeze_time - +from django_cron import get_class +from django_cron.cron import FailedRunsNotificationCronJob from django_cron.helpers import humanize_duration from django_cron.models import CronJobLog +from test_crons import TestErrorCronJob + + +SUCCESS_CRON = 'test_crons.TestSucessCronJob' +ERROR_CRON = 'test_crons.TestErrorCronJob' +FIVE_MINS_CRON = 'test_crons.Test5minsCronJob' +RUN_AT_TIMES_CRON = 'test_crons.TestRunAtTimesCronJob' +WAIT_3SEC_CRON = 'test_crons.Wait3secCronJob' +DOES_NOT_EXIST_CRON = 'ThisCronObviouslyDoesntExist' +FAILED_RUNS_CRON = 'django_cron.cron.FailedRunsNotificationCronJob' class OutBuffer(object): @@ -34,71 +47,64 @@ def str_content(self): return self._str_cache -class TestCase(TransactionTestCase): - - success_cron = 'test_crons.TestSucessCronJob' - error_cron = 'test_crons.TestErrorCronJob' - five_mins_cron = 'test_crons.Test5minsCronJob' - run_at_times_cron = 'test_crons.TestRunAtTimesCronJob' - wait_3sec_cron = 'test_crons.Wait3secCronJob' - does_not_exist_cron = 'ThisCronObviouslyDoesntExist' - test_failed_runs_notification_cron = 'django_cron.cron.FailedRunsNotificationCronJob' - +class DjangoCronTestCase(TransactionTestCase): def setUp(self): CronJobLog.objects.all().delete() + +class BaseTests(DjangoCronTestCase): def test_success_cron(self): logs_count = CronJobLog.objects.all().count() - call_command('runcrons', self.success_cron, force=True) + call_command('runcrons', SUCCESS_CRON, force=True) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) def test_failed_cron(self): logs_count = CronJobLog.objects.all().count() - call_command('runcrons', self.error_cron, force=True) + call_command('runcrons', ERROR_CRON, force=True) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) def test_not_exists_cron(self): logs_count = CronJobLog.objects.all().count() out_buffer = OutBuffer() - call_command('runcrons', self.does_not_exist_cron, force=True, stdout=out_buffer) + call_command('runcrons', DOES_NOT_EXIST_CRON, force=True, stdout=out_buffer) self.assertIn('Make sure these are valid cron class names', out_buffer.str_content()) - self.assertIn(self.does_not_exist_cron, out_buffer.str_content()) + self.assertIn(DOES_NOT_EXIST_CRON, out_buffer.str_content()) self.assertEqual(CronJobLog.objects.all().count(), logs_count) @override_settings(DJANGO_CRON_LOCK_BACKEND='django_cron.backends.lock.file.FileLock') def test_file_locking_backend(self): logs_count = CronJobLog.objects.all().count() - call_command('runcrons', self.success_cron, force=True) + call_command('runcrons', SUCCESS_CRON, force=True) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) def test_runs_every_mins(self): logs_count = CronJobLog.objects.all().count() with freeze_time("2014-01-01 00:00:00"): - call_command('runcrons', self.five_mins_cron) + call_command('runcrons', FIVE_MINS_CRON) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) with freeze_time("2014-01-01 00:04:59"): - call_command('runcrons', self.five_mins_cron) + call_command('runcrons', FIVE_MINS_CRON) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) with freeze_time("2014-01-01 00:05:01"): - call_command('runcrons', self.five_mins_cron) + call_command('runcrons', FIVE_MINS_CRON) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 2) def test_runs_at_time(self): logs_count = CronJobLog.objects.all().count() with freeze_time("2014-01-01 00:00:01"): - call_command('runcrons', self.run_at_times_cron) + call_command('runcrons', RUN_AT_TIMES_CRON) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) with freeze_time("2014-01-01 00:04:50"): - call_command('runcrons', self.run_at_times_cron) + call_command('runcrons', RUN_AT_TIMES_CRON) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) with freeze_time("2014-01-01 00:05:01"): - call_command('runcrons', self.run_at_times_cron) + call_command('runcrons', RUN_AT_TIMES_CRON) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 2) def test_admin(self): @@ -112,14 +118,14 @@ def test_admin(self): self.client.login(username=user.username, password=password) # edit CronJobLog object - call_command('runcrons', self.success_cron, force=True) + call_command('runcrons', SUCCESS_CRON, force=True) log = CronJobLog.objects.all()[0] url = reverse('admin:django_cron_cronjoblog_change', args=(log.id,)) response = self.client.get(url) self.assertIn('Cron job logs', str(response.content)) def run_cronjob_in_thread(self, logs_count): - call_command('runcrons', self.wait_3sec_cron) + call_command('runcrons', WAIT_3SEC_CRON) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) db.close_old_connections() @@ -133,7 +139,7 @@ def test_cache_locking_backend(self): t.start() # this shouldn't get running sleep(0.1) # to avoid race condition - call_command('runcrons', self.wait_3sec_cron) + call_command('runcrons', WAIT_3SEC_CRON) t.join(10) self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) @@ -153,20 +159,10 @@ def test_cache_locking_backend(self): # t.start() # # this shouldn't get running # sleep(1) # to avoid race condition - # call_command('runcrons', self.wait_3sec_cron) + # call_command('runcrons', WAIT_3SEC_CRON) # t.join(10) # self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1) - def test_failed_runs_notification(self): - CronJobLog.objects.all().delete() - logs_count = CronJobLog.objects.all().count() - - for i in range(10): - call_command('runcrons', self.error_cron, force=True) - call_command('runcrons', self.test_failed_runs_notification_cron) - - self.assertEqual(CronJobLog.objects.all().count(), logs_count + 11) - def test_humanize_duration(self): test_subjects = ( (timedelta(days=1, hours=1, minutes=1, seconds=1), '1 day, 1 hour, 1 minute, 1 second'), @@ -180,3 +176,181 @@ def test_humanize_duration(self): humanize_duration(duration), humanized ) + + +class FailureReportTests(DjangoCronTestCase): + """ + Unit tests for the FailedRunsNotificationCronJob. + """ + def _error_cron(self): + call_command('runcrons', ERROR_CRON, force=True) + + def _report_cron(self): + call_command( + 'runcrons', FAILED_RUNS_CRON, + force=True + ) + + def _error_and_report(self): + self._error_cron() + self._report_cron() + + def _resolve_reported_failures(self, cron_cls, failed_jobs): + """ + Resolve the failed jobs passed to the notifier's report_failure(). + + This allows us to assert the jobs passed given that failed jobs is a + queryset which shouldn't match any instances after the notifier runs + as it should make all log entries as having been reported. + """ + self.reported_cls = cron_cls + self.reported_jobs = set(failed_jobs) + + @override_settings( + CRON_CLASSES=( + SUCCESS_CRON, + FIVE_MINS_CRON, + ERROR_CRON, + FAILED_RUNS_CRON + ) + ) + @patch.object(FailedRunsNotificationCronJob, 'check_for_failures') + def test_checks_according_to_cron_classes(self, mock_check): + """ + The FailedRunsNotificationCronJob checks for failures in CRON_CLASSES. + """ + self._report_cron() + self.assertEquals(3, mock_check.call_count) + + # It should also ignore itself + expected_checked_classes = { + get_class(cron_class_name) + for cron_class_name in settings.CRON_CLASSES + if cron_class_name != FAILED_RUNS_CRON + } + actual_checked_classes = { + cron_class + for call_args, call_kwargs in mock_check.call_args_list + for cron_class in call_args + } + self.assertEquals( + actual_checked_classes, expected_checked_classes + ) + + @patch.object(FailedRunsNotificationCronJob, 'do') + def test_runs_always_by_default(self, mock_do): + mock_do.return_value = 'OK' + + with freeze_time("2017-01-01") as frozen_datetime: + call_command('runcrons', FAILED_RUNS_CRON) + self.assertEqual(1, mock_do.call_count) + + frozen_datetime.tick(delta=timedelta(minutes=2)) + call_command('runcrons', FAILED_RUNS_CRON) + self.assertEqual(2, mock_do.call_count) + + frozen_datetime.tick(delta=timedelta(minutes=2)) + call_command('runcrons', FAILED_RUNS_CRON) + self.assertEqual(3, mock_do.call_count) + + @override_settings(CRON_FAILURE_REPORT=dict(RUN_EVERY_MINS=10)) + @patch.object(FailedRunsNotificationCronJob, 'do') + def test_settings_can_override_run_every(self, mock_do): + mock_do.return_value = 'OK' + + with freeze_time("2017-01-01") as frozen_datetime: + call_command('runcrons', FAILED_RUNS_CRON) + self.assertEqual(1, mock_do.call_count) + + # Less than 10 minutes, should not run again + frozen_datetime.tick(delta=timedelta(minutes=9)) + call_command('runcrons', FAILED_RUNS_CRON) + self.assertEqual(1, mock_do.call_count) + + # More than 10 minutes, should run again + frozen_datetime.tick(delta=timedelta(minutes=2)) + call_command('runcrons', FAILED_RUNS_CRON) + self.assertEqual(2, mock_do.call_count) + + @patch.object(FailedRunsNotificationCronJob, 'report_failure') + def test_failed_notifications(self, mock_report): + """ + By default, the user should be notified after 10 job failures. + """ + mock_report.side_effect = self._resolve_reported_failures + + for _ in range(9): + self._error_and_report() + self.assertEquals(0, mock_report.call_count) + + # The tenth error triggers the report + self._error_and_report() + self.assertEqual(1, mock_report.call_count) + + # The correct job class and entries should be included + self.assertEquals(TestErrorCronJob, self.reported_cls) + self.assertEquals( + set(CronJobLog.objects.filter(code=TestErrorCronJob.code)), + self.reported_jobs + ) + + @patch.object(FailedRunsNotificationCronJob, 'report_failure') + @override_settings(CRON_FAILURE_REPORT=dict(MIN_NUM_FAILURES=1)) + def test_settings_can_override_number_of_failures(self, mock_report): + mock_report.side_effect = self._resolve_reported_failures + self._error_and_report() + self.assertEqual(1, mock_report.call_count) + + @patch.object(FailedRunsNotificationCronJob, 'report_failure') + @override_settings(CRON_FAILURE_REPORT=dict(MIN_NUM_FAILURES=1)) + def test_logs_all_unreported(self, mock_report): + mock_report.side_effect = self._resolve_reported_failures + self._error_cron() + self._error_and_report() + self.assertEqual(1, mock_report.call_count) + self.assertEqual(2, len(self.reported_jobs)) + + @patch.object(FailedRunsNotificationCronJob, 'report_failure') + @override_settings(CRON_FAILURE_REPORT=dict(MIN_NUM_FAILURES=1)) + def test_only_logs_failures(self, mock_report): + mock_report.side_effect = self._resolve_reported_failures + call_command('runcrons', SUCCESS_CRON, force=True) + self._error_and_report() + self.assertEqual( + self.reported_jobs, + {CronJobLog.objects.get(code=TestErrorCronJob.code)} + ) + + @patch.object(FailedRunsNotificationCronJob, 'report_failure') + @override_settings(CRON_FAILURE_REPORT=dict(MIN_NUM_FAILURES=1)) + def test_only_reported_once(self, mock_report): + mock_report.side_effect = self._resolve_reported_failures + self._error_and_report() + self.assertEqual(1, mock_report.call_count) + + # Calling the notifier for a second time doesn't report a second time + self._report_cron() + self.assertEqual(1, mock_report.call_count) + + @patch('django_cron.cron.send_mail') + @override_settings( + CRON_FAILURE_REPORT=dict( + MIN_NUM_FAILURES=1, + FROM_EMAIL='from@email.com', + EMAIL_RECIPIENTS=['foo@bar.com', 'x@y.com'], + EMAIL_PREFIX='ERROR!!!' + ) + ) + def test_uses_send_mail(self, mock_send_mail): + """ + Test that django_common is used to send the email notifications. + """ + self._error_and_report() + self.assertEquals(1, mock_send_mail.call_count) + kwargs = mock_send_mail.call_args[1] + + self.assertIn('ERROR!!!', kwargs['subject']) + self.assertEquals('from@email.com', kwargs['from_email']) + self.assertEquals( + ['foo@bar.com', 'x@y.com'], kwargs['recipient_emails'] + ) diff --git a/docs/sample_cron_configurations.rst b/docs/sample_cron_configurations.rst index fffccd0..8a3b763 100644 --- a/docs/sample_cron_configurations.rst +++ b/docs/sample_cron_configurations.rst @@ -59,7 +59,7 @@ This will run job every 2h plus one run at 6:30. Allowing parallels runs ----------------------- -By deafult parallels runs are not allowed (for security reasons). However if you +By default parallel runs are not allowed (for security reasons). However if you want enable them just add: .. code-block:: python @@ -81,28 +81,68 @@ If you wish to override which cache is used, put this in your settings file: FailedRunsNotificationCronJob ----------------------------- -This example cron check last cron jobs results. If they were unsuccessfull 10 times in row, it sends email to user. +This sample cron job checks for any unreported failed jobs for each job class +provided in your ``CRON_CLASSES`` list, and reports them as necessary. The job +is set to run on each ``runcrons`` task, and the default process is to email +all the users specified in the ``ADMINS`` settings list when a job fails more +than 10 times in a row. Install required dependencies: ``Django>=1.7.0``, ``django-common>=0.5.1``. -Add ``django_cron.cron.FailedRunsNotificationCronJob`` to your ``CRON_CLASSES`` in settings file. +Add ``django_cron.cron.FailedRunsNotificationCronJob`` to *the end* of your +``CRON_CLASSES`` list within your settings file. -To set up minimal number of failed runs set up ``MIN_NUM_FAILURES`` in your cron class (default = 10). For example: :: +.. code-block:: python + + CRON_CLASSES = [ + ... + 'django_cron.cron.FailedRunsNotificationCronJob' + ] + +Each cron job can specify the minimum number of failures required before an email is sent by setting a ``MIN_NUM_FAILURES`` attribute: :: class MyCronJob(CronJobBase): - RUN_EVERY_MINS = 10 MIN_NUM_FAILURES = 3 + ... - schedule = Schedule(run_every_mins=RUN_EVERY_MINS) - code = 'app.MyCronJob' +**Configuration** + +By adding a ``CRON_FAILURE_REPORT`` dictionary to your Django settings you can configure the reporter in a variety of ways: + +.. code-block:: python + + # settings.py + CRON_FAILURE_REPORT = dict() + +The possible options are: + +* ``RUN_EVERY_MINS (int)`` + + After how many minutes should the reporter run (default: 0). + +* ``EMAIL_PREFIX (str)`` + + The prefix for all failure emails (default: "[Cron Failure] - "). + +* ``MIN_NUM_FAILURES (int)`` + + The default number of times an individual cron job can fail before an email is sent. + Job classes can always override this value by setting a class attribute + with the same name (default: 10). + +* ``FROM_EMAIL (str)`` - def do(self): - ... some action here ... + The email address the fail reports are sent from (default: settings.DEFAULT_FROM_EMAIL) -Emails are imported from ``ADMINS`` in settings file +* ``EMAIL_RECIPIENTS (list)`` -To set up email prefix, you must add ``FAILED_RUNS_CRONJOB_EMAIL_PREFIX`` in your settings file (default is empty). For example: :: + A list of email addresses controlling who the reports are sent to (default: email addresses defined in settings.ADMIN) - FAILED_RUNS_CRONJOB_EMAIL_PREFIX = "[Server check]: " +For more advanced usage, you can subclass ``FailedRunsNotificationCronJob``, and define a custom ``report_failure()`` method to be notified of failures in whichever way you like (e.g. via slack, text etc.). For example: :: -``FailedRunsNotificationCronJob`` checks every cron from ``CRON_CLASSES`` + class FailedNotifier(FailedRunsNotificationCronJob): + def report_failure(self, cron_cls, failed_jobs): + """ + Report in Slack that the given Cron job failed. + """ + slack.post("ERROR - Cron job '{0}' failed.".format(cron_cls.code)) diff --git a/test_requirements.pip b/test_requirements.pip index 79c9a2b..739955e 100644 --- a/test_requirements.pip +++ b/test_requirements.pip @@ -1,4 +1,5 @@ -freezegun==0.2.8 +freezegun==0.3.8 +mock==2.0.0 mysqlclient==1.3.6 psycopg2==2.6 -flake8==2.4.0 \ No newline at end of file +flake8==2.4.0