From 0f5606fa97bedf03395b49b8745998386aa5f175 Mon Sep 17 00:00:00 2001 From: Felix Rindt Date: Wed, 13 Sep 2023 16:14:51 +0200 Subject: [PATCH] unify healthchecks --- ephios/core/services/health/healthchecks.py | 178 ++++++++++++++++++ ephios/core/signals.py | 6 + .../core/settings/settings_instance.html | 83 ++++---- ephios/core/urls.py | 4 +- ephios/core/views/healthcheck.py | 34 +--- ephios/core/views/settings.py | 11 +- 6 files changed, 234 insertions(+), 82 deletions(-) create mode 100644 ephios/core/services/health/healthchecks.py diff --git a/ephios/core/services/health/healthchecks.py b/ephios/core/services/health/healthchecks.py new file mode 100644 index 000000000..ea14c22e7 --- /dev/null +++ b/ephios/core/services/health/healthchecks.py @@ -0,0 +1,178 @@ +import os +from pathlib import Path + +from django.conf import settings +from django.contrib.auth.models import Permission +from django.contrib.humanize.templatetags.humanize import naturaltime +from django.dispatch import receiver +from django.utils.safestring import mark_safe +from django.utils.translation import gettext_lazy as _ + +from ephios.core.dynamic_preferences_registry import LastRunPeriodicCall +from ephios.core.signals import register_healthchecks + +# health checks are meant to monitor the health of the application while it is running +# in contrast there are django checks which are meant to check the configuration of the application + + +def run_healthchecks(): + for _, healthchecks in register_healthchecks.send(None): + for HealthCheck in healthchecks: + check = HealthCheck() + status, message = check.check() + yield check, status, message + + +class HealthCheckStatus: + OK = "ok" + WARNING = "warning" + ERROR = "error" + + +class AbstractHealthCheck: + @property + def slug(self): + """ + Return a unique slug for this health check. + """ + raise NotImplementedError + + @property + def name(self): + """ + Return a short name of this health check. + """ + raise NotImplementedError + + @property + def description(self): + """ + Return a short description of this health check. + """ + raise NotImplementedError + + @property + def documentation_link(self): + """ + Return a link to the documentation of this health check. + """ + return None + + def check(self): + """ + Return a tuple of (status, message) where status is one of HealthCheckStatus + """ + raise NotImplementedError + + +class DBHealthCheck(AbstractHealthCheck): + slug = "db" + name = _("Database") + description = _("The database is the central storage for all data.") + documentation_link = "https://docs.djangoproject.com/en/stable/ref/databases/" + + def check(self): + from django.db import connection + + try: + connection.cursor() + Permission.objects.exists() + except Exception as e: + return HealthCheckStatus.ERROR, str(e) + + if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.sqlite3": + return HealthCheckStatus.WARNING, _( + "Using SQLite, this is not recommended in production." + ) + + return HealthCheckStatus.OK, _("Database connection established.") + + +class CacheHealthCheck(AbstractHealthCheck): + slug = "cache" + name = _("Cache") + description = _("The cache is used to store temporary data.") + documentation_link = "https://docs.djangoproject.com/en/stable/topics/cache/" + + def check(self): + from django.core import cache + + try: + cache.cache.set("_healthcheck", "1") + if not cache.cache.get("_healthcheck") == "1": + raise Exception("Cache not available") + except Exception as e: + return HealthCheckStatus.ERROR, str(e) + + if ( + settings.CACHES.get("default", {}).get("BACKEND") + == "django.core.cache.backends.locmem.LocMemCache" + ): + return HealthCheckStatus.WARNING, _( + "Using LocMemCache, this is not recommended in production." + ) + + return HealthCheckStatus.OK, _("Cache connection established.") + + +class CronJobHealthCheck(AbstractHealthCheck): + slug = "cronjob" + name = _("Cronjob") + description = _( + "A cron job must regularly call ephios to do recurring tasks like sending notifications." + ) + documentation_link = ( + "https://docs.ephios.de/en/stable/admin/deployment/manual/index.html#setup-cron" + ) + + def check(self): + last_call = LastRunPeriodicCall.get_last_call() + if LastRunPeriodicCall.is_stuck(): + if last_call: + return ( + HealthCheckStatus.WARNING, + mark_safe( + _("Cronjob stuck, last run {last_call}.").format( + last_call=naturaltime(last_call), + ) + ), + ) + else: + return ( + HealthCheckStatus.ERROR, + mark_safe(_("Cronjob stuck, no last run.")), + ) + else: + return ( + HealthCheckStatus.OK, + mark_safe(_("Last run {last_call}.").format(last_call=naturaltime(last_call))), + ) + + +class WritableMediaRootHealthCheck(AbstractHealthCheck): + slug = "writable_media_root" + name = _("Writable Media Root") + description = _("The media root must be writable by the application server.") + documentation_link = ( + "https://docs.ephios.de/en/stable/admin/deployment/manual/index.html#data-directory" + ) + + def check(self): + media_root = Path(settings.MEDIA_ROOT) + if not os.access(media_root, os.W_OK): + return ( + HealthCheckStatus.ERROR, + mark_safe(_("Media root not writable by application server.")), + ) + return ( + HealthCheckStatus.OK, + mark_safe(_("Media root writable by application server.")), + ) + + +@receiver(register_healthchecks, dispatch_uid="ephios.core.healthchecks.register_core_healthchecks") +def register_core_healthchecks(sender, **kwargs): + yield DBHealthCheck + yield CacheHealthCheck + yield CronJobHealthCheck + yield WritableMediaRootHealthCheck diff --git a/ephios/core/signals.py b/ephios/core/signals.py index 366627e5a..55563482e 100644 --- a/ephios/core/signals.py +++ b/ephios/core/signals.py @@ -95,6 +95,12 @@ Receivers should return a list of subclasses of ``ephios.core.notifications.backends.AbstractNotificationBackend`` """ +register_healthchecks = PluginSignal() +""" +This signal is sent out to get all health checks that can be run to monitor the health of the application. +Receivers should return a list of subclasses of ``ephios.core.services.health.AbstractHealthCheck`` +""" + periodic_signal = PluginSignal() """ This signal is called periodically, at least every 15 minutes. diff --git a/ephios/core/templates/core/settings/settings_instance.html b/ephios/core/templates/core/settings/settings_instance.html index 391f8cfe3..83daaf80f 100644 --- a/ephios/core/templates/core/settings/settings_instance.html +++ b/ephios/core/templates/core/settings/settings_instance.html @@ -10,50 +10,47 @@ - {% if show_system_health %} -
-

- {% translate "System health" %} -

-
-
-
- {% translate "Cron job" %} - {% if last_run_periodic_call_stuck %} - - {% else %} - - {% endif %} -
-

- {% blocktranslate trimmed %} - A cron job must regularly call ephios to do recurring tasks - like sending reminder emails. - {% endblocktranslate %} - - - - {% translate "Learn more" %} - - -
- {% if last_run_periodic_call == None %} - {% translate "Last run:" %} - - {% translate "never" %} - - {% elif last_run_periodic_call_stuck %} - {% translate "Last run:" %} - - {{ last_run_periodic_call|naturaltime }} - - {% else %} - {% translate "Last run:" %} {{ last_run_periodic_call|naturaltime }} - {% endif %} -

+ {% if healthchecks %} + +

+ {% translate "System health" %} +

+
+ {% for check, status, message in healthchecks %} +
+
+
+
+ {{ check.name }} + {% if status == "error" %} + + {% translate "Error" %} + {% elif status == "warning" %} + + {% translate "Warning" %} + {% elif status == "ok" %} + + {% translate "OK" %} + {% endif %} +
+

+ {{ check.description }} + {% if check.documentation_link %} + + + + {% translate "Learn more" %} + + + {% endif %} +

+

+ {{ message }} +

+
+
-
+ {% endfor %}
{% endif %} diff --git a/ephios/core/urls.py b/ephios/core/urls.py index 8654a3b66..a1bbf8830 100644 --- a/ephios/core/urls.py +++ b/ephios/core/urls.py @@ -39,7 +39,7 @@ EventTypeListView, EventTypeUpdateView, ) -from ephios.core.views.healthcheck import HealthcheckView +from ephios.core.views.healthcheck import HealthCheckView from ephios.core.views.log import LogView from ephios.core.views.pwa import OfflineView, PWAManifestView, ServiceWorkerView from ephios.core.views.settings import ( @@ -72,7 +72,7 @@ path("manifest.json", PWAManifestView.as_view(), name="pwa_manifest"), path("serviceworker.js", ServiceWorkerView.as_view(), name="pwa_serviceworker"), path("offline/", OfflineView.as_view(), name="pwa_offline"), - path("healthcheck/", HealthcheckView.as_view(), name="healthcheck"), + path("healthcheck/", HealthCheckView.as_view(), name="healthcheck"), path("events/", EventListView.as_view(), name="event_list"), path( "events//edit/", diff --git a/ephios/core/views/healthcheck.py b/ephios/core/views/healthcheck.py index 4fdf15323..589a45eed 100644 --- a/ephios/core/views/healthcheck.py +++ b/ephios/core/views/healthcheck.py @@ -1,41 +1,19 @@ -from django.contrib.auth.models import Permission -from django.core import cache -from django.db import Error as DjangoDBError from django.http import HttpResponse -from django.utils.formats import date_format from django.views import View -from ephios.core.dynamic_preferences_registry import LastRunPeriodicCall +from ephios.core.services.health.healthchecks import run_healthchecks -class HealthcheckView(View): +class HealthCheckView(View): def get(self, request, *args, **kwargs): messages = [] errors = [] - # check db access - try: - Permission.objects.exists() - messages.append("DB OK") - except DjangoDBError: - errors.append("DB not available") - # check cache access - cache.cache.set("_healthcheck", "1") - if not cache.cache.get("_healthcheck") == "1": - errors.append("Cache not available") - else: - messages.append("Cache OK") - - # check cronjob - if LastRunPeriodicCall.is_stuck(): - if last_call := LastRunPeriodicCall.get_last_call(): - errors.append( - f"Cronjob stuck, last run {date_format(last_call,format='SHORT_DATETIME_FORMAT')}" - ) + for check, status, message in run_healthchecks(): + if status == "ok": + messages.append(str(message)) else: - errors.append("Cronjob stuck, no last run") - else: - messages.append("Cronjob OK") + errors.append(str(message)) if errors: return HttpResponse( diff --git a/ephios/core/views/settings.py b/ephios/core/views/settings.py index 9e6e19b17..92bc336be 100644 --- a/ephios/core/views/settings.py +++ b/ephios/core/views/settings.py @@ -6,8 +6,8 @@ from django.views.generic import FormView, TemplateView from dynamic_preferences.forms import global_preference_form_builder -from ephios.core.dynamic_preferences_registry import LastRunPeriodicCall from ephios.core.forms.users import UserNotificationPreferenceForm +from ephios.core.services.health.healthchecks import run_healthchecks from ephios.core.signals import management_settings_sections from ephios.extra.mixins import StaffRequiredMixin @@ -58,16 +58,9 @@ def get_success_url(self): def get_context_data(self, **kwargs): if self.request.user.is_superuser: - kwargs.update(self._get_healthcheck_context()) + kwargs["healthchecks"] = list(run_healthchecks()) return super().get_context_data(**kwargs) - def _get_healthcheck_context(self): - return { - "show_system_health": True, - "last_run_periodic_call": LastRunPeriodicCall.get_last_call(), - "last_run_periodic_call_stuck": LastRunPeriodicCall.is_stuck(), - } - class PersonalDataSettingsView(LoginRequiredMixin, TemplateView): template_name = "core/settings/settings_personal_data.html"