-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #186 from neutrons/metrics
Add new endpoint /metrics with relevant system monitoring metrics
- Loading branch information
Showing
11 changed files
with
365 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
Metrics | ||
======= | ||
|
||
In order to assist in monitoring a collections of metrics is collected and returned in json. | ||
|
||
* https://monitor.sns.gov/metrics/ (for all the data at once) | ||
* https://monitor.sns.gov/metrics/workflow_diagnostics/ (for workflow diagnostics) | ||
* https://monitor.sns.gov/metrics/postprocessing_diagnostics/ (for postprocessing diagnostics) | ||
* https://monitor.sns.gov/metrics/instrument_status/ (for instrument status) | ||
* https://monitor.sns.gov/metrics/run_statuses/ (return the count of the current reduction statuses for all the run started in the last hour) | ||
* https://monitor.sns.gov/metrics/run_statuses/{n}/ (return the count of current reduction statuses for all the run started in the last *n* minutes, *e.g.* https://monitor.sns.gov/metrics/run_statuses/525600/ will return the last year) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from django.urls import path | ||
from . import views | ||
|
||
app_name = "metrics" | ||
|
||
urlpatterns = [ | ||
path("/", views.metrics, name="metrics"), | ||
path("/workflow_diagnostics/", views.workflow_diagnostics, name="workflow_diagnostics"), | ||
path("/postprocessing_diagnostics/", views.postprocessing_diagnostics, name="postprocessing_diagnostics"), | ||
path("/instrument_status/", views.instrument_status, name="instrument_status"), | ||
path("/run_statuses/", views.run_statuses, name="run_statuses"), | ||
path("/run_statuses/<int:minutes>/", views.run_statuses, name="run_statuses"), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from reporting.report.models import Instrument, DataRun, WorkflowSummary, Information | ||
from reporting.dasmon.models import Parameter, StatusCache, ActiveInstrument | ||
from reporting.report.view_util import is_acquisition_complete | ||
from reporting.dasmon.view_util import is_running | ||
from django.conf import settings | ||
from django.utils import timezone | ||
from django.db.models import Q | ||
|
||
|
||
def postprocessing_diagnostics(): | ||
"""collect and return Cataloging & Reduction diagnostics""" | ||
common_services = Instrument.objects.get(name="common") | ||
agents = [] | ||
|
||
for node_prefix in settings.POSTPROCESS_NODE_PREFIX: | ||
params = Parameter.objects.filter( | ||
~Q(name__endswith="_pid"), name__startswith=settings.SYSTEM_STATUS_PREFIX + node_prefix | ||
) | ||
for param in params: | ||
node = param.name.removeprefix(settings.SYSTEM_STATUS_PREFIX) | ||
info = {"name": node} | ||
value = StatusCache.objects.filter(instrument_id=common_services, key_id=param).latest("timestamp") | ||
info["timestamp"] = value.timestamp | ||
|
||
try: | ||
pid = Parameter.objects.get(name=param.name + "_pid") | ||
info["PID"] = ( | ||
StatusCache.objects.filter(instrument_id=common_services, key_id=pid).latest("timestamp").value | ||
) | ||
|
||
except (Parameter.DoesNotExist, StatusCache.DoesNotExist): | ||
pass | ||
|
||
try: | ||
last_status = Information.objects.filter(description=node).latest("id") | ||
info["last_message"] = str(last_status.run_status_id) | ||
info["last_message_timestamp"] = last_status.run_status_id.created_on | ||
except Information.DoesNotExist: | ||
pass | ||
agents.append(info) | ||
|
||
return agents | ||
|
||
|
||
def instrument_status(): | ||
"""return map of instrument name to run status""" | ||
|
||
instruments = Instrument.objects.all().order_by("name") | ||
status = {} | ||
|
||
for instrument_id in instruments: | ||
if ActiveInstrument.objects.is_alive(instrument_id): | ||
status[str(instrument_id)] = is_running(instrument_id) | ||
|
||
return status | ||
|
||
|
||
def run_statuses(minutes=60): | ||
"""Of all the runs created in the last n minutes, | ||
return the number that are acquiring, complete, incomplete, | ||
error or unknown along with the total number""" | ||
|
||
runs = DataRun.objects.filter(created_on__gte=timezone.now() - timezone.timedelta(minutes=minutes)).order_by( | ||
"created_on" | ||
) | ||
|
||
statuses = {"total": len(runs), "acquiring": 0, "incomplete": 0, "complete": 0, "error": 0, "unknown": 0} | ||
|
||
for run_id in runs: | ||
try: | ||
s = WorkflowSummary.objects.get(run_id=run_id) | ||
except WorkflowSummary.DoesNotExist: | ||
statuses["unknown"] += 1 | ||
continue | ||
|
||
if not is_acquisition_complete(run_id): | ||
statuses["acquiring"] += 1 | ||
elif s.complete: | ||
statuses["complete"] += 1 | ||
elif run_id.last_error() is None: | ||
statuses["incomplete"] += 1 | ||
else: | ||
statuses["error"] += 1 | ||
|
||
return statuses |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from django.http import JsonResponse | ||
from django.conf import settings | ||
from django.views.decorators.cache import cache_page | ||
import reporting.users.view_util as users_view_util | ||
import reporting.dasmon.view_util as dasmon_view_util | ||
from . import view_util | ||
|
||
|
||
@users_view_util.login_or_local_required_401 | ||
@cache_page(settings.FAST_PAGE_CACHE_TIMEOUT) | ||
def metrics(request): | ||
data = {} | ||
data["workflow_diagnostics"] = dasmon_view_util.workflow_diagnostics() | ||
data["postprocessing_diagnostics"] = view_util.postprocessing_diagnostics() | ||
data["instrument_status"] = view_util.instrument_status() | ||
data["run_statuses"] = view_util.run_statuses() | ||
return JsonResponse(data) | ||
|
||
|
||
@users_view_util.login_or_local_required_401 | ||
@cache_page(settings.FAST_PAGE_CACHE_TIMEOUT) | ||
def workflow_diagnostics(request): | ||
return JsonResponse(dasmon_view_util.workflow_diagnostics()) | ||
|
||
|
||
@users_view_util.login_or_local_required_401 | ||
@cache_page(settings.FAST_PAGE_CACHE_TIMEOUT) | ||
def postprocessing_diagnostics(request): | ||
return JsonResponse(view_util.postprocessing_diagnostics(), safe=False) | ||
|
||
|
||
@users_view_util.login_or_local_required_401 | ||
@cache_page(settings.FAST_PAGE_CACHE_TIMEOUT) | ||
def instrument_status(request): | ||
return JsonResponse(view_util.instrument_status()) | ||
|
||
|
||
@users_view_util.login_or_local_required_401 | ||
@cache_page(settings.FAST_PAGE_CACHE_TIMEOUT) | ||
def run_statuses(request, minutes=60): | ||
return JsonResponse(view_util.run_statuses(minutes)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.