Skip to content

Commit

Permalink
Fix #61: Normandy checks for uptake and reported recipes (#65)
Browse files Browse the repository at this point in the history
  • Loading branch information
leplatrem authored Sep 19, 2019
1 parent 9de4908 commit fc46185
Show file tree
Hide file tree
Showing 8 changed files with 357 additions and 38 deletions.
46 changes: 46 additions & 0 deletions checks/normandy/reported_recipes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Recipes available on the server should match the recipes clients are reporting
Uptake Telemetry about.
The list of recipes for which no event was received is returned. The min/max
timestamps give the datetime range of the dataset obtained from
https://sql.telemetry.mozilla.org/queries/64921/
"""
import aiohttp

from poucave.typings import CheckResult
from poucave.utils import fetch_redash


EXPOSED_PARAMETERS = ["server"]

REDASH_QUERY_ID = 64921

NORMANDY_URL = "{server}/api/v1/recipe/signed/"


async def run(api_key: str, server: str) -> CheckResult:
# Fetch latest results from Redash JSON API.
rows = await fetch_redash(REDASH_QUERY_ID, api_key)

min_timestamp = min(r["min_timestamp"] for r in rows)
max_timestamp = max(r["max_timestamp"] for r in rows)

reported_recipes = set(int(r["source"].split("/")[-1]) for r in rows)

async with aiohttp.ClientSession() as session:
# Recipes from source of truth.
normandy_url = NORMANDY_URL.format(server=server)
async with session.get(normandy_url) as response:
normandy_recipes = await response.json()

normandy_recipes = set(r["recipe"]["id"] for r in normandy_recipes)

missing = list(normandy_recipes - reported_recipes)

data = {
"min_timestamp": min_timestamp,
"max_timestamp": max_timestamp,
"missing": missing,
}
return len(missing) == 0, data
118 changes: 118 additions & 0 deletions checks/normandy/uptake_error_rate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""
The percentage of reported errors in Uptake Telemetry should be under the specified
maximum.
For each recipe whose error rate is above the maximum, the total number of events
for each status is returned. The min/max timestamps give the datetime range of the
dataset obtained from https://sql.telemetry.mozilla.org/queries/64921/
"""
from collections import defaultdict
from typing import Dict, List

from poucave.typings import CheckResult
from poucave.utils import fetch_redash


EXPOSED_PARAMETERS = ["max_error_percentage", "min_total_events"]

REDASH_QUERY_ID = 64921

# Normandy uses the Uptake telemetry statuses in a specific way.
# See https://searchfox.org/mozilla-central/rev/4218cb868d8deed13e902718ba2595d85e12b86b/toolkit/components/normandy/lib/Uptake.jsm#23-43
NORMANDY_STATUSES = {
"custom_1_error": "recipe_action_disabled",
"backoff": "recipe_didnt_match_filter",
"apply_error": "recipe_execution_error",
"content_error": "recipe_filter_broken",
"download_error": "recipe_invalid_action",
"signature_error": "runner_invalid_signature",
}
UPTAKE_STATUSES = {v: k for k, v in NORMANDY_STATUSES.items()}


def sort_dict_desc(d, key):
return dict(sorted(d.items(), key=key, reverse=True))


async def run(
api_key: str,
max_error_percentage: float,
min_total_events: int = 100,
ignore_status: List[str] = [],
) -> CheckResult:
# Ignored statuses are specified using the Normandy ones.
ignored_status = [UPTAKE_STATUSES.get(s, s) for s in ignore_status]
# A client reporting that recipe didn't match filter is not an error.
ignore_status += ["recipe_didnt_match_filter"]

# Fetch latest results from Redash JSON API.
rows = await fetch_redash(REDASH_QUERY_ID, api_key)

min_timestamp = min(r["min_timestamp"] for r in rows)
max_timestamp = max(r["max_timestamp"] for r in rows)

by_collection: Dict[str, Dict[str, int]] = defaultdict(dict)
for row in rows:
rid = int(row["source"].split("/")[-1])
by_collection[rid][row["status"]] = row["total"]

error_rates = {}
for rid, all_statuses in by_collection.items():
total_statuses = sum(total for status, total in all_statuses.items())

# Ignore uptake Telemetry of a certain recipe if the total of collected
# events is too small.
if total_statuses < min_total_events:
continue

statuses = {
NORMANDY_STATUSES.get(status, status): total
for status, total in all_statuses.items()
if status not in ignored_status
}
ignored = {
NORMANDY_STATUSES.get(status, status): total
for status, total in all_statuses.items()
if status in ignored_status
}
total_errors = sum(
total for status, total in statuses.items() if status.endswith("_error")
)
error_rate = round(total_errors * 100 / total_statuses, 2)

if error_rate < max_error_percentage:
continue

error_rates[rid] = {
"error_rate": error_rate,
"statuses": sort_dict_desc(statuses, key=lambda item: item[1]),
"ignored": sort_dict_desc(ignored, key=lambda item: item[1]),
}

sort_by_rate = sort_dict_desc(error_rates, key=lambda item: item[1]["error_rate"])

data = {
"recipes": sort_by_rate,
"min_timestamp": min_timestamp,
"max_timestamp": max_timestamp,
}
"""
{
"recipes": {
532: {
"error_rate": 60.4,
"statuses": {
"recipe_execution_error": 56,
"success": 35,
"action_post_execution_error": 5
},
"ignored": {
"recipe_didnt_match_filter": 5
}
}
},
"min_timestamp": "2019-09-19T03:47:42.773",
"max_timestamp": "2019-09-19T09:43:26.083"
}
"""
return len(sort_by_rate) == 0, data
19 changes: 3 additions & 16 deletions checks/remotesettings/uptake_error_rate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,39 +10,26 @@
from typing import Dict, List

from poucave.typings import CheckResult
from poucave.utils import fetch_json
from poucave.utils import fetch_redash


EXPOSED_PARAMETERS = ["max_error_percentage", "min_total_events"]

REDASH_URI = (
f"https://sql.telemetry.mozilla.org/api/queries/64808/results.json?api_key="
)
REDASH_QUERY_ID = 64808


def sort_dict_desc(d, key):
return dict(sorted(d.items(), key=key, reverse=True))


async def fetch_redash(api_key):
redash_uri = REDASH_URI + api_key

body = await fetch_json(redash_uri)

query_result = body["query_result"]
data = query_result["data"]
rows = data["rows"]
return rows


async def run(
api_key: str,
max_error_percentage: float,
min_total_events: int = 10000,
ignore_status: List[str] = [],
) -> CheckResult:
# Fetch latest results from Redash JSON API.
rows = await fetch_redash(api_key)
rows = await fetch_redash(REDASH_QUERY_ID, api_key)

min_timestamp = min(r["min_timestamp"] for r in rows)
max_timestamp = max(r["max_timestamp"] for r in rows)
Expand Down
12 changes: 12 additions & 0 deletions poucave/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ def get(self, key: str) -> Optional[Any]:
return None


REDASH_URI = "https://sql.telemetry.mozilla.org/api/queries/{}/results.json?api_key={}"


async def fetch_redash(query_id, api_key):
redash_uri = REDASH_URI.format(query_id, api_key)
body = await fetch_json(redash_uri)
query_result = body["query_result"]
data = query_result["data"]
rows = data["rows"]
return rows


retry_decorator = backoff.on_exception(
backoff.expo,
(aiohttp.ClientError, asyncio.TimeoutError),
Expand Down
89 changes: 89 additions & 0 deletions tests/checks/normandy/test_normandy_uptake_error_rate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from checks.normandy.uptake_error_rate import run
from tests.utils import patch_async


MODULE = "checks.normandy.uptake_error_rate"

FAKE_ROWS = [
{
"status": "success",
"source": "normandy/recipe/123",
"total": 20000,
"min_timestamp": "2019-09-16T02:36:12.348",
"max_timestamp": "2019-09-16T06:24:58.741",
},
{
"status": "apply_error",
"source": "normandy/recipe/123",
"total": 15000,
"min_timestamp": "2019-09-16T03:36:12.348",
"max_timestamp": "2019-09-16T05:24:58.741",
},
{
"status": "backoff",
"source": "normandy/recipe/123",
"total": 5000,
"min_timestamp": "2019-09-16T01:36:12.348",
"max_timestamp": "2019-09-16T07:24:58.741",
},
]


async def test_positive():
with patch_async(f"{MODULE}.fetch_redash", return_value=FAKE_ROWS):
status, data = await run(api_key="", max_error_percentage=100.0)

assert status is True
assert data == {
"recipes": {},
"min_timestamp": "2019-09-16T01:36:12.348",
"max_timestamp": "2019-09-16T07:24:58.741",
}


async def test_negative():
with patch_async(f"{MODULE}.fetch_redash", return_value=FAKE_ROWS):
status, data = await run(api_key="", max_error_percentage=0.1)

assert status is False
assert data == {
"recipes": {
123: {
"error_rate": 37.5,
"statuses": {"success": 20000, "recipe_execution_error": 15000},
"ignored": {"recipe_didnt_match_filter": 5000},
}
},
"min_timestamp": "2019-09-16T01:36:12.348",
"max_timestamp": "2019-09-16T07:24:58.741",
}


async def test_ignore_status():
with patch_async(f"{MODULE}.fetch_redash", return_value=FAKE_ROWS):
status, data = await run(
api_key="",
max_error_percentage=0.1,
ignore_status=["recipe_execution_error"],
)

assert status is True
assert data == {
"recipes": {},
"min_timestamp": "2019-09-16T01:36:12.348",
"max_timestamp": "2019-09-16T07:24:58.741",
}


async def test_min_total_events():
with patch_async(f"{MODULE}.fetch_redash", return_value=FAKE_ROWS):
status, data = await run(
api_key="", max_error_percentage=0.1, min_total_events=40001
)

assert status is True
assert data == {
"recipes": {},
"min_timestamp": "2019-09-16T01:36:12.348",
"max_timestamp": "2019-09-16T07:24:58.741",
}
67 changes: 67 additions & 0 deletions tests/checks/normandy/test_reported_recipes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from checks.normandy.reported_recipes import run
from tests.utils import patch_async


MODULE = "checks.normandy.reported_recipes"

FAKE_ROWS = [
{
"status": "success",
"source": "normandy/recipe/123",
"total": 20000,
"min_timestamp": "2019-09-16T02:36:12.348",
"max_timestamp": "2019-09-16T06:24:58.741",
},
{
"status": "backoff",
"source": "normandy/recipe/456",
"total": 15000,
"min_timestamp": "2019-09-16T03:36:12.348",
"max_timestamp": "2019-09-16T05:24:58.741",
},
{
"status": "apply_error",
"source": "normandy/recipe/123",
"total": 5000,
"min_timestamp": "2019-09-16T01:36:12.348",
"max_timestamp": "2019-09-16T07:24:58.741",
},
]


async def test_positive(mock_aioresponses):
mock_aioresponses.get(
"http://normandy/api/v1/recipe/signed/",
payload=[{"recipe": {"id": 123}}, {"recipe": {"id": 456}}],
)

with patch_async(f"{MODULE}.fetch_redash", return_value=FAKE_ROWS):
status, data = await run(server="http://normandy", api_key="")

assert status is True
assert data == {
"missing": [],
"min_timestamp": "2019-09-16T01:36:12.348",
"max_timestamp": "2019-09-16T07:24:58.741",
}


async def test_negative(mock_aioresponses):
mock_aioresponses.get(
"http://normandy/api/v1/recipe/signed/",
payload=[
{"recipe": {"id": 123}},
{"recipe": {"id": 456}},
{"recipe": {"id": 789}},
],
)

with patch_async(f"{MODULE}.fetch_redash", return_value=FAKE_ROWS):
status, data = await run(server="http://normandy", api_key="")

assert status is False
assert data == {
"missing": [789],
"min_timestamp": "2019-09-16T01:36:12.348",
"max_timestamp": "2019-09-16T07:24:58.741",
}
Loading

0 comments on commit fc46185

Please sign in to comment.