Skip to content

Commit 921f306

Browse files
authored
Fix #1472: check attachments bundle (#1473)
* WIP * Add check for attachments bundles * Remove useless change * Remove leftover comment
1 parent dd3d493 commit 921f306

File tree

3 files changed

+274
-0
lines changed

3 files changed

+274
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
"""
2+
Verify freshness and validity of attachment bundles.
3+
4+
For each collection where the attachments bundle is enable, return the modification timestamp and number of attachments bundled.
5+
"""
6+
7+
import io
8+
import logging
9+
import urllib.parse
10+
import zipfile
11+
from typing import Any
12+
13+
from telescope.typings import CheckResult
14+
from telescope.utils import (
15+
ClientSession,
16+
retry_decorator,
17+
run_parallel,
18+
utcfromhttpdate,
19+
utcfromtimestamp,
20+
)
21+
22+
from .utils import KintoClient, fetch_signed_resources
23+
24+
25+
EXPOSED_PARAMETERS = ["server"]
26+
27+
logger = logging.getLogger(__name__)
28+
29+
30+
@retry_decorator
31+
async def fetch_binary(url: str, **kwargs) -> tuple[int, str, bytes]:
32+
human_url = urllib.parse.unquote(url)
33+
logger.debug(f"Fetch binary from '{human_url}'")
34+
async with ClientSession() as session:
35+
async with session.get(url, **kwargs) as response:
36+
return (
37+
response.status,
38+
response.headers.get("Last-Modified", "Mon, 01 Jan 1970 00:00:00 GMT"),
39+
await response.read(),
40+
)
41+
42+
43+
async def run(
44+
server: str, auth: str, margin_publication_hours: int = 12
45+
) -> CheckResult:
46+
client = KintoClient(server_url=server, auth=auth)
47+
resources = await fetch_signed_resources(server, auth)
48+
49+
logger.debug("Fetch metadata of %s collections", len(resources))
50+
futures = [
51+
client.get_collection(
52+
bucket=resource["source"]["bucket"],
53+
id=resource["source"]["collection"],
54+
)
55+
for resource in resources
56+
]
57+
sources_metadata = await run_parallel(*futures)
58+
resources_sources_metadata = zip(resources, sources_metadata)
59+
60+
metadata_for_bundled = [
61+
(r, m)
62+
for r, m in resources_sources_metadata
63+
if m["data"].get("attachment", {}).get("bundle", False)
64+
]
65+
logger.info("%s collections with attachments bundle", len(metadata_for_bundled))
66+
assert metadata_for_bundled, metadata_for_bundled
67+
records_timestamps = [
68+
resource["last_modified"] for resource, _ in metadata_for_bundled
69+
]
70+
71+
info = await client.server_info()
72+
base_url = info["capabilities"]["attachments"]["base_url"]
73+
74+
futures_bundles = []
75+
for resource, metadata in metadata_for_bundled:
76+
bid = resource["destination"]["bucket"]
77+
cid = metadata["data"]["id"]
78+
url = f"{base_url}bundles/{bid}--{cid}.zip"
79+
futures_bundles.append(fetch_binary(url))
80+
bundles = await run_parallel(*futures_bundles)
81+
82+
timestamps_metadata_bundles = zip(records_timestamps, metadata_for_bundled, bundles)
83+
84+
result: dict[str, dict[str, Any]] = {}
85+
success = True
86+
for timestamp, (resource, metadata), bundle in timestamps_metadata_bundles:
87+
http_status, modified, binary = bundle
88+
bid = resource["destination"]["bucket"]
89+
cid = metadata["data"]["id"]
90+
if http_status >= 400:
91+
result[f"{bid}/{cid}"] = {"status": "missing"}
92+
success = False
93+
continue
94+
95+
try:
96+
z = zipfile.ZipFile(io.BytesIO(binary))
97+
nfiles = len(z.namelist())
98+
except zipfile.BadZipFile:
99+
result[f"{bid}/{cid}"] = {"status": "bad zip"}
100+
success = False
101+
continue
102+
103+
bundle_ts = utcfromhttpdate(modified)
104+
records_ts = utcfromtimestamp(timestamp)
105+
status = (
106+
"outdated"
107+
if ((records_ts - bundle_ts).total_seconds() / 3600)
108+
> margin_publication_hours
109+
else "ok"
110+
)
111+
result[f"{bid}/{cid}"] = {
112+
"status": status,
113+
"size": len(binary),
114+
"attachments": nfiles,
115+
"publication_timestamp": bundle_ts.isoformat(),
116+
"collection_timestamp": records_ts.isoformat(),
117+
}
118+
119+
return success, result

telescope/utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import email.utils
23
import json
34
import logging
45
import textwrap
@@ -164,6 +165,10 @@ def utcfromisoformat(iso8601):
164165
return datetime.fromisoformat(iso8601_tz).replace(tzinfo=timezone.utc)
165166

166167

168+
def utcfromhttpdate(httpdate):
169+
return email.utils.parsedate_to_datetime(httpdate).replace(tzinfo=timezone.utc)
170+
171+
167172
def render_checks(func):
168173
async def wrapper(request):
169174
# First, check that client requests supported output format.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import io
2+
import zipfile
3+
4+
from checks.remotesettings.attachments_bundles import run
5+
6+
7+
COLLECTION_URL = "/buckets/{}/collections/{}"
8+
RECORDS_URL = "/buckets/{}/collections/{}/records"
9+
CHANGESET_URL = "/buckets/{}/collections/{}/changeset"
10+
11+
12+
def build_zip(num_files=3):
13+
zip_buffer = io.BytesIO()
14+
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
15+
for i in range(num_files):
16+
file_name = f"fake_file_{i}.txt"
17+
zip_file.writestr(file_name, 1024 * "x")
18+
return zip_buffer.getvalue()
19+
20+
21+
async def test_negative(mock_responses, mock_aioresponses):
22+
server_url = "http://fake.local/v1"
23+
mock_responses.get(
24+
server_url + "/",
25+
payload={
26+
"capabilities": {
27+
"attachments": {"base_url": "http://cdn/"},
28+
"signer": {
29+
"resources": [
30+
{
31+
"source": {"bucket": "main-workspace", "collection": None},
32+
"preview": {"bucket": "main-preview", "collection": None},
33+
"destination": {"bucket": "main", "collection": None},
34+
}
35+
]
36+
},
37+
}
38+
},
39+
)
40+
may8_ts = 389664061000
41+
may8_http = "Mon, 08 May 1982 00:01:01 GMT"
42+
may8_iso = "1982-05-08T00:01:01+00:00"
43+
44+
changes_url = server_url + RECORDS_URL.format("monitor", "changes")
45+
mock_responses.get(
46+
changes_url,
47+
payload={
48+
"data": [
49+
{
50+
"id": "abc",
51+
"bucket": "main",
52+
"collection": "missing",
53+
"last_modified": may8_ts,
54+
},
55+
{
56+
"id": "efg",
57+
"bucket": "main",
58+
"collection": "ok",
59+
"last_modified": may8_ts,
60+
},
61+
{
62+
"id": "hij",
63+
"bucket": "main",
64+
"collection": "badzip",
65+
"last_modified": may8_ts,
66+
},
67+
{
68+
"id": "klm",
69+
"bucket": "main",
70+
"collection": "outdated",
71+
"last_modified": may8_ts + 24 * 3600 * 1000 + 60 * 1000,
72+
},
73+
{
74+
"id": "nop",
75+
"bucket": "main",
76+
"collection": "late",
77+
"last_modified": may8_ts + 600 * 1000,
78+
},
79+
{
80+
"id": "qrs",
81+
"bucket": "main",
82+
"collection": "no-bundle",
83+
"last_modified": may8_ts,
84+
},
85+
]
86+
},
87+
)
88+
89+
for cid in ("missing", "ok", "badzip", "outdated", "late", "no-bundle"):
90+
mock_responses.get(
91+
server_url + COLLECTION_URL.format("main-workspace", cid),
92+
payload={
93+
"data": {
94+
"id": cid,
95+
"bucket": "main-workspace",
96+
"attachment": {"bundle": cid != "no-bundle"},
97+
}
98+
},
99+
)
100+
101+
mock_aioresponses.get("http://cdn/bundles/main--missing.zip", status=404)
102+
mock_aioresponses.get(
103+
"http://cdn/bundles/main--ok.zip",
104+
body=build_zip(),
105+
headers={"Last-Modified": may8_http},
106+
)
107+
mock_aioresponses.get(
108+
"http://cdn/bundles/main--outdated.zip",
109+
body=build_zip(num_files=6),
110+
headers={"Last-Modified": may8_http},
111+
)
112+
mock_aioresponses.get(
113+
"http://cdn/bundles/main--late.zip",
114+
body=build_zip(num_files=6),
115+
headers={"Last-Modified": may8_http},
116+
)
117+
mock_aioresponses.get(
118+
"http://cdn/bundles/main--badzip.zip",
119+
body=b"boom",
120+
headers={"Last-Modified": may8_http},
121+
)
122+
123+
status, data = await run(server_url, auth="")
124+
125+
assert status is False
126+
assert data == {
127+
"main/badzip": {"status": "bad zip"},
128+
"main/missing": {"status": "missing"},
129+
"main/ok": {
130+
"status": "ok",
131+
"attachments": 3,
132+
"collection_timestamp": "1982-05-08T00:01:01+00:00",
133+
"publication_timestamp": may8_iso,
134+
"size": 373,
135+
},
136+
"main/late": {
137+
"status": "ok",
138+
"attachments": 6,
139+
"collection_timestamp": "1982-05-08T00:11:01+00:00",
140+
"publication_timestamp": may8_iso,
141+
"size": 724,
142+
},
143+
"main/outdated": {
144+
"attachments": 6,
145+
"collection_timestamp": "1982-05-09T00:02:01+00:00",
146+
"publication_timestamp": may8_iso,
147+
"size": 724,
148+
"status": "outdated",
149+
},
150+
}

0 commit comments

Comments
 (0)