Skip to content

Commit f20613b

Browse files
committed
Merge branch 'main' into weblate
2 parents 9606b95 + d8817e6 commit f20613b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1361
-442
lines changed

backend/btrixcloud/colls.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -297,11 +297,12 @@ async def get_collection_raw(
297297
async def get_collection_raw_by_slug(
298298
self,
299299
coll_slug: str,
300+
oid: UUID,
300301
previous_slugs: bool = False,
301302
public_or_unlisted_only: bool = False,
302303
) -> Dict[str, Any]:
303304
"""Get collection by slug (current or previous) as dict from database"""
304-
query: dict[str, object] = {}
305+
query: dict[str, object] = {"oid": oid}
305306
if previous_slugs:
306307
query["previousSlugs"] = coll_slug
307308
else:
@@ -323,12 +324,12 @@ async def get_collection(
323324
return Collection.from_dict(result)
324325

325326
async def get_collection_by_slug(
326-
self, coll_slug: str, public_or_unlisted_only: bool = False
327+
self, coll_slug: str, oid: UUID, public_or_unlisted_only: bool = False
327328
) -> Collection:
328329
"""Get collection by slug"""
329330
try:
330331
result = await self.get_collection_raw_by_slug(
331-
coll_slug, public_or_unlisted_only=public_or_unlisted_only
332+
coll_slug, oid, public_or_unlisted_only=public_or_unlisted_only
332333
)
333334
return Collection.from_dict(result)
334335
# pylint: disable=broad-exception-caught
@@ -337,6 +338,7 @@ async def get_collection_by_slug(
337338

338339
result = await self.get_collection_raw_by_slug(
339340
coll_slug,
341+
oid,
340342
previous_slugs=True,
341343
public_or_unlisted_only=public_or_unlisted_only,
342344
)
@@ -432,7 +434,7 @@ async def get_public_thumbnail(
432434
) -> StreamingResponse:
433435
"""return thumbnail of public collection, if any"""
434436
result = await self.get_collection_raw_by_slug(
435-
slug, public_or_unlisted_only=True
437+
slug, org.id, public_or_unlisted_only=True
436438
)
437439

438440
thumbnail = result.get("thumbnail")
@@ -1227,7 +1229,7 @@ async def get_public_collection(
12271229
# pylint: disable=raise-missing-from
12281230
raise HTTPException(status_code=404, detail="collection_not_found")
12291231

1230-
coll = await colls.get_collection_by_slug(coll_slug)
1232+
coll = await colls.get_collection_by_slug(coll_slug, org.id)
12311233

12321234
return await colls.get_public_collection_out(
12331235
coll.id, org, dict(request.headers), allow_unlisted=True
@@ -1251,7 +1253,7 @@ async def download_public_collection(
12511253

12521254
# Make sure collection exists and is public/unlisted
12531255
coll = await colls.get_collection_by_slug(
1254-
coll_slug, public_or_unlisted_only=True
1256+
coll_slug, org.id, public_or_unlisted_only=True
12551257
)
12561258

12571259
if coll.allowPublicDownload is False:

backend/btrixcloud/crawl_logs.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
"""crawl logs"""
2+
3+
from typing import TYPE_CHECKING, Any, Optional, Dict, Tuple, List
4+
5+
import json
6+
from uuid import UUID, uuid4
7+
8+
from fastapi import HTTPException
9+
import pymongo
10+
11+
from .models import CrawlLogLine, Organization
12+
from .pagination import DEFAULT_PAGE_SIZE
13+
14+
if TYPE_CHECKING:
15+
from .orgs import OrgOps
16+
else:
17+
OrgOps = object
18+
19+
20+
# ============================================================================
21+
class CrawlLogOps:
22+
"""crawl log management"""
23+
24+
org_ops: OrgOps
25+
26+
# pylint: disable=too-many-locals, too-many-arguments, invalid-name
27+
28+
def __init__(self, mdb, org_ops):
29+
self.logs = mdb["crawl_logs"]
30+
self.org_ops = org_ops
31+
32+
async def init_index(self):
33+
"""init index for crawl logs"""
34+
await self.logs.create_index(
35+
[
36+
("crawlId", pymongo.HASHED),
37+
("oid", pymongo.ASCENDING),
38+
("qaRunId", pymongo.ASCENDING),
39+
("timestamp", pymongo.ASCENDING),
40+
]
41+
)
42+
await self.logs.create_index(
43+
[
44+
("crawlId", pymongo.HASHED),
45+
("oid", pymongo.ASCENDING),
46+
("qaRunId", pymongo.ASCENDING),
47+
("logLevel", pymongo.ASCENDING),
48+
]
49+
)
50+
await self.logs.create_index(
51+
[
52+
("crawlId", pymongo.HASHED),
53+
("oid", pymongo.ASCENDING),
54+
("qaRunId", pymongo.ASCENDING),
55+
("context", pymongo.ASCENDING),
56+
]
57+
)
58+
await self.logs.create_index(
59+
[
60+
("crawlId", pymongo.HASHED),
61+
("oid", pymongo.ASCENDING),
62+
("qaRunId", pymongo.ASCENDING),
63+
("message", pymongo.ASCENDING),
64+
]
65+
)
66+
67+
async def add_log_line(
68+
self,
69+
crawl_id: str,
70+
oid: UUID,
71+
log_line: str,
72+
qa_run_id: Optional[str] = None,
73+
) -> bool:
74+
"""add crawl log line to database"""
75+
try:
76+
log_dict = json.loads(log_line)
77+
78+
# Ensure details are a dictionary
79+
# If they are a list, convert to a dict
80+
details = None
81+
log_dict_details = log_dict.get("details")
82+
if log_dict_details:
83+
if isinstance(log_dict_details, dict):
84+
details = log_dict_details
85+
else:
86+
details = {"items": log_dict_details}
87+
88+
log_to_add = CrawlLogLine(
89+
id=uuid4(),
90+
crawlId=crawl_id,
91+
oid=oid,
92+
qaRunId=qa_run_id,
93+
timestamp=log_dict["timestamp"],
94+
logLevel=log_dict["logLevel"],
95+
context=log_dict["context"],
96+
message=log_dict["message"],
97+
details=details,
98+
)
99+
res = await self.logs.insert_one(log_to_add.to_dict())
100+
return res is not None
101+
# pylint: disable=broad-exception-caught
102+
except Exception as err:
103+
print(
104+
f"Error adding log line for crawl {crawl_id} to database: {err}",
105+
flush=True,
106+
)
107+
return False
108+
109+
async def get_crawl_logs(
110+
self,
111+
org: Organization,
112+
crawl_id: str,
113+
page_size: int = DEFAULT_PAGE_SIZE,
114+
page: int = 1,
115+
sort_by: str = "timestamp",
116+
sort_direction: int = -1,
117+
contexts: Optional[List[str]] = None,
118+
log_levels: Optional[List[str]] = None,
119+
qa_run_id: Optional[str] = None,
120+
) -> Tuple[list[CrawlLogLine], int]:
121+
"""list all logs for particular crawl"""
122+
# pylint: disable=too-many-locals, duplicate-code
123+
124+
# Zero-index page for query
125+
page = page - 1
126+
skip = page_size * page
127+
128+
match_query: Dict[str, Any] = {
129+
"oid": org.id,
130+
"crawlId": crawl_id,
131+
"qaRunId": qa_run_id,
132+
}
133+
134+
if contexts:
135+
match_query["context"] = {"$in": contexts}
136+
137+
if log_levels:
138+
match_query["logLevel"] = {"$in": log_levels}
139+
140+
aggregate: List[Dict[str, Any]] = [{"$match": match_query}]
141+
142+
if sort_by:
143+
if sort_by not in (
144+
"timestamp",
145+
"logLevel",
146+
"context",
147+
"message",
148+
):
149+
raise HTTPException(status_code=400, detail="invalid_sort_by")
150+
if sort_direction not in (1, -1):
151+
raise HTTPException(status_code=400, detail="invalid_sort_direction")
152+
153+
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
154+
155+
aggregate.extend(
156+
[
157+
{
158+
"$facet": {
159+
"items": [
160+
{"$skip": skip},
161+
{"$limit": page_size},
162+
],
163+
"total": [{"$count": "count"}],
164+
}
165+
},
166+
]
167+
)
168+
169+
cursor = self.logs.aggregate(aggregate)
170+
results = await cursor.to_list(length=1)
171+
result = results[0]
172+
items = result["items"]
173+
174+
try:
175+
total = int(result["total"][0]["count"])
176+
except (IndexError, ValueError):
177+
total = 0
178+
179+
log_lines = [CrawlLogLine.from_dict(res) for res in items]
180+
181+
return log_lines, total

0 commit comments

Comments
 (0)