webrecorder
diff --git a/‎backend/btrixcloud/colls.py‎
Lines changed: 8 additions & 6 deletions b/‎backend/btrixcloud/colls.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎backend/btrixcloud/crawl_logs.py‎
Lines changed: 181 additions & 0 deletions b/‎backend/btrixcloud/crawl_logs.py‎
Lines changed: 181 additions & 0 deletions
@@ -297,11 +297,12 @@ async def get_collection_raw(
     async def get_collection_raw_by_slug(
         self,
         coll_slug: str,
+        oid: UUID,
         previous_slugs: bool = False,
         public_or_unlisted_only: bool = False,
     ) -> Dict[str, Any]:
         """Get collection by slug (current or previous) as dict from database"""
-        query: dict[str, object] = {}
+        query: dict[str, object] = {"oid": oid}
         if previous_slugs:
             query["previousSlugs"] = coll_slug
         else:
@@ -323,12 +324,12 @@ async def get_collection(
         return Collection.from_dict(result)
 
     async def get_collection_by_slug(
-        self, coll_slug: str, public_or_unlisted_only: bool = False
+        self, coll_slug: str, oid: UUID, public_or_unlisted_only: bool = False
     ) -> Collection:
         """Get collection by slug"""
         try:
             result = await self.get_collection_raw_by_slug(
-                coll_slug, public_or_unlisted_only=public_or_unlisted_only
+                coll_slug, oid, public_or_unlisted_only=public_or_unlisted_only
             )
             return Collection.from_dict(result)
         # pylint: disable=broad-exception-caught
@@ -337,6 +338,7 @@ async def get_collection_by_slug(
 
         result = await self.get_collection_raw_by_slug(
             coll_slug,
+            oid,
             previous_slugs=True,
             public_or_unlisted_only=public_or_unlisted_only,
         )
@@ -432,7 +434,7 @@ async def get_public_thumbnail(
     ) -> StreamingResponse:
         """return thumbnail of public collection, if any"""
         result = await self.get_collection_raw_by_slug(
-            slug, public_or_unlisted_only=True
+            slug, org.id, public_or_unlisted_only=True
         )
 
         thumbnail = result.get("thumbnail")
@@ -1227,7 +1229,7 @@ async def get_public_collection(
             # pylint: disable=raise-missing-from
             raise HTTPException(status_code=404, detail="collection_not_found")
 
-        coll = await colls.get_collection_by_slug(coll_slug)
+        coll = await colls.get_collection_by_slug(coll_slug, org.id)
 
         return await colls.get_public_collection_out(
             coll.id, org, dict(request.headers), allow_unlisted=True
@@ -1251,7 +1253,7 @@ async def download_public_collection(
 
         # Make sure collection exists and is public/unlisted
         coll = await colls.get_collection_by_slug(
-            coll_slug, public_or_unlisted_only=True
+            coll_slug, org.id, public_or_unlisted_only=True
         )
 
         if coll.allowPublicDownload is False:
 
@@ -0,0 +1,181 @@
+"""crawl logs"""
+
+from typing import TYPE_CHECKING, Any, Optional, Dict, Tuple, List
+
+import json
+from uuid import UUID, uuid4
+
+from fastapi import HTTPException
+import pymongo
+
+from .models import CrawlLogLine, Organization
+from .pagination import DEFAULT_PAGE_SIZE
+
+if TYPE_CHECKING:
+    from .orgs import OrgOps
+else:
+    OrgOps = object
+
+
+# ============================================================================
+class CrawlLogOps:
+    """crawl log management"""
+
+    org_ops: OrgOps
+
+    # pylint: disable=too-many-locals, too-many-arguments, invalid-name
+
+    def __init__(self, mdb, org_ops):
+        self.logs = mdb["crawl_logs"]
+        self.org_ops = org_ops
+
+    async def init_index(self):
+        """init index for crawl logs"""
+        await self.logs.create_index(
+            [
+                ("crawlId", pymongo.HASHED),
+                ("oid", pymongo.ASCENDING),
+                ("qaRunId", pymongo.ASCENDING),
+                ("timestamp", pymongo.ASCENDING),
+            ]
+        )
+        await self.logs.create_index(
+            [
+                ("crawlId", pymongo.HASHED),
+                ("oid", pymongo.ASCENDING),
+                ("qaRunId", pymongo.ASCENDING),
+                ("logLevel", pymongo.ASCENDING),
+            ]
+        )
+        await self.logs.create_index(
+            [
+                ("crawlId", pymongo.HASHED),
+                ("oid", pymongo.ASCENDING),
+                ("qaRunId", pymongo.ASCENDING),
+                ("context", pymongo.ASCENDING),
+            ]
+        )
+        await self.logs.create_index(
+            [
+                ("crawlId", pymongo.HASHED),
+                ("oid", pymongo.ASCENDING),
+                ("qaRunId", pymongo.ASCENDING),
+                ("message", pymongo.ASCENDING),
+            ]
+        )
+
+    async def add_log_line(
+        self,
+        crawl_id: str,
+        oid: UUID,
+        log_line: str,
+        qa_run_id: Optional[str] = None,
+    ) -> bool:
+        """add crawl log line to database"""
+        try:
+            log_dict = json.loads(log_line)
+
+            # Ensure details are a dictionary
+            # If they are a list, convert to a dict
+            details = None
+            log_dict_details = log_dict.get("details")
+            if log_dict_details:
+                if isinstance(log_dict_details, dict):
+                    details = log_dict_details
+                else:
+                    details = {"items": log_dict_details}
+
+            log_to_add = CrawlLogLine(
+                id=uuid4(),
+                crawlId=crawl_id,
+                oid=oid,
+                qaRunId=qa_run_id,
+                timestamp=log_dict["timestamp"],
+                logLevel=log_dict["logLevel"],
+                context=log_dict["context"],
+                message=log_dict["message"],
+                details=details,
+            )
+            res = await self.logs.insert_one(log_to_add.to_dict())
+            return res is not None
+        # pylint: disable=broad-exception-caught
+        except Exception as err:
+            print(
+                f"Error adding log line for crawl {crawl_id} to database: {err}",
+                flush=True,
+            )
+            return False
+
+    async def get_crawl_logs(
+        self,
+        org: Organization,
+        crawl_id: str,
+        page_size: int = DEFAULT_PAGE_SIZE,
+        page: int = 1,
+        sort_by: str = "timestamp",
+        sort_direction: int = -1,
+        contexts: Optional[List[str]] = None,
+        log_levels: Optional[List[str]] = None,
+        qa_run_id: Optional[str] = None,
+    ) -> Tuple[list[CrawlLogLine], int]:
+        """list all logs for particular crawl"""
+        # pylint: disable=too-many-locals, duplicate-code
+
+        # Zero-index page for query
+        page = page - 1
+        skip = page_size * page
+
+        match_query: Dict[str, Any] = {
+            "oid": org.id,
+            "crawlId": crawl_id,
+            "qaRunId": qa_run_id,
+        }
+
+        if contexts:
+            match_query["context"] = {"$in": contexts}
+
+        if log_levels:
+            match_query["logLevel"] = {"$in": log_levels}
+
+        aggregate: List[Dict[str, Any]] = [{"$match": match_query}]
+
+        if sort_by:
+            if sort_by not in (
+                "timestamp",
+                "logLevel",
+                "context",
+                "message",
+            ):
+                raise HTTPException(status_code=400, detail="invalid_sort_by")
+            if sort_direction not in (1, -1):
+                raise HTTPException(status_code=400, detail="invalid_sort_direction")
+
+            aggregate.extend([{"$sort": {sort_by: sort_direction}}])
+
+        aggregate.extend(
+            [
+                {
+                    "$facet": {
+                        "items": [
+                            {"$skip": skip},
+                            {"$limit": page_size},
+                        ],
+                        "total": [{"$count": "count"}],
+                    }
+                },
+            ]
+        )
+
+        cursor = self.logs.aggregate(aggregate)
+        results = await cursor.to_list(length=1)
+        result = results[0]
+        items = result["items"]
+
+        try:
+            total = int(result["total"][0]["count"])
+        except (IndexError, ValueError):
+            total = 0
+
+        log_lines = [CrawlLogLine.from_dict(res) for res in items]
+
+        return log_lines, total