Skip to content

Commit

Permalink
Also exclude "Error loading" website errors
Browse files Browse the repository at this point in the history
Following up on #83
  • Loading branch information
ghostwords committed Mar 7, 2024
1 parent 6cb432d commit ca9c0b7
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
7 changes: 3 additions & 4 deletions crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ def get_recently_failed_domains():
return domains
revisions = revisions.split('\n')

error_pattern = re.compile("Exception on ([^:]+):")
timeout_pattern = re.compile("Timed out loading (.+)$")
error_pattern = re.compile("(?:Error loading|Exception on) ([^: ]+):")
timeout_pattern = re.compile("Timed out loading ([^ ]+)$")
timeout_counts = {}
logs = []

Expand All @@ -178,8 +178,7 @@ def get_recently_failed_domains():
domains.add(matches.group(1))
elif matches := timeout_pattern.search(line):
domain = matches.group(1)
if domain != "extension page":
timeout_counts[domain] = timeout_counts.get(domain, 0) + 1
timeout_counts[domain] = timeout_counts.get(domain, 0) + 1
logs.append(log_txt)

num_scans = len(revisions)
Expand Down
9 changes: 7 additions & 2 deletions tests/sitelist_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ def mock_run(cmd, cwd=None): # pylint:disable=unused-argument

if cmd == "git show klmno:log.txt":
return "\n".join(["Visiting 1: example.website",
"Timed out loading example.website"])
"Timed out loading example.website",
"Visiting 2: example.com",
"Error loading extension page (JavascriptException):",
"Visiting 3: example.us",
"Error loading example.us:"])

return ""

Expand All @@ -96,4 +100,5 @@ def mock_run(cmd, cwd=None): # pylint:disable=unused-argument
"example.net",
"example.org",
"example.co.uk",
"example.website"])
"example.website",
"example.us"])

0 comments on commit ca9c0b7

Please sign in to comment.