Skip to content

Commit

Permalink
Automate error scrapping
Browse files Browse the repository at this point in the history
  • Loading branch information
SpEcHiDe authored and null-nick committed Sep 19, 2024
1 parent 0340cc7 commit d84bd86
Show file tree
Hide file tree
Showing 6 changed files with 2,107 additions and 499 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/scrape-errors.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Scrape Errors
on:
workflow_dispatch: {} # Allow manually kicking off builds
schedule:
- cron: '0 12 * * *' # Every day at 12:00 (noon). Ref https://crontab.guru/examples.html
jobs:
build:
name: scrape-errors
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 1
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.9'

- name: scrape
run: |
cd compiler/errors/
pip install --upgrade pip setuptools wheel
pip install requests==2.28.1
python sort.py scrape
python sort.py sort
- name: Open Pull Request
uses: peter-evans/create-pull-request@v4
with:
commit-message: >
Update unknown_errors
title: >
Update Telegram API errors
body: >
This is an automated PR. Please check the diff, and the action logs, to check for any funky behaviour.
branch: automated/api-error-scrape
labels: automated
delete-branch: true
58 changes: 47 additions & 11 deletions compiler/errors/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,54 @@

import csv
from pathlib import Path
import re
import requests # requests==2.28.1
import sys

for p in Path("source").glob("*.tsv"):
with open(p) as f:
reader = csv.reader(f, delimiter="\t")
dct = {k: v for k, v in reader if k != "id"}
keys = sorted(dct)
if len(sys.argv) != 1:
sys.exit(1)

with open(p, "w") as f:
f.write("id\tmessage\n")
if sys.argv[1] == "sort":
for p in Path("source").glob("*.tsv"):
with open(p) as f:
reader = csv.reader(f, delimiter="\t")
dct = {k: v for k, v in reader if k != "id"}
keys = sorted(dct)

for i, item in enumerate(keys, start=1):
f.write(f"{item}\t{dct[item]}")
with open(p, "w") as f:
f.write("id\tmessage\n")

if i != len(keys):
f.write("\n")
for i, item in enumerate(keys, start=1):
f.write(f"{item}\t{dct[item]}")

if i != len(keys):
f.write("\n")

elif sys.argv[1] == "scrape":
b = "https://core.telegram.org"
c = "/api/errors"
a = requests.get(b + c)
d = a.text
e = r"\<a\ href\=\"(.*)\"\>here.*\<\/a\>"
f = re.search(e, d)
if f:
a = requests.get(
b + f.group(1)
)
d = a.json()
e = d.get("errors", [])
w = ""
for h in e:
j = b.get("errors").get(h)
for k in j:
if k.endswith("_*"):
continue
g = d.get("descriptions")
l = g.get(k)
m = k.replace("_%d", "_X")
l = l.replace("%d", "{value}")
w += f"{m}\t{l}\n"
for p in Path("source/").glob(f"{d}*.tsv"):
with open(p, "w") as f:
f.write("id\tmessage\n")
f.write(w)
Loading

0 comments on commit d84bd86

Please sign in to comment.