Automate error scrapping

Credits: https://t.me/c/1999755950/19474 thanks to TelegramPlayGround/pyrogram@c67504c
MemersGallery · Sep 19, 2024 · d84bd86 · d84bd86
1 parent 0340cc7
commit d84bd86
Show file tree

Hide file tree

Showing 6 changed files with 2,107 additions and 499 deletions.
diff --git a/.github/workflows/scrape-errors.yml b/.github/workflows/scrape-errors.yml
@@ -0,0 +1,38 @@
+name: Scrape Errors
+on:
+  workflow_dispatch: {} # Allow manually kicking off builds
+  schedule:
+    - cron:  '0 12 * * *' # Every day at 12:00 (noon). Ref https://crontab.guru/examples.html
+jobs:
+  build:
+    name: scrape-errors
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 1
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.9'
+
+    - name: scrape
+      run: |
+          cd compiler/errors/
+          pip install --upgrade pip setuptools wheel
+          pip install requests==2.28.1
+          python sort.py scrape
+          python sort.py sort
+
+    - name: Open Pull Request
+      uses: peter-evans/create-pull-request@v4
+      with:
+        commit-message: >
+          Update unknown_errors
+        title: >
+          Update Telegram API errors
+        body: >
+          This is an automated PR. Please check the diff, and the action logs, to check for any funky behaviour.
+        branch: automated/api-error-scrape
+        labels: automated
+        delete-branch: true
diff --git a/compiler/errors/sort.py b/compiler/errors/sort.py
@@ -18,18 +18,54 @@
 
 import csv
 from pathlib import Path
+import re
+import requests  # requests==2.28.1
+import sys
 
-for p in Path("source").glob("*.tsv"):
-    with open(p) as f:
-        reader = csv.reader(f, delimiter="\t")
-        dct = {k: v for k, v in reader if k != "id"}
-        keys = sorted(dct)
+if len(sys.argv) != 1:
+    sys.exit(1)
 
-    with open(p, "w") as f:
-        f.write("id\tmessage\n")
+if sys.argv[1] == "sort": 
+    for p in Path("source").glob("*.tsv"):
+        with open(p) as f:
+            reader = csv.reader(f, delimiter="\t")
+            dct = {k: v for k, v in reader if k != "id"}
+            keys = sorted(dct)
 
-        for i, item in enumerate(keys, start=1):
-            f.write(f"{item}\t{dct[item]}")
+        with open(p, "w") as f:
+            f.write("id\tmessage\n")
 
-            if i != len(keys):
-                f.write("\n")
+            for i, item in enumerate(keys, start=1):
+                f.write(f"{item}\t{dct[item]}")
+
+                if i != len(keys):
+                    f.write("\n")
+
+elif sys.argv[1] == "scrape":
+    b = "https://core.telegram.org"
+    c = "/api/errors"
+    a = requests.get(b + c)
+    d = a.text
+    e = r"\<a\ href\=\"(.*)\"\>here.*\<\/a\>"
+    f = re.search(e, d)
+    if f:
+        a = requests.get(
+            b + f.group(1)
+        )
+        d = a.json()
+        e = d.get("errors", [])
+        w = ""
+        for h in e:
+            j = b.get("errors").get(h)
+            for k in j:
+                if k.endswith("_*"):
+                    continue
+                g = d.get("descriptions")
+                l = g.get(k)
+                m = k.replace("_%d", "_X")
+                l = l.replace("%d", "{value}")
+                w += f"{m}\t{l}\n"
+            for p in Path("source/").glob(f"{d}*.tsv"):
+                with open(p, "w") as f:
+                    f.write("id\tmessage\n")
+                    f.write(w)