wxnnvs · D-stefaang · Mar 16, 2024 · Mar 17, 2024 · Mar 17, 2024 · Mar 17, 2024
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+/venv
+
+# Docker-compose override files
+docker-compose.override*.yml
+
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.3.3
+  hooks:
+    # Run the linter.
+    - id: ruff
+      args: [ --fix ]
+    # Run the formatter.
+    - id: ruff-format
diff --git a/README.md b/README.md
@@ -1,11 +1,16 @@
-# UberEatsScraper
-Scrape a hell lot of  Uber Eats shops and their URL
+# Foodiecall
+Search through a lot of  Uber Eats shops and their URL
 
 ## Installation (Manual)
 
 1. Install [Python 3](https://www.python.org/downloads/)
 2. Download the [latest release](github.com/wxnnvs/UberEatsScraper/releases/latest) to a dedicated folder
-3. Run `pip install -r requirements.txt` to install all modules
+3. Install dependencies
+```
+python3 -m venv venv
+. venv/bin/activate
+pip install -r requirements.txt
+```
 4. Run `python3 main.py`
 
 Results can be found under the `countries` folder
@@ -14,6 +19,14 @@ Results can be found under the `countries` folder
 
 1. Install [Docker](https://www.docker.com/get-started/)
 2. Run `docker run --rm -it -v .:/app/countries wxnnvs/ubereats-scraper` in a dedicated folder
-3. The program will deploy automaticly
+3. The program will deploy automatically
 
-Results can be found in the folder you ran the container in
+Results can be found in the folder you ran the container in
+
+## Contribute
+
+1. Install the dev requirements
+```
+pip install -r requirements-dev.txt
+pre-commit install
+```
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,13 @@
+version: "3.7"
+
+services:
+
+  mongodb:
+    image: "mongo:7"
+    hostname: "mongodb"
+    restart: "no"
+    volumes:
+      - mongodb_data:/data/db
+
+volumes:
+  mongodb_data:
diff --git a/main.py b/main.py
@@ -9,137 +9,164 @@
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
 }
 
-countries = ["au", "be", "ca", "cl", "cr", "do", "ec", "sv", "fr", "de", "gt", "ie", "jp", "ke", "mx", "nl", "nz", "pa", "pl", "pt", "za", "es", "lk", "se", "ch", "tw", "gb"]
+countries = [
+    "au",
+    "be",
+    "ca",
+    "cl",
+    "cr",
+    "do",
+    "ec",
+    "sv",
+    "fr",
+    "de",
+    "gt",
+    "ie",
+    "jp",
+    "ke",
+    "mx",
+    "nl",
+    "nz",
+    "pa",
+    "pl",
+    "pt",
+    "za",
+    "es",
+    "lk",
+    "se",
+    "ch",
+    "tw",
+    "gb",
+]
+
 
 parser = argparse.ArgumentParser(description="Scrape Uber Eats data")
-parser.add_argument("--country", "-c", type=str, nargs='+', help="Scrape data from a specific country. \nIf not specified, all countries will be scraped.", metavar="<COUNTRYCODE>")
+
+parser.add_argument(
+    "--country",
+    "-c",
+    type=str,
+    nargs="+",
+    help="Scrape data from a specific country. \nIf not specified, all countries will be scraped.",
+    metavar="<COUNTRYCODE>",
+)
+
 args = parser.parse_args()
 
+
 def clear():
-    os.system('cls' if os.name == 'nt' else 'clear')
+    os.system("cls" if os.name == "nt" else "clear")
+
 
 def end(signal, frame):
     print("Exiting...")
     with open(f"countries/{c}.json", "w", encoding="utf-8") as file:
         json.dump(data, file, indent=4)
     exit(0)
 
+
 # Register the signal handler for Ctrl+C
 signal.signal(signal.SIGINT, end)
 
 # the actual stuff
-if args.c == None:
+if args.c is None:
     clear()
     print("Scraping all countries...")
     for c in countries:
-        country = requests.get(f"https://restcountries.com/v3.1/alpha/{c}?fields=name", headers=headers, timeout=10).json()["name"]["common"]
+        country = requests.get(
+            f"https://restcountries.com/v3.1/alpha/{c}?fields=name",
+            headers=headers,
+            timeout=10,
+        ).json()["name"]["common"]
         # Check if the 'countries' folder exists, create it if it doesn't
-        if not os.path.exists('countries'):
-            os.makedirs('countries')
-
-        data = {
-            "country": country.upper(),
-            "cities": []
-        }
+        if not os.path.exists("countries"):
+            os.makedirs("countries")
+
+        data = {"country": country.upper(), "cities": []}
 
         print(f"Scraping {country}...")
 
         url = f"https://www.ubereats.com/{c}/location"
 
         try:
             response = requests.get(url, headers=headers, timeout=10)
-            response.raise_for_status() 
+            response.raise_for_status()
         except requests.exceptions.RequestException as e:
             print("An error occurred:", e)
             exit(1)
 
         soup = BeautifulSoup(response.content, "html.parser")
 
-        links = soup.find_all('a')
+        links = soup.find_all("a")
         for link in links:
-            href = link.get('href')  # Get href attribute if it exists
+            href = link.get("href")  # Get href attribute if it exists
             name = link.get_text().strip()
             if href and href.startswith(f"/{c}/city"):
                 city_url = f"https://www.ubereats.com{href}"
-                city_data = {
-                    "city": name,
-                    "shops": []
-                }
+                city_data = {"city": name, "shops": []}
 
                 city_response = requests.get(city_url, headers=headers, timeout=10)
                 city_soup = BeautifulSoup(city_response.content, "html.parser")
-                shops = city_soup.find_all('a', {"data-test": "store-link"})
+                shops = city_soup.find_all("a", {"data-test": "store-link"})
                 for shop in shops:
-                    path = shop.get('href')
+                    path = shop.get("href")
                     page_link = "https://www.ubereats.com" + path
-                    names = shop.find_all('h3')
+                    names = shop.find_all("h3")
                     for name in names:
                         restaurant_name = name.get_text().strip()
-                        shop_data = {
-                            "name": restaurant_name,
-                            "link": page_link
-                        }
+                        shop_data = {"name": restaurant_name, "link": page_link}
                         city_data["shops"].append(shop_data)
-
-                data["cities"].append(city_data)
-
-        end()
 else:
     for c in args.c:
         if c not in countries:
             print(f"Invalid country code: {c}")
             exit(1)
     for c in args.c:
         clear()
-        country = requests.get(f"https://restcountries.com/v3.1/alpha/{c}?fields=name", headers=headers, timeout=10).json()["name"]["common"]
+        country = requests.get(
+            f"https://restcountries.com/v3.1/alpha/{c}?fields=name",
+            headers=headers,
+            timeout=10,
+        ).json()["name"]["common"]
         # Check if the 'countries' folder exists, create it if it doesn't
-        if not os.path.exists('countries'):
-            os.makedirs('countries')
-
-        data = {
-            "country": country.upper(),
-            "cities": []
-        }
+        if not os.path.exists("countries"):
+            os.makedirs("countries")
+
+        data = {"country": country.upper(), "cities": []}
 
         print(f"Scraping {country}...")
 
         url = f"https://www.ubereats.com/{c}/location"
 
         try:
             response = requests.get(url, headers=headers, timeout=10)
-            response.raise_for_status() 
+            response.raise_for_status()
         except requests.exceptions.RequestException as e:
             print("An error occurred:", e)
             exit(1)
 
         soup = BeautifulSoup(response.content, "html.parser")
 
-        links = soup.find_all('a')
+        links = soup.find_all("a")
         for link in links:
-            href = link.get('href')  # Get href attribute if it exists
+            href = link.get("href")  # Get href attribute if it exists
             name = link.get_text().strip()
             if href and href.startswith(f"/{c}/city"):
                 city_url = f"https://www.ubereats.com{href}"
-                city_data = {
-                    "city": name,
-                    "shops": []
-                }
+                city_data = {"city": name, "shops": []}
 
                 city_response = requests.get(city_url, headers=headers, timeout=10)
                 city_soup = BeautifulSoup(city_response.content, "html.parser")
-                shops = city_soup.find_all('a', {"data-test": "store-link"})
+                shops = city_soup.find_all("a", {"data-test": "store-link"})
                 for shop in shops:
-                    path = shop.get('href')
+                    path = shop.get("href")
                     page_link = "https://www.ubereats.com" + path
-                    names = shop.find_all('h3')
+                    names = shop.find_all("h3")
                     for name in names:
                         restaurant_name = name.get_text().strip()
-                        shop_data = {
-                            "name": restaurant_name,
-                            "link": page_link
-                        }
+                        shop_data = {"name": restaurant_name, "link": page_link}
                         city_data["shops"].append(shop_data)
 
                 data["cities"].append(city_data)
 
-        end()
+        end()
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,2 @@
+ruff
+pre-commit