diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..900cbb9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/venv + +# Docker-compose override files +docker-compose.override*.yml + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..387bc0f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.3.3 + hooks: + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format diff --git a/README.md b/README.md index 9affb3e..e2217db 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,16 @@ -# UberEatsScraper -Scrape a hell lot of Uber Eats shops and their URL +# Foodiecall +Search through a lot of Uber Eats shops and their URL ## Installation (Manual) 1. Install [Python 3](https://www.python.org/downloads/) 2. Download the [latest release](github.com/wxnnvs/UberEatsScraper/releases/latest) to a dedicated folder -3. Run `pip install -r requirements.txt` to install all modules +3. Install dependencies +``` +python3 -m venv venv +. venv/bin/activate +pip install -r requirements.txt +``` 4. Run `python3 main.py` Results can be found under the `countries` folder @@ -14,6 +19,14 @@ Results can be found under the `countries` folder 1. Install [Docker](https://www.docker.com/get-started/) 2. Run `docker run --rm -it -v .:/app/countries wxnnvs/ubereats-scraper` in a dedicated folder -3. The program will deploy automaticly +3. The program will deploy automatically -Results can be found in the folder you ran the container in \ No newline at end of file +Results can be found in the folder you ran the container in + +## Contribute + +1. Install the dev requirements +``` +pip install -r requirements-dev.txt +pre-commit install +``` diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0dfc2f3 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +version: "3.7" + +services: + + mongodb: + image: "mongo:7" + hostname: "mongodb" + restart: "no" + volumes: + - mongodb_data:/data/db + +volumes: + mongodb_data: diff --git a/main.py b/main.py index 6c71caf..ec44a0d 100644 --- a/main.py +++ b/main.py @@ -9,14 +9,54 @@ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" } -countries = ["au", "be", "ca", "cl", "cr", "do", "ec", "sv", "fr", "de", "gt", "ie", "jp", "ke", "mx", "nl", "nz", "pa", "pl", "pt", "za", "es", "lk", "se", "ch", "tw", "gb"] +countries = [ + "au", + "be", + "ca", + "cl", + "cr", + "do", + "ec", + "sv", + "fr", + "de", + "gt", + "ie", + "jp", + "ke", + "mx", + "nl", + "nz", + "pa", + "pl", + "pt", + "za", + "es", + "lk", + "se", + "ch", + "tw", + "gb", +] + parser = argparse.ArgumentParser(description="Scrape Uber Eats data") -parser.add_argument("--country", "-c", type=str, nargs='+', help="Scrape data from a specific country. \nIf not specified, all countries will be scraped.", metavar="") + +parser.add_argument( + "--country", + "-c", + type=str, + nargs="+", + help="Scrape data from a specific country. \nIf not specified, all countries will be scraped.", + metavar="", +) + args = parser.parse_args() + def clear(): - os.system('cls' if os.name == 'nt' else 'clear') + os.system("cls" if os.name == "nt" else "clear") + def end(signal, frame): print("Exiting...") @@ -24,23 +64,25 @@ def end(signal, frame): json.dump(data, file, indent=4) exit(0) + # Register the signal handler for Ctrl+C signal.signal(signal.SIGINT, end) # the actual stuff -if args.c == None: +if args.c is None: clear() print("Scraping all countries...") for c in countries: - country = requests.get(f"https://restcountries.com/v3.1/alpha/{c}?fields=name", headers=headers, timeout=10).json()["name"]["common"] + country = requests.get( + f"https://restcountries.com/v3.1/alpha/{c}?fields=name", + headers=headers, + timeout=10, + ).json()["name"]["common"] # Check if the 'countries' folder exists, create it if it doesn't - if not os.path.exists('countries'): - os.makedirs('countries') - - data = { - "country": country.upper(), - "cities": [] - } + if not os.path.exists("countries"): + os.makedirs("countries") + + data = {"country": country.upper(), "cities": []} print(f"Scraping {country}...") @@ -48,42 +90,32 @@ def end(signal, frame): try: response = requests.get(url, headers=headers, timeout=10) - response.raise_for_status() + response.raise_for_status() except requests.exceptions.RequestException as e: print("An error occurred:", e) exit(1) soup = BeautifulSoup(response.content, "html.parser") - links = soup.find_all('a') + links = soup.find_all("a") for link in links: - href = link.get('href') # Get href attribute if it exists + href = link.get("href") # Get href attribute if it exists name = link.get_text().strip() if href and href.startswith(f"/{c}/city"): city_url = f"https://www.ubereats.com{href}" - city_data = { - "city": name, - "shops": [] - } + city_data = {"city": name, "shops": []} city_response = requests.get(city_url, headers=headers, timeout=10) city_soup = BeautifulSoup(city_response.content, "html.parser") - shops = city_soup.find_all('a', {"data-test": "store-link"}) + shops = city_soup.find_all("a", {"data-test": "store-link"}) for shop in shops: - path = shop.get('href') + path = shop.get("href") page_link = "https://www.ubereats.com" + path - names = shop.find_all('h3') + names = shop.find_all("h3") for name in names: restaurant_name = name.get_text().strip() - shop_data = { - "name": restaurant_name, - "link": page_link - } + shop_data = {"name": restaurant_name, "link": page_link} city_data["shops"].append(shop_data) - - data["cities"].append(city_data) - - end() else: for c in args.c: if c not in countries: @@ -91,15 +123,16 @@ def end(signal, frame): exit(1) for c in args.c: clear() - country = requests.get(f"https://restcountries.com/v3.1/alpha/{c}?fields=name", headers=headers, timeout=10).json()["name"]["common"] + country = requests.get( + f"https://restcountries.com/v3.1/alpha/{c}?fields=name", + headers=headers, + timeout=10, + ).json()["name"]["common"] # Check if the 'countries' folder exists, create it if it doesn't - if not os.path.exists('countries'): - os.makedirs('countries') - - data = { - "country": country.upper(), - "cities": [] - } + if not os.path.exists("countries"): + os.makedirs("countries") + + data = {"country": country.upper(), "cities": []} print(f"Scraping {country}...") @@ -107,39 +140,33 @@ def end(signal, frame): try: response = requests.get(url, headers=headers, timeout=10) - response.raise_for_status() + response.raise_for_status() except requests.exceptions.RequestException as e: print("An error occurred:", e) exit(1) soup = BeautifulSoup(response.content, "html.parser") - links = soup.find_all('a') + links = soup.find_all("a") for link in links: - href = link.get('href') # Get href attribute if it exists + href = link.get("href") # Get href attribute if it exists name = link.get_text().strip() if href and href.startswith(f"/{c}/city"): city_url = f"https://www.ubereats.com{href}" - city_data = { - "city": name, - "shops": [] - } + city_data = {"city": name, "shops": []} city_response = requests.get(city_url, headers=headers, timeout=10) city_soup = BeautifulSoup(city_response.content, "html.parser") - shops = city_soup.find_all('a', {"data-test": "store-link"}) + shops = city_soup.find_all("a", {"data-test": "store-link"}) for shop in shops: - path = shop.get('href') + path = shop.get("href") page_link = "https://www.ubereats.com" + path - names = shop.find_all('h3') + names = shop.find_all("h3") for name in names: restaurant_name = name.get_text().strip() - shop_data = { - "name": restaurant_name, - "link": page_link - } + shop_data = {"name": restaurant_name, "link": page_link} city_data["shops"].append(shop_data) data["cities"].append(city_data) - end() \ No newline at end of file + end() diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..c6ef9af --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +ruff +pre-commit