From 6309009ec4cb9e3036eb6b55b6d4626ee3896008 Mon Sep 17 00:00:00 2001 From: Anshika Yadav <14anshika7yadav@gmail.com> Date: Sat, 3 Aug 2024 17:41:04 +0530 Subject: [PATCH 1/2] Added Scrapping data from Booking.com #1117 --- dev-documentation.md | 17 +++++++ src/scrape_up/booking.com/__init__.py | 3 ++ src/scrape_up/booking.com/bookingcom.py | 61 +++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 src/scrape_up/booking.com/__init__.py create mode 100644 src/scrape_up/booking.com/bookingcom.py diff --git a/dev-documentation.md b/dev-documentation.md index 0a30421f..b926be61 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1932,6 +1932,23 @@ weather=Indiatodayweather("Mumbai") and humidity of the place. | --- +### Booking.com + +```py +from scrape_up import Booking.com +``` + +Create an instance of `BookingScraper` class + +```python +scraper = BookingScraper("London") +``` + +| Method | Details | +| ------------------------ | --------------------------------------------------------- | +| `get_hotels()` | Returns a list of hotels with their details. | +--- + ## Bayt ```python diff --git a/src/scrape_up/booking.com/__init__.py b/src/scrape_up/booking.com/__init__.py new file mode 100644 index 00000000..d639d00f --- /dev/null +++ b/src/scrape_up/booking.com/__init__.py @@ -0,0 +1,3 @@ +from .bookingcom import BookingScraper + +__all__ = ["BookingScraper"] diff --git a/src/scrape_up/booking.com/bookingcom.py b/src/scrape_up/booking.com/bookingcom.py new file mode 100644 index 00000000..c71bc1a9 --- /dev/null +++ b/src/scrape_up/booking.com/bookingcom.py @@ -0,0 +1,61 @@ +import requests +from bs4 import BeautifulSoup + +class BookingScraper: + """ + A class to scrape data from Booking.com + + Create an instance of `BookingScraper` class + + ```python + scraper = BookingScraper("London") + ``` + + | Method | Details | + | ------------------------ | --------------------------------------------------------- | + | `get_hotels()` | Returns a list of hotels with their details. | + """ + + def __init__(self, location): + self.location = location + self.url = f"https://www.booking.com/searchresults.html?ss={self.location.replace(' ', '+')}" + self.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" + } + + def get_hotels(self): + response = requests.get(self.url, headers=self.headers) + if response.status_code != 200: + print(f"Failed to retrieve the webpage. Status code: {response.status_code}") + return [] + + soup = BeautifulSoup(response.content, "html.parser") + hotels = [] + + hotel_elements = soup.find_all("div", {"data-testid": "property-card"}) + + if not hotel_elements: + print("No hotel elements found. The structure of the page may have changed.") + return hotels + + for hotel in hotel_elements: + try: + hotel_name = hotel.find("div", {"data-testid": "title"}).get_text(strip=True) if hotel.find("div", {"data-testid": "title"}) else "No name" + hotel_reviews = hotel.find("div", {"data-testid": "review-score"}).get_text(strip=True) if hotel.find("div", {"data-testid": "review-score"}) else "No reviews" + + hotels.append({ + "name": hotel_name, + "reviews": hotel_reviews + }) + except Exception as e: + print(f"Error parsing hotel: {e}") + continue + + return hotels + +# Example usage +if __name__ == "__main__": + scraper = BookingScraper("Delhi") + hotels = scraper.get_hotels() + for hotel in hotels: + print(hotel) From b3e601edd2afa97b0d3c59c39b296660af35e3b2 Mon Sep 17 00:00:00 2001 From: Anshika Yadav <14anshika7yadav@gmail.com> Date: Sat, 3 Aug 2024 17:46:25 +0530 Subject: [PATCH 2/2] change --- src/scrape_up/booking.com/bookingcom.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/scrape_up/booking.com/bookingcom.py b/src/scrape_up/booking.com/bookingcom.py index c71bc1a9..45c8a888 100644 --- a/src/scrape_up/booking.com/bookingcom.py +++ b/src/scrape_up/booking.com/bookingcom.py @@ -53,9 +53,4 @@ def get_hotels(self): return hotels -# Example usage -if __name__ == "__main__": - scraper = BookingScraper("Delhi") - hotels = scraper.get_hotels() - for hotel in hotels: - print(hotel) +