From 3ad6477634d09446a75126fbdc28edaa32bcbe37 Mon Sep 17 00:00:00 2001 From: Eric Date: Mon, 14 Sep 2020 11:39:30 +1200 Subject: [PATCH] Feature/16 date filter (#19) * Change fortnite -> minecraft * Add 'after' positional param * Bump 0.3.2 -> 0.3.3 --- README.md | 24 ++++++++++++------------ app_store_scraper/__version__.py | 2 +- app_store_scraper/base.py | 16 ++++++++++++---- app_store_scraper/tests/test_all.py | 21 +++++++++++++++++++-- 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 5cd87ce..8b3e65e 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,11 @@ Scrape reviews for an app: from app_store_scraper import AppStore from pprint import pprint -fortnite = AppStore(country="nz", app_name="fortnite") -fortnite.review(how_many=20) +minecraft = AppStore(country="nz", app_name="minecraft") +minecraft.review(how_many=20) -pprint(fortnite.reviews) -pprint(fortnite.reviews_count) +pprint(minecraft.reviews) +pprint(minecraft.reviews_count) ``` Scrape reviews for a podcast: @@ -67,15 +67,15 @@ There are two required and one positional parameters: Once instantiated, the object can be examined: ```pycon ->>> fortnite -AppStore(country='nz', app_name='fortnite', app_id=1261357853) +>>> minecraft +AppStore(country='nz', app_name='minecraft', app_id=479516143) ``` ```pycon >>> print(app) Country | nz - Name | fortnite - ID | 1261357853 - URL | https://apps.apple.com/nz/app/fortnite/id1261357853 + Name | minecraft + ID | 479516143 + URL | https://apps.apple.com/nz/app/minecraft/id479516143 Review count | 0 ``` @@ -97,8 +97,8 @@ Other optional parameters are: The maximum number of reviews fetched per request is 20. To minimise the number of calls, the limit of 20 is hardcoded. This means the `review()` method will always grab more than the `how_many` argument supplied with an increment of 20. ```pycon ->>> fortnite.review(how_many=33) ->>> fortnite.reviews_count +>>> minecraft.review(how_many=33) +>>> minecraft.reviews_count 40 ``` @@ -111,7 +111,7 @@ If `how_many` is not provided, `review()` will terminate after *all* reviews are The fetched review data are loaded in memory and live inside `reviews` attribute as a list of dict. ```pycon ->>> fortnite.reviews +>>> minecraft.reviews [{'userName': 'someone', 'rating': 5, 'date': datetime.datetime(... ``` diff --git a/app_store_scraper/__version__.py b/app_store_scraper/__version__.py index 716d068..ea39191 100644 --- a/app_store_scraper/__version__.py +++ b/app_store_scraper/__version__.py @@ -1,5 +1,5 @@ __title__ = "app-store-scraper" -__version__ = "0.3.2" +__version__ = "0.3.3" __description__ = "Single API ☝ App Store Review Scraper 🧹" __author__ = "Eric Lim" __url__ = "https://github.com/cowboy-bebug/app-store-scraper" diff --git a/app_store_scraper/base.py b/app_store_scraper/base.py index ece2ee7..fcf919c 100644 --- a/app_store_scraper/base.py +++ b/app_store_scraper/base.py @@ -79,7 +79,10 @@ def __init__( def __repr__(self): return "{}(country='{}', app_name='{}', app_id={})".format( - self.__class__.__name__, self.country, self.app_name, self.app_id, + self.__class__.__name__, + self.country, + self.app_name, + self.app_id, ) def __str__(self): @@ -129,11 +132,13 @@ def _token(self): token = re.search(r"token%22%3A%22(.+?)%22", tag).group(1) return f"bearer {token}" - def _parse_data(self): + def _parse_data(self, after): response = self._response.json() for data in response["data"]: review = data["attributes"] review["date"] = datetime.strptime(review["date"], "%Y-%m-%dT%H:%M:%SZ") + if after and review["date"] < after: + continue self.reviews.append(review) self.reviews_count += 1 self._fetched_count += 1 @@ -170,8 +175,11 @@ def search_id(self): app_id = re.search(pattern, self._response.text).group(1) return app_id - def review(self, how_many=sys.maxsize): + def review(self, how_many=sys.maxsize, after=None): self._log_timer = 0 + if after and not isinstance(after, datetime): + raise SystemExit("`after` must be a datetime object.") + try: while True: self._heartbeat() @@ -180,7 +188,7 @@ def review(self, how_many=sys.maxsize): headers=self._request_headers, params=self._request_params, ) - self._parse_data() + self._parse_data(after) self._parse_next() if self._request_offset is None or self._fetched_count >= how_many: break diff --git a/app_store_scraper/tests/test_all.py b/app_store_scraper/tests/test_all.py index 46451f1..64c7d1c 100644 --- a/app_store_scraper/tests/test_all.py +++ b/app_store_scraper/tests/test_all.py @@ -1,4 +1,5 @@ from app_store_scraper import AppStore, Podcast +from datetime import datetime, timedelta class TestEmptyApp: @@ -40,11 +41,11 @@ def test_str(self, capsys): class TestAppStore: - app = AppStore(country="nz", app_name="fortnite") + app = AppStore(country="nz", app_name="minecraft") def test_search_id(self): self.app.search_id() - assert self.app.app_id == 1261357853 + assert self.app.app_id == 479516143 def test_review(self): self.app.review(how_many=3) @@ -60,6 +61,14 @@ def test_reviews_for_duplicates(self): for i in range(len(self.app.reviews) - 1): assert self.app.reviews[i] != self.app.reviews[i + 1] + def test_reviews_for_after(self): + t1 = datetime.now() + t0 = t1 - timedelta(weeks=26) + self.app.reviews = [] + self.app.review(how_many=3, after=t0) + for review in self.app.reviews: + assert review["date"] >= t0 and review["date"] < t1 + class TestPodcast: podcast = Podcast(country="nz", app_name="stuff you should know") @@ -81,3 +90,11 @@ def test_review_continuation(self): def test_reviews_for_duplicates(self): for i in range(len(self.podcast.reviews) - 1): assert self.podcast.reviews[i] != self.podcast.reviews[i + 1] + + def test_reviews_for_after(self): + t1 = datetime.now() + t0 = t1 - timedelta(weeks=26) + self.podcast.reviews = [] + self.podcast.review(how_many=3, after=t0) + for review in self.podcast.reviews: + assert review["date"] >= t0 and review["date"] < t1