diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..6db4b47
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,40 @@
+name: Build
+
+on:
+ push:
+ branches: [ master ]
+ paths-ignore:
+ - '**.md'
+ pull_request:
+ branches: [ master ]
+ paths-ignore:
+ - '**.md'
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ os: [ubuntu-latest, macos-latest, windows-latest]
+ python-version: [3.6, 3.7, 3.8]
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install black flake8 pytest
+ pip install -r requirements.txt
+ - name: Format with black
+ run: |
+ black . --check
+ - name: Lint with flake8
+ run: |
+ flake8 . --ignore=E203 --count --show-source --statistics --max-line-length=90
+ - name: Test with pytest
+ run: |
+ pytest
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..fa96f2d
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,26 @@
+name: Publish
+
+on:
+ release:
+ types: [created]
+
+jobs:
+ deploy:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.x'
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install setuptools wheel twine
+ - name: Build and publish ๐ ๐ฆ to PyPi
+ env:
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+ run: |
+ python setup.py sdist bdist_wheel
+ twine upload dist/*
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7c59dc0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+venv
+__pycache__
+.pytest_cache
+*.egg-info
diff --git a/LICENCE b/LICENCE
new file mode 100644
index 0000000..c2f064d
--- /dev/null
+++ b/LICENCE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Eric Lim
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..bf0afe9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,100 @@
+![Build](https://github.com/cowboy-bebug/app-store-scraper/workflows/Build/badge.svg)
+[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](https://github.com/cowboy-bebug/app-store-scraper/pulls)
+
+
+```
+ ___ _____ _ _____
+ / _ \ / ___| | / ___|
+ / /_\ \_ __ _ __ \ `--.| |_ ___ _ __ ___ \ `--. ___ _ __ __ _ _ __ ___ _ __
+ | _ | '_ \| '_ \ `--. \ __/ _ \| '__/ _ \ `--. \/ __| '__/ _` | '_ \ / _ \ '__|
+ | | | | |_) | |_) | /\__/ / || (_) | | | __/ /\__/ / (__| | | (_| | |_) | __/ |
+ \_| |_/ .__/| .__/ \____/ \__\___/|_| \___| \____/ \___|_| \__,_| .__/ \___|_|
+ | | | | | |
+ |_| |_| |_|
+```
+
+# Quickstart
+
+```console
+pip3 install app-store-scraper
+```
+
+```python
+from app_store_scraper import AppStore
+from pprint import pprint
+
+fortnite = AppStore(country="nz", app_name="fortnite", app_id=1261357853)
+fortnite.review(how_many=20)
+
+pprint(fortnite.reviews)
+pprint(fortnite.reviews_count)
+```
+
+# Extra Details
+
+Let's continue from the code example used in [Quickstart](#quickstart).
+
+
+## Instantiation
+
+There are three required arguments, `country, app_name, app_id`.
+
+```pycon
+>>> fortnite
+AppStore(country=nz, app_name=fortnite, app_id=1261357853)
+```
+
+These are required to create a URL for the App Store landing page, which can be displayed by the private field, `landing_url` like below:
+
+```pycon
+>>> fortnite.landing_url
+'https://apps.apple.com/nz/app/fortnite/id1261357853'
+```
+
+There are optional arguments used to override log settings:
+
+- `log_format`
+ - passed directly to `logging.basicConfig(format=log_format)`
+ - default is `"%(asctime)s [%(levelname)s] %(name)s - %(message)s"`
+- `log_level`
+ - passed directly to `logging.basicConfig(level=log_level)`
+ - default is `"INFO"`
+- `log_interval`
+ - log is produced every 10 seconds (by default) as a "heartbeat" (useful for a long scraping session)
+ - default is `10`
+
+
+## Fetching Review
+
+The maximum number of reviews fetched per request is 20. To minimise the number of calls, the limit of 20 is hardcoded. This means the `review()` method will always grab more than the `how_many` argument supplied with an increment of 20.
+
+```pycon
+>>> fortnite.review(how_many=33)
+>>> fortnite.reviews_count
+40
+```
+
+If `how_many` is not provided, `review()` will terminate after *all* reviews are fetched.
+
+**NOTE** the review count seen on the landing page differs from the actual number of reviews fetched. This is simply because only *some* users who rated the app also leave reviews.
+
+
+## Review Data
+
+The fetched review data are loaded in memory and live inside `reviews` attribute as a list of dict.
+```pycon
+>>> fortnite.reviews
+[{'userName': 'someone', 'rating': 5, 'date': datetime.datetime(...
+```
+
+Each review dictionary has the following schema:
+```python
+{
+ "date": datetime.datetime,
+ "isEdited": bool,
+ "rating": int,
+ "review": str,
+ "title": str,
+ "userName": str
+ }
+```
diff --git a/app_store_scraper/__init__.py b/app_store_scraper/__init__.py
new file mode 100644
index 0000000..8ad7ff5
--- /dev/null
+++ b/app_store_scraper/__init__.py
@@ -0,0 +1,11 @@
+from .app_store import AppStore
+from .__version__ import ( # noqa: F401
+ __title__,
+ __version__,
+ __description__,
+ __author__,
+ __url__,
+ __license__,
+)
+
+__all__ = ["AppStore"]
diff --git a/app_store_scraper/__version__.py b/app_store_scraper/__version__.py
new file mode 100644
index 0000000..22e6d51
--- /dev/null
+++ b/app_store_scraper/__version__.py
@@ -0,0 +1,7 @@
+__title__ = "app-store-scraper"
+__version__ = "0.1.1"
+__description__ = "Single API โ App Store Review Scraper ๐งน"
+__author__ = "Eric Lim"
+__url__ = "https://github.com/cowboy-bebug/app-store-scraper"
+__license__ = "MIT"
+__keywords__ = ["app store", "ios", "review", "scraping", "scraper"]
diff --git a/app_store_scraper/app_store.py b/app_store_scraper/app_store.py
new file mode 100644
index 0000000..041fd85
--- /dev/null
+++ b/app_store_scraper/app_store.py
@@ -0,0 +1,117 @@
+import logging
+import re
+import requests
+import sys
+import time
+from datetime import datetime
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
+from .base import Base
+
+logger = logging.getLogger("AppStore")
+
+
+class AppStore(Base):
+ def __init__(
+ self,
+ country,
+ app_name,
+ app_id,
+ log_format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
+ log_level="INFO",
+ log_interval=10,
+ ):
+ super().__init__(country, app_name, app_id)
+ self.request_headers.update({"Authorization": self.__token()})
+
+ logging.basicConfig(format=log_format, level=log_level.upper())
+ self.log_interval = log_interval
+
+ def __repr__(self):
+ return "{object}(country={country}, app_name={app_name}, app_id={app_id})".format(
+ object=self.__class__.__name__,
+ country=self.country,
+ app_name=self.app_name,
+ app_id=self.app_id,
+ )
+
+ def __str__(self):
+ width = 12
+ return (
+ f"{'Country'.rjust(width, ' ')} | {self.country}\n"
+ f"{'Name'.rjust(width, ' ')} | {self.app_name}\n"
+ f"{'ID'.rjust(width, ' ')} | {self.app_id}\n"
+ f"{'URL'.rjust(width, ' ')} | {self.landing_url}\n"
+ f"{'Review count'.rjust(width, ' ')} | {self.reviews_count}"
+ )
+
+ def __get(
+ self,
+ url,
+ headers=None,
+ params=None,
+ total=3,
+ backoff_factor=3,
+ status_forcelist=[404],
+ ) -> requests.Response:
+ retries = Retry(
+ total=total,
+ backoff_factor=backoff_factor,
+ status_forcelist=status_forcelist,
+ )
+ with requests.Session() as s:
+ s.mount(self.base_request_url, HTTPAdapter(max_retries=retries))
+ logger.debug(f"Making a GET request: {url}")
+ self.response = s.get(url, headers=headers, params=params)
+
+ def __token(self):
+ self.__get(self.landing_url)
+ tags = self.response.text.splitlines()
+ for tag in tags:
+ if re.match(r" interval:
+ logger.info(f"[{interval}s HEARTBEAT] Fetched {self.reviews_count} reviews")
+ self.log_timer = 0
+
+ def review(self, how_many=sys.maxsize):
+ logger.info(f"Fetching reviews for {self.landing_url}")
+ while True:
+ self.__heartbeat()
+ self.__get(
+ self.request_url,
+ headers=self.request_headers,
+ params=self.request_params,
+ )
+ self.__parse_data()
+ self.__parse_next()
+ if self.request_offset is None or self.fetched_count >= how_many:
+ logger.info(f"Fetched {self.fetched_count} reviews")
+ self.fetched_count = 0
+ break
diff --git a/app_store_scraper/base.py b/app_store_scraper/base.py
new file mode 100644
index 0000000..4fc9d30
--- /dev/null
+++ b/app_store_scraper/base.py
@@ -0,0 +1,64 @@
+import random
+import re
+
+
+class Base:
+ __scheme = "https"
+
+ __landing_host = "apps.apple.com"
+ __request_host = "amp-api.apps.apple.com"
+
+ __landing_path = "{country}/app/{app_name}/id{app_id}"
+ __request_path = "v1/catalog/{country}/apps/{app_id}/reviews"
+
+ def __init__(self, country, app_name, app_id):
+ self.country = str(country).lower()
+ self.app_name = re.sub(r"[\W_]+", "-", str(app_name).lower())
+ self.app_id = str(app_id)
+
+ self.base_landing_url = f"{self.__scheme}://{self.__landing_host}"
+ self.base_request_url = f"{self.__scheme}://{self.__request_host}"
+
+ self.landing_url = self.__landing_url()
+ self.request_url = self.__request_url()
+
+ self.user_agents = [
+ # NOTE: grab from https://bit.ly/2zu0cmU
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)",
+ "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
+ ]
+
+ self.request_offset = 0
+ self.request_headers = {
+ "Accept": "application/json",
+ "Connection": "keep-alive",
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+ "Origin": self.base_landing_url,
+ "Referer": self.landing_url,
+ "User-Agent": random.choice(self.user_agents),
+ }
+ self.request_params = {
+ "l": "en-GB",
+ "offset": self.request_offset,
+ "limit": 20,
+ "platform": "web",
+ "additionalPlatforms": "appletv,ipad,iphone,mac",
+ }
+
+ self.reviews = list()
+ self.reviews_count = int()
+
+ self.fetched_count = int()
+
+ self.log_timer = float()
+
+ def __landing_url(self):
+ landing_url = f"{self.__scheme}://{self.__landing_host}/{self.__landing_path}"
+ return landing_url.format(
+ country=self.country, app_name=self.app_name, app_id=self.app_id
+ )
+
+ def __request_url(self):
+ request_url = f"{self.__scheme}://{self.__request_host}/{self.__request_path}"
+ return request_url.format(country=self.country, app_id=self.app_id)
diff --git a/app_store_scraper/tests/__init__.py b/app_store_scraper/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app_store_scraper/tests/test_all.py b/app_store_scraper/tests/test_all.py
new file mode 100644
index 0000000..cc2c3d7
--- /dev/null
+++ b/app_store_scraper/tests/test_all.py
@@ -0,0 +1,43 @@
+from app_store_scraper import AppStore
+
+test_country = "Nz"
+test_app_name = "Cool App"
+test_app_id = 7357
+
+app = AppStore(country=test_country, app_name=test_app_name, app_id=test_app_id)
+
+
+def test_app_init_fields():
+ assert app.country == test_country.lower()
+ assert app.app_name == test_app_name.lower().replace(" ", "-")
+ assert app.app_id == str(test_app_id).lower()
+
+
+def test_app_urls():
+ test_base_landing_url = "https://apps.apple.com"
+ test_base_request_url = "https://amp-api.apps.apple.com"
+ test_landing_path = f"{app.country}/app/{app.app_name}/id{app.app_id}"
+ test_request_path = f"v1/catalog/{app.country}/apps/{app.app_id}/reviews"
+ test_landing_url = f"{test_base_landing_url}/{test_landing_path}"
+ test_request_url = f"{test_base_request_url}/{test_request_path}"
+ assert app.base_landing_url == test_base_landing_url
+ assert app.base_request_url == test_base_request_url
+ assert app.landing_url == test_landing_url
+ assert app.request_url == test_request_url
+
+
+def test_app_defaults():
+ assert app.log_interval == 10
+
+
+def test_app_token():
+ assert app.request_headers["Authorization"] is None
+
+
+def test_app():
+ fortnite = AppStore(country="nz", app_name="fortnite", app_id=1261357853)
+ fortnite.review(how_many=3)
+
+ assert len(fortnite.reviews) == 20
+ assert len(fortnite.reviews) == fortnite.reviews_count
+ assert fortnite.request_offset == 20
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b450057
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+requests==2.23.0
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..6bfac1d
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,38 @@
+import setuptools
+
+about = dict()
+
+with open("app_store_scraper/__version__.py", "r") as f:
+ exec(f.read(), about)
+
+with open("README.md", "r") as f:
+ long_description = f.read()
+
+with open("requirements.txt", "r") as f:
+ install_requires = f.readlines()
+
+setuptools.setup(
+ name=about["__title__"],
+ version=about["__version__"],
+ description=about["__description__"],
+ long_description=long_description,
+ long_description_content_type="text/markdown",
+ author=about["__author__"],
+ url=about["__url__"],
+ license=about["__license__"],
+ keywords=about["__keywords__"],
+ packages=setuptools.find_packages(".", exclude=["*.tests"]),
+ install_requires=install_requires,
+ classifiers=[
+ "License :: OSI Approved :: MIT License",
+ "Intended Audience :: Developers",
+ "Natural Language :: English",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ ],
+ python_requires=">=3.6",
+ project_urls={"Source": about["__url__"]},
+)