Skip to content

Commit 7c64cfd

Browse files
committed
🎉 Added Date
1 parent ded193e commit 7c64cfd

File tree

4 files changed

+16
-8
lines changed

4 files changed

+16
-8
lines changed

README.md

+6-6
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from scrape_amazon import get_reviews
2828

2929
reviews = get_reviews('in','B078BNQ318') #returns dataframe
30-
#Reviewer, Rating, Title, Description
30+
#Reviewer, Rating, Title, Description, Date
3131
```
3232

3333
### CLI
@@ -52,11 +52,11 @@ output_path output_path for saving (B078BNQ318.csv)
5252
## Output
5353

5454
```shell
55-
Reviewer Rating Title Description
56-
0 Parth Maniar 4 Great but ... I change ...
57-
1 Manpreet Singh 3 Delivers ... Great ph ...
58-
2 Aparna Uniyal 1 Battery/H ... I have ...
59-
3 Rahul 5 Great but ... On the f ...
55+
Reviewer Rating Title Description Date
56+
0 Parth Maniar 4 Great but ... I change ... '05/24/2021, 00:00:00'
57+
1 Manpreet Singh 3 Delivers ... Great ph ... '05/24/2021, 00:00:00'
58+
2 Aparna Uniyal 1 Battery/H ... I have ... '05/24/2021, 00:00:00'
59+
3 Rahul 5 Great but ... On the f ... '05/24/2021, 00:00:00'
6060
```
6161
## Want to contribute?
6262
To get more information on contributing, go to the

scrape_amazon/util/scrape.py

+7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import math
22
import re
3+
import datefinder
34
import pandas as pd
45
from bs4 import BeautifulSoup
56
from p_tqdm import p_map
@@ -34,9 +35,11 @@ def extractPage(url: str) -> str:
3435
pageNotLoaded = False
3536
reviewers = []
3637
ratings = []
38+
ratingsDate = []
3739
reviewDescriptions = []
3840
reviewTitles = []
3941
reviewrsSpan = productPage.findAll("span", {"class": "a-profile-name"})
42+
reviewDate = productPage.findAll("span", {"class": "review-date"})
4043
ratingsSpan = productPage.findAll("i", {"class": "review-rating"})
4144
reviewTitlesSpan = productPage.findAll("a", {"class": "review-title-content"})
4245
reviewDescriptionSpan = productPage.findAll(
@@ -48,6 +51,8 @@ def extractPage(url: str) -> str:
4851
for i in range(2, len(reviewrsSpan)):
4952
reviewers.append(reviewrsSpan[i].get_text())
5053
ratings.append(int(ratingsSpan[i].get_text()[0]))
54+
matches = datefinder.find_dates(reviewDate[i].get_text())
55+
ratingsDate.append(list(matches)[0].strftime("%m/%d/%Y"))
5156

5257
for i in range(0, len(reviewTitlesSpan)):
5358
reviewTitles.append(reviewTitlesSpan[i].get_text())
@@ -63,6 +68,7 @@ def extractPage(url: str) -> str:
6368
"ratings": ratings,
6469
"reviewTitles": reviewTitles,
6570
"reviewDescriptions": reviewDescriptions,
71+
"date": ratingsDate,
6672
}
6773

6874

@@ -107,6 +113,7 @@ def scrape_reviews(url):
107113
productReviewsData["Rating"] = res["ratings"]
108114
productReviewsData["Title"] = res["reviewTitles"]
109115
productReviewsData["Description"] = res["reviewDescriptions"]
116+
productReviewsData["Date"] = res["date"]
110117
# productReviewsData["link"] = url
111118
# productReviewsData["Product Title"] = pageTitle
112119

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = scrape_amazon
3-
version = 0.1.7
3+
version = 0.1.8
44
description = Scrape Amazon Reviews smoothly.
55
license = MIT
66
author = Parth Maniar

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="scrape_amazon",
8-
version="0.1.7",
8+
version="0.1.8",
99
description="Scrape Amazon Reviews",
1010
url="http://github.com/officialpm/scrape-amazon",
1111
author="Parth Maniar",
@@ -34,6 +34,7 @@
3434
"p_tqdm",
3535
"my_fake_useragent",
3636
"requests",
37+
"datefinder"
3738
],
3839
entry_points={
3940
"console_scripts": ["scrape-amazon=scrape_amazon.cli:get_reviews_cli"],

0 commit comments

Comments
 (0)