-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
92 lines (85 loc) · 3.02 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from os import path,makedirs
from datetime import datetime
from common import *
from amazonScraper import AmazonScraper
from flipkartScraper import FlipkartScraper
from bigbasketScraper import BigbasketScraper
from familydollarScraper import FamilyDollarScraper
from kotsovolosScraper import KotsovolosScraper
from zeptoScraper import ZeptoScraper
def getScraper(headless) -> Scraper:
scrapers = [
AmazonScraper(),
FlipkartScraper(),
BigbasketScraper(),
AmazonScraper(category='fresh'),
FlipkartScraper(category='GROCERY'),
FamilyDollarScraper(),
KotsovolosScraper()
]
if not headless:
scrapers.extend([
ZeptoScraper()
])
print(f"[{print_as}] Select site to scrape:")
for i,scraper in enumerate(scrapers):
print(f"\t{i+1} for {scraper.name}")
inp = None
while True:
try:
inp = int(input())
if inp>0 and inp<=len(scrapers):
break
else:
raise ValueError("Error!")
except:
print(f"[{print_as}] Invalid input, please select from the above options!")
scraper = scrapers[inp-1]
print(f"[{print_as}] Selected {scraper.name}")
return scraper
print(f"*** Welcome to {print_as}! ***")
runAgain = True
hlMsg = f"[{print_as}] Use head-less browser? [Y/N(default)]"
hlMsg += "\n(This removes quick commerce sites and sponsored items on Amazon!) "
hl = (input(hlMsg).upper() == 'Y')
print(f"[{print_as}] Connecting to Chrome via driver")
driver = getDriver(headless=hl)
while runAgain:
scraper = getScraper(headless=hl)
while True:
term = input(f"[{print_as}] Enter search term: ").strip().replace(' ','+')
if term=='':
print(f"[{print_as}] Empty search term is not allowed!")
else:
break
npages = 5
if scraper.isPageWise:
try:
npages = int(input(f"[{print_as}] Enter # of pages to scrape (max=20, default=5): "))
assert npages<=scraper.getPageLimit()
except:
npages = 5
print(f"[{print_as}] Defaulting to # of pages = 5")
outStkMsg = f"[{print_as}] Include out of stock items? (This removes sponsored items on Amazon!) [Y/N(default)] "
outStk = (input(outStkMsg).upper() == 'Y')
resultItems: ResultItems = scraper.scrape(
driver=driver,
term=term,
maxPages=npages,
outOfStock=outStk,
silent=False
)
if resultItems.isEmpty():
print(f"[{print_as}] No results found!")
else:
dt = str(datetime.now()).replace(':','-')
dirpath = f"scrapedump/{scraper.folder}"
if not path.exists(dirpath):
makedirs(dirpath)
fpath = f"{dirpath}/{term}@{dt}.csv"
print(f"[{print_as}] Writing to {fpath}")
resultItems.writeToCSV(filepath=fpath)
runAgain = (input(f"[{print_as}] Run another query? [Y/N(default)] ")).upper() == 'Y'
print(f"[{print_as}] Closing driver connection")
driver.quit()
input("Press ENTER to exit")