-
Notifications
You must be signed in to change notification settings - Fork 0
/
zeptoScraper.py
100 lines (82 loc) · 3.34 KB
/
zeptoScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from common import *
class ZeptoScraper(Scraper):
def __init__(self,category: Optional[str] = None) -> None:
super().__init__(name="Zepto",folder="zepto",isPageWise=False)
def getURL(self,term: str, page: str, outOfStock: bool) -> str:
s = f"https://www.zeptonow.com/search?query={term}"
return s
def parseOne(self, resultElt: WebElement, pgno: int = 1, flags: Flags = set()) -> ResultItems.SingleItem:
item = super().parseOne(resultElt,pgno)
try:
title = resultElt.find_element(By.XPATH,".//h5").text
except Exception as e:
if self.isDebug():
print(e)
title = None
item.initialize('Title',title)
item.initialize('Rating (/5 stars)',"N/A")
item.initialize('# of Ratings',"N/A")
try:
price = resultElt.find_element(By.XPATH,".//h4[@data-testid='product-card-price']").text[1:]
except Exception as e:
if self.isDebug():
print(e)
price = None
item.initialize('Price',price)
try:
units = resultElt.find_element(By.XPATH,f".//span[@data-testid='product-card-quantity']/h4").text
except Exception as e:
if self.isDebug():
print(e)
units = None
item.initialize('Units',units)
try:
unavailableDiv = resultElt.find_element(By.XPATH,f".//img[@alt='ring-bell-icon']")
available = "NO"
except Exception as e:
if self.isDebug():
print(e)
available = "YES"
item.initialize('Available',available)
item.initialize('Sponsored',"N/A")
if self.isDebug() and len(flags):
item.initialize("DEBUG","|".join(flags))
return item
def scrape(self, driver: webdriver.Chrome, term: str, maxPages: int, outOfStock: bool, silent: bool = False) -> ResultItems:
urli = self.getURL(term=term,page=1,outOfStock=outOfStock)
driver.get(urli)
while True:
try:
loadElement(
parent=driver,
by='CSS_SELECTOR',
query=".location-popup-container",
maxWait=1
)
input(f"[{print_as}] Enter the location in the browser, then press ENTER in this terminal")
except Exception as e:
if self.isDebug():
print("Error trying to get location popup\n",e)
break
prodCardPath = "//a[@data-testid='product-card']"
loadElement(
parent=driver,
by='XPATH',
query=prodCardPath,
maxWait=1
)
nScrolls = 0
resultIterator = []
nElts = -1
while len(resultIterator)>nElts and nScrolls<self.getPageLimit():
nElts = len(resultIterator)
resultIterator = driver.find_elements(By.XPATH,prodCardPath)
driver.execute_script("arguments[0].scrollIntoView();",resultIterator[-1])
sleep(1)
nScrolls += 1
if not silent:
resultIterator = tqdm(resultIterator, desc=f"[{print_as}] Parsing results")
return ResultItems(items=map(
lambda resultElt: self.parseOne(resultElt),
resultIterator
))