-
Notifications
You must be signed in to change notification settings - Fork 4
/
amazonpricescraper.py
104 lines (83 loc) · 3 KB
/
amazonpricescraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 28 17:23:57 2018
@author: vishnuhari
"""
from bs4 import BeautifulSoup
from stem import Signal
from stem.control import Controller
import urllib3
import time
#Crawler which opens up with proxy
#
#
# Leverage settings @http://www.andrewwatters.com/privoxy/
class ConnectionFactory:
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
header = {'user-agent':user_agent}
numberofIps = 10
secondsofdelay = 3
#Holders
defaultIP = "0.0.0.0"
newIP = "0.0.0.0"
oldIP="0.0.0.0"
def createConnection(self):
with Controller.from_port(port=9051) as controller:
controller.authenticate(password = "dhd8adkeiuLDKGSU937")
if controller.is_newnym_available():
controller.signal(Signal.NEWNYM)
print("NEWNYM called")
controller.close()
def createnewIP(self):
#self.setdefaultproxy()
if self.newIP == self.defaultIP:
self.createConnection()
self.newIP = self.openurl('http://icanhazip.com/')
print("NewIP in base if set as {}".format(self.newIP))
print(self.newIP)
else:
self.oldIP = self.newIP
self.createConnection()
self.newIP = self.openurl('http://icanhazip.com/')
print("NewIP in else set as {}".format(self.newIP))
print(self.newIP)
waittime = 0
while (self.oldIP == self.newIP):
time.sleep(self.secondsofdelay)
waittime+=self.secondsofdelay
print("Waiting for {} sec for new IP".format(waittime))
self.newIP = self.openurl('http://icanhazip.com/')
def openurl(self,url):
http = urllib3.PoolManager()
proxy = urllib3.ProxyManager("http://127.0.0.1:8118",timeout=20)
req = proxy.request(method='GET',url = url,headers=self.header)
ip = req.data
req.release_conn()
return ip
class ParseFactory():
def parseHtml(self,html):
items = list()
bs = BeautifulSoup(html,'html.parser')
for rootelement in bs.find('ul',attrs={'id':'s-results-list-atf'}):
for ref in rootelement.find('a'):
items.append(item(ref['alt'], ref.parent['href']))
bs.clear(decompose=True)
return items
class item():
name = ""
url =""
def __init__(self,name,url):
self.name = name
self.url = url
if __name__ == '__main__':
connect = ConnectionFactory()
parser = ParseFactory()
eans = ['8901030373930','8904004400779']
for i in range(0 , len(eans)):
connect.createnewIP()
url = 'https://www.amazon.in/s/field-keywords={0}'.format(eans[i])
resp = connect.openurl(url)
items = parser.parseHtml(resp)
for item in items:
print(item.name + " : " + item.url)