-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper-proxy.py
96 lines (77 loc) · 3.05 KB
/
scraper-proxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import urllib
import random
class ScraperProxy():
def __init__(self):
self.proxy = None
self._proxy_list = []
def set_proxy(self, proxy):
""" Set the proxy
"""
self.proxy = proxy
def load_proxies(self, filename):
""" Load proxies from a file to a list
"""
with open(filename) as f:
for line in f:
line = line.strip()
if line:
self._proxy_list.append(line)
def set_random_proxy(self, filename):
""" Set proxy randomly from a file
"""
# If the proxy list is populated, then get the random one
# Else load the proxies from the file first and then choose one randomly
if self._proxy_list:
self.set_proxy(random.choice(self._proxy_list))
else:
self.load_proxies(filename)
self.set_proxy(random.choice(self._proxy_list))
def check_proxy(self, proxy = None, test_url='https://www.google.com'):
""" Check if a proxy is dead (False) or alive (True)
"""
# See http://stackoverflow.com/questions/16738525/python-default-values-for-class-member-function-parameters-set-to-member-variabl
proxy = proxy or self.proxy
try:
urllib.urlopen(test_url, proxies={'http':'http://'+proxy,
'https':'http://'+proxy})
except IOError:
print('Connection error! Proxy %s possibly dead...' % proxy)
return False
else:
print('Proxy %s is alive...' % proxy)
return True
def get_external_ip(self, with_proxy = False, proxy = None):
""" Get the external ip using dnsdynamic.org
enabling or disabling proxy
"""
dnsdynamic_url = 'http://myip.dnsdynamic.org/'
myexternalip_url = 'http://myexternalip.com/raw'
proxy = proxy or self.proxy
try:
if with_proxy:
response = urllib.urlopen(dnsdynamic_url, proxies={'http':'http://'+proxy,
'https':'http://'+proxy})
else:
response = urllib.urlopen(dnsdynamic_url)
try:
ip = response.read()
finally:
response.close()
except IOError:
print('Connection error for %s with proxy %s' % (dnsdynamic_url, proxy))
return None
else:
print('IP returned is %s' % ip)
return ip
if __name__ == '__main__':
proxy = ScraperProxy()
test_proxy = '58.215.142.208:80'
print('Set proxy to %s' % test_proxy)
proxy.set_proxy(test_proxy)
print('Test proxy %s' % proxy.proxy)
proxy.check_proxy()
proxy.set_random_proxy('proxies.txt')
print('Set random proxy from file %s' % proxy.proxy)
proxy.set_proxy(test_proxy)
print('External IP with no proxy: %s' % proxy.get_external_ip())
print('External IP with proxy (%s): %s' % (proxy.proxy, proxy.get_external_ip(with_proxy = True)))