-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathcrawl.py
96 lines (71 loc) · 3.04 KB
/
crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import re
import requests
import threading
print('''
╔═╗┬─┐┌─┐─┐ ┬┬ ┬ ┌─┐┌─┐┬─┐┌─┐┌─┐┌─┐┌─┐┬─┐
╠═╝├┬┘│ │┌┴┬┘└┬┘ └─┐│ ├┬┘├─┤├─┘├─┘├┤ ├┬┘
╩ ┴└─└─┘┴ └─ ┴ └─┘└─┘┴└─┴ ┴┴ ┴ └─┘┴└─
[ V2 ]
- discord:Arrys#3381
''')
urls = '''
https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all
'''
file = open('proxies.txt', 'w')
file.write('Proxies:\n')
file.close()
file = open('proxies.txt', 'a')
good_proxies = list()
def pattern_one(url):
ip_port = re.findall('(\d{,3}\.\d{,3}\.\d{,3}\.\d{,3}:\d{2,5})', url)
if not ip_port: pattern_two(url)
else:
for i in ip_port:
file.write(str(i) + '\n')
good_proxies.append(i)
def pattern_two(url):
ip = re.findall('>(\d{,3}\.\d{,3}\.\d{,3}\.\d{,3})<', url)
port = re.findall('td>(\d{2,5})<', url)
if not ip or not port: pattern_three(url)
else:
for i in range(len(ip)):
file.write(str(ip[i]) + ':' + str(port[i]) + '\n')
good_proxies.append(str(ip[i]) + ':' + str(port[i]))
def pattern_three(url):
ip = re.findall('>\n[\s]+(\d{,3}\.\d{,3}\.\d{,3}\.\d{,3})', url)
port = re.findall('>\n[\s]+(\d{2,5})\n', url)
if not ip or not port: pattern_four(url)
else:
for i in range(len(ip)):
file.write(str(ip[i]) + ':' + str(port[i]) + '\n')
good_proxies.append(str(ip[i]) + ':' + str(port[i]))
def pattern_four(url):
ip = re.findall('>(\d{,3}\.\d{,3}\.\d{,3}\.\d{,3})<', url)
port = re.findall('>(\d{2,5})<', url)
if not ip or not port: pattern_five(url)
else:
for i in range(len(ip)):
file.write(str(ip[i]) + ':' + str(port[i]) + '\n')
good_proxies.append(str(ip[i]) + ':' + str(port[i]))
def pattern_five(url):
ip = re.findall('(\d{,3}\.\d{,3}\.\d{,3}\.\d{,3})', url)
port = re.findall('(\d{2,5})', url)
for i in range(len(ip)):
file.write(str(ip[i]) + ':' + str(port[i]) + '\n')
good_proxies.append(str(ip[i]) + ':' + str(port[i]))
def start(url):
try:
req = requests.get(url, headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}).text
pattern_one(req)
print(f' [+] Scrapping from: {url}')
except requests.exceptions.SSLError: print(str(url) + ' [x] SSL Error')
except: print(str(url) + ' [x] Random Error')
threads = list()
for url in urls.splitlines():
if url:
x = threading.Thread(target=start, args=(url, ))
x.start()
threads.append(x)
for th in threads:
th.join()
input(f' \n\n[/] Total scraped proxies: ({len(good_proxies)}) type and thing to quit! ')