-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.py
66 lines (53 loc) · 1.61 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from bs4 import BeautifulSoup
import requests
import colorama
import re
success_files = 0
def get_link():
try:
link = raw_input('Give link to scrape page pelase: ')
if not link:
raise ValueError('Must enter link, jack!')
return link
except ValueError as e:
print e
sys.exit(1)
def get_page(link):
try:
html = requests.get(link).content
return html
except requests.exceptions.RequestException as e:
print e
sys.exit(1)
def download_file(file):
global success_files
with open(file[1], 'wb') as f:
response = requests.get(file[0])
if response.status_code == 200:
success_files+=1
print success_files
def get_keyword():
try:
keyword = raw_input('Enter url link keyword for '
'download link(besides https/http): ')
if not link:
raise ValueError('Must enter keyword, jack!')
return keyword
except ValueError as e:
print e
def scrape_page(html, keyword):
soup = BeautifulSoup(html)
soup = soup.find_all('a', href=re.compile('^(http).*{0}|(https).*{0}'
.format(keyword)))
download_list = [
[download['href'], re.sub(
'MAME 0.149 ROMs/', '', download.get_text())]
for download in soup]
return download_list
if __name__ == "__main__":
link = get_link()
keyword = get_keyword()
html = get_page(link)
download_list = scrape_page(html, keyword)
[download_file(file) for file in download_list]
print "success with {} files".format(success_files)