-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmovie_rank_webscraping.py
67 lines (48 loc) · 1.68 KB
/
movie_rank_webscraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import requests
import pandas as pd
from bs4 import BeautifulSoup
import requests
import xlwt
contents1 = []
contents2 = []
dict1 = {}
for i in range(10):
year = 2009+i
param = {"search_year": str(year)} # 將年份自動轉換 從2009-2019
url = 'https://movies.yahoo.com.tw/chart.html?cate=rating'
resp = requests.get(url, params=param)
resp.encoding = 'utf-8'
soup = BeautifulSoup(resp.text, 'lxml')
rows = soup.find_all('div', class_='tr')
colname = list(rows.pop(0).stripped_strings)
colname.remove('預告片')
colname.remove('上映日期')
colname.remove('排名')
for row in rows:
rank = row.find_next('div',attrs={'class':'td'})
updown = rank.find_next('div')
lastweek_rank = updown.find_next('div')
if rank.string == str(1):
movie_title = rank.find_next('h2')
else:
movie_title = rank.find_next('div',attrs={'class':'rank_txt'})
stars = row.find('h6',attrs={'class':'count'})
movie_name = [movie_title.string]
movie_star = [stars.string]
if (float(stars.string)>3.9):
contents1.append(movie_name)
contents2.append(movie_star)
else:
pass
for x in range(len(contents1)):
dict1[contents1[x][0]]=contents2[x][0]
dict1 = sorted(dict1.items(), key=lambda d: d[1], reverse=True) #星星數由大至小排列
#以下由 Xlwt 寫至 Excel
workbook = xlwt.Workbook(encoding='utf-8')
booksheet = workbook.add_sheet('Sheet 1', cell_overwrite_ok=True)
booksheet.write(0,0,'MOVIE')
booksheet.write(0,1,'STARTS')
for count in range(len(dict1)):
booksheet.write(1+count,0,dict1[count][0])
booksheet.write(1+count,1,dict1[count][1])
workbook.save('/home/vic/Downloads/MOVIE_rank.xls')