-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path018-爬取小米应用商城top100的app_name.py
30 lines (28 loc) · 1.06 KB
/
018-爬取小米应用商城top100的app_name.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""
@file : run_spider_xiaomi_top100_app.py
@author : xiaolu
@email : [email protected]
@time : 2022-02-16
"""
import time
import requests
from lxml import etree
from bs4 import BeautifulSoup
if __name__ == '__main__':
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
}
final_data = []
for i in range(1, 4):
url = 'https://app.mi.com/catTopList/0?page={}'.format(i)
response = requests.get(url, headers)
time.sleep(3)
selector = etree.HTML(response.text)
for j in range(1, 37):
app_name = selector.xpath('/html/body/div[6]/div/div[1]/div[1]/ul/li[{}]/h5/a/text()'.format(j))[0]
app_cls = selector.xpath('/html/body/div[6]/div/div[1]/div[1]/ul/li[{}]/p/a/text()'.format(j))[0]
s = app_name.strip() + '\t' + app_cls.strip()
final_data.append(s)
print(len(final_data))
with open('app_crawl_data.txt', 'w', encoding='utf8') as f:
f.write('\n'.join(final_data))