forked from PengWeihb/toutiaoSpider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetMid.py
64 lines (56 loc) · 1.46 KB
/
getMid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- coding:utf-8 -*-
import re
import time
from selenium import webdriver
from bs4 import BeautifulSoup
from MySQLdb import *
'''
https://www.toutiao.com/c/user/51045089537/#mid=51045089537
今日头条号主主页关键参数mid的获取:主要通过PhantomJS()+selenium模拟点击的方法
'''
def get_mid(usr_id,d):
url = 'https://www.toutiao.com/c/user/' + str(usr_id) + '/'
driver = webdriver.PhantomJS()
driver.get(url)
time.sleep(1)
driver.refresh()
new_url = driver.current_url
print new_url
pattern = re.compile(r'\d+')
response = re.findall(pattern,new_url)
if len(response)==2:
data = response[1]
else:
data = '[]'
data = str(data)
print data
d = str(d)
Update_mid(data,d)
driver.quit()
def Update_mid(mid,n):
params = [mid,n]
db = connect(host="secret", port=3306, db="Spider", user="root", passwd="secret", charset="utf8")
conn = db.cursor()
try:
sql = """update Media6 set mid =%s WHERE iid=%s"""
print 'ok!!!'
user_mid = conn.execute(sql,params)
db.commit()
except:
db.rollback()
db.close()
if __name__ == '__main__':
db = connect(host="secret", port=3306, db="Spider", user="root", passwd="secret", charset="utf8")
conn = db.cursor()
try:
sql = 'SELECT userId FROM Media6'
MainUrl = conn.execute(sql)
data = conn.fetchall()
db.commit()
except:
db.rollback()
for i in range(len(data)):
usr_id = data[i][0]
d = i + 1
get_mid(usr_id,d)
db.close()