forked from PengWeihb/toutiaoSpider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuserId.py
55 lines (47 loc) · 1.36 KB
/
userId.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# -*- coding:utf-8 -*-
from pymysql import *
import re
'''
对之前已经获取的URL,分别提取其userID及其对应的mid,存入MySQL
'''
def user_ID():
db = connect(host="secret", port=3306, db="Spider", user="root", password="secret", charset="utf8")
cursor = db.cursor()
try:
sql = 'SELECT MainUrl FROM Media'
MainUrl = cursor.execute(sql)
data = cursor.fetchall()
db.commit()
except:
db.rollback()
for i in range(len(data)):
url = data[i][0]
pattern = re.compile(r'\d+')
id = re.findall(pattern,url)
if len(id) == 2:
user_id = id[0]
mid = id[1]
else:
user_id = ''
mid = ''
print(user_id)
print(mid)
n = i+1
user_id = str(user_id)
mid = str(mid)
n = str(n)
Update_user(user_id,mid,n)
db.close()
def Update_user(user_id,mid,n):
params = [user_id,mid,n]
db = connect(host="secret", port=3306, db="Spider", user="root", password="secret", charset="utf8")
conn = db.cursor()
try:
sql = """update Media set userId=%s,mid =%s WHERE iid=%s"""
user_mid = conn.execute(sql,params)
db.commit()
except:
db.rollback()
db.close()
if __name__ == "__main__":
user_ID()