-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_poem.py
87 lines (79 loc) · 2.92 KB
/
get_poem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from selenium import webdriver
import time
chrome_driver="D:\\Program Files\\Chromedriver\\chromedriver.exe"
driver = webdriver.Chrome(executable_path=chrome_driver)
driver.get('http://www.shicimingju.com/category/all')
f = open("poems.txt", "a+", encoding="utf-8")
poet_page = 0
height = 30
while True:
poet_page += 1
if poet_page > 5:
break
Poets = []
poets = driver.find_elements_by_class_name("zuozhe_list_item")
for poet in poets:
Poets.append(poet.text.split("\n")[0])
print(poet.text.split("\n")[0])
for poet in Poets:
if poet == "[宋]李清照" or poet == "[先秦]诗经" or poet == "[宋]苏轼" or poet == "[唐]李白":
continue
while True:
try:
driver.find_element_by_link_text(poet).click()
break
except:
js = "window.scrollBy(0,{});".format(height)
driver.execute_script(js)
handles = driver.window_handles
driver.switch_to_window(handles[1])
poem_page = 0
while True:
poem_page += 1
if poem_page > 3:
break
poems = driver.find_elements_by_class_name("shici_list_main")
for poem in poems:
poem_kv = "{"
poem_kv += u'"poet":"{}",'.format(poet)
name = poem.find_element_by_tag_name("h3").text
poem_kv += u'"title":"{}",'.format(name)
content = poem.find_element_by_class_name("shici_content")
poem_sentence_cnt = 0
while True:
try:
more = content.find_element_by_link_text("展开全文")
js = "window.scrollBy(0,{});".format(height)
driver.execute_script(js)
except:
break
try:
more.click()
break
except:
continue
p = content.text
poem_sentences = p.split("\n")
poem_sentence_cnt = len(poem_sentences)
if poem_sentences[poem_sentence_cnt-1] == "收起":
poem_sentence_cnt -= 1
poem_content = ""
for i in range(poem_sentence_cnt):
poem_content += poem_sentences[i]
poem_kv += u'"poem":"{}"'.format(poem_content)
poem_kv += "}\n"
print(poem_kv)
f.write(poem_kv)
try:
driver.find_element_by_link_text("下一页").click()
except:
break
driver.close()
handles = driver.window_handles
driver.switch_to_window(handles[0])
try:
driver.find_element_by_link_text("下一页").click()
except:
break
f.close()
# driver.find_element_by_id("su").click()