-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathnhimmeo.py
71 lines (64 loc) · 2.03 KB
/
nhimmeo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import sys
import json
import re
from Lib.Epub import Epub
def find_src(k):
try:
return re.findall(r'''<img src="([\s\S]+?)" ''', k)[0]
except:
pass
try:
return re.findall(r'''<img loading="lazy" src="([\s\S]+?)" ''', k)[0]
except:
pass
# print(f"[WARNING]:\t failed using Default regular expression \t {k}")
try:
return re.findall(r'''<img alt="([\s\S]+?)" src="([\s\S]+?)"''', k)[0][1]
except:
print(f"[WARNING]:\t failed using Senior regular expression \t {k}")
try:
tmp = re.findall(r'''src="([\s\S]+?)"''', k)[0]
print(f"[WARNING]:\t using Unsafety regular expression \t {k}")
return tmp
except:
print(
f"[Critical]:\t can not find the src with all regular expression \t {k}")
if len(sys.argv) != 2:
print("缺失json文件")
else:
file = sys.argv[1]
with open(file, "r", encoding="utf-8") as f:
raw = json.load(f)
e = Epub(describe={
"name": raw["bookname"],
"author": raw["author"],
"update_time": raw["book_uptime"],
"coverurl": raw["cover"],
"describe": raw["details"].split("\n")
})
for i in raw["chapterList"]:
Chaptername = i["name"]
for j in i["lists"]:
chap = i["lists"][j]
content = chap["content"].split("\n")
text = [{
"Uid": chap["href"].split("/")[-1],
"title": f'{Chaptername}-{chap["name"]}',
"lines": []
}]
for k in content:
k = k.replace('\u3000', " ")
if "<img" not in k:
text[0]['lines'].append({
"type": "p",
"item": k
})
else:
text[0]['lines'].append(
{
"type": "img",
"item": find_src(k)
}
)
e.add_text(text)
e.finish()