-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathTemplate.py
159 lines (141 loc) · 5.54 KB
/
Template.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import argparse
import os
import re
import time
import json
# from bs4 import BeautifulSoup
from Lib.Network import Network
from Lib.ini import CONF
if not os.path.exists("txt"):
os.mkdir("txt")
if not os.path.exists("Data"):
os.mkdir("Data")
parser = argparse.ArgumentParser(
prog="Book Downloader",
description='用于下载一些模板一致的网站',
epilog='Phantom-sea © limited |∀` )',
)
parser.add_argument('-d', '--domain', type=str, help='网站域名')
parser.add_argument("-p", "--protocal", type=str,
help='网站支持协议', default="https://")
parser.add_argument("-i", "--ip", type=str, help='网站IP地址', default='False')
parser.add_argument("-s", "--start", type=int, help="起始点", default=0)
parser.add_argument("-e", "--end", type=int, help="终止点", default=10000)
parser.add_argument("-m", "--mode", type=str, help="模式", default="default")
parser.add_argument("-c", "--code", type=str, help="网页编码", default="gbk")
parser.add_argument("-x", "--x", type=str, help="高级设定", default="/txt/2")
args = parser.parse_args()
class Static:
@staticmethod
def rematch(text):
r = re.findall(
r'''</a> <a ([\s\S]+?)>txt下载</a>''', text)[0].split(" ")
fin = {}
for i in r:
t = i.split("=")[0]
fin[t] = i.replace(t+"=", "").replace("\"", "")
return fin
@staticmethod
def rematch_test(text):
r = re.findall(
r'''</a> <a([\s\S]+?)>txt下载</a>''', text)[0]
r = re.findall(r''' ([\s\S]+?=\"[\s\S]+?)\"''', r)
fin = {}
for i in r:
t = i.split("=")[0]
fin[t] = i.replace(t+"=", "").replace("\"", "").replace(" ", "")
return fin
# @staticmethod #不搞这个了,麻烦
# def html(text):
# et_html = BeautifulSoup(text, "html.parser")
# # 查找所有class属性为hd的div标签下的a标签的第一个span标签
# urls = et_html.xpath("/html/body/div[2]/div[2]/div[2]/h2/a[2]")
# # movie_list = []
# # 获取每个span的文本
# for each in urls:
# movie = each.attrib
# filename = (
# movie["download"].replace("/", " ").replace("|", " ").replace("?", " ").replace("?", " ")
# ) # 修复文件名存在"/"时候产生的问题
# href = movie["href"].strip("aa..")
# href = str("https://www.trxs123.com/e/DownSys") + str(href)
# # movie_list.append(movie)
class template():
def __init__(self, domain, ip="False", protocal="https://", path="/txt/2", encoding="gbk") -> None:
if ip == "False":
ip = False # 为github action作出妥协
self.s = Network({domain: {"ip": ip}})
self.c = CONF(domain, conf_path="Data")
self.url = protocal + domain
self.download = path
self.encoding = encoding
def get(self, path):
return self.s.get(self.url+path)
def get_url(self, ID, method=Static.rematch_test, tryid=0):
r = self.get(f"{self.download}-{ID}-0.html")
r.encoding = self.encoding
if r.status_code != 200:
if tryid >= 3:
raise Exception("ERROR")
time.sleep(1)
tryid += 1
return self.get_url(ID, method, tryid)
try:
return method(r.text)
except Exception:
return False
def get_ori_url(self, url):
r = self.s.get(url, allow_redirects=False)
return self.url + r.headers["Location"]
def run(self, start=0, end=5000):
F = open(os.path.join("txt", "url.txt"), "w")
ID_list = self.c.load("Core", "ID")[0]
if ID_list == False:
ID_list = []
else:
ID_list = json.loads(ID_list)
ERR_list = self.c.load("Core", "Error")[0]
if ERR_list == False:
ERR_list = []
else:
ERR_list = json.loads(ERR_list)
while start <= end:
try:
fin = self.get_url(start)
except:
print(str(start)+"出现问题,请手动校对")
ERR_list.append(start)
else:
if fin != False:
url = self.url + "/e/DownSys/" + fin["href"].split("/")[1]
url = self.get_ori_url(url)
F.write(url)
F.write("\n\tout=" + fin["download"] + "\n")
if start not in ID_list:
ID_list.append(start)
self.c.add(str(start), "Download", url)
self.c.add(str(start), "Filename", fin["download"].replace("/","&"))
start += 1
self.c.add("Core", "ID", ID_list)
self.c.add("Core", "Error", ERR_list)
self.c.save()
F.close()
def run_local(self):
ID_list = self.c.load("Core", "ID")[0]
if ID_list == False:
print("未发现有效配置文件,请手动生成")
ID_list = json.loads(ID_list)
with open(os.path.join("txt", "url.txt"), "w") as f:
for i in ID_list:
f.write(self.c.load(str(i), "download")[0])
f.write("\n\tout=" + self.c.load(str(i), "filename")[0] + "\n")
if __name__ == "__main__":
print(args)
if args.domain == None:
print("missing key domain")
else:
t = template(args.domain, args.ip, args.protocal, args.x, args.code)
if args.mode == "default":
t.run(args.start, args.end)
elif args.mode == "local":
t.run_local()