-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtitlegetter.py
249 lines (224 loc) · 9.17 KB
/
titlegetter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#!/usr/bin/python
# -*- coding: UTF-8 -*-
### Modules Importing ###
from genericpath import exists
import os
import requests
from bs4 import BeautifulSoup
import toml
import argparse
### Modules Importing ###
class Main:
'''
Main Operations
Loading configuration files and languages, show the version and the logo, etc.
The functions on this class will be ran as soon as the program started.
'''
def GetParser(self):
'''
This function is used to get the options the users give.
'''
parser = argparse.ArgumentParser(
description='HELP', epilog='Have a nice day!')
parser.add_argument('-v', '--version', help='Show the version.', action='store_true')
parser.add_argument('-f', '--format', help='The format of output')
parser.add_argument('-o', '--output', help='The filename of output')
parser.add_argument(
'-u', '--url', help='The url from which you want to get the Title')
parser.add_argument(
'-i', '--input-file', help='The original url list. It may be a *.txt file.')
parser.add_argument(
'-b', '--batch-mode', help=
'Get titles from multi URLs, a list file(*.txt) and an output-file are required.', action="store_true")
return parser
def LoadTheConfig(self, filename):
'''
Configuration files will be loaded by this function.
This parameter "filename" is required to be a name of a toml file (*.toml),
In the source code library, you can find it in the directory 'config/'
For the installed, usually, it will be moved to the '/usr/share/titlegetter/'
And the file is "config.toml"
When finished loading, the result including the content of the configuration file would be returned.
And the other functions would use the result.
'''
config = toml.load(filename)
return config
def ShowLogo(self, config):
'''
The intention of this function is simple.
Showing a LOGO composed of texts on a terminal is its final mission. LOL
However, the LOGO was written to the configuration file by a foolish dog,
So the parameter "config" is used to receive the result of the function "LoadTheConfig()".
'''
print(config['Sign']['LOGO'])
'''
Well, finished.
But in order to read the LOGO correctly, the parameter "config" is required.
such as:
config = LoadTheConfig("config.toml")
ShowLogo(config=config)
Like this.
'''
def LoadOutputs(self, filename):
'''
The intention of this function is the same one as the function LoadTheConfig().
This parameter "filename" is required to be a name of a toml file (*.toml),
So...
In the source code library, you can find it in the directory 'config/'.
For the installed, usually, it will be moved to the '/usr/share/titlegetter/'.
And the file is "lang.toml".
Generally, we needn't to edit this file.
'''
lang = toml.load(filename)
return lang
def ShowVersion(self, filename):
Version = open(filename)
print("titlegetter version " + Version.read())
# Get the version from the configuration file, and show it on the terminal.
class Process:
def GetPage(self, headers, URL, session, config):
# Get the webpage (HTML files).
if config['Proxy']['Enable']:
proxies = {
'http':config['Proxy']['socks5'],
'https':config['Proxy']['socks5']
}
else:
proxies = {}
session = session
session.proxies.update(proxies)
response = session.get(url=URL, headers=headers)
if response.status_code == 200:
print('\n' + URL + " --> " + str(response.status_code) + " OK")
return response.text
else:
print(URL + "Get Page Failed:" + response.status_code)
os._exit(0)
def GetTitle(self, page):
# Get the title from the page.
soup = BeautifulSoup(page, 'lxml')
title = soup.find('title')
return title.string.strip()
def PrintAsPureText(self, title, URL):
print('-' * 40)
print('Title: ' + title)
print('Link: ' + URL)
print('-' * 40)
def PrintAsMarkDown(self, title, URL):
print('-' * 40)
print('[' + title + ']' + '(' + URL + ')')
print('-' * 40)
def PrintAsHTML(self, title, URL):
print('-' * 40)
print("<ul><a href=" + "\"" + URL + "\"" + ">" + title + "</a></ul>")
print('-' * 40)
def PrintAsBBScode(self, title, URL):
print('-' * 40)
print("[url=" + URL + "]" + title + "[/url]")
print('-' * 40)
'''
Here is the running aera for the classes, everything will be started from here.
'''
# Step Zero, initialize everything.
Starting = Main()
Do = Process()
if os.path.exists(str(os.getenv('XDG_CONFIG_HOME')) + '/titlegetter/config.toml') == True:
config = Starting.LoadTheConfig(
os.getenv('XDG_CONFIG_HOME') + '/titlegetter/config.toml')
elif os.path.exists(os.getenv('HOME') + '/.config/titlegetter/config.toml') == True:
config = Starting.LoadTheConfig(
os.getenv('HOME') + '/.config/titlegetter/config.toml')
elif os.path.exists('/etc/titlegetter/config.toml') == True:
config = Starting.LoadTheConfig('/etc/titlegetter/config.toml')
elif os.path.exists('config/config.toml') == True:
# Now it's time to load the config file. :)
config = Starting.LoadTheConfig(filename="config/config.toml")
Starting.ShowLogo(config=config)
parser = Starting.GetParser()
args = parser.parse_args()
headers = config['headers'] # import the headers
session = requests.session() # start a session
# Step One, Check if the BatchMode opening.
# Now it's time to check the WorkMode.
# if the LOGO is printed correctly, the configuration file has been loaded successfully.
if args.version:
if os.path.exists('/etc/titlegetter/.version'):
Starting.ShowVersion('/etc/titlegetter/.version')
elif os.path.exists('config/version'):
Starting.ShowVersion('config/version')
os._exit(0)
if not args.batch_mode:
# If it's zero, then we will work on single-url mode.
# Now we just need to get the url.
URL = args.url
# then get the title
if URL == None:
parser.error('URL is required!')
parser.print_help()
Page = Do.GetPage(headers=headers, URL=URL, session=session, config=config)
Title = Do.GetTitle(page=Page)
# Then got the format.
if args.format == 'txt':
Do.PrintAsPureText(URL=URL, title=Title)
elif args.format == 'md':
Do.PrintAsMarkDown(URL=URL, title=Title)
elif args.format == 'html':
Do.PrintAsHTML(URL=URL, title=Title)
elif args.format == 'bbscode':
Do.PrintAsBBScode(URL=URL, title=Title)
elif args.format == None:
parser.error('Format is required!\n')
parser.print_help()
else:
parser.error("'" + args.format + "'" +
' is not a legal format that TitleGetter supports.\n')
parser.print_help()
elif args.batch_mode:
# If the WorkMode is one, then it will be different.
# at first we should read a text(*.txt) file which contains some URLs and the output-file.
InputFileName = args.input_file
# Then we need to get the name of output-file
OutputFileName = args.output
# And the format
Format = args.format
# If None, print the warn.
if InputFileName == None:
parser.error('Filename is required!')
parser.print_help()
os._exit(0)
if OutputFileName == None:
parser.error('Filename is required!')
parser.print_help()
os._exit(0)
if Format == None:
parser.error('Format is required!')
parser.print_help()
os._exit(0)
# If everything is ok.
with open(OutputFileName, 'w', encoding='utf-8') as f:
URLList = open(InputFileName)
for URL in URLList:
PureURL = URL.strip()
if PureURL == '':
parser.error('URL can not be empty!')
f.close()
os.remove(OutputFileName)
os._exit(0)
print('[Loaded] ' + PureURL)
Page = Do.GetPage(headers=headers, URL=PureURL, session=session, config=config)
Title = Do.GetTitle(page=Page)
if Format == 'txt':
f.write('Title: ' + Title + '\n' + 'Link: ' + PureURL + '\n\n')
Do.PrintAsPureText(title=Title, URL=PureURL)
elif Format == 'md':
f.write('[' + Title + ']' + '(' + PureURL + ')' + '\n\n')
Do.PrintAsMarkDown(title=Title, URL=PureURL)
elif Format == 'html':
f.write("<ul><a href=" + "\"" + PureURL +
"\"" + ">" + Title + "</a></ul>" + "\n")
Do.PrintAsHTML(title=Title, URL=PureURL)
elif Format == 'bbscode':
f.write("[url=" + PureURL + "]" + Title + "[/url]")
Do.PrintAsBBScode(title=Title, URL=PureURL)
# Tell the file to the user
print('\n\n\n\n File saved as:' + os.getcwd() + '/' + OutputFileName)