This repository has been archived by the owner on Dec 23, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
classes.py
351 lines (307 loc) · 12.2 KB
/
classes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time, psycopg2.extras
# todo add user class (include positions, urls, user_telegram_id)
# todo method should use inner attributes: _example
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
class User:
def __init__(self, id: int, user_name: str, telegram_id: int, active: bool) -> object:
self._id = id
self._user_name = user_name
self._telegram_id = telegram_id
self._active = active
self._links = []
@property
def id(self):
return self._id
@property
def user_name(self):
return self._user_name
@property
def telegram_id(self):
return self._telegram_id
@property
def active(self):
return self._active
@property
def links(self):
return self._links
class Link(BeautifulSoup):
_driver: type # Chrome
_old_vacancies: set # known vacancies
_logger: type # Logger
def __init__(self, raw_html):
if not hasattr(self, 'driver'):
raise AttributeError('Chrome is not set')
self._vacancies = []
super().__init__(raw_html, "html.parser")
@property
def driver(self): # database cursor
return self._driver
@driver.setter
def driver(self, value):
self._driver = value
@property
def logger(self):
return self._logger
@property
def old_vacancies(self): # old vacancies fetched from a DataBase
return self._old_vacancies
@old_vacancies.setter
def old_vacancies(self, value):
self._old_vacancies = value
@property
def url(self): # example "https://www.work.ua/jobs-kyiv-devops/"
return self._url
@property
def host(self): # example "work.ua"
return self._host
@property
def vacancies(self): # example [<class Vacancy>, <class Vacancy>, <class Vacancy>...]
return self._vacancies
@property
def raw_html(self): # example "<!DOCTYPE html><html><body><p>This is a jobpars..."
return self._raw_html
@property
def target(self): # example "devops"
return self._target
@property
def user_id(self): # user's telegram id who is tied up with link. Example 123456789.
return self._user_id
@property
def prev_links(self): # all vacancy links found for this user
return self._prev_links
@property
def is_new(self): # all vacancy links found for this user
return self._is_new
class Dou(Link):
def __init__(self, page_url, target, user_id, is_new):
self._host = 'jobs.dou.ua' # I just hardcoded it. It was not necessary.
self._url = page_url
self._target = target
self._user_id = user_id
self._is_new = is_new
try:
self.driver.get(self._url)
except Exception as e:
self.logger.error(f'ERROR - could not get to {self._url}')
return
# clicking "more" button as many times as possible to chow all vacancies
try:
while True:
time.sleep(1.5)
more_btn = self.driver.find_element(By.LINK_TEXT, 'Больше вакансий')
more_btn.click()
except Exception as e:
pass
self._raw_html = self.driver.page_source
super().__init__(self._raw_html)
vacancy_divs = self.select('.vacancy')
for vacancy in vacancy_divs:
vacancy_url = vacancy.select_one('.vt').get('href')
title = vacancy.select_one('.vt').getText().strip()
try:
company = vacancy.select_one('.company').getText().strip()
except:
company = 'not specified' # company name may not be specified
self.vacancies.append(Vacancy(self._host, vacancy_url, title, company, self._target, self))
class Rabota(Link):
def __init__(self, page_url, target, user_id, is_new):
self._host = 'rabota.ua'
self._url = page_url
self._target = target
self._user_id = user_id
self._is_new = is_new
try:
self.driver.get(self._url)
except Exception as e:
self.logger.error(f'ERROR - could not get to {self._url}')
return
self._raw_html = ''
# clicking "more" button as many times as possible to show all vacancies
try:
counter = 0
while counter < 3:
time.sleep(1.5)
self._raw_html += self.driver.page_source # collecting page source from all pagination
more_btn = self.driver.find_element_by_class_name('nextbtn')
more_btn.click()
counter += 1
# todo exception is too broad
except Exception as e:
pass # pages ended
super().__init__(self._raw_html)
vacancy_divs = self.select('.card-body')
for vacancy in vacancy_divs:
vacancy_url = self._host + vacancy.select_one('.ga_listing').get('href')
title = vacancy.select_one('.ga_listing').getText().strip()
# company name may not be specified
try:
# selecting company name
company = vacancy.select_one('.company-profile-name').getText().strip()
except:
company = 'not specified'
self._vacancies.append(Vacancy(self._host, vacancy_url, title, company, self._target, self))
class Headh(Link):
def __init__(self, page_url, target, user_id, is_new):
self._host = 'hh.ua'
self._url = page_url
self._target = target
self._user_id = user_id
self._is_new = is_new
try:
self.driver.get(self._url)
except Exception as e:
self.logger.error(f'ERROR - could not get to {self._url}')
return
self._raw_html = ''
# clicking "more" button as many times as possible to chow all vacancies
try:
while True:
time.sleep(1.5)
self._raw_html += self.driver.page_source # collecting page source from all pagination
self.driver.execute_script("window.stop();")
element = self.driver.find_element_by_class_name('saved-search-subscription-wrapper')
self.driver.execute_script("arguments[0].style.visibility='hidden'", element)
more_btn = self.driver.find_element_by_xpath('//a[contains(text(),\'дальше\')]')
more_btn.click()
# todo exception is too broad. If selectors change, I will know about that.
except Exception as e:
pass # Pages ended
super().__init__(self._raw_html)
vacancy_divs = self.select('.vacancy-serp-item')
for vacancy in vacancy_divs:
vacancy_url = vacancy.select_one('.HH-LinkModifier').get('href')
title = vacancy.select_one('.HH-LinkModifier').getText().strip()
# company name may not be specified
try:
company = vacancy.select_one('.bloko-link_secondary').getText().strip()
except:
company = 'not specified'
self._vacancies.append(Vacancy(self._host, vacancy_url, title, company, self._target, self))
class Work(Link):
def __init__(self, page_url, target, user_id, is_new):
self._host = 'work.ua'
self._url = page_url
self._target = target
self._user_id = user_id
self._is_new = is_new
try:
self.driver.get(self._url)
except Exception as e:
self.logger.error(f'ERROR - could not get to {self._url}')
return
# clicking "more" button as many times as possible to chow all vacancies
self._raw_html = ''
try:
while True:
time.sleep(1.5)
self._raw_html += self.driver.page_source # collecting page source from all pagination
more_btn = self.driver.find_element_by_css_selector('.pagination li:last-child a')
more_btn.click()
# todo too broad exception
except Exception as e:
pass
super().__init__(self._raw_html)
vacancy_divs = self.select('.job-link')
for vacancy in vacancy_divs:
vacancy_url = self._host + vacancy.select_one('h2 a').get('href')
title = vacancy.select_one('h2 a').getText().strip()
# company name may not be specified
try:
company = vacancy.select_one('.job-link img').get('alt').strip()
except:
company = 'not specified'
self._vacancies.append(Vacancy(self._host, vacancy_url, title, company, self._target, self))
class Vacancy:
_bot: type # Telegram connection
_cursor: type # Connected Data Base's cursor
def __init__(self, host, url, title, company, target, link_parrent):
if not hasattr(self, 'cursor'):
raise AttributeError('Database cursor is not set')
if not hasattr(self, 'bot'):
raise AttributeError('Telegram connnection is not set')
self._host = host # example: 'jobs.dou.ua'
self._url = url # example: 'https://jobs.dou.ua/vacancies/?city=Kyiv&search=devops'
self._title = title # example: 'DevOps Engineer'
self._applied = False # for future use
self._company = company # example: 'epam'
self._target = target # example: 'devops'
self._link_parrent = link_parrent # it is a kind of conditional inheritance
# checking if position is new one
if self._url in self.link_parrent.old_vacancies:
self._new = False
else:
self._new = True
@property
def bot(self):
return self._bot
@bot.setter
def bot(self,value):
self._bot=value
@property
def cursor(self):
return self.cursor
@cursor.setter
def cursor(self,connection):
self.cursor=connection.cursor(cursor_factory = psycopg2.extras.DictCursor) # todo rewrite - make it easier
@property
def host(self):
return self._host
@property
def url(self):
return self._url
@property
def title(self):
return self._title
@property
def applied(self):
return self._applied
@property
def company(self):
return self._company
@property
def target(self):
return self._target
@property
def new(self):
return self._new
@property
def link_parrent(self):
return self._link_parrent
def insert_to_db(self):
checked = 1
inserted = 0
if self.new:
insert_query = 'INSERT INTO "public"."positions" ("host", "url", "title", "company", "target", "user_id")' \
' VALUES (%(host)s, %(url)s,%(title)s, %(company)s, %(target)s, %(user_id)s) ;'
substitution = {"host": self.host, "url": self.url, "title": self.title, "company": self.company,
"target": self.target, "user_id": self.link_parrent.user_id}
self.cursor.execute(insert_query, substitution)
inserted = 1
else:
pass
return checked, inserted
def apply(self):
# todo auto apply button
pass
def send_notification(self, user_telegram_id):
notifivation_msg = 'Опубликована новая вакансия на {host}.\nНазвание: {title}\nКомпания: {company}\nСсылка: {url}' \
.format(host=self.host, title=self.title, company=self.company, url=self.url)
try:
self.bot.send_message(chat_id=str(user_telegram_id), text=notifivation_msg)
except Exception as exception:
print(exception)
print('Could not notify about : ' + notifivation_msg) # # todo high priority log
else: # this block executes if no exception was raised
update_notified_querry = 'UPDATE "positions" SET "notified" = TRUE WHERE ("user_id" = \'{}\' AND "url" = \'{}\');'.format(
self.link_parrent.user_id, self.url)
self.cursor.execute(update_notified_querry)