-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcrawler.py
49 lines (37 loc) · 1.48 KB
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from selenium import webdriver
from datetime import datetime
import json
from src.company_list import get_company_list
company_list = get_company_list()
print(f'[CRAWLER] Number of companies: {len(company_list)}')
with open('jobs.json', 'w') as f:
f.write('{}')
# setup headless webdriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(options=chrome_options)
# count open positions
total_number_of_jobs: int = 0
current_jobs = {}
def print_and_collect_numbers(company_name: str, total: int):
now = datetime.date(datetime.now())
print(f'[CRAWLER] Company {company_name} has {total} open positions on {now}')
global total_number_of_jobs
total_number_of_jobs = total_number_of_jobs + total
global current_jobs
current_jobs[company_name] = total
def write_numbers():
now = datetime.date(datetime.now())
global total_number_of_jobs
print(f'[CRAWLER] In Total {total_number_of_jobs} of open positions on {now}')
global current_jobs
current_jobs["Total Jobs"] = total_number_of_jobs
with open(f"current.json", "w") as file:
json.dump(current_jobs, file, indent=4)
for company in company_list:
jobs_data = company.scraper_type().getJobs(driver, company.jobs_url, company.company_name)
print_and_collect_numbers(company.company_name, len(jobs_data))
driver.close()
write_numbers()