diff --git a/.gitignore b/.gitignore index 4e55d39..e26a71b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ test.ipynb driver/.DS_Store -interface \ No newline at end of file +interface +config\chromedriver.exes \ No newline at end of file diff --git a/Driver.py b/Driver.py index 9198ea4..cf93857 100644 --- a/Driver.py +++ b/Driver.py @@ -1,5 +1,6 @@ from selenium import webdriver from selenium.webdriver.chrome.options import Options +from selenium.common.exceptions import SessionNotCreatedException from Resolvers import PathResolver from platform import system @@ -51,10 +52,11 @@ def __init__( executable_path=self.driver_directory, options=chrome_options ) - + driver.set_window_size(1440, 900) self._driver = driver def driver(self): return self._driver + diff --git a/GoogleTrend.py b/GoogleTrend.py index df1fb75..113820f 100644 --- a/GoogleTrend.py +++ b/GoogleTrend.py @@ -3,6 +3,7 @@ from platform import system from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException # from selenium.webdriver.chrome.options import Options # from selenium.webdriver.common.by import By import pandas as pd @@ -97,6 +98,8 @@ def _download(self, failed:int=0): print('Done (成功)') return True + except NoSuchElementException: + return self.no_error() except: s = randint(2, 5) print(f'Failed (失敗) ··· 休眠約 {s}s 後喚醒') diff --git a/Stock.py b/Stock.py index 62ae33c..6e05063 100644 --- a/Stock.py +++ b/Stock.py @@ -28,15 +28,15 @@ def scrapping_per_week(self): print(f'\n正在抓取 {self.origin_q} {self.key} {cy}年 週資料···') url = f'{self.url}?date={cy}-01-01%20{cy}-12-31&q={self.q}{geo_query}{cat_query}' self._toPage(url) - if self.no_error(): - self._download() + # if self.no_error(): + self._download() elif sequence_type == 'cross-year': print(f'\n正在抓取 {self.origin_q} {self.key} {cy}年 跨 {cy+1}年 週資料···') url = f'{self.url}?date={cy}-07-01%20{cy+1}-6-30&q={self.q}{geo_query}{cat_query}' self._toPage(url) - if self.no_error(): - self._download() + # if self.no_error(): + self._download() cy += 1 def merge_per_week(self): @@ -105,8 +105,8 @@ def _0_or_1(i: int): url += f'date={cy}-{iTs(sm)}-01%20{cy}-{iTs(sm+5)}-3{_0_or_1(sm+5)}&q={self.q}{geo_query}{cat_query}' sm += 6 self._toPage(url) - if self.no_error(): - self._download() + if not self._download(): + continue sleep(rd_ms()) cy += 1 sm = 1 @@ -163,8 +163,7 @@ def scrapping_per_month(self): print(f'\n正在抓取 {self.origin_q} {self.key} {2004}年 跨 {current_year}年 月資料···') url = f'{self.url}?date={start_date}%20{current_date}&q={self.q}{geo_query}{cat_query}' self._toPage(url) - if self.no_error(): - self._download() + self._download() def merge_per_month(self): print(f'\n正在合併月資料 ···') @@ -207,7 +206,7 @@ def has_set_ticker_detail(self) -> (bool): if text == self.origin_q: key = splited_elements print(key) - sleep(1) + sleep(0.5) if key: if self.origin_q == key[1]: diff --git a/Updater.py b/Updater.py index 03429ee..0e42a83 100644 --- a/Updater.py +++ b/Updater.py @@ -1,12 +1,30 @@ +from selenium import webdriver from Resolvers import PathResolver from Driver import Driver +from time import sleep +from selenium.common.exceptions import NoSuchElementException class DriverUpdater: def __init__(self): self._get_driver() - def _get_driver(self): + def _get_driver(self) -> (webdriver.chrome.webdriver.WebDriver): download_pr = PathResolver(['download'], mkdir=True) - self.driver = Driver( - download_directory=download_pr.path() - ) \ No newline at end of file + self.updater = Driver( + download_directory=download_pr.path(), + headless=False + ).driver() + + def to_page(self, url:str): + self.updater.get(url=url) + sleep(1) + + def check_version(self): + try: + judgment = self.updater.find_element_by_css_selector( + "#content-base > section.section-block.section-block-main-extra > div > div.content-block-main > div.judgment.judgment-bad") + print(judgment.text) + except NoSuchElementException: + self.updater.close(); + print('Done!') + diff --git a/cli.py b/cli.py index 97f3d5a..b98e0bc 100644 --- a/cli.py +++ b/cli.py @@ -7,7 +7,7 @@ from Resolvers import PathResolver from pandas import read_csv from selenium.common.exceptions import SessionNotCreatedException -# from Driver import DriverUpdater +from Updater import DriverUpdater class SVI_CLI: def __init__(self, config): @@ -211,18 +211,22 @@ def google_trend_cli(self): def taiwan_stock_cli(self): - # try: - while True: - if not self.google_trend_cli(): - break - # except SessionNotCreatedException: - # print('ChromeDriver 版本過期,請更新版本。') - # updater = DriverUpdater() + try: + while True: + if not self.google_trend_cli(): + break + except SessionNotCreatedException: + print('\n[ChromeDriver]: ChromeDriver版本過期,請更新版本。\n') + print('[Info]: \"Ctrl + 滑鼠左鍵\" 點擊以下網址,查看版本資訊') + print('https://www.whatismybrowser.com/detect/what-version-of-chrome-do-i-have\n') + print('[Info]: \"Ctrl + 滑鼠左鍵\" 點擊以下網址,下載最新版本的 chromedriver') + print('https://chromedriver.chromium.org/downloads\n') + input('輸入 Enter 結束') # handle driver updating... - # except: - # print('\n中斷爬蟲中') - # dot(1) - # self.save() + except: + print('\n中斷爬蟲中') + dot(1) + self.save() print('\n----- 系統將在 2 秒後自動關閉視窗,或是手動點擊右上角離開視窗 ··· -----') sleep(1) diff --git a/config/chromedriver.exe b/config/chromedriver.exe new file mode 100644 index 0000000..6ab0ba9 Binary files /dev/null and b/config/chromedriver.exe differ diff --git a/config/config.json b/config/config.json index 2b35046..084d2f3 100644 --- a/config/config.json +++ b/config/config.json @@ -1 +1 @@ -{"geo":{"0":"TW"},"txt":{"0":true},"cat":{"0":true},"day":{"0":true},"week":{"0":true},"month":{"0":true},"cross_year":{"0":true},"all_year_range":{"0":false},"prevent_spamming":{"0":true},"table.columns_name":{"0":["ticker","company","date","Raw_SVI","Adj_raw_SVI","median","Adjust_SVI"]},"median_range.week":{"0":8},"median_range.month":{"0":8}} \ No newline at end of file +{"geo":{"0":"TW"},"txt":{"0":true},"cat":{"0":true},"day":{"0":false},"week":{"0":true},"month":{"0":false},"cross_year":{"0":true},"all_year_range":{"0":false},"prevent_spamming":{"0":true},"table.columns_name":{"0":["ticker","company","date","Raw_SVI","Adj_raw_SVI","median","Adjust_SVI"]},"median_range.week":{"0":8},"median_range.month":{"0":8}} \ No newline at end of file diff --git a/main.py b/main.py index 0c01842..16ae38d 100644 --- a/main.py +++ b/main.py @@ -2,6 +2,8 @@ from config import tw_stock_config from datetime import datetime from utils import dot +from Updater import DriverUpdater +from selenium.common.exceptions import SessionNotCreatedException def main(): @@ -12,6 +14,13 @@ def main(): print('\n----- 若要背景執行大量資料爬蟲,請將系統 *螢幕保護 或 *休眠關閉,否則可能造成資訊中斷而損失資料 -----') dot(.5) + try: + updater = DriverUpdater() + updater.to_page(url="https://www.whatismybrowser.com/detect/what-version-of-chrome-do-i-have") + updater.check_version() + except SessionNotCreatedException: + print('session created failed~') + config = tw_stock_config() svi = SVI_CLI(config=config)