Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ test.ipynb

driver/.DS_Store

interface
interface
config\chromedriver.exes
4 changes: 3 additions & 1 deletion Driver.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import SessionNotCreatedException
from Resolvers import PathResolver
from platform import system

Expand Down Expand Up @@ -51,10 +52,11 @@ def __init__(
executable_path=self.driver_directory,
options=chrome_options
)

driver.set_window_size(1440, 900)
self._driver = driver

def driver(self):
return self._driver


3 changes: 3 additions & 0 deletions GoogleTrend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from platform import system

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.common.by import By
import pandas as pd
Expand Down Expand Up @@ -97,6 +98,8 @@ def _download(self, failed:int=0):
print('Done (成功)')

return True
except NoSuchElementException:
return self.no_error()
except:
s = randint(2, 5)
print(f'Failed (失敗) ··· 休眠約 {s}s 後喚醒')
Expand Down
17 changes: 8 additions & 9 deletions Stock.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ def scrapping_per_week(self):
print(f'\n正在抓取 {self.origin_q} {self.key} {cy}年 週資料···')
url = f'{self.url}?date={cy}-01-01%20{cy}-12-31&q={self.q}{geo_query}{cat_query}'
self._toPage(url)
if self.no_error():
self._download()
# if self.no_error():
self._download()

elif sequence_type == 'cross-year':
print(f'\n正在抓取 {self.origin_q} {self.key} {cy}年 跨 {cy+1}年 週資料···')
url = f'{self.url}?date={cy}-07-01%20{cy+1}-6-30&q={self.q}{geo_query}{cat_query}'
self._toPage(url)
if self.no_error():
self._download()
# if self.no_error():
self._download()
cy += 1

def merge_per_week(self):
Expand Down Expand Up @@ -105,8 +105,8 @@ def _0_or_1(i: int):
url += f'date={cy}-{iTs(sm)}-01%20{cy}-{iTs(sm+5)}-3{_0_or_1(sm+5)}&q={self.q}{geo_query}{cat_query}'
sm += 6
self._toPage(url)
if self.no_error():
self._download()
if not self._download():
continue
sleep(rd_ms())
cy += 1
sm = 1
Expand Down Expand Up @@ -163,8 +163,7 @@ def scrapping_per_month(self):
print(f'\n正在抓取 {self.origin_q} {self.key} {2004}年 跨 {current_year}年 月資料···')
url = f'{self.url}?date={start_date}%20{current_date}&q={self.q}{geo_query}{cat_query}'
self._toPage(url)
if self.no_error():
self._download()
self._download()

def merge_per_month(self):
print(f'\n正在合併月資料 ···')
Expand Down Expand Up @@ -207,7 +206,7 @@ def has_set_ticker_detail(self) -> (bool):
if text == self.origin_q:
key = splited_elements
print(key)
sleep(1)
sleep(0.5)

if key:
if self.origin_q == key[1]:
Expand Down
26 changes: 22 additions & 4 deletions Updater.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
from selenium import webdriver
from Resolvers import PathResolver
from Driver import Driver
from time import sleep
from selenium.common.exceptions import NoSuchElementException

class DriverUpdater:
def __init__(self):
self._get_driver()

def _get_driver(self):
def _get_driver(self) -> (webdriver.chrome.webdriver.WebDriver):
download_pr = PathResolver(['download'], mkdir=True)
self.driver = Driver(
download_directory=download_pr.path()
)
self.updater = Driver(
download_directory=download_pr.path(),
headless=False
).driver()

def to_page(self, url:str):
self.updater.get(url=url)
sleep(1)

def check_version(self):
try:
judgment = self.updater.find_element_by_css_selector(
"#content-base > section.section-block.section-block-main-extra > div > div.content-block-main > div.judgment.judgment-bad")
print(judgment.text)
except NoSuchElementException:
self.updater.close();
print('Done!')

28 changes: 16 additions & 12 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from Resolvers import PathResolver
from pandas import read_csv
from selenium.common.exceptions import SessionNotCreatedException
# from Driver import DriverUpdater
from Updater import DriverUpdater

class SVI_CLI:
def __init__(self, config):
Expand Down Expand Up @@ -211,18 +211,22 @@ def google_trend_cli(self):


def taiwan_stock_cli(self):
# try:
while True:
if not self.google_trend_cli():
break
# except SessionNotCreatedException:
# print('ChromeDriver 版本過期,請更新版本。')
# updater = DriverUpdater()
try:
while True:
if not self.google_trend_cli():
break
except SessionNotCreatedException:
print('\n[ChromeDriver]: ChromeDriver版本過期,請更新版本。\n')
print('[Info]: \"Ctrl + 滑鼠左鍵\" 點擊以下網址,查看版本資訊')
print('https://www.whatismybrowser.com/detect/what-version-of-chrome-do-i-have\n')
print('[Info]: \"Ctrl + 滑鼠左鍵\" 點擊以下網址,下載最新版本的 chromedriver')
print('https://chromedriver.chromium.org/downloads\n')
input('輸入 Enter 結束')
# handle driver updating...
# except:
# print('\n中斷爬蟲中')
# dot(1)
# self.save()
except:
print('\n中斷爬蟲中')
dot(1)
self.save()

print('\n----- 系統將在 2 秒後自動關閉視窗,或是手動點擊右上角離開視窗 ··· -----')
sleep(1)
Expand Down
Binary file added config/chromedriver.exe
Binary file not shown.
2 changes: 1 addition & 1 deletion config/config.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"geo":{"0":"TW"},"txt":{"0":true},"cat":{"0":true},"day":{"0":true},"week":{"0":true},"month":{"0":true},"cross_year":{"0":true},"all_year_range":{"0":false},"prevent_spamming":{"0":true},"table.columns_name":{"0":["ticker","company","date","Raw_SVI","Adj_raw_SVI","median","Adjust_SVI"]},"median_range.week":{"0":8},"median_range.month":{"0":8}}
{"geo":{"0":"TW"},"txt":{"0":true},"cat":{"0":true},"day":{"0":false},"week":{"0":true},"month":{"0":false},"cross_year":{"0":true},"all_year_range":{"0":false},"prevent_spamming":{"0":true},"table.columns_name":{"0":["ticker","company","date","Raw_SVI","Adj_raw_SVI","median","Adjust_SVI"]},"median_range.week":{"0":8},"median_range.month":{"0":8}}
9 changes: 9 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from config import tw_stock_config
from datetime import datetime
from utils import dot
from Updater import DriverUpdater
from selenium.common.exceptions import SessionNotCreatedException


def main():
Expand All @@ -12,6 +14,13 @@ def main():
print('\n----- 若要背景執行大量資料爬蟲,請將系統 *螢幕保護 或 *休眠關閉,否則可能造成資訊中斷而損失資料 -----')
dot(.5)

try:
updater = DriverUpdater()
updater.to_page(url="https://www.whatismybrowser.com/detect/what-version-of-chrome-do-i-have")
updater.check_version()
except SessionNotCreatedException:
print('session created failed~')

config = tw_stock_config()

svi = SVI_CLI(config=config)
Expand Down