-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: web_crawler 수정, post bookmark api 추가
- Loading branch information
Showing
9 changed files
with
62 additions
and
137 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,30 @@ | ||
from selenium import webdriver | ||
from selenium.webdriver.chrome.service import Service | ||
from webdriver_manager.chrome import ChromeDriverManager | ||
# ChromeDriverManager 라인은 제거 | ||
from selenium.webdriver.common.by import By | ||
from selenium.webdriver.chrome.options import Options | ||
import time | ||
|
||
def get_title_and_content_selenium(url): | ||
driver = None | ||
try: | ||
# Selenium 설정: 머리 없는(headless) 브라우저로 설정 | ||
chrome_options = Options() | ||
chrome_options.add_argument("--headless") | ||
|
||
# WebDriver 경로 설정 | ||
service = Service(ChromeDriverManager().install()) | ||
# 직접 지정한 ChromeDriver 경로 | ||
driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', options=chrome_options) | ||
|
||
# WebDriver 초기화 | ||
driver = webdriver.Chrome(service=service, options=chrome_options) | ||
|
||
# 주어진 URL로 이동 | ||
driver.get(url) | ||
time.sleep(5) | ||
|
||
# 페이지가 완전히 로드될 때까지 기다립니다. | ||
time.sleep(5) # 실제 상황에 따라 대기 시간을 조절해야 할 수 있습니다. | ||
|
||
# 페이지의 제목 가져오기 | ||
title = driver.title | ||
|
||
# BeautifulSoup를 사용하지 않고 Selenium으로 내용 추출 | ||
content_elements = driver.find_elements(By.TAG_NAME, 'p') | ||
content = ' '.join([element.text for element in content_elements]) | ||
|
||
print("성공적으로 웹 페이지를 가져왔습니다.") | ||
|
||
driver.quit() # 브라우저 닫기 | ||
|
||
return title, content | ||
|
||
except Exception as e: | ||
print(f"오류 발생: {e}") | ||
driver.quit() | ||
finally: | ||
if driver is not None: | ||
driver.quit() | ||
return title, content |