Skip to content

Commit

Permalink
feat: web_crawler 수정, post bookmark api 추가
Browse files Browse the repository at this point in the history
  • Loading branch information
mjkweon17 committed Mar 23, 2024
1 parent 2db8f11 commit 908ad3f
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 137 deletions.
60 changes: 45 additions & 15 deletions src/bookmarks/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
from sqlalchemy.orm import Session

from database import get_db
# from models import
# from schemas import
# from serivce import
from models import Bookmark, Page
from bookmarks.schemas import BookmarkCreate, BookmarkResponse

router = APIRouter(
prefix="/bookmarks",
Expand All @@ -22,23 +21,54 @@

@router.post(
"/",
# response_model=model,
# status_code=status.HTTP_201_CREATED,
description="""
- ★user_id 또는 anonymous_user_id 둘 중 하나는 반드시 입력해야 함
- ★user_id와 anonymous_user_id 둘 다 입력할 수 없음. 둘 중 하나만 입력해야 함!!!
- chains 폴더에 있는 파일들 완성시켜야 함
""",
# summary="북마크 추가",
response_model=BookmarkResponse,
status_code=status.HTTP_201_CREATED,
description="북마크 추가",
summary="북마크 추가",
response_description={
status.HTTP_201_CREATED: {
"description": "북마크 추가 성공"
}
}
)
async def function_name(
# bookmark: BookmarkCreate,
def create_bookmark(
bookmark: BookmarkCreate,
db: Session = Depends(get_db)
):
# return create_bookmark_service(bookmark, db)
pass
page = db.query(Page).filter(Page.url == bookmark.url).first()
if not page:
# Page 만들기
page = Page(
title="temporay title",
url=bookmark.url,
summary="temporay summary",
created_at=datetime.now(),
state=1
)
db.add(page)
db.commit()
db.refresh(page)

page = db.query(Page).filter(Page.url == bookmark.url).first()

db_bookmark = Bookmark(
page_id=page.page_id,
user_id=bookmark.user_id,
created_at=datetime.now(),
state=1
)
db.add(db_bookmark)
db.commit()
db.refresh(db_bookmark)

response = BookmarkResponse(
bookmark_id=db_bookmark.bookmark_id,
page_id=db_bookmark.page_id,
user_id=db_bookmark.user_id,
created_at=db_bookmark.created_at,
url=bookmark.url,
title=page.title,
summarization=page.summary
)

return db_bookmark
29 changes: 8 additions & 21 deletions src/bookmarks/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,22 @@

from pydantic import BaseModel, Field

# 아래는 모두 example

class BookmarkBase(BaseModel):
user_id: int = Field(..., title="user_id", description="북마크한 사용자의 user_id", example=1, ge=0)
url: str = Field(..., title="url", description="북마크한 페이지의 url", example="https://mindorizip.tistory.com")

class ExampleBase(BaseModel):
url_example: str = Field(..., title="url", description="북마크한 페이지의 url", example="https://mindorizip.tistory.com")
user_id: Optional[int] = Field(default=None, title="user_id", description="북마크한 사용자의 user_id", example=1, ge=0)
anonymous_user_id: Optional[int] = Field(default=None, title="anonymous_user_id", description="북마크한 사용자의 익명 user_id", example=1, ge=0)


class ExampleCreate(ExampleBase):
class BookmarkCreate(BookmarkBase):
pass


class ExampleResponse(ExampleBase):
class BookmarkResponse(BookmarkBase):
bookmark_id: int = Field(..., title="bookmark_id", description="북마크 id", example=1, ge=1)
page_id: int = Field(..., title="page_id", description="북마크한 페이지의 page_id", example=1, ge=1)
url: str = Field(..., title="url", description="북마크한 페이지의 url", example="https://mindorizip.tistory.com")
created_at: datetime = Field(..., title="created_at", description="북마크 생성일", example="2024-10-16 00:00:00")
updated_at: datetime = Field(..., title="updated_at", description="북마크 수정일", example="2024-10-16 00:00:00")
title: Optional[str] = Field(default=None, title="title", description="북마크한 페이지의 title", example="마음의 소리")
summarization: Optional[str] = Field(default=None, title="summarization", description="북마크한 페이지의 summarization", example="마음의 소리")


class ExampleList(BaseModel):
bookmarks: List[ExampleResponse]


class ExampleUpdate(ExampleBase):
pass


class ExampleDelete(BaseModel):
bookmark_id: int = Field(..., title="bookmark_id", description="북마크 id", example=1, ge=1)
class BookmarkList(BaseModel):
bookmarks: List[BookmarkResponse]
4 changes: 1 addition & 3 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from keywords.router import router as keywords_router
from search.router import router as search_router
from users.router import router as users_router
from test.router import router as test_router

from utils.web_crawler import get_title_and_content_selenium

Expand All @@ -32,12 +31,11 @@

# 라우터 등록
# app.include_router(auth_router)
# app.include_router(bookmarks_router)
app.include_router(bookmarks_router)
# app.include_router(contents_router)
# app.include_router(keywords_router)
# app.include_router(search_router)
# app.include_router(users_router)
app.include_router(test_router)

# CORS 설정
origins = [
Expand Down
Empty file removed src/test/__init__.py
Empty file.
51 changes: 0 additions & 51 deletions src/test/router.py

This file was deleted.

9 changes: 0 additions & 9 deletions src/test/schemas.py

This file was deleted.

18 changes: 0 additions & 18 deletions src/test/service.py

This file was deleted.

Empty file removed src/test/utils.py
Empty file.
28 changes: 8 additions & 20 deletions src/utils/web_crawler.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,30 @@
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
# ChromeDriverManager 라인은 제거
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time

def get_title_and_content_selenium(url):
driver = None
try:
# Selenium 설정: 머리 없는(headless) 브라우저로 설정
chrome_options = Options()
chrome_options.add_argument("--headless")

# WebDriver 경로 설정
service = Service(ChromeDriverManager().install())
# 직접 지정한 ChromeDriver 경로
driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', options=chrome_options)

# WebDriver 초기화
driver = webdriver.Chrome(service=service, options=chrome_options)

# 주어진 URL로 이동
driver.get(url)
time.sleep(5)

# 페이지가 완전히 로드될 때까지 기다립니다.
time.sleep(5) # 실제 상황에 따라 대기 시간을 조절해야 할 수 있습니다.

# 페이지의 제목 가져오기
title = driver.title

# BeautifulSoup를 사용하지 않고 Selenium으로 내용 추출
content_elements = driver.find_elements(By.TAG_NAME, 'p')
content = ' '.join([element.text for element in content_elements])

print("성공적으로 웹 페이지를 가져왔습니다.")

driver.quit() # 브라우저 닫기

return title, content

except Exception as e:
print(f"오류 발생: {e}")
driver.quit()
finally:
if driver is not None:
driver.quit()
return title, content

0 comments on commit 908ad3f

Please sign in to comment.