Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions lambda_tem/lambda_schedule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import json
import re
import pymysql
import os
from datetime import datetime
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# ✅ 환경변수 기반 RDS 설정
DB_HOST = os.environ.get("DB_HOST", "oneteam.cfog0q0ku1kn.ap-northeast-2.rds.amazonaws.com")
DB_USER = os.environ.get("DB_USER", "admin")
DB_PASSWORD = os.environ.get("DB_PASSWORD", "Oneteam2025!")
DB_NAME = os.environ.get("DB_NAME", "oneteam_DB")
DB_PORT = int(os.environ.get("DB_PORT", 3306))
TABLE_NAME = "smu_schedule"

def crawl_schedule(driver, wait, year):
url = f"https://www.smu.ac.kr/kor/life/academicCalendar.do?mode=list&srYear={year}&srMonth=1"
driver.get(url)
try:
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.schedule-list table")))
except:
return []

soup = BeautifulSoup(driver.page_source, "html.parser")
table = soup.select_one("div.year-schedule div.schedule-list table")
data = []

current_year, current_month = None, None

for row in table.select("tbody tr"):
tds = row.find_all("td")
if tds[0].has_attr("rowspan"):
m = re.match(r"(\d{4})년\s*(\d{1,2})월", tds[0].text.strip())
if m:
current_year = int(m.group(1))
current_month = int(m.group(2))
date_range = tds[1].get_text(strip=True)
content = tds[2].get_text(strip=True)
else:
date_range = tds[0].get_text(strip=True)
content = tds[1].get_text(strip=True)

date_match = re.match(r"(\d{1,2})\.(\d{1,2})\([^\)]*\) ~ (\d{1,2})\.(\d{1,2})\([^\)]*\)", date_range)
if date_match and current_year and current_month:
sm, sd, em, ed = map(int, date_match.groups())
start_year = current_year - 1 if sm > current_month else current_year
end_year = start_year if em >= sm else start_year + 1
start_date = f"{start_year}.{sm:02d}.{sd:02d}"
end_date = f"{end_year}.{em:02d}.{ed:02d}"
else:
start_date, end_date = None, None

data.append({
"start_date": start_date,
"end_date": end_date,
"content": content.strip()
})

return data


def lambda_handler(event, context):
options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 10)

conn = None
cursor = None

try:
# ✅ DB 연결
conn = pymysql.connect(
host=DB_HOST,
user=DB_USER,
password=DB_PASSWORD,
database=DB_NAME,
port=DB_PORT,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
cursor = conn.cursor()

# ✅ 테이블 생성
create_sql = f"""
CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
id INT AUTO_INCREMENT PRIMARY KEY,
start_date VARCHAR(10),
end_date VARCHAR(10),
content TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""
cursor.execute(create_sql)

# ✅ 기존 데이터 조회
cursor.execute(f"SELECT start_date, end_date, content FROM {TABLE_NAME}")
existing = set((row["start_date"], row["end_date"], row["content"]) for row in cursor.fetchall())

# ✅ 수집 연도
current_year = datetime.now().year
target_years = range(current_year, current_year + 2)

total_saved = 0
for year in target_years:
data = crawl_schedule(driver, wait, year)
new_data = [
(d["start_date"], d["end_date"], d["content"])
for d in data if None not in (d["start_date"], d["end_date"], d["content"])
and (d["start_date"], d["end_date"], d["content"]) not in existing
]
if new_data:
cursor.executemany(
f"INSERT INTO {TABLE_NAME} (start_date, end_date, content) VALUES (%s, %s, %s)",
new_data
)
conn.commit()
total_saved += len(new_data)

except Exception as e:
return {
"statusCode": 500,
"body": json.dumps({"error": str(e)})
}

finally:
if cursor: cursor.close()
if conn: conn.close()
driver.quit()

return {
"statusCode": 200,
"body": json.dumps({
"message": f"총 저장된 일정: {total_saved}개",
"saved_count": total_saved
})
}
134 changes: 134 additions & 0 deletions lambda_tem/lambda_smu_alarm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import json
import re
import time
import os
import pymysql
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# ✅ 날짜 파싱 함수
def parse_date(text):
today = datetime.today()
text = text.strip()
if "방금 전" in text or "초 전" in text or "분 전" in text or "시간 전" in text:
return today.strftime("%Y-%m-%d")
match = re.search(r"(\d+)\s*일\s*전", text)
if match:
return (today - timedelta(days=int(match.group(1)))).strftime("%Y-%m-%d")
match = re.search(r"(\d{4}-\d{2}-\d{2})", text)
return match.group(1) if match else today.strftime("%Y-%m-%d")

def lambda_handler(event, context):
# ✅ 환경설정 (필요시 os.environ.get(...)로 수정)
USER_ID = "202210852" # os.environ.get("SMU_ID")
USER_PASSWORD = "manse1223~!" # os.environ.get("SMU_PW")

DB_HOST = "oneteam.cfog0q0ku1kn.ap-northeast-2.rds.amazonaws.com"
DB_USER = "admin"
DB_PASSWORD = "Oneteam2025!" # os.environ.get("DB_PASSWORD")
DB_NAME = "oneteam_DB"
DB_PORT = 3306
TABLE_NAME = "notifications_cleaned"

# ✅ Selenium 설정
options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 10)

try:
# ✅ 로그인
driver.get("https://smsso.smu.ac.kr/svc/tk/Auth.do?ac=Y&ifa=N&id=portal&")
wait.until(EC.presence_of_element_located((By.ID, "user_id"))).send_keys(USER_ID)
wait.until(EC.presence_of_element_located((By.ID, "user_password"))).send_keys(USER_PASSWORD)
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), '로그인')]"))).click()
wait.until(EC.url_contains("portal.smu.ac.kr/p/S00"))

# ✅ 알림 아이콘 클릭
wait.until(EC.element_to_be_clickable((By.ID, "countTag"))).click()
time.sleep(2)

# ✅ 알림 HTML 추출
html = driver.find_element(By.ID, "_notiList").get_attribute("innerHTML")
soup = BeautifulSoup(html, "html.parser")
all_lis = soup.select("li")

# ✅ DB에서 기존 데이터 가져오기
conn = pymysql.connect(
host=DB_HOST,
user=DB_USER,
password=DB_PASSWORD,
database=DB_NAME,
port=DB_PORT,
charset='utf8mb4'
)
cursor = conn.cursor()
cursor.execute(f"CREATE TABLE IF NOT EXISTS {TABLE_NAME} ("
"id INT AUTO_INCREMENT PRIMARY KEY,"
"date VARCHAR(10),"
"content TEXT,"
"link VARCHAR(500),"
"created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)")
cursor.execute(f"SELECT date, content FROM {TABLE_NAME}")
existing = set((row[0], row[1]) for row in cursor.fetchall())

# ✅ 알림 파싱 및 중복 제거
results = []
i = 0
max_count = 30
while i < len(all_lis) - 1 and len(results) < max_count:
content_li = all_lis[i]
time_li = all_lis[i + 1]

if "time" not in time_li.get("class", []):
i += 1
continue

date = parse_date(time_li.get_text(strip=True))
onclick = content_li.get("onclick", "")
link_match = re.search(r"location\.href='([^']+)'", onclick) if onclick else None
link = "https://www.smu.ac.kr" + link_match.group(1) if link_match else ""
content = content_li.get_text(" ", strip=True)

if (date, content) in existing:
i += 2
continue

results.append((date, content, link))
i += 2

# ✅ 새 데이터 저장
saved_count = 0
if results:
insert_sql = f"INSERT INTO {TABLE_NAME} (date, content, link) VALUES (%s, %s, %s)"
cursor.executemany(insert_sql, results)
conn.commit()
saved_count = cursor.rowcount

cursor.close()
conn.close()
driver.quit()

return {
"statusCode": 200,
"body": json.dumps({
"message": f"✅ 새 알림 저장: {saved_count}개",
"saved_count": saved_count
})
}

except Exception as e:
driver.quit()
return {
"statusCode": 500,
"body": json.dumps({
"error": str(e)
})
}