diff --git a/lambda_tem/lambda_schedule.py b/lambda_tem/lambda_schedule.py new file mode 100644 index 0000000..d3a901f --- /dev/null +++ b/lambda_tem/lambda_schedule.py @@ -0,0 +1,144 @@ +import json +import re +import pymysql +import os +from datetime import datetime +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + +# ✅ 환경변수 기반 RDS 설정 +DB_HOST = os.environ.get("DB_HOST", "oneteam.cfog0q0ku1kn.ap-northeast-2.rds.amazonaws.com") +DB_USER = os.environ.get("DB_USER", "admin") +DB_PASSWORD = os.environ.get("DB_PASSWORD", "Oneteam2025!") +DB_NAME = os.environ.get("DB_NAME", "oneteam_DB") +DB_PORT = int(os.environ.get("DB_PORT", 3306)) +TABLE_NAME = "smu_schedule" + +def crawl_schedule(driver, wait, year): + url = f"https://www.smu.ac.kr/kor/life/academicCalendar.do?mode=list&srYear={year}&srMonth=1" + driver.get(url) + try: + wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.schedule-list table"))) + except: + return [] + + soup = BeautifulSoup(driver.page_source, "html.parser") + table = soup.select_one("div.year-schedule div.schedule-list table") + data = [] + + current_year, current_month = None, None + + for row in table.select("tbody tr"): + tds = row.find_all("td") + if tds[0].has_attr("rowspan"): + m = re.match(r"(\d{4})년\s*(\d{1,2})월", tds[0].text.strip()) + if m: + current_year = int(m.group(1)) + current_month = int(m.group(2)) + date_range = tds[1].get_text(strip=True) + content = tds[2].get_text(strip=True) + else: + date_range = tds[0].get_text(strip=True) + content = tds[1].get_text(strip=True) + + date_match = re.match(r"(\d{1,2})\.(\d{1,2})\([^\)]*\) ~ (\d{1,2})\.(\d{1,2})\([^\)]*\)", date_range) + if date_match and current_year and current_month: + sm, sd, em, ed = map(int, date_match.groups()) + start_year = current_year - 1 if sm > current_month else current_year + end_year = start_year if em >= sm else start_year + 1 + start_date = f"{start_year}.{sm:02d}.{sd:02d}" + end_date = f"{end_year}.{em:02d}.{ed:02d}" + else: + start_date, end_date = None, None + + data.append({ + "start_date": start_date, + "end_date": end_date, + "content": content.strip() + }) + + return data + + +def lambda_handler(event, context): + options = Options() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + driver = webdriver.Chrome(options=options) + wait = WebDriverWait(driver, 10) + + conn = None + cursor = None + + try: + # ✅ DB 연결 + conn = pymysql.connect( + host=DB_HOST, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME, + port=DB_PORT, + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor + ) + cursor = conn.cursor() + + # ✅ 테이블 생성 + create_sql = f""" + CREATE TABLE IF NOT EXISTS {TABLE_NAME} ( + id INT AUTO_INCREMENT PRIMARY KEY, + start_date VARCHAR(10), + end_date VARCHAR(10), + content TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """ + cursor.execute(create_sql) + + # ✅ 기존 데이터 조회 + cursor.execute(f"SELECT start_date, end_date, content FROM {TABLE_NAME}") + existing = set((row["start_date"], row["end_date"], row["content"]) for row in cursor.fetchall()) + + # ✅ 수집 연도 + current_year = datetime.now().year + target_years = range(current_year, current_year + 2) + + total_saved = 0 + for year in target_years: + data = crawl_schedule(driver, wait, year) + new_data = [ + (d["start_date"], d["end_date"], d["content"]) + for d in data if None not in (d["start_date"], d["end_date"], d["content"]) + and (d["start_date"], d["end_date"], d["content"]) not in existing + ] + if new_data: + cursor.executemany( + f"INSERT INTO {TABLE_NAME} (start_date, end_date, content) VALUES (%s, %s, %s)", + new_data + ) + conn.commit() + total_saved += len(new_data) + + except Exception as e: + return { + "statusCode": 500, + "body": json.dumps({"error": str(e)}) + } + + finally: + if cursor: cursor.close() + if conn: conn.close() + driver.quit() + + return { + "statusCode": 200, + "body": json.dumps({ + "message": f"총 저장된 일정: {total_saved}개", + "saved_count": total_saved + }) + } \ No newline at end of file diff --git a/lambda_tem/lambda_smu_alarm.py b/lambda_tem/lambda_smu_alarm.py new file mode 100644 index 0000000..33ea3b7 --- /dev/null +++ b/lambda_tem/lambda_smu_alarm.py @@ -0,0 +1,134 @@ +import json +import re +import time +import os +import pymysql +from datetime import datetime, timedelta +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + +# ✅ 날짜 파싱 함수 +def parse_date(text): + today = datetime.today() + text = text.strip() + if "방금 전" in text or "초 전" in text or "분 전" in text or "시간 전" in text: + return today.strftime("%Y-%m-%d") + match = re.search(r"(\d+)\s*일\s*전", text) + if match: + return (today - timedelta(days=int(match.group(1)))).strftime("%Y-%m-%d") + match = re.search(r"(\d{4}-\d{2}-\d{2})", text) + return match.group(1) if match else today.strftime("%Y-%m-%d") + +def lambda_handler(event, context): + # ✅ 환경설정 (필요시 os.environ.get(...)로 수정) + USER_ID = "202210852" # os.environ.get("SMU_ID") + USER_PASSWORD = "manse1223~!" # os.environ.get("SMU_PW") + + DB_HOST = "oneteam.cfog0q0ku1kn.ap-northeast-2.rds.amazonaws.com" + DB_USER = "admin" + DB_PASSWORD = "Oneteam2025!" # os.environ.get("DB_PASSWORD") + DB_NAME = "oneteam_DB" + DB_PORT = 3306 + TABLE_NAME = "notifications_cleaned" + + # ✅ Selenium 설정 + options = Options() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + driver = webdriver.Chrome(options=options) + wait = WebDriverWait(driver, 10) + + try: + # ✅ 로그인 + driver.get("https://smsso.smu.ac.kr/svc/tk/Auth.do?ac=Y&ifa=N&id=portal&") + wait.until(EC.presence_of_element_located((By.ID, "user_id"))).send_keys(USER_ID) + wait.until(EC.presence_of_element_located((By.ID, "user_password"))).send_keys(USER_PASSWORD) + wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), '로그인')]"))).click() + wait.until(EC.url_contains("portal.smu.ac.kr/p/S00")) + + # ✅ 알림 아이콘 클릭 + wait.until(EC.element_to_be_clickable((By.ID, "countTag"))).click() + time.sleep(2) + + # ✅ 알림 HTML 추출 + html = driver.find_element(By.ID, "_notiList").get_attribute("innerHTML") + soup = BeautifulSoup(html, "html.parser") + all_lis = soup.select("li") + + # ✅ DB에서 기존 데이터 가져오기 + conn = pymysql.connect( + host=DB_HOST, + user=DB_USER, + password=DB_PASSWORD, + database=DB_NAME, + port=DB_PORT, + charset='utf8mb4' + ) + cursor = conn.cursor() + cursor.execute(f"CREATE TABLE IF NOT EXISTS {TABLE_NAME} (" + "id INT AUTO_INCREMENT PRIMARY KEY," + "date VARCHAR(10)," + "content TEXT," + "link VARCHAR(500)," + "created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)") + cursor.execute(f"SELECT date, content FROM {TABLE_NAME}") + existing = set((row[0], row[1]) for row in cursor.fetchall()) + + # ✅ 알림 파싱 및 중복 제거 + results = [] + i = 0 + max_count = 30 + while i < len(all_lis) - 1 and len(results) < max_count: + content_li = all_lis[i] + time_li = all_lis[i + 1] + + if "time" not in time_li.get("class", []): + i += 1 + continue + + date = parse_date(time_li.get_text(strip=True)) + onclick = content_li.get("onclick", "") + link_match = re.search(r"location\.href='([^']+)'", onclick) if onclick else None + link = "https://www.smu.ac.kr" + link_match.group(1) if link_match else "" + content = content_li.get_text(" ", strip=True) + + if (date, content) in existing: + i += 2 + continue + + results.append((date, content, link)) + i += 2 + + # ✅ 새 데이터 저장 + saved_count = 0 + if results: + insert_sql = f"INSERT INTO {TABLE_NAME} (date, content, link) VALUES (%s, %s, %s)" + cursor.executemany(insert_sql, results) + conn.commit() + saved_count = cursor.rowcount + + cursor.close() + conn.close() + driver.quit() + + return { + "statusCode": 200, + "body": json.dumps({ + "message": f"✅ 새 알림 저장: {saved_count}개", + "saved_count": saved_count + }) + } + + except Exception as e: + driver.quit() + return { + "statusCode": 500, + "body": json.dumps({ + "error": str(e) + }) + }