배포 v1.6.1

skqorrla · web-flow · commit 8577c461e08e · 2025-06-22T21:28:55.000+09:00
배포 v1.6.1
diff --git a/ELK/app/main.py b/ELK/app/main.py
@@ -15,20 +15,22 @@ async def lifespan(app: FastAPI):
     """애플리케이션 시작 시 실행"""
     elasticsearch_service.create_log_index_if_not_exists()
     elasticsearch_service.create_click_log_index_if_not_exists()
+    elasticsearch_service.create_search_log_index_if_not_exists()
     yield
 
 app = FastAPI(title="ELK Search API", version="1.0.0", lifespan=lifespan)
 
 @app.get("/api/place/search", response_model=SearchResponse)
-async def search_places(query: str, max_results: int = 23):
+async def search_places(query: str, max_results: int = 23, user_id: str | None = None):
     """장소 검색 API"""
     try:
         if not elasticsearch_service.is_connected():
             raise HTTPException(status_code=503, detail="Elasticsearch 연결 실패")
         
         places = elasticsearch_service.search_places(
             query=query,
-            max_results=max_results
+            max_results=max_results,
+            user_id=user_id
         )
         
         # 딕셔너리를 Place 객체로 변환
@@ -44,15 +46,16 @@ async def search_places(query: str, max_results: int = 23):
         raise HTTPException(status_code=500, detail=str(e))
     
 @app.get("/api/place/search/llm-tool", response_model=LLMToolResponse)
-async def search_places_for_llm_tool(region: str, categories: List[str] = Query(..., min_length=1, max_length=3)):
+async def search_places_for_llm_tool(region: str, categories: List[str] = Query(..., min_length=1), user_id: str | None = None):
     """LLM 도구를 위한 장소 검색 API"""
     try:
         if not elasticsearch_service.is_connected():
             raise HTTPException(status_code=503, detail="Elasticsearch 연결 실패")
         
         uuids, total = elasticsearch_service.search_places_for_llm_tool(
             region=region,
-            categories=categories
+            categories=categories,
+            user_id=user_id
         )
         
         return LLMToolResponse(
@@ -187,5 +190,23 @@ async def get_place_click_count(place_id: str):
             "clickCount": count
         }
         
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/api/training-data/{user_id}")
+async def get_training_data(user_id: str):
+    """DeepCTR 모델 학습을 위한 데이터 생성 API"""
+    try:
+        if not elasticsearch_service.is_connected():
+            raise HTTPException(status_code=503, detail="Elasticsearch 연결 실패")
+        
+        data = elasticsearch_service.get_search_click_data_for_user(user_id)
+        
+        return {
+            "success": True,
+            "userId": user_id,
+            "data": data
+        }
+    
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
diff --git a/ELK/app/services/elasticsearch_service.py b/ELK/app/services/elasticsearch_service.py
@@ -1,6 +1,7 @@
+import pandas as pd
 from elasticsearch import Elasticsearch
 from typing import List, Dict, Any, Tuple, Optional
-from datetime import datetime
+from datetime import datetime, timezone
 
 
 class ElasticsearchService:
@@ -11,6 +12,7 @@ def __init__(self, host: str = "elasticsearch", port: int = 9200):
         self.index_name = "place_data"
         self.log_index_name = "chatbot_log"
         self.click_log_index_name = "click_log"
+        self.search_log_index_name = "search_log"
         
     def is_connected(self) -> bool:
         """연결 상태 확인"""
@@ -19,7 +21,7 @@ def is_connected(self) -> bool:
         except:
             return False
 
-    def search_places(self, query: str, max_results: int = 23) -> List[Dict[str, Any]]:
+    def search_places(self, query: str, max_results: int = 23, user_id: Optional[str] = None) -> List[Dict[str, Any]]:
         """장소 검색"""
         search_body = {
             "query": {
@@ -48,51 +50,19 @@ def search_places(self, query: str, max_results: int = 23) -> List[Dict[str, Any
             for hit in hits
         ]
         
-        return places
-    
-    def search_places_chatbot(self, query: str, max_results: int = 100):
-        """챗봇 장소 검색"""
-        search_body = {
-            "query": {
-                "match": {
-                    "name": {
-                        "query": query,
-                        "fuzziness": "AUTO"
-                    }
-                }
-            },
-            "sort": [{"_score": {"order": "desc"}}],
-            "size": max_results,
-            "_source": ["uuid", "name", "category", "subcategory", "gu", "dong", "ro", "station", "location", "opentime", "breaktime", "closedate", "phone", "alias", "address", "content"]
-        }
-        
-        response = self.es.search(index=self.index_name, body=search_body)
-        
-        hits = response['hits']['hits']
-        places = [
-            {
-                'uuid': hit['_source']['uuid'],
-                'name': hit['_source']['name'], 
-                'category': hit['_source']['category'],
-                'subcategory': hit['_source']['subcategory'],
-                'gu': hit['_source']['gu'],
-                'dong': hit['_source']['dong'],
-                'ro': hit['_source']['ro'],
-                'station': hit['_source']['station'],
-                'location': hit['_source']['location'],
-                'opentime': hit['_source']['opentime'],
-                'breaktime': hit['_source']['breaktime'],
-                'closedate': hit['_source']['closedate'],
-                'phone': hit['_source']['phone'],
-                'alias': hit['_source']['alias'],
-                'address': hit['_source']['address'],
-                'content': hit['_source']['content']
+        if user_id:
+            place_ids = [place['uuid'] for place in places]
+            log_data = {
+                "userId": user_id,
+                "query": query,
+                "placeIds": place_ids,
+                "timestamp": datetime.now(timezone.utc)
             }
-            for hit in hits
-        ]
+            self.insert_search_log(log_data)
+            
         return places
 
-    def search_places_for_llm_tool(self, region: str, categories: List[str]) -> Tuple[List[str], int]:
+    def search_places_for_llm_tool(self, region: str, categories: List[str], user_id: Optional[str] = None) -> Tuple[List[str], int]:
         """
         LLM 도구를 위한 장소 검색.
         지역과 카테고리 정보를 바탕으로 장소 uuid 목록과 총 개수를 반환합니다.
@@ -308,4 +278,130 @@ def get_click_count_by_place(self, place_id: str) -> int:
             return response.get('count', 0)
         except Exception as e:
             print(f"클릭 수 조회 오류: {e}")
-            return 0
+            return 0
+
+    def create_search_log_index_if_not_exists(self):
+        """검색 로그 인덱스가 없으면 생성"""
+        try:
+            if not self.es.indices.exists(index=self.search_log_index_name):
+                # 검색 로그 인덱스 매핑 설정
+                mapping = {
+                    "mappings": {
+                        "properties": {
+                            "userId": {"type": "keyword"},
+                            "query": {"type": "text"},
+                            "placeIds": {"type": "keyword"},
+                            "timestamp": {"type": "date"}
+                        }
+                    }
+                }
+                self.es.indices.create(index=self.search_log_index_name, body=mapping)
+                print(f"검색 로그 인덱스 '{self.search_log_index_name}' 생성 완료")
+        except Exception as e:
+            raise Exception(f"검색 로그 인덱스 생성 오류: {e}")
+
+    def insert_search_log(self, log_data: dict) -> Tuple[bool, Optional[str]]:
+        """검색 로그 데이터를 Elasticsearch에 삽입"""
+        try:
+            # 문서 ID 생성 (타임스탬프 + userId 조합)
+            doc_id = f"{log_data['userId']}_{int(datetime.now().timestamp())}"
+            
+            # Elasticsearch에 문서 삽입
+            response = self.es.index(
+                index=self.search_log_index_name,
+                id=doc_id,
+                body=log_data
+            )
+            
+            # 삽입 성공 여부 확인
+            if response.get('result') in ['created', 'updated']:
+                return True, doc_id
+            else:
+                return False, None
+                
+        except Exception as e:
+            print(f"검색 로그 삽입 오류: {e}")
+            return False, None
+
+    def get_all_search_logs_by_user(self, user_id: str) -> List[Dict]:
+        """사용자의 모든 검색 로그를 시간순으로 조회"""
+        try:
+            query = {
+                "query": {"term": {"userId": user_id}},
+                "sort": [{"timestamp": {"order": "asc"}}],
+                "size": 1000
+            }
+            response = self.es.search(index=self.search_log_index_name, body=query)
+            return response["hits"]["hits"]
+        except Exception as e:
+            print(f"사용자 검색 로그 조회 오류: {e}")
+            return []
+
+    def get_all_click_logs_by_user(self, user_id: str) -> List[Dict]:
+        """사용자의 모든 클릭 로그를 시간순으로 조회"""
+        try:
+            query = {
+                "query": {"term": {"userId": user_id}},
+                "sort": [{"timestamp": {"order": "asc"}}],
+                "size": 10000
+            }
+            response = self.es.search(index=self.click_log_index_name, body=query)
+            return response["hits"]["hits"]
+        except Exception as e:
+            print(f"사용자 클릭 로그 조회 오류: {e}")
+            return []
+
+    def get_search_click_data_for_user(self, user_id: str) -> pd.DataFrame:
+        """사용자 검색 및 클릭 데이터를 기반으로 학습 데이터 생성"""
+        
+        def _ensure_utc_aware(dt_str: str) -> datetime:
+            """datetime 문자열을 UTC-aware datetime 객체로 변환"""
+            dt = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
+            if dt.tzinfo is None:
+                # Naive datetime은 UTC로 간주
+                return dt.replace(tzinfo=timezone.utc)
+            return dt
+
+        search_logs = self.get_all_search_logs_by_user(user_id)
+        click_logs = self.get_all_click_logs_by_user(user_id)
+
+        if not search_logs:
+            return pd.DataFrame()
+
+        training_data = []
+        click_iterator = iter(click_logs)
+        current_click = next(click_iterator, None)
+
+        for i, search_hit in enumerate(search_logs):
+            search_log = search_hit["_source"]
+            search_time = _ensure_utc_aware(search_log["timestamp"])
+
+            next_search_time = datetime.now(timezone.utc)
+            if i + 1 < len(search_logs):
+                next_search_log = search_logs[i + 1]["_source"]
+                next_search_time = _ensure_utc_aware(next_search_log["timestamp"])
+
+            clicked_in_window = set()
+            while current_click:
+                click_log = current_click["_source"]
+                click_time = _ensure_utc_aware(click_log["timestamp"])
+
+                if search_time <= click_time < next_search_time:
+                    clicked_in_window.add(click_log["placeId"])
+                    current_click = next(click_iterator, None)
+                elif click_time >= next_search_time:
+                    break
+                else: # click_time < search_time
+                    current_click = next(click_iterator, None)
+
+            for place_id in search_log.get("placeIds", []):
+                training_data.append({
+                    "userid": user_id,
+                    "place_id": place_id,
+                    "yn": 1 if place_id in clicked_in_window else 0
+                })
+
+        if not training_data:
+            return pd.DataFrame()
+
+        return pd.DataFrame(training_data)