배포 v1.3.5

skqorrla · web-flow · commit 2e0e73e1b90c · 2025-06-17T16:36:42.000+09:00
배포 v1.3.5
diff --git a/ELK/app/main.py b/ELK/app/main.py
@@ -1,8 +1,9 @@
-from fastapi import FastAPI, HTTPException
+from fastapi import FastAPI, HTTPException, Query
 from contextlib import asynccontextmanager
 from datetime import datetime
+from typing import List
 
-from app.schema.search_schemas import SearchResponse
+from app.schema.search_schemas import SearchResponse, LLMToolResponse
 from app.services.elasticsearch_service import ElasticsearchService
 from app.schema.log_schemas import LogRequest, LogResponse
 
@@ -38,6 +39,27 @@ async def search_places(query: str, max_results: int = 23):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     
+@app.get("/api/place/search/llm-tool", response_model=LLMToolResponse)
+async def search_places_for_llm_tool(region: str, categories: List[str] = Query(..., min_length=3)):
+    """LLM 도구를 위한 장소 검색 API"""
+    try:
+        if not elasticsearch_service.is_connected():
+            raise HTTPException(status_code=503, detail="Elasticsearch 연결 실패")
+        
+        uuids, total = elasticsearch_service.search_places_for_llm_tool(
+            region=region,
+            categories=categories
+        )
+        
+        return LLMToolResponse(
+            success=True,
+            uuids=uuids,
+            total=total
+        )
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    
 @app.post("/api/chatbot")
 async def insert_chatbot_log(log_data: LogRequest):
     """챗봇 로그 삽입"""
diff --git a/ELK/app/schema/search_schemas.py b/ELK/app/schema/search_schemas.py
@@ -12,3 +12,8 @@ class SearchResponse(BaseModel):
     success: bool
     places: List[Place]
     total: int
+
+class LLMToolResponse(BaseModel):
+    success: bool
+    uuids: List[str]
+    total: int
diff --git a/ELK/app/services/elasticsearch_service.py b/ELK/app/services/elasticsearch_service.py
@@ -1,5 +1,5 @@
 from elasticsearch import Elasticsearch
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Tuple
 from datetime import datetime
 
 
@@ -8,7 +8,7 @@ class ElasticsearchService:
     
     def __init__(self, host: str = "elasticsearch", port: int = 9200):
         self.es = Elasticsearch([{'host': host, 'port': port, 'scheme': 'http'}])
-        self.index_name = "place_data_v3"
+        self.index_name = "place_data"
         self.log_index_name = "chatbot_log"
         
     def is_connected(self) -> bool:
@@ -48,6 +48,91 @@ def search_places(self, query: str, max_results: int = 23) -> List[str]:
         ]
         
         return places
+    
+    def search_places_chatbot(self, query: str, max_results: int = 100):
+        """챗봇 장소 검색"""
+        search_body = {
+            "query": {
+                "match": {
+                    "name": {
+                        "query": query,
+                        "fuzziness": "AUTO"
+                    }
+                }
+            },
+            "sort": [{"_score": {"order": "desc"}}],
+            "size": max_results,
+            "_source": ["uuid", "name", "category", "subcategory", "gu", "dong", "ro", "station", "location", "opentime", "breaktime", "closedate", "phone", "alias", "address", "content"]
+        }
+        
+        response = self.es.search(index=self.index_name, body=search_body)
+        
+        hits = response['hits']['hits']
+        places = [
+            {
+                'uuid': hit['_source']['uuid'],
+                'name': hit['_source']['name'], 
+                'category': hit['_source']['category'],
+                'subcategory': hit['_source']['subcategory'],
+                'gu': hit['_source']['gu'],
+                'dong': hit['_source']['dong'],
+                'ro': hit['_source']['ro'],
+                'station': hit['_source']['station'],
+                'location': hit['_source']['location'],
+                'opentime': hit['_source']['opentime'],
+                'breaktime': hit['_source']['breaktime'],
+                'closedate': hit['_source']['closedate'],
+                'phone': hit['_source']['phone'],
+                'alias': hit['_source']['alias'],
+                'address': hit['_source']['address'],
+                'content': hit['_source']['content']
+            }
+            for hit in hits
+        ]
+        return places
+
+    def search_places_for_llm_tool(self, region: str, categories: List[str]) -> Tuple[List[str], int]:
+        """
+        LLM 도구를 위한 장소 검색.
+        지역과 카테고리 정보를 바탕으로 장소 uuid 목록과 총 개수를 반환합니다.
+        """
+        query_body = {
+            "query": {
+                "bool": {
+                    "must": [],
+                    "filter": []
+                }
+            },
+            "size": 10000,
+            "_source": ["uuid"],
+            "track_total_hits": True
+        }
+
+        if region:
+            query_body["query"]["bool"]["must"].append({
+                "multi_match": {
+                    "query": region,
+                    "fields": ["gu", "dong", "ro", "station", "address"]
+                }
+            })
+
+        if categories:
+            query_body["query"]["bool"]["filter"].append({
+                "bool": {
+                    "should": [
+                        {"terms": {"category.keyword": categories}},
+                        {"terms": {"subcategory.keyword": categories}}
+                    ],
+                    "minimum_should_match": 1
+                }
+            })
+            
+        response = self.es.search(index=self.index_name, body=query_body)
+        
+        uuids = [hit['_source']['uuid'] for hit in response['hits']['hits']]
+        total = response['hits']['total']['value']
+        
+        return uuids, total
 
     def create_log_index_if_not_exists(self):
         """로그 인덱스가 없으면 생성"""
@@ -57,30 +142,30 @@ def create_log_index_if_not_exists(self):
                 mapping = {
                     "mappings": {
                         "properties": {
-                                "userId": {"type": "keyword"},
-                                "question": {"type": "text", "analyzer": "nori"},
-                                "answer": {
-                                    "properties": {
-                                        "title": {"type": "text"},
-                                        "placeList": {
-                                            "properties": {
-                                                "placeId": {"type": "keyword"},
-                                                "name": {"type": "text", "analyzer": "nori"},
-                                                "address": {"type": "text", "analyzer": "nori"},
-                                                "imgUrl": {"type": "keyword"},
-                                                "location": {
-                                                    "lat": {"type": "float"},
-                                                    "lng": {"type": "float"}
-                                                }
+                            "userId": {"type": "keyword"},
+                            "question": {"type": "text", "analyzer": "nori"},
+                            "answer": {
+                                "properties": {
+                                    "title": {"type": "text"},
+                                    "placeList": {
+                                        "properties": {
+                                            "placeId": {"type": "keyword"},
+                                            "name": {"type": "text", "analyzer": "nori"},
+                                            "address": {"type": "text", "analyzer": "nori"},
+                                            "imgUrl": {"type": "keyword"},
+                                            "location": {
+                                                "lat": {"type": "float"},
+                                                "lng": {"type": "float"}
                                             }
-                                        },
-                                        "detail": {"type": "text", "analyzer": "nori"}
-                                    }
-                                },
-                                "createAt": {"type": "date"}
-                            }
+                                        }
+                                    },
+                                    "detail": {"type": "text", "analyzer": "nori"}
+                                }
+                            },
+                            "createAt": {"type": "date"}
                         }
                     }
+                }
                 self.es.indices.create(index=self.log_index_name, body=mapping)
                 print(f"로그 인덱스 '{self.log_index_name}' 생성 완료")
         except Exception as e:
diff --git a/data/congestion_preprocessing.py b/data/congestion_preprocessing.py
@@ -0,0 +1,57 @@
+import json
+import pandas as pd
+from datetime import datetime
+
+class CrowdPreprocessor:
+    def __init__(self):
+        self.df = None
+
+    def safe_float_convert(self, value, default=0.0):
+        try:
+            return float(value)
+        except (ValueError, TypeError):
+            return default
+
+    def process_json(self, json_data):
+        try:
+            ppltn = json_data['CITYDATA']['LIVE_PPLTN_STTS'][0]
+            road = json_data['CITYDATA']['ROAD_TRAFFIC_STTS']['AVG_ROAD_DATA']
+            weather = json_data['CITYDATA']['WEATHER_STTS'][0]
+        except (KeyError, IndexError, TypeError):
+            return None
+
+        # 기본 데이터 추출
+        data = {
+            'AREA_NM': ppltn.get('AREA_NM'),
+            'AREA_CONGEST_LVL': ppltn.get('AREA_CONGEST_LVL'),
+            'ROAD_TRAFFIC_IDX': road.get('ROAD_TRAFFIC_IDX'),
+            'PPLTN_RATE_20': self.safe_float_convert(ppltn.get('PPLTN_RATE_20')),
+            'PPLTN_RATE_30': self.safe_float_convert(ppltn.get('PPLTN_RATE_30')),
+            'PPLTN_RATE_40': self.safe_float_convert(ppltn.get('PPLTN_RATE_40')),
+            'TEMP': self.safe_float_convert(weather.get('TEMP')),
+            'HUMIDITY': self.safe_float_convert(weather.get('HUMIDITY')),
+            'ROAD_TRAFFIC_SPD': self.safe_float_convert(road.get('ROAD_TRAFFIC_SPD')),
+            'CALL_API_TIME': datetime.now()  
+        }
+        
+        # 데이터프레임 생성
+        self.df = pd.DataFrame([data])
+        
+        # 시간 특성 생성
+        self.df['hour'] = self.df['CALL_API_TIME'].dt.hour
+        self.df['day_of_week'] = self.df['CALL_API_TIME'].dt.dayofweek
+        self.df['is_rush_hour'] = ((self.df['hour'].between(7, 9)) | 
+                                  (self.df['hour'].between(17, 19))).astype(int)
+        
+        return self.df
+
+# 사용 예시
+if __name__ == "__main__":
+    # JSON 파일 읽기
+    with open('서울대공원_20250520_000003.json', 'r', encoding='utf-8') as f:
+        json_data = json.load(f)
+    
+    # 데이터 처리
+    preprocessor = CrowdPreprocessor()
+    processed_data = preprocessor.process_json(json_data)
+    print(processed_data)
diff --git a/data/src/utils/es_place_upload.py b/data/src/utils/es_place_upload.py