Skip to content

Commit 8577c46

Browse files
authored
배포 v1.6.1
배포 v1.6.1
2 parents 39a7934 + d064620 commit 8577c46

File tree

2 files changed

+166
-49
lines changed

2 files changed

+166
-49
lines changed

ELK/app/main.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,22 @@ async def lifespan(app: FastAPI):
1515
"""애플리케이션 시작 시 실행"""
1616
elasticsearch_service.create_log_index_if_not_exists()
1717
elasticsearch_service.create_click_log_index_if_not_exists()
18+
elasticsearch_service.create_search_log_index_if_not_exists()
1819
yield
1920

2021
app = FastAPI(title="ELK Search API", version="1.0.0", lifespan=lifespan)
2122

2223
@app.get("/api/place/search", response_model=SearchResponse)
23-
async def search_places(query: str, max_results: int = 23):
24+
async def search_places(query: str, max_results: int = 23, user_id: str | None = None):
2425
"""장소 검색 API"""
2526
try:
2627
if not elasticsearch_service.is_connected():
2728
raise HTTPException(status_code=503, detail="Elasticsearch 연결 실패")
2829

2930
places = elasticsearch_service.search_places(
3031
query=query,
31-
max_results=max_results
32+
max_results=max_results,
33+
user_id=user_id
3234
)
3335

3436
# 딕셔너리를 Place 객체로 변환
@@ -44,15 +46,16 @@ async def search_places(query: str, max_results: int = 23):
4446
raise HTTPException(status_code=500, detail=str(e))
4547

4648
@app.get("/api/place/search/llm-tool", response_model=LLMToolResponse)
47-
async def search_places_for_llm_tool(region: str, categories: List[str] = Query(..., min_length=1, max_length=3)):
49+
async def search_places_for_llm_tool(region: str, categories: List[str] = Query(..., min_length=1), user_id: str | None = None):
4850
"""LLM 도구를 위한 장소 검색 API"""
4951
try:
5052
if not elasticsearch_service.is_connected():
5153
raise HTTPException(status_code=503, detail="Elasticsearch 연결 실패")
5254

5355
uuids, total = elasticsearch_service.search_places_for_llm_tool(
5456
region=region,
55-
categories=categories
57+
categories=categories,
58+
user_id=user_id
5659
)
5760

5861
return LLMToolResponse(
@@ -187,5 +190,23 @@ async def get_place_click_count(place_id: str):
187190
"clickCount": count
188191
}
189192

193+
except Exception as e:
194+
raise HTTPException(status_code=500, detail=str(e))
195+
196+
@app.get("/api/training-data/{user_id}")
197+
async def get_training_data(user_id: str):
198+
"""DeepCTR 모델 학습을 위한 데이터 생성 API"""
199+
try:
200+
if not elasticsearch_service.is_connected():
201+
raise HTTPException(status_code=503, detail="Elasticsearch 연결 실패")
202+
203+
data = elasticsearch_service.get_search_click_data_for_user(user_id)
204+
205+
return {
206+
"success": True,
207+
"userId": user_id,
208+
"data": data
209+
}
210+
190211
except Exception as e:
191212
raise HTTPException(status_code=500, detail=str(e))

ELK/app/services/elasticsearch_service.py

Lines changed: 141 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import pandas as pd
12
from elasticsearch import Elasticsearch
23
from typing import List, Dict, Any, Tuple, Optional
3-
from datetime import datetime
4+
from datetime import datetime, timezone
45

56

67
class ElasticsearchService:
@@ -11,6 +12,7 @@ def __init__(self, host: str = "elasticsearch", port: int = 9200):
1112
self.index_name = "place_data"
1213
self.log_index_name = "chatbot_log"
1314
self.click_log_index_name = "click_log"
15+
self.search_log_index_name = "search_log"
1416

1517
def is_connected(self) -> bool:
1618
"""연결 상태 확인"""
@@ -19,7 +21,7 @@ def is_connected(self) -> bool:
1921
except:
2022
return False
2123

22-
def search_places(self, query: str, max_results: int = 23) -> List[Dict[str, Any]]:
24+
def search_places(self, query: str, max_results: int = 23, user_id: Optional[str] = None) -> List[Dict[str, Any]]:
2325
"""장소 검색"""
2426
search_body = {
2527
"query": {
@@ -48,51 +50,19 @@ def search_places(self, query: str, max_results: int = 23) -> List[Dict[str, Any
4850
for hit in hits
4951
]
5052

51-
return places
52-
53-
def search_places_chatbot(self, query: str, max_results: int = 100):
54-
"""챗봇 장소 검색"""
55-
search_body = {
56-
"query": {
57-
"match": {
58-
"name": {
59-
"query": query,
60-
"fuzziness": "AUTO"
61-
}
62-
}
63-
},
64-
"sort": [{"_score": {"order": "desc"}}],
65-
"size": max_results,
66-
"_source": ["uuid", "name", "category", "subcategory", "gu", "dong", "ro", "station", "location", "opentime", "breaktime", "closedate", "phone", "alias", "address", "content"]
67-
}
68-
69-
response = self.es.search(index=self.index_name, body=search_body)
70-
71-
hits = response['hits']['hits']
72-
places = [
73-
{
74-
'uuid': hit['_source']['uuid'],
75-
'name': hit['_source']['name'],
76-
'category': hit['_source']['category'],
77-
'subcategory': hit['_source']['subcategory'],
78-
'gu': hit['_source']['gu'],
79-
'dong': hit['_source']['dong'],
80-
'ro': hit['_source']['ro'],
81-
'station': hit['_source']['station'],
82-
'location': hit['_source']['location'],
83-
'opentime': hit['_source']['opentime'],
84-
'breaktime': hit['_source']['breaktime'],
85-
'closedate': hit['_source']['closedate'],
86-
'phone': hit['_source']['phone'],
87-
'alias': hit['_source']['alias'],
88-
'address': hit['_source']['address'],
89-
'content': hit['_source']['content']
53+
if user_id:
54+
place_ids = [place['uuid'] for place in places]
55+
log_data = {
56+
"userId": user_id,
57+
"query": query,
58+
"placeIds": place_ids,
59+
"timestamp": datetime.now(timezone.utc)
9060
}
91-
for hit in hits
92-
]
61+
self.insert_search_log(log_data)
62+
9363
return places
9464

95-
def search_places_for_llm_tool(self, region: str, categories: List[str]) -> Tuple[List[str], int]:
65+
def search_places_for_llm_tool(self, region: str, categories: List[str], user_id: Optional[str] = None) -> Tuple[List[str], int]:
9666
"""
9767
LLM 도구를 위한 장소 검색.
9868
지역과 카테고리 정보를 바탕으로 장소 uuid 목록과 총 개수를 반환합니다.
@@ -308,4 +278,130 @@ def get_click_count_by_place(self, place_id: str) -> int:
308278
return response.get('count', 0)
309279
except Exception as e:
310280
print(f"클릭 수 조회 오류: {e}")
311-
return 0
281+
return 0
282+
283+
def create_search_log_index_if_not_exists(self):
284+
"""검색 로그 인덱스가 없으면 생성"""
285+
try:
286+
if not self.es.indices.exists(index=self.search_log_index_name):
287+
# 검색 로그 인덱스 매핑 설정
288+
mapping = {
289+
"mappings": {
290+
"properties": {
291+
"userId": {"type": "keyword"},
292+
"query": {"type": "text"},
293+
"placeIds": {"type": "keyword"},
294+
"timestamp": {"type": "date"}
295+
}
296+
}
297+
}
298+
self.es.indices.create(index=self.search_log_index_name, body=mapping)
299+
print(f"검색 로그 인덱스 '{self.search_log_index_name}' 생성 완료")
300+
except Exception as e:
301+
raise Exception(f"검색 로그 인덱스 생성 오류: {e}")
302+
303+
def insert_search_log(self, log_data: dict) -> Tuple[bool, Optional[str]]:
304+
"""검색 로그 데이터를 Elasticsearch에 삽입"""
305+
try:
306+
# 문서 ID 생성 (타임스탬프 + userId 조합)
307+
doc_id = f"{log_data['userId']}_{int(datetime.now().timestamp())}"
308+
309+
# Elasticsearch에 문서 삽입
310+
response = self.es.index(
311+
index=self.search_log_index_name,
312+
id=doc_id,
313+
body=log_data
314+
)
315+
316+
# 삽입 성공 여부 확인
317+
if response.get('result') in ['created', 'updated']:
318+
return True, doc_id
319+
else:
320+
return False, None
321+
322+
except Exception as e:
323+
print(f"검색 로그 삽입 오류: {e}")
324+
return False, None
325+
326+
def get_all_search_logs_by_user(self, user_id: str) -> List[Dict]:
327+
"""사용자의 모든 검색 로그를 시간순으로 조회"""
328+
try:
329+
query = {
330+
"query": {"term": {"userId": user_id}},
331+
"sort": [{"timestamp": {"order": "asc"}}],
332+
"size": 1000
333+
}
334+
response = self.es.search(index=self.search_log_index_name, body=query)
335+
return response["hits"]["hits"]
336+
except Exception as e:
337+
print(f"사용자 검색 로그 조회 오류: {e}")
338+
return []
339+
340+
def get_all_click_logs_by_user(self, user_id: str) -> List[Dict]:
341+
"""사용자의 모든 클릭 로그를 시간순으로 조회"""
342+
try:
343+
query = {
344+
"query": {"term": {"userId": user_id}},
345+
"sort": [{"timestamp": {"order": "asc"}}],
346+
"size": 10000
347+
}
348+
response = self.es.search(index=self.click_log_index_name, body=query)
349+
return response["hits"]["hits"]
350+
except Exception as e:
351+
print(f"사용자 클릭 로그 조회 오류: {e}")
352+
return []
353+
354+
def get_search_click_data_for_user(self, user_id: str) -> pd.DataFrame:
355+
"""사용자 검색 및 클릭 데이터를 기반으로 학습 데이터 생성"""
356+
357+
def _ensure_utc_aware(dt_str: str) -> datetime:
358+
"""datetime 문자열을 UTC-aware datetime 객체로 변환"""
359+
dt = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
360+
if dt.tzinfo is None:
361+
# Naive datetime은 UTC로 간주
362+
return dt.replace(tzinfo=timezone.utc)
363+
return dt
364+
365+
search_logs = self.get_all_search_logs_by_user(user_id)
366+
click_logs = self.get_all_click_logs_by_user(user_id)
367+
368+
if not search_logs:
369+
return pd.DataFrame()
370+
371+
training_data = []
372+
click_iterator = iter(click_logs)
373+
current_click = next(click_iterator, None)
374+
375+
for i, search_hit in enumerate(search_logs):
376+
search_log = search_hit["_source"]
377+
search_time = _ensure_utc_aware(search_log["timestamp"])
378+
379+
next_search_time = datetime.now(timezone.utc)
380+
if i + 1 < len(search_logs):
381+
next_search_log = search_logs[i + 1]["_source"]
382+
next_search_time = _ensure_utc_aware(next_search_log["timestamp"])
383+
384+
clicked_in_window = set()
385+
while current_click:
386+
click_log = current_click["_source"]
387+
click_time = _ensure_utc_aware(click_log["timestamp"])
388+
389+
if search_time <= click_time < next_search_time:
390+
clicked_in_window.add(click_log["placeId"])
391+
current_click = next(click_iterator, None)
392+
elif click_time >= next_search_time:
393+
break
394+
else: # click_time < search_time
395+
current_click = next(click_iterator, None)
396+
397+
for place_id in search_log.get("placeIds", []):
398+
training_data.append({
399+
"userid": user_id,
400+
"place_id": place_id,
401+
"yn": 1 if place_id in clicked_in_window else 0
402+
})
403+
404+
if not training_data:
405+
return pd.DataFrame()
406+
407+
return pd.DataFrame(training_data)

0 commit comments

Comments
 (0)