diff --git a/ELK/app/services/elasticsearch_service.py b/ELK/app/services/elasticsearch_service.py index 746a4db..445d7a3 100644 --- a/ELK/app/services/elasticsearch_service.py +++ b/ELK/app/services/elasticsearch_service.py @@ -66,7 +66,7 @@ def __init__(self): raise ValueError("ES_PORT 환경 변수는 반드시 숫자여야 합니다.") self.es = Elasticsearch([{'host': host, 'port': port, 'scheme': 'http'}]) - self.index_name = "place_data" + self.index_name = "place_data_v2" self.log_index_name = "chatbot_log" self.click_log_index_name = "click_log" self.search_log_index_name = "search_log" @@ -79,14 +79,19 @@ def is_connected(self) -> bool: return False def search_places(self, query: str, max_results: int = 23, user_id: Optional[str] = None) -> List[Dict[str, Any]]: - """장소 검색""" + """장소 검색 (copy_to 필드와 multi_match로 고도화)""" search_body = { "query": { - "match": { - "name": { - "query": query, - "fuzziness": "AUTO" - } + "multi_match": { + "query": query, + "fields": [ + "name^4", + "alias^3", + "categories^2", + "addresses^2", + "content" + ], + "fuzziness": "AUTO" } }, "sort": [{"_score": {"order": "desc"}}], @@ -124,20 +129,16 @@ def search_places_for_llm_tool(self, region: str, categories: List[str], user_id LLM 도구를 위한 장소 검색. 지역과 카테고리 정보를 바탕으로 장소 uuid 목록과 총 개수를 반환합니다. """ - query_body = { - "query": { - "bool": { - "must": [], - "filter": [] - } - }, - "size": 100, - "_source": ["uuid"], - "track_total_hits": True + # 1. bool 쿼리를 기본 쿼리로 정의 + base_query = { + "bool": { + "must": [], + "filter": [] + } } if region: - query_body["query"]["bool"]["must"].append({ + base_query["bool"]["must"].append({ "multi_match": { "query": region, "fields": ["gu", "dong", "ro", "station", "address"] @@ -145,7 +146,7 @@ def search_places_for_llm_tool(self, region: str, categories: List[str], user_id }) if categories: - query_body["query"]["bool"]["filter"].append({ + base_query["bool"]["filter"].append({ "bool": { "should": [ {"terms": {"category.keyword": categories}}, @@ -155,6 +156,24 @@ def search_places_for_llm_tool(self, region: str, categories: List[str], user_id } }) + # 2. function_score 쿼리로 기본 쿼리를 감싸고, random_score 함수를 추가 + query_body = { + "query": { + "function_score": { + "query": base_query, + "functions": [ + { + "random_score": {} + } + ], + "boost_mode": "multiply" # 원래 점수와 랜덤 점수를 곱하여 자연스럽게 섞음 + } + }, + "size": 100, + "_source": ["uuid"], + "track_total_hits": True + } + response = self.es.search(index=self.index_name, body=query_body) uuids = [hit['_source']['uuid'] for hit in response['hits']['hits']] diff --git a/data/src/utils/es_place_upload.py b/data/src/utils/es_place_upload.py index b63cffc..36814ea 100644 --- a/data/src/utils/es_place_upload.py +++ b/data/src/utils/es_place_upload.py @@ -95,6 +95,7 @@ def create_korean_content_index(self, index_name: str) -> bool: "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "categories", "fields": { "keyword": { "type": "keyword", @@ -106,6 +107,7 @@ def create_korean_content_index(self, index_name: str) -> bool: "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "categories", "fields": { "keyword": { "type": "keyword", @@ -117,21 +119,25 @@ def create_korean_content_index(self, index_name: str) -> bool: "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "addresses", }, "dong": { "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "addresses", }, "ro": { "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "addresses", }, "station": { "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "addresses", "fields": { "keyword": { "type": "keyword", @@ -311,7 +317,7 @@ def insert_data_from_json(self, index_name: str, json_file_path: str) -> bool: es_client = KoreanContentElasticsearch() # 인덱스 이름 설정 - INDEX_NAME = "place_data" + INDEX_NAME = "place_data_v2" # JSON 파일 경로 설정 JSON_FILE_PATH = "data/place_json_preprocessing.json"