From 1ca4960572cd79bf198fcb136d41b8a756d2026b Mon Sep 17 00:00:00 2001 From: skqorrla Date: Wed, 25 Jun 2025 14:43:48 +0900 Subject: [PATCH 1/3] =?UTF-8?q?feat:=20=EC=83=89=EC=9D=B8=20=EA=B3=BC?= =?UTF-8?q?=EC=A0=95=EC=97=90=EC=84=9C=20copy=5Fto=20=EC=B6=94=EA=B0=80=20?= =?UTF-8?q?=EB=B0=8F=20=EC=9D=B8=EB=8D=B1=EC=8A=A4=20=EC=9D=B4=EB=A6=84=20?= =?UTF-8?q?=EC=97=85=EB=8D=B0=EC=9D=B4=ED=8A=B8=20(place=5Fdata=5Fv2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data/src/utils/es_place_upload.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/data/src/utils/es_place_upload.py b/data/src/utils/es_place_upload.py index b63cffc..36814ea 100644 --- a/data/src/utils/es_place_upload.py +++ b/data/src/utils/es_place_upload.py @@ -95,6 +95,7 @@ def create_korean_content_index(self, index_name: str) -> bool: "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "categories", "fields": { "keyword": { "type": "keyword", @@ -106,6 +107,7 @@ def create_korean_content_index(self, index_name: str) -> bool: "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "categories", "fields": { "keyword": { "type": "keyword", @@ -117,21 +119,25 @@ def create_korean_content_index(self, index_name: str) -> bool: "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "addresses", }, "dong": { "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "addresses", }, "ro": { "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "addresses", }, "station": { "type": "text", "analyzer": "my_nori_analyzer", "search_analyzer": "my_nori_analyzer", + "copy_to": "addresses", "fields": { "keyword": { "type": "keyword", @@ -311,7 +317,7 @@ def insert_data_from_json(self, index_name: str, json_file_path: str) -> bool: es_client = KoreanContentElasticsearch() # 인덱스 이름 설정 - INDEX_NAME = "place_data" + INDEX_NAME = "place_data_v2" # JSON 파일 경로 설정 JSON_FILE_PATH = "data/place_json_preprocessing.json" From bbe6b2b323377d64c08f8ff9df2b9444abacd9a0 Mon Sep 17 00:00:00 2001 From: skqorrla Date: Wed, 25 Jun 2025 15:11:55 +0900 Subject: [PATCH 2/3] =?UTF-8?q?feat:=20=EC=9E=A5=EC=86=8C=20=EA=B2=80?= =?UTF-8?q?=EC=83=89=20=EA=B8=B0=EB=8A=A5=20=EA=B3=A0=EB=8F=84=ED=99=94=20?= =?UTF-8?q?=EB=B0=8F=20=EC=9D=B8=EB=8D=B1=EC=8A=A4=20=EC=9D=B4=EB=A6=84?= =?UTF-8?q?=EC=9D=84=20place=5Fdata=5Fv2=EB=A1=9C=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ELK/app/services/elasticsearch_service.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/ELK/app/services/elasticsearch_service.py b/ELK/app/services/elasticsearch_service.py index 746a4db..f5b8086 100644 --- a/ELK/app/services/elasticsearch_service.py +++ b/ELK/app/services/elasticsearch_service.py @@ -66,7 +66,7 @@ def __init__(self): raise ValueError("ES_PORT 환경 변수는 반드시 숫자여야 합니다.") self.es = Elasticsearch([{'host': host, 'port': port, 'scheme': 'http'}]) - self.index_name = "place_data" + self.index_name = "place_data_v2" self.log_index_name = "chatbot_log" self.click_log_index_name = "click_log" self.search_log_index_name = "search_log" @@ -79,14 +79,19 @@ def is_connected(self) -> bool: return False def search_places(self, query: str, max_results: int = 23, user_id: Optional[str] = None) -> List[Dict[str, Any]]: - """장소 검색""" + """장소 검색 (copy_to 필드와 multi_match로 고도화)""" search_body = { "query": { - "match": { - "name": { - "query": query, - "fuzziness": "AUTO" - } + "multi_match": { + "query": query, + "fields": [ + "name^4", + "alias^3", + "categories^2", + "addresses^2", + "content" + ], + "fuzziness": "AUTO" } }, "sort": [{"_score": {"order": "desc"}}], From ad9b0f283710856d06cfdf637ef4901ed18822d7 Mon Sep 17 00:00:00 2001 From: skqorrla Date: Wed, 25 Jun 2025 15:25:56 +0900 Subject: [PATCH 3/3] =?UTF-8?q?feat:=20=EC=9E=A5=EC=86=8C=20=EA=B2=80?= =?UTF-8?q?=EC=83=89=20=EC=BF=BC=EB=A6=AC=20=EA=B0=9C=EC=84=A0=20=EA=B0=80?= =?UTF-8?q?=EC=A4=91=EC=B9=98=20=EB=B6=80=EC=97=AC=20=EB=B0=8F=20llm=20sea?= =?UTF-8?q?rch=20tool=20=EB=9E=9C=EB=8D=A4=20=EC=A0=90=EC=88=98=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ELK/app/services/elasticsearch_service.py | 38 ++++++++++++++++------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/ELK/app/services/elasticsearch_service.py b/ELK/app/services/elasticsearch_service.py index f5b8086..445d7a3 100644 --- a/ELK/app/services/elasticsearch_service.py +++ b/ELK/app/services/elasticsearch_service.py @@ -129,20 +129,16 @@ def search_places_for_llm_tool(self, region: str, categories: List[str], user_id LLM 도구를 위한 장소 검색. 지역과 카테고리 정보를 바탕으로 장소 uuid 목록과 총 개수를 반환합니다. """ - query_body = { - "query": { - "bool": { - "must": [], - "filter": [] - } - }, - "size": 100, - "_source": ["uuid"], - "track_total_hits": True + # 1. bool 쿼리를 기본 쿼리로 정의 + base_query = { + "bool": { + "must": [], + "filter": [] + } } if region: - query_body["query"]["bool"]["must"].append({ + base_query["bool"]["must"].append({ "multi_match": { "query": region, "fields": ["gu", "dong", "ro", "station", "address"] @@ -150,7 +146,7 @@ def search_places_for_llm_tool(self, region: str, categories: List[str], user_id }) if categories: - query_body["query"]["bool"]["filter"].append({ + base_query["bool"]["filter"].append({ "bool": { "should": [ {"terms": {"category.keyword": categories}}, @@ -160,6 +156,24 @@ def search_places_for_llm_tool(self, region: str, categories: List[str], user_id } }) + # 2. function_score 쿼리로 기본 쿼리를 감싸고, random_score 함수를 추가 + query_body = { + "query": { + "function_score": { + "query": base_query, + "functions": [ + { + "random_score": {} + } + ], + "boost_mode": "multiply" # 원래 점수와 랜덤 점수를 곱하여 자연스럽게 섞음 + } + }, + "size": 100, + "_source": ["uuid"], + "track_total_hits": True + } + response = self.es.search(index=self.index_name, body=query_body) uuids = [hit['_source']['uuid'] for hit in response['hits']['hits']]