Skip to content

Commit 87aae79

Browse files
authored
Feat/es 검색엔진 고도화
Feat/es 검색엔진 고도화
2 parents 619aa05 + ad9b0f2 commit 87aae79

File tree

2 files changed

+45
-20
lines changed

2 files changed

+45
-20
lines changed

ELK/app/services/elasticsearch_service.py

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def __init__(self):
6666
raise ValueError("ES_PORT 환경 변수는 반드시 숫자여야 합니다.")
6767

6868
self.es = Elasticsearch([{'host': host, 'port': port, 'scheme': 'http'}])
69-
self.index_name = "place_data"
69+
self.index_name = "place_data_v2"
7070
self.log_index_name = "chatbot_log"
7171
self.click_log_index_name = "click_log"
7272
self.search_log_index_name = "search_log"
@@ -79,14 +79,19 @@ def is_connected(self) -> bool:
7979
return False
8080

8181
def search_places(self, query: str, max_results: int = 23, user_id: Optional[str] = None) -> List[Dict[str, Any]]:
82-
"""장소 검색"""
82+
"""장소 검색 (copy_to 필드와 multi_match로 고도화)"""
8383
search_body = {
8484
"query": {
85-
"match": {
86-
"name": {
87-
"query": query,
88-
"fuzziness": "AUTO"
89-
}
85+
"multi_match": {
86+
"query": query,
87+
"fields": [
88+
"name^4",
89+
"alias^3",
90+
"categories^2",
91+
"addresses^2",
92+
"content"
93+
],
94+
"fuzziness": "AUTO"
9095
}
9196
},
9297
"sort": [{"_score": {"order": "desc"}}],
@@ -124,28 +129,24 @@ def search_places_for_llm_tool(self, region: str, categories: List[str], user_id
124129
LLM 도구를 위한 장소 검색.
125130
지역과 카테고리 정보를 바탕으로 장소 uuid 목록과 총 개수를 반환합니다.
126131
"""
127-
query_body = {
128-
"query": {
129-
"bool": {
130-
"must": [],
131-
"filter": []
132-
}
133-
},
134-
"size": 100,
135-
"_source": ["uuid"],
136-
"track_total_hits": True
132+
# 1. bool 쿼리를 기본 쿼리로 정의
133+
base_query = {
134+
"bool": {
135+
"must": [],
136+
"filter": []
137+
}
137138
}
138139

139140
if region:
140-
query_body["query"]["bool"]["must"].append({
141+
base_query["bool"]["must"].append({
141142
"multi_match": {
142143
"query": region,
143144
"fields": ["gu", "dong", "ro", "station", "address"]
144145
}
145146
})
146147

147148
if categories:
148-
query_body["query"]["bool"]["filter"].append({
149+
base_query["bool"]["filter"].append({
149150
"bool": {
150151
"should": [
151152
{"terms": {"category.keyword": categories}},
@@ -155,6 +156,24 @@ def search_places_for_llm_tool(self, region: str, categories: List[str], user_id
155156
}
156157
})
157158

159+
# 2. function_score 쿼리로 기본 쿼리를 감싸고, random_score 함수를 추가
160+
query_body = {
161+
"query": {
162+
"function_score": {
163+
"query": base_query,
164+
"functions": [
165+
{
166+
"random_score": {}
167+
}
168+
],
169+
"boost_mode": "multiply" # 원래 점수와 랜덤 점수를 곱하여 자연스럽게 섞음
170+
}
171+
},
172+
"size": 100,
173+
"_source": ["uuid"],
174+
"track_total_hits": True
175+
}
176+
158177
response = self.es.search(index=self.index_name, body=query_body)
159178

160179
uuids = [hit['_source']['uuid'] for hit in response['hits']['hits']]

data/src/utils/es_place_upload.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ def create_korean_content_index(self, index_name: str) -> bool:
9595
"type": "text",
9696
"analyzer": "my_nori_analyzer",
9797
"search_analyzer": "my_nori_analyzer",
98+
"copy_to": "categories",
9899
"fields": {
99100
"keyword": {
100101
"type": "keyword",
@@ -106,6 +107,7 @@ def create_korean_content_index(self, index_name: str) -> bool:
106107
"type": "text",
107108
"analyzer": "my_nori_analyzer",
108109
"search_analyzer": "my_nori_analyzer",
110+
"copy_to": "categories",
109111
"fields": {
110112
"keyword": {
111113
"type": "keyword",
@@ -117,21 +119,25 @@ def create_korean_content_index(self, index_name: str) -> bool:
117119
"type": "text",
118120
"analyzer": "my_nori_analyzer",
119121
"search_analyzer": "my_nori_analyzer",
122+
"copy_to": "addresses",
120123
},
121124
"dong": {
122125
"type": "text",
123126
"analyzer": "my_nori_analyzer",
124127
"search_analyzer": "my_nori_analyzer",
128+
"copy_to": "addresses",
125129
},
126130
"ro": {
127131
"type": "text",
128132
"analyzer": "my_nori_analyzer",
129133
"search_analyzer": "my_nori_analyzer",
134+
"copy_to": "addresses",
130135
},
131136
"station": {
132137
"type": "text",
133138
"analyzer": "my_nori_analyzer",
134139
"search_analyzer": "my_nori_analyzer",
140+
"copy_to": "addresses",
135141
"fields": {
136142
"keyword": {
137143
"type": "keyword",
@@ -311,7 +317,7 @@ def insert_data_from_json(self, index_name: str, json_file_path: str) -> bool:
311317
es_client = KoreanContentElasticsearch()
312318

313319
# 인덱스 이름 설정
314-
INDEX_NAME = "place_data"
320+
INDEX_NAME = "place_data_v2"
315321

316322
# JSON 파일 경로 설정
317323
JSON_FILE_PATH = "data/place_json_preprocessing.json"

0 commit comments

Comments
 (0)