From 3b388d277ef655c4697b8e917977217bc799df24 Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 21:03:02 +0900 Subject: [PATCH 01/21] =?UTF-8?q?chore:=20=EC=9B=90=ED=99=9C=ED=95=9C=20?= =?UTF-8?q?=ED=85=8C=EC=8A=A4=ED=8A=B8=20=EC=A7=84=ED=96=89=EC=9D=84=20?= =?UTF-8?q?=EC=9C=84=ED=95=B4=20rate=20limiter=20=EC=84=A4=EC=A0=95=20?= =?UTF-8?q?=EC=99=84=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/test/resources/application-integrationtest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/resources/application-integrationtest.yml b/src/test/resources/application-integrationtest.yml index c2e3c7c..854ecce 100644 --- a/src/test/resources/application-integrationtest.yml +++ b/src/test/resources/application-integrationtest.yml @@ -128,13 +128,13 @@ resilience4j: ratelimiter: configs: default: - limit-for-period: 37 + limit-for-period: 60 limit-refresh-period: 1m timeout-duration: 10s instances: llmSummary: base-config: default - limit-for-period: 37 + limit-for-period: 60 limit-refresh-period: 1m timeout-duration: 15s llmEmbedding: From 83418e8f1dd9e213c93c14f44f1e13b5e999fcae Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 21:03:20 +0900 Subject: [PATCH 02/21] =?UTF-8?q?fix:=20ScrapPost=20=EC=A4=91=EB=B3=B5=20?= =?UTF-8?q?=EC=A0=9C=EA=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../setup/UserDataSetupAndExporter.java | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/test/java/com/techfork/domain/recommendation/setup/UserDataSetupAndExporter.java b/src/test/java/com/techfork/domain/recommendation/setup/UserDataSetupAndExporter.java index 6d5c301..bdbf663 100644 --- a/src/test/java/com/techfork/domain/recommendation/setup/UserDataSetupAndExporter.java +++ b/src/test/java/com/techfork/domain/recommendation/setup/UserDataSetupAndExporter.java @@ -55,7 +55,7 @@ public class UserDataSetupAndExporter extends IntegrationTestBase { @Autowired private FileExporter fileExporter; - private static final int USER_COUNT = 5; + private static final int USER_COUNT = 15; private static final int READ_POST_COUNT = 80; // 프로필 구성용 (읽은 글) - 1100개 데이터셋 기준 (약 7%) private static final int HOLDOUT_COUNT = 30; // Ground Truth (평가용, 숨김) - 평가 샘플 (약 2.7%) @@ -86,18 +86,37 @@ void step1_LoadPostFixtures() { @Test @Order(2) - @DisplayName("STEP 2: 테스트 사용자 5명 생성 (임베딩 포함)") + @DisplayName("STEP 2: 테스트 사용자 15명 생성 (임베딩 포함)") @Transactional @Commit void step2_CreateTestUsers() throws IOException { log.info("===== STEP 2: 테스트 사용자 생성 ====="); List> interestCombos = Arrays.asList( - Arrays.asList(EInterestCategory.BACKEND), - Arrays.asList(EInterestCategory.FRONTEND), - Arrays.asList(EInterestCategory.AI_ML), + // Backend 중심 (4명) Arrays.asList(EInterestCategory.BACKEND, EInterestCategory.DATABASE), - Arrays.asList(EInterestCategory.AI_ML, EInterestCategory.DATA_SCIENCE) + Arrays.asList(EInterestCategory.BACKEND, EInterestCategory.ARCHITECTURE), + Arrays.asList(EInterestCategory.BACKEND, EInterestCategory.SECURITY), + Arrays.asList(EInterestCategory.BACKEND, EInterestCategory.FRONTEND), + + // Frontend 중심 (3명) + Arrays.asList(EInterestCategory.FRONTEND, EInterestCategory.PRODUCT_UX), + Arrays.asList(EInterestCategory.FRONTEND, EInterestCategory.ARCHITECTURE), + Arrays.asList(EInterestCategory.FRONTEND), + + // Data & AI (3명) + Arrays.asList(EInterestCategory.AI_ML, EInterestCategory.DATA_SCIENCE), + Arrays.asList(EInterestCategory.DATA_ENGINEERING, EInterestCategory.DATABASE), + Arrays.asList(EInterestCategory.AI_ML, EInterestCategory.CLOUD), + + // DevOps & Infrastructure (3명) + Arrays.asList(EInterestCategory.DEVOPS, EInterestCategory.CLOUD), + Arrays.asList(EInterestCategory.CLOUD, EInterestCategory.ARCHITECTURE), + Arrays.asList(EInterestCategory.SYSTEMS_OS, EInterestCategory.NETWORKING), + + // Mobile (2명) + Arrays.asList(EInterestCategory.IOS, EInterestCategory.ANDROID), + Arrays.asList(EInterestCategory.IOS, EInterestCategory.PRODUCT_UX) ); Map> userGroundTruthMap = new HashMap<>(); From e9c1f931fe5dbcae9c736919de8abff62185a8cc Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 21:04:13 +0900 Subject: [PATCH 03/21] =?UTF-8?q?improve:=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20?= =?UTF-8?q?=EA=B2=B0=EA=B3=BC=EC=97=90=20=EB=94=B0=EB=9D=BC=20=EA=B0=80?= =?UTF-8?q?=EC=A4=91=EC=B9=98=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../recommendation/config/RecommendationProperties.java | 6 +++--- src/main/resources/application.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java b/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java index f85e398..0b97f20 100644 --- a/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java +++ b/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java @@ -34,9 +34,9 @@ public class RecommendationProperties { @NoArgsConstructor @AllArgsConstructor public static class EmbeddingWeights { - private Float title = 0.5f; - private Float summary = 0.5f; - private Float content = 0.0f; + private Float title = 0.4f; + private Float summary = 0.4f; + private Float content = 0.2f; } @Getter diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index cd30af7..001fe86 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -101,9 +101,9 @@ recommendation: active-user-hours: 24 # 임베딩 가중치 설정 (합계 1.0) embedding-weights: - title: 0.5 # 제목 중요도 50% - summary: 0.5 # 요약 중요도 50% - content: 0.0 # 콘텐츠 청크 중요도 0% (제외) + title: 0.4 # 제목 중요도 50% + summary: 0.4 # 요약 중요도 50% + content: 0.2 # 콘텐츠 청크 중요도 0% (제외) # 시간 감쇠 가중치 설정 time-decay: days-7: 1.3 # 최근 7일: +30% From 797b98942549ef3e8368bb477b1978a2b62d26fb Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 21:07:48 +0900 Subject: [PATCH 04/21] =?UTF-8?q?refactor:=20Retrieval=EC=97=90=EC=84=9C?= =?UTF-8?q?=EB=8A=94=20=EC=A2=8B=EC=9D=80=20=ED=9B=84=EB=B3=B4=EA=B5=B0=20?= =?UTF-8?q?=EC=B6=94=EC=B6=9C=EC=9D=84=20=EC=9C=84=ED=95=B4=20=EB=9E=9C?= =?UTF-8?q?=EB=8D=A4=EC=84=B1=20=EC=A0=9C=EA=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/LlmRecommendationService.java | 24 ++++--------------- .../query/VectorQueryBuilder.java | 9 ------- .../query/VectorSearchQueryBuilder.java | 17 ------------- 3 files changed, 4 insertions(+), 46 deletions(-) diff --git a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java index 6a0ed90..bee2a29 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java @@ -182,10 +182,6 @@ private List searchCandidatesWithCustomReadHistory( // 가중치 가져오기 RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); - // 랜덤 시드 생성 (현재 시간 기반) - long randomSeed = System.currentTimeMillis(); - double randomWeight = 0.0; - // 1. 읽은 글 제외 필터 쿼리 생성 (Pre-filtering) Query filterQuery = createExcludeFilter(readPostIds); @@ -203,17 +199,13 @@ private List searchCandidatesWithCustomReadHistory( filterQuery ); - // 3. 랜덤 요소 추가 (function_score) - Query randomQuery = vectorQueryBuilder.createRandomScoreQuery(randomSeed, randomWeight); - - log.debug("ES k-NN 검색 실행 (Train/Test Split) - 가중치 [title:{}, summary:{}], 랜덤가중치: {}", - weights.getTitle(), weights.getSummary(), randomWeight); + log.debug("ES k-NN 검색 실행 (Train/Test Split) - 가중치 [title:{}, summary:{}, content:{}]", + weights.getTitle(), weights.getSummary(), weights.getContent()); long startTime = System.currentTimeMillis(); SearchResponse response = elasticsearchClient.search(s -> s .index(POSTS_INDEX) .knn(knnSearches) // k-NN 검색 (관련성 + 필터링) - .query(randomQuery) // 랜덤 점수 추가 .size(properties.getKnnSearchSize()) , PostDocument.class @@ -246,10 +238,6 @@ private List searchCandidates(float[] userProfileVector, User user // 가중치 가져오기 RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); - // 랜덤 시드 생성 (현재 시간 기반) - long randomSeed = System.currentTimeMillis(); - double randomWeight = 0.0; // 랜덤 가중치 20% - // 1. 읽은 글 제외 필터 쿼리 생성 (Pre-filtering) Query filterQuery = createExcludeFilter(readPostIds); @@ -267,17 +255,13 @@ private List searchCandidates(float[] userProfileVector, User user filterQuery ); - // 3. 랜덤 요소 추가 (function_score) - Query randomQuery = vectorQueryBuilder.createRandomScoreQuery(randomSeed, randomWeight); - - log.debug("ES k-NN 검색 실행 - 가중치 [title:{}, summary:{}], 랜덤시드: {}, 랜덤가중치: {}", - weights.getTitle(), weights.getSummary(), randomSeed, randomWeight); + log.debug("ES k-NN 검색 실행 - 가중치 [title:{}, summary:{}, content:{}]", + weights.getTitle(), weights.getSummary(), weights.getContent()); long startTime = System.currentTimeMillis(); SearchResponse response = elasticsearchClient.search(s -> s .index(POSTS_INDEX) .knn(knnSearches) // k-NN 검색 (관련성 + 필터링) - .query(randomQuery) // 랜덤 점수 추가 .size(properties.getKnnSearchSize()) , PostDocument.class diff --git a/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java b/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java index c435993..90f0035 100644 --- a/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java +++ b/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java @@ -38,13 +38,4 @@ List createKnnSearches( int numCandidates, Query filter ); - - /** - * 랜덤 점수를 위한 function_score 쿼리 생성 - * - * @param randomSeed 랜덤 시드 - * @param randomWeight 랜덤 가중치 - * @return function_score 쿼리 - */ - Query createRandomScoreQuery(long randomSeed, double randomWeight); } \ No newline at end of file diff --git a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java index 3a978bf..990800e 100644 --- a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java +++ b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java @@ -81,21 +81,4 @@ public List createKnnSearches( return knnSearches; } - - @Override - public Query createRandomScoreQuery(long randomSeed, double randomWeight) { - return Query.of(q -> q - .functionScore(fs -> fs - .query(mq -> mq.matchAll(m -> m)) - .functions(fn -> fn - .randomScore(rs -> rs - .seed(String.valueOf(randomSeed)) - .field("_seq_no") - ) - .weight(randomWeight) - ) - .boostMode(FunctionBoostMode.Sum) - ) - ); - } } \ No newline at end of file From b79fa3f1936e32af76b3d8a3111094e48f2f361a Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 21:11:42 +0900 Subject: [PATCH 05/21] =?UTF-8?q?refactor:=20=EC=9D=BD=EC=9D=80=20?= =?UTF-8?q?=EA=B8=80=20=EC=A0=9C=EC=99=B8=20=EC=BF=BC=EB=A6=AC=20=EC=83=9D?= =?UTF-8?q?=EC=84=B1=20=EB=A9=94=EC=84=9C=EB=93=9C=EB=A5=BC=20VectorQueryB?= =?UTF-8?q?uilder=EB=A1=9C=20=EC=9D=B4=EB=8F=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/LlmRecommendationService.java | 29 ++----------------- .../query/VectorQueryBuilder.java | 9 ++++++ .../query/VectorSearchQueryBuilder.java | 24 +++++++++++++++ 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java index bee2a29..499ac6b 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java @@ -1,7 +1,6 @@ package com.techfork.domain.recommendation.service; import co.elastic.clients.elasticsearch.ElasticsearchClient; -import co.elastic.clients.elasticsearch._types.FieldValue; import co.elastic.clients.elasticsearch._types.KnnSearch; import co.elastic.clients.elasticsearch._types.query_dsl.Query; import co.elastic.clients.elasticsearch.core.SearchResponse; @@ -183,7 +182,7 @@ private List searchCandidatesWithCustomReadHistory( RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); // 1. 읽은 글 제외 필터 쿼리 생성 (Pre-filtering) - Query filterQuery = createExcludeFilter(readPostIds); + Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); // 2. 네이티브 k-NN 검색 객체 리스트 생성 (Title + Summary + Content) List knnSearches = vectorQueryBuilder.createKnnSearches( @@ -239,7 +238,7 @@ private List searchCandidates(float[] userProfileVector, User user RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); // 1. 읽은 글 제외 필터 쿼리 생성 (Pre-filtering) - Query filterQuery = createExcludeFilter(readPostIds); + Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); // 2. 네이티브 k-NN 검색 객체 리스트 생성 (Title + Summary + Content) List knnSearches = vectorQueryBuilder.createKnnSearches( @@ -277,30 +276,6 @@ private List searchCandidates(float[] userProfileVector, User user .toList(); } - /** - * 읽은 글 제외를 위한 필터 쿼리 생성 - */ - private Query createExcludeFilter(Set readPostIds) { - if (readPostIds == null || readPostIds.isEmpty()) { - return null; - } - - List excludeValues = readPostIds.stream() - .map(FieldValue::of) - .toList(); - - return Query.of(q -> q - .bool(b -> b - .mustNot(mn -> mn - .terms(t -> t - .field("postId") - .terms(v -> v.value(excludeValues)) - ) - ) - ) - ); - } - /** * PostDocument를 MmrCandidate로 변환 * 시간 감쇠 가중치를 유사도 점수에 적용 diff --git a/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java b/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java index 90f0035..7d0c0c7 100644 --- a/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java +++ b/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java @@ -3,6 +3,7 @@ import co.elastic.clients.elasticsearch._types.KnnSearch; import co.elastic.clients.elasticsearch._types.query_dsl.Query; import java.util.List; +import java.util.Set; /** * Elasticsearch 벡터 검색 쿼리 빌더 인터페이스 @@ -10,6 +11,14 @@ */ public interface VectorQueryBuilder { + /** + * 읽은 글 제외를 위한 필터 쿼리 생성 (Pre-filtering용) + * + * @param readPostIds 제외할 게시글 ID 목록 + * @return Elasticsearch Query 객체 + */ + Query createExcludeFilter(Set readPostIds); + /** * 네이티브 k-NN 검색 객체 리스트 생성 * (title, summary, content 필드에 대한 k-NN 검색) diff --git a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java index 990800e..3c04b8a 100644 --- a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java +++ b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java @@ -1,5 +1,6 @@ package com.techfork.global.elasticsearch.query; +import co.elastic.clients.elasticsearch._types.FieldValue; import co.elastic.clients.elasticsearch._types.KnnSearch; import co.elastic.clients.elasticsearch._types.query_dsl.FunctionBoostMode; import co.elastic.clients.elasticsearch._types.query_dsl.Query; @@ -9,6 +10,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Set; /** * Elasticsearch 벡터 검색 쿼리 빌더 구현체 @@ -18,6 +20,28 @@ @NoArgsConstructor(access = AccessLevel.PRIVATE) public class VectorSearchQueryBuilder implements VectorQueryBuilder { + @Override + public Query createExcludeFilter(Set readPostIds) { + if (readPostIds == null || readPostIds.isEmpty()) { + return null; + } + + List excludeValues = readPostIds.stream() + .map(FieldValue::of) + .toList(); + + return Query.of(q -> q + .bool(b -> b + .mustNot(mn -> mn + .terms(t -> t + .field("postId") + .terms(v -> v.value(excludeValues)) + ) + ) + ) + ); + } + @Override public List createKnnSearches( String titleField, From bb41f04cf7a286168cf7895c36fb26e808baf6eb Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 21:12:50 +0900 Subject: [PATCH 06/21] =?UTF-8?q?chore:=20=EB=A9=94=EC=84=9C=EB=93=9C=20?= =?UTF-8?q?=EC=9C=84=EC=B9=98=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/LlmRecommendationService.java | 115 +++++++++--------- 1 file changed, 55 insertions(+), 60 deletions(-) diff --git a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java index 499ac6b..755d806 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java @@ -126,13 +126,64 @@ public int generateRecommendationsForUser(User user) { } } + /** + * Elasticsearch 네이티브 k-NN 검색으로 초기 후보군 조회 + * - 이미 읽은 글 제외 + */ + private List searchCandidates(float[] userProfileVector, User user) throws IOException { + // 이미 읽은 글 ID 목록 + Set readPostIds = readPostRepository.findRecentReadPostsByUserIdWithMinDuration(user.getId(), PageRequest.of(0, 1000)) + .stream() + .map(readPost -> readPost.getPost().getId()) + .collect(Collectors.toSet()); + + log.debug("사용자 {}의 읽은 게시글 {} 개 제외", user.getId(), readPostIds.size()); + + // 가중치 가져오기 + RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); + + // 1. 읽은 글 제외 필터 쿼리 생성 (Pre-filtering) + Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); + + // 2. 네이티브 k-NN 검색 객체 리스트 생성 (Title + Summary + Content) + List knnSearches = vectorQueryBuilder.createKnnSearches( + TITLE_EMBEDDING_FIELD, + SUMMARY_EMBEDDING_FIELD, + CONTENT_CHUNKS_EMBEDDING_FIELD, + userProfileVector, + weights.getTitle(), + weights.getSummary(), + weights.getContent(), + properties.getKnnSearchSize(), + properties.getNumCandidates(), + filterQuery + ); + + log.debug("ES k-NN 검색 실행 - 가중치 [title:{}, summary:{}, content:{}]", + weights.getTitle(), weights.getSummary(), weights.getContent()); + + long startTime = System.currentTimeMillis(); + SearchResponse response = elasticsearchClient.search(s -> s + .index(POSTS_INDEX) + .knn(knnSearches) // k-NN 검색 (관련성 + 필터링) + .size(properties.getKnnSearchSize()) + , + PostDocument.class + ); + long duration = System.currentTimeMillis() - startTime; + log.info("추천 후보군 검색 완료: {} 개, 소요 시간: {}ms", response.hits().hits().size(), duration); + + // 결과를 MmrCandidate로 변환 + return response.hits().hits().stream() + .filter(hit -> hit.source() != null) + .map(this::mapToMmrCandidate) + .filter(candidate -> candidate.getSummaryVector() != null) + .toList(); + } + /** * 추천 생성 (평가 전용 - Train/Test Split 지원) * 특정 읽은 글 목록(Train Set)만 제외하고 추천 생성 - * - * @param user 사용자 - * @param trainPostIds Train Set 게시글 ID 목록 (제외할 글) - * @return 추천된 게시글 ID 리스트 */ public List generateRecommendationsForEvaluation(User user, Set trainPostIds) { // 1. 사용자 프로필 벡터 조회 @@ -220,62 +271,6 @@ private List searchCandidatesWithCustomReadHistory( .toList(); } - /** - * Elasticsearch 네이티브 k-NN 검색으로 초기 후보군 조회 - * - 이미 읽은 글 제외 - * - 랜덤 시드를 사용하여 매번 다른 후보군 생성 - */ - private List searchCandidates(float[] userProfileVector, User user) throws IOException { - // 이미 읽은 글 ID 목록 - Set readPostIds = readPostRepository.findRecentReadPostsByUserIdWithMinDuration(user.getId(), PageRequest.of(0, 1000)) - .stream() - .map(readPost -> readPost.getPost().getId()) - .collect(Collectors.toSet()); - - log.debug("사용자 {}의 읽은 게시글 {} 개 제외", user.getId(), readPostIds.size()); - - // 가중치 가져오기 - RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); - - // 1. 읽은 글 제외 필터 쿼리 생성 (Pre-filtering) - Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); - - // 2. 네이티브 k-NN 검색 객체 리스트 생성 (Title + Summary + Content) - List knnSearches = vectorQueryBuilder.createKnnSearches( - TITLE_EMBEDDING_FIELD, - SUMMARY_EMBEDDING_FIELD, - CONTENT_CHUNKS_EMBEDDING_FIELD, - userProfileVector, - weights.getTitle(), - weights.getSummary(), - weights.getContent(), - properties.getKnnSearchSize(), - properties.getNumCandidates(), - filterQuery - ); - - log.debug("ES k-NN 검색 실행 - 가중치 [title:{}, summary:{}, content:{}]", - weights.getTitle(), weights.getSummary(), weights.getContent()); - - long startTime = System.currentTimeMillis(); - SearchResponse response = elasticsearchClient.search(s -> s - .index(POSTS_INDEX) - .knn(knnSearches) // k-NN 검색 (관련성 + 필터링) - .size(properties.getKnnSearchSize()) - , - PostDocument.class - ); - long duration = System.currentTimeMillis() - startTime; - log.info("추천 후보군 검색 완료: {} 개, 소요 시간: {}ms", response.hits().hits().size(), duration); - - // 결과를 MmrCandidate로 변환 - return response.hits().hits().stream() - .filter(hit -> hit.source() != null) - .map(this::mapToMmrCandidate) - .filter(candidate -> candidate.getSummaryVector() != null) - .toList(); - } - /** * PostDocument를 MmrCandidate로 변환 * 시간 감쇠 가중치를 유사도 점수에 적용 From 4fe2283f230b6e7d7b7baa45dc1777bec1619bed Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 21:43:54 +0900 Subject: [PATCH 07/21] =?UTF-8?q?refactor:=20=ED=8F=89=EA=B0=80=EB=A5=BC?= =?UTF-8?q?=20=EC=9C=84=ED=95=9C=20=EC=B6=94=EC=B2=9C=20=EB=A9=94=EC=84=9C?= =?UTF-8?q?=EB=93=9C=EB=A5=BC=20test=20=ED=8C=A8=ED=82=A4=EC=A7=80?= =?UTF-8?q?=EB=A1=9C=20=EB=B6=84=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/LlmRecommendationService.java | 168 ++---------------- .../query/VectorSearchQueryBuilder.java | 25 ++- .../RecommendationEvaluationService.java | 116 ++++++++++++ .../evaluation/RecommendationTestBase.java | 69 ++----- 4 files changed, 159 insertions(+), 219 deletions(-) create mode 100644 src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java diff --git a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java index 755d806..c48ee89 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java @@ -34,10 +34,6 @@ /** * MMR 알고리즘 기반 추천 전략 구현 - * - Elasticsearch k-NN 검색으로 초기 후보군 수집 - * - MMR 알고리즘으로 다양성 보장 - * - 읽은 글 제외 필터링 (Pre-filtering) - * - 시간 감쇠 가중치 적용 (최신 게시글 우선) */ @Slf4j @Service @@ -65,7 +61,6 @@ public class LlmRecommendationService implements RecommendationService { public int generateRecommendationsForUser(User user) { log.info("사용자 {} 추천 생성 시작", user.getId()); - // 1. 사용자 프로필 벡터 조회 Optional profileOpt = userProfileDocumentRepository.findByUserId(user.getId()); if (profileOpt.isEmpty() || profileOpt.get().getProfileVector() == null) { log.warn("사용자 {}의 프로필 또는 벡터를 찾을 수 없음. 추천 생성 스킵.", user.getId()); @@ -84,14 +79,11 @@ public int generateRecommendationsForUser(User user) { return 0; } - log.info("사용자 {} 추천 후보 {} 개 발견", user.getId(), candidates.size()); - // 3. MMR 적용하여 최종 추천 선택 List mmrResults = mmrService.applyMmr(candidates); - // 4. 기존 추천을 이력으로 보관 (오늘 생성된 추천 포함) + // 4. 기존 추천을 이력으로 보관 List oldRecommendations = recommendedPostRepository.findByUserOrderByRankAsc(user); - if (!oldRecommendations.isEmpty()) { List histories = oldRecommendations.stream() .map(RecommendationHistory::fromRecommendedPost) @@ -104,20 +96,12 @@ public int generateRecommendationsForUser(User user) { List recommendations = new ArrayList<>(); for (MmrResult result : mmrResults) { Post post = postRepository.getReferenceById(result.getPostId()); - RecommendedPost recommendedPost = RecommendedPost.create( - user, - post, - result.getSimilarityScore(), - result.getMmrScore(), - result.getRank() - ); - recommendations.add(recommendedPost); + recommendations.add(RecommendedPost.create( + user, post, result.getSimilarityScore(), result.getMmrScore(), result.getRank() + )); } recommendedPostRepository.saveAll(recommendations); - - log.info("사용자 {} 추천 생성 완료: {} 개", user.getId(), recommendations.size()); - return recommendations.size(); } catch (Exception e) { @@ -126,144 +110,33 @@ public int generateRecommendationsForUser(User user) { } } - /** - * Elasticsearch 네이티브 k-NN 검색으로 초기 후보군 조회 - * - 이미 읽은 글 제외 - */ private List searchCandidates(float[] userProfileVector, User user) throws IOException { - // 이미 읽은 글 ID 목록 Set readPostIds = readPostRepository.findRecentReadPostsByUserIdWithMinDuration(user.getId(), PageRequest.of(0, 1000)) .stream() .map(readPost -> readPost.getPost().getId()) .collect(Collectors.toSet()); - log.debug("사용자 {}의 읽은 게시글 {} 개 제외", user.getId(), readPostIds.size()); - - // 가중치 가져오기 RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); - - // 1. 읽은 글 제외 필터 쿼리 생성 (Pre-filtering) Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); - // 2. 네이티브 k-NN 검색 객체 리스트 생성 (Title + Summary + Content) List knnSearches = vectorQueryBuilder.createKnnSearches( - TITLE_EMBEDDING_FIELD, - SUMMARY_EMBEDDING_FIELD, - CONTENT_CHUNKS_EMBEDDING_FIELD, - userProfileVector, - weights.getTitle(), - weights.getSummary(), - weights.getContent(), - properties.getKnnSearchSize(), - properties.getNumCandidates(), - filterQuery + TITLE_EMBEDDING_FIELD, SUMMARY_EMBEDDING_FIELD, CONTENT_CHUNKS_EMBEDDING_FIELD, + userProfileVector, weights.getTitle(), weights.getSummary(), weights.getContent(), + properties.getKnnSearchSize(), properties.getNumCandidates(), filterQuery ); - log.debug("ES k-NN 검색 실행 - 가중치 [title:{}, summary:{}, content:{}]", - weights.getTitle(), weights.getSummary(), weights.getContent()); - long startTime = System.currentTimeMillis(); SearchResponse response = elasticsearchClient.search(s -> s .index(POSTS_INDEX) - .knn(knnSearches) // k-NN 검색 (관련성 + 필터링) + .knn(knnSearches) .size(properties.getKnnSearchSize()) , PostDocument.class ); - long duration = System.currentTimeMillis() - startTime; - log.info("추천 후보군 검색 완료: {} 개, 소요 시간: {}ms", response.hits().hits().size(), duration); - - // 결과를 MmrCandidate로 변환 - return response.hits().hits().stream() - .filter(hit -> hit.source() != null) - .map(this::mapToMmrCandidate) - .filter(candidate -> candidate.getSummaryVector() != null) - .toList(); - } - - /** - * 추천 생성 (평가 전용 - Train/Test Split 지원) - * 특정 읽은 글 목록(Train Set)만 제외하고 추천 생성 - */ - public List generateRecommendationsForEvaluation(User user, Set trainPostIds) { - // 1. 사용자 프로필 벡터 조회 - Optional profileOpt = userProfileDocumentRepository.findByUserId(user.getId()); - if (profileOpt.isEmpty() || profileOpt.get().getProfileVector() == null) { - log.warn("사용자 {}의 프로필 또는 벡터를 찾을 수 없음. 추천 생성 스킵.", user.getId()); - return Collections.emptyList(); - } - - float[] userProfileVector = profileOpt.get().getProfileVector(); - - try { - // 2. k-NN 검색으로 초기 후보군 가져오기 (Train Set만 제외) - List candidates = searchCandidatesWithCustomReadHistory(userProfileVector, user, trainPostIds); - - if (candidates.isEmpty()) { - log.debug("사용자 {}의 추천 후보군을 찾을 수 없음 (Train Set {} 개 제외)", user.getId(), trainPostIds.size()); - return Collections.emptyList(); - } - - // 3. MMR 적용하여 최종 추천 선택 - List mmrResults = mmrService.applyMmr(candidates); - - // 4. 추천된 게시글 ID 리스트 반환 - return mmrResults.stream() - .map(MmrResult::getPostId) - .toList(); - - } catch (Exception e) { - log.error("사용자 {} 추천 생성 실패 (Train/Test Split 평가용)", user.getId(), e); - return Collections.emptyList(); - } - } - - /** - * Elasticsearch 네이티브 k-NN 검색으로 초기 후보군 조회 (커스텀 읽은 글 목록) - * Train/Test Split 평가를 위해 Train Set만 제외 - */ - private List searchCandidatesWithCustomReadHistory( - float[] userProfileVector, - User user, - Set readPostIds) throws IOException { - - log.debug("사용자 {}의 읽은 게시글 {} 개 제외 (Train Set)", user.getId(), readPostIds.size()); - - // 가중치 가져오기 - RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); - - // 1. 읽은 글 제외 필터 쿼리 생성 (Pre-filtering) - Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); - - // 2. 네이티브 k-NN 검색 객체 리스트 생성 (Title + Summary + Content) - List knnSearches = vectorQueryBuilder.createKnnSearches( - TITLE_EMBEDDING_FIELD, - SUMMARY_EMBEDDING_FIELD, - CONTENT_CHUNKS_EMBEDDING_FIELD, - userProfileVector, - weights.getTitle(), - weights.getSummary(), - weights.getContent(), - properties.getKnnSearchSize(), - properties.getNumCandidates(), - filterQuery - ); - log.debug("ES k-NN 검색 실행 (Train/Test Split) - 가중치 [title:{}, summary:{}, content:{}]", - weights.getTitle(), weights.getSummary(), weights.getContent()); + log.info("후보군 검색 완료: {} 개, 소요 시간: {}ms", + response.hits().hits().size(), System.currentTimeMillis() - startTime); - long startTime = System.currentTimeMillis(); - SearchResponse response = elasticsearchClient.search(s -> s - .index(POSTS_INDEX) - .knn(knnSearches) // k-NN 검색 (관련성 + 필터링) - .size(properties.getKnnSearchSize()) - , - PostDocument.class - ); - long duration = System.currentTimeMillis() - startTime; - log.info("추천 후보군 검색 완료 (Evaluation): {} 개, 소요 시간: {}ms", response.hits().hits().size(), duration); - - // 결과를 MmrCandidate로 변환 return response.hits().hits().stream() .filter(hit -> hit.source() != null) .map(this::mapToMmrCandidate) @@ -271,29 +144,16 @@ private List searchCandidatesWithCustomReadHistory( .toList(); } - /** - * PostDocument를 MmrCandidate로 변환 - * 시간 감쇠 가중치를 유사도 점수에 적용 - */ private MmrCandidate mapToMmrCandidate(Hit hit) { PostDocument doc = hit.source(); double score = Objects.requireNonNullElse(hit.score(), 0.0); - - // 시간 감쇠 가중치 적용 double timeDecayWeight = timeDecayStrategy.calculateWeight(Objects.requireNonNull(doc).getPublishedAt()); - double adjustedScore = score * timeDecayWeight; - - log.trace("게시글 {} 점수 조정: 원본={}, 시간가중치={}, 최종={}", - doc.getPostId(), score, timeDecayWeight, adjustedScore); - - float[] titleVector = VectorUtil.convertToFloatArray(doc.getTitleEmbedding()); - float[] summaryVector = VectorUtil.convertToFloatArray(doc.getSummaryEmbedding()); - + return MmrCandidate.builder() .postId(doc.getPostId()) - .titleVector(titleVector) - .summaryVector(summaryVector) - .similarityScore(adjustedScore) + .titleVector(VectorUtil.convertToFloatArray(doc.getTitleEmbedding())) + .summaryVector(VectorUtil.convertToFloatArray(doc.getSummaryEmbedding())) + .similarityScore(score * timeDecayWeight) .build(); } } diff --git a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java index 3c04b8a..0b06d72 100644 --- a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java +++ b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java @@ -2,7 +2,6 @@ import co.elastic.clients.elasticsearch._types.FieldValue; import co.elastic.clients.elasticsearch._types.KnnSearch; -import co.elastic.clients.elasticsearch._types.query_dsl.FunctionBoostMode; import co.elastic.clients.elasticsearch._types.query_dsl.Query; import lombok.AccessLevel; import lombok.NoArgsConstructor; @@ -64,10 +63,10 @@ public List createKnnSearches( if (titleWeight > 0) { knnSearches.add(KnnSearch.of(ks -> { ks.field(titleField) - .queryVector(vectorList) - .k(k) - .numCandidates(numCandidates) - .boost(titleWeight); + .queryVector(vectorList) + .k(k) + .numCandidates(numCandidates) + .boost(titleWeight); if (filter != null) { ks.filter(filter); } @@ -78,10 +77,10 @@ public List createKnnSearches( if (summaryWeight > 0) { knnSearches.add(KnnSearch.of(ks -> { ks.field(summaryField) - .queryVector(vectorList) - .k(k) - .numCandidates(numCandidates) - .boost(summaryWeight); + .queryVector(vectorList) + .k(k) + .numCandidates(numCandidates) + .boost(summaryWeight); if (filter != null) { ks.filter(filter); } @@ -92,10 +91,10 @@ public List createKnnSearches( if (contentWeight > 0 && contentField != null) { knnSearches.add(KnnSearch.of(ks -> { ks.field(contentField) - .queryVector(vectorList) - .k(k) - .numCandidates(numCandidates) - .boost(contentWeight); + .queryVector(vectorList) + .k(k) + .numCandidates(numCandidates) + .boost(contentWeight); if (filter != null) { ks.filter(filter); } diff --git a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java new file mode 100644 index 0000000..3c825fd --- /dev/null +++ b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java @@ -0,0 +1,116 @@ +package com.techfork.domain.recommendation.evaluation; + +import co.elastic.clients.elasticsearch.ElasticsearchClient; +import co.elastic.clients.elasticsearch.core.SearchResponse; +import co.elastic.clients.elasticsearch.core.search.Hit; +import co.elastic.clients.elasticsearch._types.query_dsl.Query; +import co.elastic.clients.elasticsearch._types.KnnSearch; +import com.techfork.domain.post.document.PostDocument; +import com.techfork.domain.recommendation.config.RecommendationProperties; +import com.techfork.domain.recommendation.service.MmrService; +import com.techfork.domain.recommendation.service.MmrService.MmrCandidate; +import com.techfork.domain.recommendation.service.MmrService.MmrResult; +import com.techfork.domain.user.document.UserProfileDocument; +import com.techfork.domain.user.entity.User; +import com.techfork.domain.user.repository.UserProfileDocumentRepository; +import com.techfork.global.elasticsearch.query.VectorQueryBuilder; +import com.techfork.global.util.TimeDecayStrategy; +import com.techfork.global.util.VectorUtil; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; + +/** + * 추천 시스템 성능 평가를 위한 전용 서비스 + */ +@Slf4j +@Service +@RequiredArgsConstructor +public class RecommendationEvaluationService { + + private final ElasticsearchClient elasticsearchClient; + private final UserProfileDocumentRepository userProfileDocumentRepository; + private final VectorQueryBuilder vectorQueryBuilder; + private final TimeDecayStrategy timeDecayStrategy; + + private static final String POSTS_INDEX = "posts"; + private static final String TITLE_EMBEDDING_FIELD = "titleEmbedding"; + private static final String SUMMARY_EMBEDDING_FIELD = "summaryEmbedding"; + private static final String CONTENT_CHUNKS_EMBEDDING_FIELD = "contentChunks.embedding"; + + /** + * 추천 생성 (평가 전용 - Train/Test Split 지원) + */ + public List generateRecommendationsForEvaluation(User user, Set trainPostIds, RecommendationProperties properties) { + Optional profileOpt = userProfileDocumentRepository.findByUserId(user.getId()); + if (profileOpt.isEmpty() || profileOpt.get().getProfileVector() == null) { + return Collections.emptyList(); + } + + float[] userProfileVector = profileOpt.get().getProfileVector(); + List keywords = profileOpt.get().getInterests(); + + try { + List candidates = searchCandidatesWithCustomReadHistory(userProfileVector, keywords, user, trainPostIds, properties); + + if (candidates.isEmpty()) { + return Collections.emptyList(); + } + + // MMR 적용 (테스트용 properties 사용) + MmrService mmrService = new MmrService(properties); + List mmrResults = mmrService.applyMmr(candidates); + + return mmrResults.stream() + .map(MmrResult::getPostId) + .toList(); + + } catch (Exception e) { + log.error("사용자 {} 평가용 추천 생성 실패", user.getId(), e); + return Collections.emptyList(); + } + } + + private List searchCandidatesWithCustomReadHistory( + float[] userProfileVector, + List keywords, + User user, + Set readPostIds, + RecommendationProperties properties) throws IOException { + + RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); + Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); + + List knnSearches = vectorQueryBuilder.createKnnSearches( + TITLE_EMBEDDING_FIELD, SUMMARY_EMBEDDING_FIELD, CONTENT_CHUNKS_EMBEDDING_FIELD, + userProfileVector, weights.getTitle(), weights.getSummary(), weights.getContent(), + properties.getKnnSearchSize(), properties.getNumCandidates(), filterQuery + ); + + SearchResponse vectorResponse = elasticsearchClient.search(s -> s + .index(POSTS_INDEX).knn(knnSearches).size(properties.getKnnSearchSize()), + PostDocument.class + ); + + return vectorResponse.hits().hits().stream() + .filter(hit -> hit.source() != null) + .map(hit -> mapToMmrCandidate(hit, hit.score() != null ? hit.score() : 0.0)) + .filter(candidate -> candidate.getSummaryVector() != null) + .toList(); + } + + private MmrCandidate mapToMmrCandidate(Hit hit, double score) { + PostDocument doc = hit.source(); + double timeDecayWeight = timeDecayStrategy.calculateWeight(Objects.requireNonNull(doc).getPublishedAt()); + return MmrCandidate.builder() + .postId(doc.getPostId()) + .titleVector(VectorUtil.convertToFloatArray(doc.getTitleEmbedding())) + .summaryVector(VectorUtil.convertToFloatArray(doc.getSummaryEmbedding())) + .similarityScore(score * timeDecayWeight) + .build(); + } +} \ No newline at end of file diff --git a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationTestBase.java b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationTestBase.java index 2d9b0ed..c36580a 100644 --- a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationTestBase.java +++ b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationTestBase.java @@ -1,21 +1,11 @@ package com.techfork.domain.recommendation.evaluation; -import co.elastic.clients.elasticsearch.ElasticsearchClient; import com.techfork.domain.activity.repository.ReadPostRepository; import com.techfork.domain.post.repository.PostDocumentRepository; -import com.techfork.domain.post.repository.PostRepository; import com.techfork.domain.recommendation.config.RecommendationProperties; -import com.techfork.domain.recommendation.repository.RecommendationHistoryRepository; -import com.techfork.domain.recommendation.repository.RecommendedPostRepository; -import com.techfork.domain.recommendation.service.LlmRecommendationService; -import com.techfork.domain.recommendation.service.MmrService; import com.techfork.domain.recommendation.util.EvaluationFixtureLoader; import com.techfork.domain.user.entity.User; -import com.techfork.domain.user.enums.EInterestCategory; -import com.techfork.domain.user.repository.UserProfileDocumentRepository; import com.techfork.global.common.IntegrationTestBase; -import com.techfork.global.elasticsearch.query.VectorQueryBuilder; -import com.techfork.global.util.TimeDecayStrategy; import com.techfork.global.util.VectorUtil; import lombok.AllArgsConstructor; import lombok.Builder; @@ -25,7 +15,6 @@ import org.junit.jupiter.api.TestInstance; import org.springframework.beans.factory.annotation.Autowired; -import java.io.IOException; import java.util.*; /** @@ -36,9 +25,9 @@ public abstract class RecommendationTestBase extends IntegrationTestBase { // 테스트 상수 - protected static final int K_FIRST_ROW = 4; // 첫 줄 - protected static final int K_FIRST_SCREEN = 8; // 첫 화면 - protected static final int K_DEEP_EXPLORE = 30; // 깊은 탐색 + protected static final int K_FIRST_ROW = 4; + protected static final int K_FIRST_SCREEN = 8; + protected static final int K_DEEP_EXPLORE = 30; protected static final float DEFAULT_TITLE_WEIGHT = 0.4f; protected static final float DEFAULT_SUMMARY_WEIGHT = 0.4f; @@ -50,15 +39,9 @@ public abstract class RecommendationTestBase extends IntegrationTestBase { @Autowired protected EvaluationFixtureLoader fixtureLoader; @Autowired protected RecommendationQualityService qualityService; + @Autowired protected RecommendationEvaluationService evaluationService; // 새로운 서비스 주입 @Autowired protected PostDocumentRepository postDocumentRepository; - @Autowired protected ElasticsearchClient elasticsearchClient; - @Autowired protected UserProfileDocumentRepository userProfileDocumentRepository; - @Autowired protected RecommendedPostRepository recommendedPostRepository; - @Autowired protected RecommendationHistoryRepository recommendationHistoryRepository; @Autowired protected ReadPostRepository readPostRepository; - @Autowired protected PostRepository postRepository; - @Autowired protected TimeDecayStrategy timeDecayStrategy; - @Autowired protected VectorQueryBuilder vectorQueryBuilder; @Autowired protected com.techfork.domain.user.repository.UserRepository userRepository; protected static List cachedTestUsers; @@ -115,10 +98,6 @@ protected List getTestUsers() { return cachedTestUsers; } - protected double calculateCompositeScore(double recall, double ndcg, double ild) { - return recall * RECALL_WEIGHT + ndcg * NDCG_WEIGHT + ild * ILD_WEIGHT; - } - protected RecommendationProperties createProperties(float tw, float sw, float cw, double lambda) { RecommendationProperties props = new RecommendationProperties(); props.setKnnSearchSize(100); @@ -132,15 +111,6 @@ protected RecommendationProperties createProperties(float tw, float sw, float cw return props; } - protected LlmRecommendationService createRecommendationService(RecommendationProperties props) { - MmrService mmrService = new MmrService(props); - return new LlmRecommendationService( - elasticsearchClient, userProfileDocumentRepository, recommendedPostRepository, - recommendationHistoryRepository, readPostRepository, postRepository, - mmrService, timeDecayStrategy, props, vectorQueryBuilder - ); - } - protected EvaluationResult calculateAverageMetrics(String configName, List metrics) { double r4 = metrics.stream().mapToDouble(UserMetrics::getRecall4).average().orElse(0.0); double n4 = metrics.stream().mapToDouble(UserMetrics::getNdcg4).average().orElse(0.0); @@ -149,7 +119,8 @@ protected EvaluationResult calculateAverageMetrics(String configName, List evaluateUserWithGroundTruth(User user, LlmRecommendationService service) { + protected Optional evaluateUserWithGroundTruth(User user, RecommendationProperties props) { try { Map groundTruth = cachedGroundTruth.get(user.getId()); if (groundTruth == null || groundTruth.isEmpty()) return Optional.empty(); @@ -171,7 +139,8 @@ protected Optional evaluateUserWithGroundTruth(User user, LlmRecomm Set readIds = readPostRepository.findRecentReadPostsByUserIdWithMinDuration(user.getId(), org.springframework.data.domain.PageRequest.of(0, 10000)) .stream().map(rp -> rp.getPost().getId()).collect(java.util.stream.Collectors.toSet()); - List recIds = service.generateRecommendationsForEvaluation(user, readIds); + // 새로운 서비스 사용 + List recIds = evaluationService.generateRecommendationsForEvaluation(user, readIds, props); if (recIds.isEmpty()) return Optional.empty(); double r4 = qualityService.calculateRecall(recIds, groundTruth.keySet(), K_FIRST_ROW); @@ -187,10 +156,7 @@ protected Optional evaluateUserWithGroundTruth(User user, LlmRecomm } } - /** - * ILD 포함 평가 (Lambda 최적화용) - */ - protected Optional evaluateUserWithGroundTruthAndILD(User user, LlmRecommendationService service) { + protected Optional evaluateUserWithGroundTruthAndILD(User user, RecommendationProperties props) { try { Map groundTruth = cachedGroundTruth.get(user.getId()); if (groundTruth == null || groundTruth.isEmpty()) return Optional.empty(); @@ -198,7 +164,8 @@ protected Optional evaluateUserWithGroundTruthAndILD(User user, Llm Set readIds = readPostRepository.findRecentReadPostsByUserIdWithMinDuration(user.getId(), org.springframework.data.domain.PageRequest.of(0, 10000)) .stream().map(rp -> rp.getPost().getId()).collect(java.util.stream.Collectors.toSet()); - List recIds = service.generateRecommendationsForEvaluation(user, readIds); + // 새로운 서비스 사용 + List recIds = evaluationService.generateRecommendationsForEvaluation(user, readIds, props); if (recIds.isEmpty()) return Optional.empty(); double r4 = qualityService.calculateRecall(recIds, groundTruth.keySet(), K_FIRST_ROW); @@ -221,11 +188,10 @@ protected Optional evaluateUserWithGroundTruthAndILD(User user, Llm } protected EvaluationResult evaluateConfigWithGroundTruth(ConfigCombo config, List testUsers) { - LlmRecommendationService service = createRecommendationService( - createProperties(config.getTitleWeight(), config.getSummaryWeight(), config.getContentWeight(), config.getMmrLambda())); + RecommendationProperties props = createProperties(config.getTitleWeight(), config.getSummaryWeight(), config.getContentWeight(), config.getMmrLambda()); List metrics = testUsers.stream() - .map(user -> evaluateUserWithGroundTruth(user, service)) + .map(user -> evaluateUserWithGroundTruth(user, props)) .filter(Optional::isPresent) .map(Optional::get) .toList(); @@ -234,11 +200,10 @@ protected EvaluationResult evaluateConfigWithGroundTruth(ConfigCombo config, Lis } protected EvaluationResult evaluateConfigWithGroundTruthAndILD(ConfigCombo config, List testUsers) { - LlmRecommendationService service = createRecommendationService( - createProperties(config.getTitleWeight(), config.getSummaryWeight(), config.getContentWeight(), config.getMmrLambda())); + RecommendationProperties props = createProperties(config.getTitleWeight(), config.getSummaryWeight(), config.getContentWeight(), config.getMmrLambda()); List metrics = testUsers.stream() - .map(user -> evaluateUserWithGroundTruthAndILD(user, service)) + .map(user -> evaluateUserWithGroundTruthAndILD(user, props)) .filter(Optional::isPresent) .map(Optional::get) .toList(); @@ -268,4 +233,4 @@ protected void printLambdaOptimizationResult(EvaluationResult result) { log.info(String.format("%-25s | %.4f | %.4f | %.4f | %.4f", result.getConfigName(), result.getAvgRecall8(), result.getAvgNdcg8(), result.getAvgIld(), result.getCompositeScore())); } -} +} \ No newline at end of file From 7a3781387b5be2511e59c32c07cdf784308b2ec4 Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 22:34:42 +0900 Subject: [PATCH 08/21] =?UTF-8?q?improve:=20LLM=20=ED=94=84=EB=A1=9C?= =?UTF-8?q?=ED=95=84=20=EC=83=9D=EC=84=B1=20=EC=8B=9C=20=ED=82=A4=EC=9B=8C?= =?UTF-8?q?=EB=93=9C=EB=8F=84=20=EC=B6=94=EC=B6=9C=ED=95=98=EB=8F=84?= =?UTF-8?q?=EB=A1=9D=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../user/document/UserProfileDocument.java | 9 +- .../user/service/UserProfileService.java | 88 ++++++++++++++----- 2 files changed, 72 insertions(+), 25 deletions(-) diff --git a/src/main/java/com/techfork/domain/user/document/UserProfileDocument.java b/src/main/java/com/techfork/domain/user/document/UserProfileDocument.java index 716c96c..b23c774 100644 --- a/src/main/java/com/techfork/domain/user/document/UserProfileDocument.java +++ b/src/main/java/com/techfork/domain/user/document/UserProfileDocument.java @@ -36,28 +36,33 @@ public class UserProfileDocument { @Field(type = FieldType.Keyword) private List interests; + @Field(type = FieldType.Keyword) + private List keyKeywords; + @Field(type = FieldType.Date) @Transient private LocalDateTime generatedAt; @Builder private UserProfileDocument(Long userId, String profileText, float[] profileVector, - List interests, LocalDateTime generatedAt) { + List interests, List keyKeywords, LocalDateTime generatedAt) { this.id = String.valueOf(userId); this.userId = userId; this.profileText = profileText; this.profileVector = profileVector; this.interests = interests; + this.keyKeywords = keyKeywords; this.generatedAt = generatedAt; } public static UserProfileDocument create(Long userId, String profileText, float[] profileVector, - List interests) { + List interests, List keyKeywords) { return UserProfileDocument.builder() .userId(userId) .profileText(profileText) .profileVector(profileVector) .interests(interests) + .keyKeywords(keyKeywords) .generatedAt(LocalDateTime.now()) .build(); } diff --git a/src/main/java/com/techfork/domain/user/service/UserProfileService.java b/src/main/java/com/techfork/domain/user/service/UserProfileService.java index 7a93d49..a45ad9c 100644 --- a/src/main/java/com/techfork/domain/user/service/UserProfileService.java +++ b/src/main/java/com/techfork/domain/user/service/UserProfileService.java @@ -25,6 +25,7 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -57,14 +58,17 @@ public void generateUserProfile(Long userId) { public void generateUserProfileSync(Long userId) { try { UserActivityData activityData = collectUserActivityData(userId); - String profileText = generateProfileTextWithLLM(activityData); - float[] profileVector = generateEmbeddingVector(profileText); + String llmResponse = generateProfileTextWithLLM(activityData); + + ProfileAndKeywords parsed = parseProfileAndKeywords(llmResponse); + float[] profileVector = generateEmbeddingVector(parsed.profileText); UserProfileDocument profileDocument = UserProfileDocument.create( userId, - profileText, + parsed.profileText, profileVector, - activityData.interests + activityData.interests, + parsed.keyKeywords ); userProfileDocumentRepository.save(profileDocument); @@ -143,7 +147,7 @@ private String generateProfileTextWithLLM(UserActivityData data) { private String buildProfileGenerationPrompt(UserActivityData data) { return String.format(""" - 아래 사용자의 활동 데이터를 분석하여 검색 고도화와 포스트 추천에 최적화된 프로필을 생성해주세요. + 아래 사용자의 활동 데이터를 분석하여 검색 리랭킹과 포스트 추천에 최적화된 프로필을 생성해주세요. ## 사용자 데이터 @@ -161,28 +165,29 @@ private String buildProfileGenerationPrompt(UserActivityData data) { ## 요구사항 - 다음 형식으로 구조화된 프로필을 생성해주세요: - - 1. **기술적 관심사 요약** (2-3문장) - - 사용자가 주로 관심을 갖는 기술 스택, 프레임워크, 도구 - - 선호하는 개발 분야 (백엔드, 프론트엔드, AI, 인프라 등) + 반드시 아래 형식으로 응답해주세요: - 2. **콘텐츠 선호 패턴** (2-3문장) - - 읽은 포스트와 스크랩한 포스트를 분석하여 선호하는 주제와 기술 파악 - - 선호하는 회사/팀이나 콘텐츠 유형 (튜토리얼, 아키텍처, 트러블슈팅 등) + ### PROFILE + 사용자의 기술적 관심사, 학습 패턴, 선호도를 의미 밀도 높고 풍부하게 표현한 텍스트를 작성하세요 (200-300자 정도). - 3. **검색 의도 분석** (2-3문장) - - 검색 기록에서 드러나는 학습 목적이나 해결하려는 문제 - - 반복되는 검색 주제나 패턴 + 다음 내용을 모두 포함하되 자연스러운 문장으로 작성: + 1. 주요 관심 기술 스택과 개발 분야 (백엔드/프론트엔드/인프라/AI 등) + 2. 선호하는 주제와 학습 방향 (아키텍처 설계, 성능 최적화, 트러블슈팅, 신기술 탐구 등) + 3. 읽은 포스트와 검색 기록에서 드러나는 구체적인 관심사 + 4. 현재 해결하려는 문제나 학습 중인 영역 + 5. 콘텐츠 선호 패턴 (심화 기술, 실전 경험, 튜토리얼 등) - 4. **추천 키워드** (쉼표로 구분된 15-20개의 키워드) - - 검색 쿼리 확장에 사용할 관련 기술 용어 - - 유사한 관심사를 가진 사용자가 찾을 만한 키워드 - - 영문과 한글 키워드 모두 포함 + 주의사항: + - 마크다운 없이 순수 텍스트로만 작성 (볼드, 이탤릭, 리스트, 번호 금지) + - 구체적인 기술 용어를 많이 사용하여 임베딩 품질 향상 + - "관심이 있습니다", "선호합니다" 같은 메타 표현 대신 직접적인 기술 용어 나열 - 5. **프로필 요약** (1-2문장, 벡터 임베딩 최적화용) - - 사용자의 기술적 페르소나를 한 줄로 압축 - - 추천 시스템이 유사 사용자를 찾는데 활용할 핵심 설명 + ### KEYWORDS + 사용자의 현재 관심사를 가장 잘 대표하는 핵심 키워드 3-5개를 쉼표로 구분하여 나열하세요. + - 구체적이고 검색 의도가 명확한 키워드만 선택 + - BM25 검색에 사용되므로 검색어로 자주 쓰일 만한 용어 선택 + - 예: Kubernetes, React hooks, 분산 트랜잭션, 성능 최적화, MSA + - 영문과 한글 혼용 가능 데이터가 부족한 경우 관심 기술 스택을 기반으로 일반적인 프로필을 생성해주세요. """, @@ -247,6 +252,43 @@ private String convertReadingDurationToNaturalLanguage(Integer durationSeconds) } } + private ProfileAndKeywords parseProfileAndKeywords(String llmResponse) { + String profileText = ""; + List keyKeywords = List.of(); + + try { + // PROFILE 섹션 추출 + int profileStart = llmResponse.indexOf("### PROFILE"); + int keywordsStart = llmResponse.indexOf("### KEYWORDS"); + + if (profileStart != -1 && keywordsStart != -1) { + profileText = llmResponse.substring(profileStart + "### PROFILE".length(), keywordsStart) + .trim(); + + String keywordsSection = llmResponse.substring(keywordsStart + "### KEYWORDS".length()) + .trim(); + + // 쉼표로 구분된 키워드 파싱 + keyKeywords = Arrays.stream(keywordsSection.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .limit(5) // 최대 5개 + .toList(); + } else { + // 파싱 실패 시 전체 텍스트를 프로필로 사용 + log.warn("Failed to parse LLM response sections, using full text as profile"); + profileText = llmResponse; + } + } catch (Exception e) { + log.error("Error parsing LLM response", e); + profileText = llmResponse; + } + + return new ProfileAndKeywords(profileText, keyKeywords); + } + + private record ProfileAndKeywords(String profileText, List keyKeywords) {} + private record UserActivityData( List interests, List readPostData, From e27eeb16e248705f164b84e2a8defa1bbe690430 Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 22:36:01 +0900 Subject: [PATCH 09/21] =?UTF-8?q?improve:=20=EC=B6=94=EC=B2=9C=20=EB=A1=9C?= =?UTF-8?q?=EC=A7=81=EC=97=90=EC=84=9C=20BM25=20=EA=B2=80=EC=83=89?= =?UTF-8?q?=EB=8F=84=20=EC=A7=84=ED=96=89=ED=95=9C=20=EB=92=A4=20RRF?= =?UTF-8?q?=EB=A1=9C=20=EA=B2=B0=ED=95=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/LlmRecommendationService.java | 70 ++++++++++++++-- .../query/VectorQueryBuilder.java | 11 +++ .../query/VectorSearchQueryBuilder.java | 41 +++++++++ .../com/techfork/global/util/RrfScorer.java | 48 +++++++++++ .../RecommendationEvaluationService.java | 83 +++++++++++++------ 5 files changed, 217 insertions(+), 36 deletions(-) create mode 100644 src/main/java/com/techfork/global/util/RrfScorer.java diff --git a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java index c48ee89..433b5ad 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java @@ -20,6 +20,7 @@ import com.techfork.domain.user.document.UserProfileDocument; import com.techfork.domain.user.entity.User; import com.techfork.domain.user.repository.UserProfileDocumentRepository; +import com.techfork.global.util.RrfScorer; import com.techfork.global.util.TimeDecayStrategy; import com.techfork.global.util.VectorUtil; import lombok.RequiredArgsConstructor; @@ -116,9 +117,13 @@ private List searchCandidates(float[] userProfileVector, User user .map(readPost -> readPost.getPost().getId()) .collect(Collectors.toSet()); + Optional profileOpt = userProfileDocumentRepository.findByUserId(user.getId()); + List keyKeywords = profileOpt.map(UserProfileDocument::getKeyKeywords).orElse(List.of()); + RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); + // 1. kNN 검색 List knnSearches = vectorQueryBuilder.createKnnSearches( TITLE_EMBEDDING_FIELD, SUMMARY_EMBEDDING_FIELD, CONTENT_CHUNKS_EMBEDDING_FIELD, userProfileVector, weights.getTitle(), weights.getSummary(), weights.getContent(), @@ -126,7 +131,7 @@ private List searchCandidates(float[] userProfileVector, User user ); long startTime = System.currentTimeMillis(); - SearchResponse response = elasticsearchClient.search(s -> s + SearchResponse vectorResponse = elasticsearchClient.search(s -> s .index(POSTS_INDEX) .knn(knnSearches) .size(properties.getKnnSearchSize()) @@ -134,26 +139,73 @@ private List searchCandidates(float[] userProfileVector, User user PostDocument.class ); - log.info("후보군 검색 완료: {} 개, 소요 시간: {}ms", - response.hits().hits().size(), System.currentTimeMillis() - startTime); + List> vectorHits = vectorResponse.hits().hits(); + + // 2. BM25 검색 + Query bm25Query = vectorQueryBuilder.createBm25Query( + keyKeywords, weights.getTitle(), weights.getSummary(), weights.getContent() + ); + + SearchResponse keywordResponse = elasticsearchClient.search(s -> s + .index(POSTS_INDEX) + .query(q -> q.bool(b -> { + b.must(bm25Query); + if (filterQuery != null) b.filter(filterQuery); + return b; + })) + .size(properties.getKnnSearchSize()), + PostDocument.class + ); + List> keywordHits = keywordResponse.hits().hits(); + + log.info("후보군 검색 완료: kNN {} 개, BM25 {} 개, 소요 시간: {}ms", + vectorHits.size(), keywordHits.size(), System.currentTimeMillis() - startTime); + + // 3. RRF로 결합 + return applyRrf(vectorHits, keywordHits); + } - return response.hits().hits().stream() + protected List applyRrf(List> vectorHits, List> keywordHits) { + // Post ID 리스트 추출 (null 체크) + List vectorPostIds = vectorHits.stream() .filter(hit -> hit.source() != null) - .map(this::mapToMmrCandidate) + .map(hit -> hit.source().getPostId()) + .toList(); + + List keywordPostIds = keywordHits.stream() + .filter(hit -> hit.source() != null) + .map(hit -> hit.source().getPostId()) + .toList(); + + // RRF 스코어 계산 + Map rrfScores = RrfScorer.calculateRrfScores(vectorPostIds, keywordPostIds); + + // Hit을 postId 기준으로 맵핑 + Map> hitMap = new HashMap<>(); + vectorHits.stream() + .filter(hit -> hit.source() != null) + .forEach(hit -> hitMap.putIfAbsent(hit.source().getPostId(), hit)); + keywordHits.stream() + .filter(hit -> hit.source() != null) + .forEach(hit -> hitMap.putIfAbsent(hit.source().getPostId(), hit)); + + // RRF 스코어 순으로 정렬하여 MMR Candidate 생성 + return rrfScores.entrySet().stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .map(entry -> mapToMmrCandidate(hitMap.get(entry.getKey()), entry.getValue())) .filter(candidate -> candidate.getSummaryVector() != null) .toList(); } - private MmrCandidate mapToMmrCandidate(Hit hit) { + protected MmrCandidate mapToMmrCandidate(Hit hit, double rrfScore) { PostDocument doc = hit.source(); - double score = Objects.requireNonNullElse(hit.score(), 0.0); double timeDecayWeight = timeDecayStrategy.calculateWeight(Objects.requireNonNull(doc).getPublishedAt()); - + return MmrCandidate.builder() .postId(doc.getPostId()) .titleVector(VectorUtil.convertToFloatArray(doc.getTitleEmbedding())) .summaryVector(VectorUtil.convertToFloatArray(doc.getSummaryEmbedding())) - .similarityScore(score * timeDecayWeight) + .similarityScore(rrfScore * timeDecayWeight) .build(); } } diff --git a/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java b/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java index 7d0c0c7..16a2c98 100644 --- a/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java +++ b/src/main/java/com/techfork/global/elasticsearch/query/VectorQueryBuilder.java @@ -47,4 +47,15 @@ List createKnnSearches( int numCandidates, Query filter ); + + /** + * BM25 키워드 검색 쿼리 생성 + * + * @param keywords 검색할 키워드 리스트 + * @param titleBoost 제목 필드 가중치 + * @param summaryBoost 요약 필드 가중치 + * @param contentBoost 본문 필드 가중치 + * @return BM25 검색 Query 객체 (키워드가 없으면 null) + */ + Query createBm25Query(List keywords, float titleBoost, float summaryBoost, float contentBoost); } \ No newline at end of file diff --git a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java index 0b06d72..33e977e 100644 --- a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java +++ b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java @@ -104,4 +104,45 @@ public List createKnnSearches( return knnSearches; } + + @Override + public Query createBm25Query(List keywords, float titleBoost, float summaryBoost, float contentBoost) { + if (keywords == null || keywords.isEmpty()) { + return null; + } + + String combinedKeywords = String.join(" ", keywords); + + return Query.of(q -> q + .bool(b -> b + .should(s -> s + .match(m -> m + .field("title") + .query(combinedKeywords) + .boost(titleBoost) + ) + ) + .should(s -> s + .match(m -> m + .field("summary") + .query(combinedKeywords) + .boost(summaryBoost) + ) + ) + .should(s -> s + .nested(n -> n + .path("contentChunks") + .query(nq -> nq + .match(m -> m + .field("contentChunks.text") + .query(combinedKeywords) + ) + ) + .boost(contentBoost) + ) + ) + .minimumShouldMatch("1") + ) + ); + } } \ No newline at end of file diff --git a/src/main/java/com/techfork/global/util/RrfScorer.java b/src/main/java/com/techfork/global/util/RrfScorer.java new file mode 100644 index 0000000..2360f43 --- /dev/null +++ b/src/main/java/com/techfork/global/util/RrfScorer.java @@ -0,0 +1,48 @@ +package com.techfork.global.util; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Reciprocal Rank Fusion (RRF) 스코어 계산 유틸리티 + * 여러 검색 결과를 결합하여 하나의 통합 점수를 생성 + * k=60 고정 사용 + */ +public class RrfScorer { + + private static final int K = 60; + + /** + * RRF 스코어 계산 (k=60 고정) + * + * @param resultsLists 여러 검색 결과 리스트 (각 리스트는 순위대로 정렬되어 있어야 함) + * @param 결과 항목의 타입 + * @return 각 항목의 ID와 RRF 스코어 맵 + */ + public static Map calculateRrfScores(List> resultsLists) { + Map rrfScores = new HashMap<>(); + + for (List results : resultsLists) { + for (int rank = 0; rank < results.size(); rank++) { + T item = results.get(rank); + double score = 1.0 / (K + rank + 1); + rrfScores.merge(item, score, Double::sum); + } + } + + return rrfScores; + } + + /** + * 두 개의 검색 결과를 RRF로 결합 (k=60 고정) + * + * @param firstResults 첫 번째 검색 결과 + * @param secondResults 두 번째 검색 결과 + * @param 결과 항목의 타입 + * @return 각 항목의 ID와 RRF 스코어 맵 + */ + public static Map calculateRrfScores(List firstResults, List secondResults) { + return calculateRrfScores(List.of(firstResults, secondResults)); + } +} \ No newline at end of file diff --git a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java index 3c825fd..8ab4ea3 100644 --- a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java +++ b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java @@ -5,8 +5,15 @@ import co.elastic.clients.elasticsearch.core.search.Hit; import co.elastic.clients.elasticsearch._types.query_dsl.Query; import co.elastic.clients.elasticsearch._types.KnnSearch; +import com.techfork.domain.activity.repository.ReadPostRepository; import com.techfork.domain.post.document.PostDocument; +import com.techfork.domain.post.entity.Post; +import com.techfork.domain.post.repository.PostRepository; import com.techfork.domain.recommendation.config.RecommendationProperties; +import com.techfork.domain.recommendation.entity.RecommendedPost; +import com.techfork.domain.recommendation.repository.RecommendationHistoryRepository; +import com.techfork.domain.recommendation.repository.RecommendedPostRepository; +import com.techfork.domain.recommendation.service.LlmRecommendationService; import com.techfork.domain.recommendation.service.MmrService; import com.techfork.domain.recommendation.service.MmrService.MmrCandidate; import com.techfork.domain.recommendation.service.MmrService.MmrResult; @@ -15,33 +22,49 @@ import com.techfork.domain.user.repository.UserProfileDocumentRepository; import com.techfork.global.elasticsearch.query.VectorQueryBuilder; import com.techfork.global.util.TimeDecayStrategy; -import com.techfork.global.util.VectorUtil; -import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; import java.io.IOException; import java.util.*; -import java.util.stream.Collectors; /** * 추천 시스템 성능 평가를 위한 전용 서비스 + * LlmRecommendationService를 상속하여 RRF, MMR 로직 재사용 */ @Slf4j @Service -@RequiredArgsConstructor -public class RecommendationEvaluationService { +public class RecommendationEvaluationService extends LlmRecommendationService { - private final ElasticsearchClient elasticsearchClient; private final UserProfileDocumentRepository userProfileDocumentRepository; private final VectorQueryBuilder vectorQueryBuilder; - private final TimeDecayStrategy timeDecayStrategy; + private final ElasticsearchClient elasticsearchClient; private static final String POSTS_INDEX = "posts"; private static final String TITLE_EMBEDDING_FIELD = "titleEmbedding"; private static final String SUMMARY_EMBEDDING_FIELD = "summaryEmbedding"; private static final String CONTENT_CHUNKS_EMBEDDING_FIELD = "contentChunks.embedding"; + public RecommendationEvaluationService( + ElasticsearchClient elasticsearchClient, + UserProfileDocumentRepository userProfileDocumentRepository, + RecommendedPostRepository recommendedPostRepository, + RecommendationHistoryRepository recommendationHistoryRepository, + ReadPostRepository readPostRepository, + PostRepository postRepository, + MmrService mmrService, + TimeDecayStrategy timeDecayStrategy, + RecommendationProperties properties, + VectorQueryBuilder vectorQueryBuilder + ) { + super(elasticsearchClient, userProfileDocumentRepository, recommendedPostRepository, + recommendationHistoryRepository, readPostRepository, postRepository, + mmrService, timeDecayStrategy, properties, vectorQueryBuilder); + this.elasticsearchClient = elasticsearchClient; + this.userProfileDocumentRepository = userProfileDocumentRepository; + this.vectorQueryBuilder = vectorQueryBuilder; + } + /** * 추천 생성 (평가 전용 - Train/Test Split 지원) */ @@ -51,11 +74,12 @@ public List generateRecommendationsForEvaluation(User user, Set trai return Collections.emptyList(); } - float[] userProfileVector = profileOpt.get().getProfileVector(); - List keywords = profileOpt.get().getInterests(); + UserProfileDocument profile = profileOpt.get(); + float[] userProfileVector = profile.getProfileVector(); + List keyKeywords = profile.getKeyKeywords(); try { - List candidates = searchCandidatesWithCustomReadHistory(userProfileVector, keywords, user, trainPostIds, properties); + List candidates = searchCandidatesWithCustomReadHistory(userProfileVector, keyKeywords, trainPostIds, properties); if (candidates.isEmpty()) { return Collections.emptyList(); @@ -77,14 +101,14 @@ public List generateRecommendationsForEvaluation(User user, Set trai private List searchCandidatesWithCustomReadHistory( float[] userProfileVector, - List keywords, - User user, + List keyKeywords, Set readPostIds, RecommendationProperties properties) throws IOException { RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); + // 1. kNN 검색 List knnSearches = vectorQueryBuilder.createKnnSearches( TITLE_EMBEDDING_FIELD, SUMMARY_EMBEDDING_FIELD, CONTENT_CHUNKS_EMBEDDING_FIELD, userProfileVector, weights.getTitle(), weights.getSummary(), weights.getContent(), @@ -96,21 +120,26 @@ private List searchCandidatesWithCustomReadHistory( PostDocument.class ); - return vectorResponse.hits().hits().stream() - .filter(hit -> hit.source() != null) - .map(hit -> mapToMmrCandidate(hit, hit.score() != null ? hit.score() : 0.0)) - .filter(candidate -> candidate.getSummaryVector() != null) - .toList(); - } + List> vectorHits = vectorResponse.hits().hits(); + + // 2. BM25 검색 + Query bm25Query = vectorQueryBuilder.createBm25Query( + keyKeywords, weights.getTitle(), weights.getSummary(), weights.getContent() + ); + + SearchResponse keywordResponse = elasticsearchClient.search(s -> s + .index(POSTS_INDEX) + .query(q -> q.bool(b -> { + b.must(bm25Query); + if (filterQuery != null) b.filter(filterQuery); + return b; + })) + .size(properties.getKnnSearchSize()), + PostDocument.class + ); + List> keywordHits = keywordResponse.hits().hits(); - private MmrCandidate mapToMmrCandidate(Hit hit, double score) { - PostDocument doc = hit.source(); - double timeDecayWeight = timeDecayStrategy.calculateWeight(Objects.requireNonNull(doc).getPublishedAt()); - return MmrCandidate.builder() - .postId(doc.getPostId()) - .titleVector(VectorUtil.convertToFloatArray(doc.getTitleEmbedding())) - .summaryVector(VectorUtil.convertToFloatArray(doc.getSummaryEmbedding())) - .similarityScore(score * timeDecayWeight) - .build(); + // 3. RRF로 결합 (부모 클래스의 protected 메서드 사용) + return applyRrf(vectorHits, keywordHits); } } \ No newline at end of file From ee65ac3449e373a837fb1f7d2dba4b907f049b23 Mon Sep 17 00:00:00 2001 From: dmori Date: Fri, 6 Feb 2026 22:36:18 +0900 Subject: [PATCH 10/21] =?UTF-8?q?refactor:=20RRF=20=EB=A1=9C=EC=A7=81?= =?UTF-8?q?=EC=9D=84=20=EA=B3=B5=ED=86=B5=20=ED=81=B4=EB=9E=98=EC=8A=A4?= =?UTF-8?q?=EC=97=90=EC=84=9C=20=EC=88=98=ED=96=89=ED=95=98=EB=8F=84?= =?UTF-8?q?=EB=A1=9D=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../search/service/SearchServiceImpl.java | 72 ++++++------------- 1 file changed, 23 insertions(+), 49 deletions(-) diff --git a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java index ab7ca46..7a465ca 100644 --- a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java +++ b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java @@ -14,6 +14,7 @@ import com.techfork.domain.user.document.UserProfileDocument; import com.techfork.domain.user.repository.UserProfileDocumentRepository; import com.techfork.global.llm.EmbeddingClient; +import com.techfork.global.util.RrfScorer; import com.techfork.global.util.VectorUtil; import java.io.IOException; import java.util.ArrayList; @@ -245,21 +246,29 @@ private KnnSearch createKnnSearch(String field, List vector, int k, int n } private List calculateRRF(List> lexicalHits, List> semanticHits) { - Map lexicalRankMap = new HashMap<>(); - AtomicInteger rank = new AtomicInteger(1); - lexicalHits.forEach(hit -> lexicalRankMap.put(hit.id(), rank.getAndIncrement())); - - Map semanticRankMap = new HashMap<>(); - rank.set(1); - semanticHits.forEach(hit -> semanticRankMap.put(hit.id(), rank.getAndIncrement())); - - Map combinedResults = new HashMap<>(); - Map rrfScores = new HashMap<>(); - - processHitsForRRF(lexicalHits, lexicalRankMap, rrfScores, combinedResults); - processHitsForRRF(semanticHits, semanticRankMap, rrfScores, combinedResults); + // Hit ID 리스트 추출 + List lexicalIds = lexicalHits.stream().map(Hit::id).toList(); + List semanticIds = semanticHits.stream().map(Hit::id).toList(); + + // RRF 스코어 계산 + Map rrfScores = RrfScorer.calculateRrfScores(lexicalIds, semanticIds); + + // Hit을 docId 기준으로 맵핑 (semantic 우선 - 벡터 포함 보장) + Map> hitMap = new HashMap<>(); + lexicalHits.forEach(hit -> hitMap.put(hit.id(), hit)); + semanticHits.forEach(hit -> hitMap.put(hit.id(), hit)); // semantic 결과로 덮어쓰기 (벡터 포함) + + // SearchResult로 변환 + Map resultMap = new HashMap<>(); + for (Map.Entry> entry : hitMap.entrySet()) { + String docId = entry.getKey(); + Hit hit = entry.getValue(); + SearchResult result = mapToSearchResult(hit); + resultMap.put(docId, result); + } - return combinedResults.values().stream() + // 최종 스코어 적용 및 정렬 + return resultMap.values().stream() .map(searchResult -> { double finalScore = rrfScores.get(searchResult.getPostId().toString()); return searchResult.toBuilder() @@ -272,41 +281,6 @@ private List calculateRRF(List> lexicalHits, Lis .collect(Collectors.toList()); } - private void processHitsForRRF(List> hits, - Map rankMap, - Map rrfScores, - Map combinedResults) { - hits.forEach(hit -> { - String docId = hit.id(); - double score = 1.0 / (generalSearchProperties.getRRF_K() + rankMap.get(docId)); - rrfScores.merge(docId, score, Double::sum); - - SearchResult newResult = mapToSearchResult(hit); - - if (!combinedResults.containsKey(docId)) { - combinedResults.put(docId, newResult); - } else { - SearchResult existing = combinedResults.get(docId); - boolean needUpdate = false; - SearchResult.SearchResultBuilder builder = existing.toBuilder(); - - if (existing.getTitleVector() == null && newResult.getTitleVector() != null) { - builder.titleVector(newResult.getTitleVector()); - needUpdate = true; - } - - if (existing.getSummaryVector() == null && newResult.getSummaryVector() != null) { - builder.summaryVector(newResult.getSummaryVector()); - needUpdate = true; - } - - if (needUpdate) { - combinedResults.put(docId, builder.build()); - } - } - }); - } - private SearchResult mapToSearchResult(Hit hit) { PostDocument doc = hit.source(); double score = Objects.requireNonNullElse(hit.score(), 0.0); From 432fc1af22264aeeda466492ec3c7abf337b0c48 Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 00:16:50 +0900 Subject: [PATCH 11/21] =?UTF-8?q?fix:=20ScrapPost=20=EC=83=9D=EC=84=B1=20?= =?UTF-8?q?=EC=8B=9C=20readPosts=EC=9D=98=20=EC=A4=91=EB=B3=B5=EC=9D=84=20?= =?UTF-8?q?=EC=A0=9C=EA=B1=B0=ED=95=98=EC=97=AC=20unique=20=EC=A0=9C?= =?UTF-8?q?=EC=95=BD=20=EB=A7=8C=EC=A1=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../setup/components/UserTestDataBuilder.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/java/com/techfork/domain/recommendation/setup/components/UserTestDataBuilder.java b/src/test/java/com/techfork/domain/recommendation/setup/components/UserTestDataBuilder.java index e014771..aa91a50 100644 --- a/src/test/java/com/techfork/domain/recommendation/setup/components/UserTestDataBuilder.java +++ b/src/test/java/com/techfork/domain/recommendation/setup/components/UserTestDataBuilder.java @@ -20,10 +20,8 @@ import org.springframework.stereotype.Component; import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.UUID; +import java.util.*; +import java.util.stream.Collectors; @Slf4j @Component @@ -97,13 +95,15 @@ public void createReadPosts(User user, List posts) { public void createScrapPosts(User user, List readPosts, int scrapCount) { LocalDateTime now = LocalDateTime.now(); - List scrabPosts = new ArrayList<>(); - List postsToScrap = new ArrayList<>(readPosts); + List postsToScrap = readPosts.stream() + .distinct() + .collect(Collectors.toList()); Collections.shuffle(postsToScrap); int actualScrapCount = Math.min(scrapCount, postsToScrap.size()); + List scrabPosts = new ArrayList<>(); for (int i = 0; i < actualScrapCount; i++) { Post post = postsToScrap.get(i); ScrabPost scrabPost = ScrabPost.create( From bfa7c3c2635b7ce0842291f32be6c7a31e171dce Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 00:17:50 +0900 Subject: [PATCH 12/21] =?UTF-8?q?improve:=20LlmRecommendationService=20?= =?UTF-8?q?=EA=B0=99=EC=9D=80=20=EC=9D=B4=EB=A6=84=EC=9D=98=20=EB=B9=88=20?= =?UTF-8?q?2=EA=B0=9C=20=ED=95=B4=EA=B2=B0=EC=9D=84=20=EC=9C=84=ED=95=B4?= =?UTF-8?q?=20Primary=20=EC=96=B4=EB=85=B8=ED=85=8C=EC=9D=B4=EC=85=98=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain/recommendation/service/LlmRecommendationService.java | 2 ++ .../recommendation/evaluation/RecommendationTestBase.java | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java index 433b5ad..67f3bc2 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java @@ -25,6 +25,7 @@ import com.techfork.global.util.VectorUtil; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.context.annotation.Primary; import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -38,6 +39,7 @@ */ @Slf4j @Service +@Primary @Transactional @RequiredArgsConstructor public class LlmRecommendationService implements RecommendationService { diff --git a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationTestBase.java b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationTestBase.java index c36580a..44f5b50 100644 --- a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationTestBase.java +++ b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationTestBase.java @@ -39,7 +39,7 @@ public abstract class RecommendationTestBase extends IntegrationTestBase { @Autowired protected EvaluationFixtureLoader fixtureLoader; @Autowired protected RecommendationQualityService qualityService; - @Autowired protected RecommendationEvaluationService evaluationService; // 새로운 서비스 주입 + @Autowired protected RecommendationEvaluationService evaluationService; // 새로운 서비스 @Autowired protected PostDocumentRepository postDocumentRepository; @Autowired protected ReadPostRepository readPostRepository; @Autowired protected com.techfork.domain.user.repository.UserRepository userRepository; From 628ecec45bf0435e5fa57a5a9bfc5202b830ba51 Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 00:18:36 +0900 Subject: [PATCH 13/21] =?UTF-8?q?improve:=20keyKeywords=EB=A5=BC=20?= =?UTF-8?q?=ED=85=8C=EC=8A=A4=ED=8A=B8=20=EC=9C=A0=EC=A0=80=20=ED=94=84?= =?UTF-8?q?=EB=A1=9C=ED=95=84=20=EC=83=9D=EC=84=B1=20=EB=A1=9C=EC=A7=81?= =?UTF-8?q?=EC=97=90=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain/recommendation/setup/UserDataSetupAndExporter.java | 1 + .../domain/recommendation/util/EvaluationFixtureLoader.java | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/test/java/com/techfork/domain/recommendation/setup/UserDataSetupAndExporter.java b/src/test/java/com/techfork/domain/recommendation/setup/UserDataSetupAndExporter.java index bdbf663..e777701 100644 --- a/src/test/java/com/techfork/domain/recommendation/setup/UserDataSetupAndExporter.java +++ b/src/test/java/com/techfork/domain/recommendation/setup/UserDataSetupAndExporter.java @@ -327,6 +327,7 @@ private Map convertUserProfileToDto(UserProfileDocument profile) } dto.put("interests", profile.getInterests()); + dto.put("keyKeywords", profile.getKeyKeywords()); return dto; } diff --git a/src/test/java/com/techfork/domain/recommendation/util/EvaluationFixtureLoader.java b/src/test/java/com/techfork/domain/recommendation/util/EvaluationFixtureLoader.java index 9fc365c..6a3fb85 100644 --- a/src/test/java/com/techfork/domain/recommendation/util/EvaluationFixtureLoader.java +++ b/src/test/java/com/techfork/domain/recommendation/util/EvaluationFixtureLoader.java @@ -335,6 +335,7 @@ private int loadUserProfiles(Map userMap) throws IOException { Long originalUserId = ((Number) dto.get("userId")).longValue(); String profileText = (String) dto.get("profileText"); List interests = (List) dto.get("interests"); + List keyKeywords = (List) dto.get("keyKeywords"); // JSON의 원래 User ID를 실제 DB User ID로 매핑 User user = userMap.get(originalUserId); @@ -359,6 +360,7 @@ private int loadUserProfiles(Map userMap) throws IOException { .profileText(profileText) .profileVector(profileVector) .interests(interests) + .keyKeywords(keyKeywords) .build(); userProfileDocumentRepository.save(profile); From 85eccf26cca9b6a7da0059c5cccf171ec99db96a Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 00:47:02 +0900 Subject: [PATCH 14/21] =?UTF-8?q?refactor:=20BM25=20=EC=BF=BC=EB=A6=AC?= =?UTF-8?q?=EB=9E=91=20kNN=20=EC=BF=BC=EB=A6=AC=EB=A5=BC=20=EB=B3=91?= =?UTF-8?q?=EB=A0=AC=EC=A0=81=EC=9C=BC=EB=A1=9C=20=EC=88=98=ED=96=89?= =?UTF-8?q?=ED=95=98=EB=8F=84=EB=A1=9D=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/LlmRecommendationService.java | 72 ++++++++++++------- .../RecommendationEvaluationService.java | 66 +++++++++++------ 2 files changed, 92 insertions(+), 46 deletions(-) diff --git a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java index 67f3bc2..b8eb8a5 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java @@ -32,6 +32,7 @@ import java.io.IOException; import java.util.*; +import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; /** @@ -125,45 +126,66 @@ private List searchCandidates(float[] userProfileVector, User user RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); - // 1. kNN 검색 + // 1. kNN 검색 쿼리 준비 List knnSearches = vectorQueryBuilder.createKnnSearches( TITLE_EMBEDDING_FIELD, SUMMARY_EMBEDDING_FIELD, CONTENT_CHUNKS_EMBEDDING_FIELD, userProfileVector, weights.getTitle(), weights.getSummary(), weights.getContent(), properties.getKnnSearchSize(), properties.getNumCandidates(), filterQuery ); - long startTime = System.currentTimeMillis(); - SearchResponse vectorResponse = elasticsearchClient.search(s -> s - .index(POSTS_INDEX) - .knn(knnSearches) - .size(properties.getKnnSearchSize()) - , - PostDocument.class - ); - - List> vectorHits = vectorResponse.hits().hits(); - - // 2. BM25 검색 + // 2. BM25 검색 쿼리 준비 Query bm25Query = vectorQueryBuilder.createBm25Query( keyKeywords, weights.getTitle(), weights.getSummary(), weights.getContent() ); - SearchResponse keywordResponse = elasticsearchClient.search(s -> s - .index(POSTS_INDEX) - .query(q -> q.bool(b -> { - b.must(bm25Query); - if (filterQuery != null) b.filter(filterQuery); - return b; - })) - .size(properties.getKnnSearchSize()), - PostDocument.class - ); - List> keywordHits = keywordResponse.hits().hits(); + long startTime = System.currentTimeMillis(); + + // 3. kNN과 BM25 검색 병렬 실행 + CompletableFuture>> vectorSearchFuture = CompletableFuture.supplyAsync(() -> { + try { + SearchResponse response = elasticsearchClient.search(s -> s + .index(POSTS_INDEX) + .knn(knnSearches) + .size(properties.getKnnSearchSize()), + PostDocument.class + ); + return response.hits().hits(); + } catch (IOException e) { + log.error("kNN 검색 실패", e); + return Collections.emptyList(); + } + }); + + CompletableFuture>> keywordSearchFuture = CompletableFuture.supplyAsync(() -> { + try { + SearchResponse response = elasticsearchClient.search(s -> s + .index(POSTS_INDEX) + .query(q -> q.bool(b -> { + b.must(bm25Query); + if (filterQuery != null) b.filter(filterQuery); + return b; + })) + .size(properties.getKnnSearchSize()), + PostDocument.class + ); + return response.hits().hits(); + } catch (IOException e) { + log.error("BM25 검색 실패", e); + return Collections.emptyList(); + } + }); + + // 4. 두 검색 완료 대기 + CompletableFuture allSearches = CompletableFuture.allOf(vectorSearchFuture, keywordSearchFuture); + allSearches.join(); + + List> vectorHits = vectorSearchFuture.join(); + List> keywordHits = keywordSearchFuture.join(); log.info("후보군 검색 완료: kNN {} 개, BM25 {} 개, 소요 시간: {}ms", vectorHits.size(), keywordHits.size(), System.currentTimeMillis() - startTime); - // 3. RRF로 결합 + // 5. RRF로 결합 return applyRrf(vectorHits, keywordHits); } diff --git a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java index 8ab4ea3..63f9ea5 100644 --- a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java +++ b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.util.*; +import java.util.concurrent.CompletableFuture; /** * 추천 시스템 성능 평가를 위한 전용 서비스 @@ -108,38 +109,61 @@ private List searchCandidatesWithCustomReadHistory( RecommendationProperties.EmbeddingWeights weights = properties.getEmbeddingWeights(); Query filterQuery = vectorQueryBuilder.createExcludeFilter(readPostIds); - // 1. kNN 검색 + // 1. kNN 검색 쿼리 준비 List knnSearches = vectorQueryBuilder.createKnnSearches( TITLE_EMBEDDING_FIELD, SUMMARY_EMBEDDING_FIELD, CONTENT_CHUNKS_EMBEDDING_FIELD, userProfileVector, weights.getTitle(), weights.getSummary(), weights.getContent(), properties.getKnnSearchSize(), properties.getNumCandidates(), filterQuery ); - SearchResponse vectorResponse = elasticsearchClient.search(s -> s - .index(POSTS_INDEX).knn(knnSearches).size(properties.getKnnSearchSize()), - PostDocument.class - ); - - List> vectorHits = vectorResponse.hits().hits(); - - // 2. BM25 검색 + // 2. BM25 검색 쿼리 준비 Query bm25Query = vectorQueryBuilder.createBm25Query( keyKeywords, weights.getTitle(), weights.getSummary(), weights.getContent() ); - SearchResponse keywordResponse = elasticsearchClient.search(s -> s - .index(POSTS_INDEX) - .query(q -> q.bool(b -> { - b.must(bm25Query); - if (filterQuery != null) b.filter(filterQuery); - return b; - })) - .size(properties.getKnnSearchSize()), - PostDocument.class - ); - List> keywordHits = keywordResponse.hits().hits(); + // 3. kNN과 BM25 검색 병렬 실행 + CompletableFuture>> vectorSearchFuture = CompletableFuture.supplyAsync(() -> { + try { + SearchResponse response = elasticsearchClient.search(s -> s + .index(POSTS_INDEX) + .knn(knnSearches) + .size(properties.getKnnSearchSize()), + PostDocument.class + ); + return response.hits().hits(); + } catch (IOException e) { + log.error("kNN 검색 실패", e); + return Collections.emptyList(); + } + }); + + CompletableFuture>> keywordSearchFuture = CompletableFuture.supplyAsync(() -> { + try { + SearchResponse response = elasticsearchClient.search(s -> s + .index(POSTS_INDEX) + .query(q -> q.bool(b -> { + b.must(bm25Query); + if (filterQuery != null) b.filter(filterQuery); + return b; + })) + .size(properties.getKnnSearchSize()), + PostDocument.class + ); + return response.hits().hits(); + } catch (IOException e) { + log.error("BM25 검색 실패", e); + return Collections.emptyList(); + } + }); + + // 4. 두 검색 완료 대기 + CompletableFuture allSearches = CompletableFuture.allOf(vectorSearchFuture, keywordSearchFuture); + allSearches.join(); + + List> vectorHits = vectorSearchFuture.join(); + List> keywordHits = keywordSearchFuture.join(); - // 3. RRF로 결합 (부모 클래스의 protected 메서드 사용) + // 5. RRF로 결합 (부모 클래스의 protected 메서드 사용) return applyRrf(vectorHits, keywordHits); } } \ No newline at end of file From 5d9e0bfdd8a7df1615215cf855562eda09dcaea2 Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 01:23:11 +0900 Subject: [PATCH 15/21] =?UTF-8?q?improve:=20=EC=BD=98=ED=85=90=EC=B8=A0=20?= =?UTF-8?q?=EC=B3=A5=ED=81=AC=EC=97=90=20=EB=8C=80=ED=95=B4=EC=84=9C=20BM2?= =?UTF-8?q?5=20=EA=B2=80=EC=83=89=20=ED=95=A0=20=EB=95=8C=20scoreMode?= =?UTF-8?q?=EB=A5=BC=20Max=EB=A1=9C=20=EC=A7=80=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../global/elasticsearch/query/VectorSearchQueryBuilder.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java index 33e977e..7b49748 100644 --- a/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java +++ b/src/main/java/com/techfork/global/elasticsearch/query/VectorSearchQueryBuilder.java @@ -2,6 +2,7 @@ import co.elastic.clients.elasticsearch._types.FieldValue; import co.elastic.clients.elasticsearch._types.KnnSearch; +import co.elastic.clients.elasticsearch._types.query_dsl.ChildScoreMode; import co.elastic.clients.elasticsearch._types.query_dsl.Query; import lombok.AccessLevel; import lombok.NoArgsConstructor; @@ -132,6 +133,7 @@ public Query createBm25Query(List keywords, float titleBoost, float summ .should(s -> s .nested(n -> n .path("contentChunks") + .scoreMode(ChildScoreMode.Max) .query(nq -> nq .match(m -> m .field("contentChunks.text") From b41c19fcf365b61ee5398fcb4c8710466f53003b Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 01:48:57 +0900 Subject: [PATCH 16/21] =?UTF-8?q?refactor:=20mmr=EC=97=90=20=EB=84=98?= =?UTF-8?q?=EA=B8=B0=EB=8A=94=20=ED=9B=84=EB=B3=B4=EA=B5=B0=EC=9D=98=20?= =?UTF-8?q?=EC=88=98=EB=A5=BC=2080=EA=B0=9C=EB=A1=9C=20=EC=A1=B0=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain/recommendation/config/RecommendationProperties.java | 2 ++ .../domain/recommendation/service/LlmRecommendationService.java | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java b/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java index 0b97f20..a2b8eb4 100644 --- a/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java +++ b/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java @@ -19,6 +19,8 @@ public class RecommendationProperties { private Integer numCandidates = 200; + private Integer mmrCandidateSize = 80; + private Integer mmrFinalSize = 30; private Double lambda = 0.95; diff --git a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java index b8eb8a5..f034438 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java @@ -214,8 +214,10 @@ protected List applyRrf(List> vectorHits, List hitMap.putIfAbsent(hit.source().getPostId(), hit)); // RRF 스코어 순으로 정렬하여 MMR Candidate 생성 + // MMR 성능을 위해 상위 N개만 선택 (MMR은 O(n²)이므로 후보 수 제한 필요) return rrfScores.entrySet().stream() .sorted(Map.Entry.comparingByValue().reversed()) + .limit(properties.getMmrCandidateSize()) .map(entry -> mapToMmrCandidate(hitMap.get(entry.getKey()), entry.getValue())) .filter(candidate -> candidate.getSummaryVector() != null) .toList(); From 439f7524244833eb3e6e7feaf7cc5ae4b6c99283 Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 02:06:20 +0900 Subject: [PATCH 17/21] =?UTF-8?q?improve:=20MMR=20=EC=95=8C=EA=B3=A0?= =?UTF-8?q?=EB=A6=AC=EC=A6=98=EC=97=90=EC=84=9C=20top-k=20=EC=83=98?= =?UTF-8?q?=ED=94=8C=EB=A7=81=EC=9C=BC=EB=A1=9C=20=EB=9E=9C=EB=8D=A4?= =?UTF-8?q?=EC=84=B1=20=EB=8F=84=EC=9E=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../recommendation/service/MmrService.java | 59 ++++++++++++------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/src/main/java/com/techfork/domain/recommendation/service/MmrService.java b/src/main/java/com/techfork/domain/recommendation/service/MmrService.java index af72fa6..90565d7 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/MmrService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/MmrService.java @@ -10,6 +10,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Random; /** * MMR (Maximal Marginal Relevance) 알고리즘 구현 @@ -21,6 +22,7 @@ public class MmrService { private final RecommendationProperties properties; + private final Random random = new Random(); @Getter @Builder @@ -40,6 +42,18 @@ public static class MmrResult { private int rank; } + private static class ScoredCandidate { + MmrCandidate candidate; + double mmrScore; + int originalIndex; + + ScoredCandidate(MmrCandidate candidate, double mmrScore, int originalIndex) { + this.candidate = candidate; + this.mmrScore = mmrScore; + this.originalIndex = originalIndex; + } + } + /** * MMR 알고리즘을 적용하여 다양성을 보장하는 추천 결과 생성 * @@ -61,8 +75,10 @@ public List applyMmr(List candidates) { log.debug("MMR 선택 시작: candidates={}, finalSize={}, lambda={}", candidates.size(), finalSize, lambda); - // 첫 번째는 가장 유사도가 높은 문서 선택 - MmrCandidate first = remainingCandidates.remove(0); + // 첫 번째는 상위 K개 중에서 랜덤하게 선택 (다양성 증가) + int topK = Math.min(5, remainingCandidates.size()); + int randomIndex = random.nextInt(topK); + MmrCandidate first = remainingCandidates.remove(randomIndex); selectedResults.add(MmrResult.builder() .postId(first.getPostId()) .similarityScore(first.getSimilarityScore()) @@ -70,32 +86,31 @@ public List applyMmr(List candidates) { .rank(1) .build()); - // 나머지 문서들을 MMR 점수 기반으로 선택 + // 나머지 문서들을 MMR 점수 기반으로 선택 (Top-K 샘플링으로 랜덤성 추가) while (selectedResults.size() < finalSize && !remainingCandidates.isEmpty()) { - MmrCandidate bestCandidate = null; - double bestMmrScore = Double.NEGATIVE_INFINITY; - int bestIndex = -1; - + // 모든 후보의 MMR 점수 계산 + List scoredCandidates = new ArrayList<>(); for (int i = 0; i < remainingCandidates.size(); i++) { MmrCandidate candidate = remainingCandidates.get(i); double mmrScore = calculateMmrScore(candidate, selectedResults, lambda, candidates); - - if (mmrScore > bestMmrScore) { - bestMmrScore = mmrScore; - bestCandidate = candidate; - bestIndex = i; - } + scoredCandidates.add(new ScoredCandidate(candidate, mmrScore, i)); } - if (bestCandidate != null) { - remainingCandidates.remove(bestIndex); - selectedResults.add(MmrResult.builder() - .postId(bestCandidate.getPostId()) - .similarityScore(bestCandidate.getSimilarityScore()) - .mmrScore(bestMmrScore) - .rank(selectedResults.size() + 1) - .build()); - } + // MMR 점수 내림차순 정렬 + scoredCandidates.sort((a, b) -> Double.compare(b.mmrScore, a.mmrScore)); + + // 상위 K개 중에서 랜덤 선택 + int topKForSelection = Math.min(3, scoredCandidates.size()); + int randomIdx = random.nextInt(topKForSelection); + ScoredCandidate selected = scoredCandidates.get(randomIdx); + + remainingCandidates.remove(selected.originalIndex); + selectedResults.add(MmrResult.builder() + .postId(selected.candidate.getPostId()) + .similarityScore(selected.candidate.getSimilarityScore()) + .mmrScore(selected.mmrScore) + .rank(selectedResults.size() + 1) + .build()); } log.info("MMR 선택 완료: 전체 {} 후보 중 {} 개 선택", From d8f8564f37f65772908b579fa0997fc31478eefb Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 02:27:34 +0900 Subject: [PATCH 18/21] =?UTF-8?q?improve:=20BM25=20=EA=B2=80=EC=83=89?= =?UTF-8?q?=EC=97=90=EC=84=9C=20=EA=B2=B0=EA=B3=BC=EA=B0=80=20null?= =?UTF-8?q?=EC=9D=BC=EB=95=8C=20=EC=B2=98=EB=A6=AC=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../recommendation/service/LlmRecommendationService.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java index f034438..4161aed 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/LlmRecommendationService.java @@ -157,6 +157,11 @@ private List searchCandidates(float[] userProfileVector, User user }); CompletableFuture>> keywordSearchFuture = CompletableFuture.supplyAsync(() -> { + // 키워드가 없으면 BM25 검색 생략 + if (bm25Query == null) { + log.debug("키워드가 없어 BM25 검색 생략"); + return Collections.emptyList(); + } try { SearchResponse response = elasticsearchClient.search(s -> s .index(POSTS_INDEX) From 67946594604fa829b904e2decc3f5351ca073176 Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 04:27:00 +0900 Subject: [PATCH 19/21] =?UTF-8?q?test:=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20?= =?UTF-8?q?=EC=BB=A8=ED=85=8C=EC=9D=B4=EB=84=88=20=EC=A4=91=20ES=EC=9D=98?= =?UTF-8?q?=20=EB=A9=94=EB=AA=A8=EB=A6=AC=EB=A5=BC=202GB=EB=A1=9C=20?= =?UTF-8?q?=EC=84=A4=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../techfork/global/configuration/IntegrationTestConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/com/techfork/global/configuration/IntegrationTestConfig.java b/src/test/java/com/techfork/global/configuration/IntegrationTestConfig.java index 0002fed..b0862de 100644 --- a/src/test/java/com/techfork/global/configuration/IntegrationTestConfig.java +++ b/src/test/java/com/techfork/global/configuration/IntegrationTestConfig.java @@ -15,7 +15,7 @@ public class IntegrationTestConfig { new ElasticsearchContainer("docker.elastic.co/elasticsearch/elasticsearch:8.18.0") .withEnv("xpack.security.enabled", "false") .withEnv("discovery.type", "single-node") - .withEnv("ES_JAVA_OPTS", "-Xms256m -Xmx256m"); + .withEnv("ES_JAVA_OPTS", "-Xms2g -Xmx2g"); private static final MySQLContainer mysql = new MySQLContainer<>("mysql:8.0.36"); From bea94c1b9512b38ac66bff38cca8f7edb7fa2fea Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 04:27:47 +0900 Subject: [PATCH 20/21] =?UTF-8?q?improve:=20=EC=B6=94=EC=B2=9C=20=ED=8F=89?= =?UTF-8?q?=EA=B0=80=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20=EC=A4=91=20=EA=B0=81?= =?UTF-8?q?=20=EB=8B=A8=EA=B3=84=EC=9D=98=20=EC=86=8C=EC=9A=94=EC=8B=9C?= =?UTF-8?q?=EA=B0=84=20=ED=99=95=EC=9D=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../RecommendationEvaluationService.java | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java index 63f9ea5..cf61246 100644 --- a/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java +++ b/src/test/java/com/techfork/domain/recommendation/evaluation/RecommendationEvaluationService.java @@ -70,6 +70,8 @@ public RecommendationEvaluationService( * 추천 생성 (평가 전용 - Train/Test Split 지원) */ public List generateRecommendationsForEvaluation(User user, Set trainPostIds, RecommendationProperties properties) { + long totalStartTime = System.currentTimeMillis(); + Optional profileOpt = userProfileDocumentRepository.findByUserId(user.getId()); if (profileOpt.isEmpty() || profileOpt.get().getProfileVector() == null) { return Collections.emptyList(); @@ -87,13 +89,21 @@ public List generateRecommendationsForEvaluation(User user, Set trai } // MMR 적용 (테스트용 properties 사용) + long mmrStartTime = System.currentTimeMillis(); MmrService mmrService = new MmrService(properties); List mmrResults = mmrService.applyMmr(candidates); + long mmrElapsedTime = System.currentTimeMillis() - mmrStartTime; + log.info("[EVAL] MMR 실행 시간: {}ms (후보 {}개 → 결과 {}개)", mmrElapsedTime, candidates.size(), mmrResults.size()); - return mmrResults.stream() + List result = mmrResults.stream() .map(MmrResult::getPostId) .toList(); + long totalElapsedTime = System.currentTimeMillis() - totalStartTime; + log.info("[EVAL] 전체 추천 로직 실행 시간: {}ms (사용자 ID: {})", totalElapsedTime, user.getId()); + + return result; + } catch (Exception e) { log.error("사용자 {} 평가용 추천 생성 실패", user.getId(), e); return Collections.emptyList(); @@ -122,14 +132,19 @@ private List searchCandidatesWithCustomReadHistory( ); // 3. kNN과 BM25 검색 병렬 실행 + long searchStartTime = System.currentTimeMillis(); + CompletableFuture>> vectorSearchFuture = CompletableFuture.supplyAsync(() -> { try { + long knnStartTime = System.currentTimeMillis(); SearchResponse response = elasticsearchClient.search(s -> s .index(POSTS_INDEX) .knn(knnSearches) .size(properties.getKnnSearchSize()), PostDocument.class ); + long knnElapsedTime = System.currentTimeMillis() - knnStartTime; + log.info("[EVAL] kNN 검색 실행 시간: {}ms", knnElapsedTime); return response.hits().hits(); } catch (IOException e) { log.error("kNN 검색 실패", e); @@ -138,7 +153,13 @@ private List searchCandidatesWithCustomReadHistory( }); CompletableFuture>> keywordSearchFuture = CompletableFuture.supplyAsync(() -> { + // 키워드가 없으면 BM25 검색 생략 + if (bm25Query == null) { + log.debug("[EVAL] 키워드가 없어 BM25 검색 생략"); + return Collections.emptyList(); + } try { + long bm25StartTime = System.currentTimeMillis(); SearchResponse response = elasticsearchClient.search(s -> s .index(POSTS_INDEX) .query(q -> q.bool(b -> { @@ -149,6 +170,8 @@ private List searchCandidatesWithCustomReadHistory( .size(properties.getKnnSearchSize()), PostDocument.class ); + long bm25ElapsedTime = System.currentTimeMillis() - bm25StartTime; + log.info("[EVAL] BM25 검색 실행 시간: {}ms", bm25ElapsedTime); return response.hits().hits(); } catch (IOException e) { log.error("BM25 검색 실패", e); @@ -163,7 +186,15 @@ private List searchCandidatesWithCustomReadHistory( List> vectorHits = vectorSearchFuture.join(); List> keywordHits = keywordSearchFuture.join(); + long searchTotalTime = System.currentTimeMillis() - searchStartTime; + log.info("[EVAL] 검색 총 소요 시간: {}ms (kNN: {}개, BM25: {}개)", searchTotalTime, vectorHits.size(), keywordHits.size()); + // 5. RRF로 결합 (부모 클래스의 protected 메서드 사용) - return applyRrf(vectorHits, keywordHits); + long rrfStartTime = System.currentTimeMillis(); + List candidates = applyRrf(vectorHits, keywordHits); + long rrfElapsedTime = System.currentTimeMillis() - rrfStartTime; + log.info("[EVAL] RRF 결합 실행 시간: {}ms (결과: {}개)", rrfElapsedTime, candidates.size()); + + return candidates; } } \ No newline at end of file From 4cff1b5034757ede0bba661389bb9b2cf1772acc Mon Sep 17 00:00:00 2001 From: dmori Date: Sat, 7 Feb 2026 04:29:22 +0900 Subject: [PATCH 21/21] =?UTF-8?q?improve:=20=EC=B6=94=EC=B2=9C=20=EC=86=8C?= =?UTF-8?q?=EC=9A=94=20=EC=8B=9C=EA=B0=84=20=EB=8B=A8=EC=B6=95=EC=9D=84=20?= =?UTF-8?q?=EC=9C=84=ED=95=B4=20=ED=9B=84=EB=B3=B4=EA=B5=B0=20=EC=A1=B0?= =?UTF-8?q?=ED=9A=8C=20=EC=82=AC=EC=9D=B4=EC=A6=88=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../config/RecommendationProperties.java | 4 +- src/main/resources/application.yml | 4 +- .../evaluation/KValueComparisonTest.java | 250 ++++++++++++++++++ 3 files changed, 254 insertions(+), 4 deletions(-) create mode 100644 src/test/java/com/techfork/domain/recommendation/evaluation/KValueComparisonTest.java diff --git a/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java b/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java index a2b8eb4..2505bf8 100644 --- a/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java +++ b/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java @@ -15,9 +15,9 @@ @ConfigurationProperties(prefix = "recommendation") public class RecommendationProperties { - private Integer knnSearchSize = 100; + private Integer knnSearchSize = 80; - private Integer numCandidates = 200; + private Integer numCandidates = 180; private Integer mmrCandidateSize = 80; diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 001fe86..41491fe 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -94,8 +94,8 @@ webhook: url: ${DISCORD_WEBHOOK_URL} recommendation: - knn-search-size: 100 - num-candidates: 200 + knn-search-size: 80 + num-candidates: 180 mmr-final-size: 30 lambda: 0.95 active-user-hours: 24 diff --git a/src/test/java/com/techfork/domain/recommendation/evaluation/KValueComparisonTest.java b/src/test/java/com/techfork/domain/recommendation/evaluation/KValueComparisonTest.java new file mode 100644 index 0000000..278f60a --- /dev/null +++ b/src/test/java/com/techfork/domain/recommendation/evaluation/KValueComparisonTest.java @@ -0,0 +1,250 @@ +package com.techfork.domain.recommendation.evaluation; + +import com.techfork.domain.recommendation.config.RecommendationProperties; +import com.techfork.domain.user.entity.User; +import lombok.Builder; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import java.util.*; + +/** + * kNN 검색 크기(k) 값에 따른 성능 및 품질 비교 테스트 + */ +@Tag("evaluation") +@Slf4j +public class KValueComparisonTest extends RecommendationTestBase { + + @Test + @DisplayName("knnSearchSize와 numCandidates 값 비교 평가") + void compareKValues() { + log.info("===== K 값에 따른 성능 및 품질 비교 ====="); + log.info("Ground-Truth: {} 명 사용자", cachedGroundTruth.size()); + + List kConfigs = createKConfigs(); + List testUsers = getTestUsers(); + log.info("테스트 사용자: {} 명", testUsers.size()); + + printKComparisonHeader(); + List results = evaluateAllKConfigs(kConfigs, testUsers); + printBestKResult(results); + } + + /** + * 테스트할 K 값 설정 생성 + */ + private List createKConfigs() { + return Arrays.asList( + // 현재 기본값 + KConfig.builder().name("현재 (50/100)") + .knnSearchSize(50).numCandidates(100).build(), + + KConfig.builder().name("중간-하 (60/120)") + .knnSearchSize(60).numCandidates(120).build(), + + // 중간 값 + KConfig.builder().name("중간 (70/150)") + .knnSearchSize(70).numCandidates(150).build(), + + KConfig.builder().name("중간-상 (80/180)") + .knnSearchSize(80).numCandidates(180).build(), + + // 이전 값 + KConfig.builder().name("이전 (100/200)") + .knnSearchSize(100).numCandidates(200).build() + ); + } + + /** + * 모든 K 설정에 대해 평가 수행 + */ + private List evaluateAllKConfigs(List kConfigs, List testUsers) { + List results = new ArrayList<>(); + + for (KConfig kConfig : kConfigs) { + long startTime = System.currentTimeMillis(); + + // Properties 생성 + RecommendationProperties properties = new RecommendationProperties(); + properties.setKnnSearchSize(kConfig.knnSearchSize); + properties.setNumCandidates(kConfig.numCandidates); + properties.setMmrCandidateSize(80); + properties.setMmrFinalSize(30); + properties.setLambda(1.0); // 다양성 제외, 관련성만 + + // 가중치는 최적값으로 고정 (제목+요약 중심) + RecommendationProperties.EmbeddingWeights weights = new RecommendationProperties.EmbeddingWeights(); + weights.setTitle(0.4f); + weights.setSummary(0.4f); + weights.setContent(0.2f); + properties.setEmbeddingWeights(weights); + + // 평가 수행 - UserMetrics 수집 + List userMetrics = testUsers.stream() + .map(user -> evaluateUserWithGroundTruth(user, properties)) + .filter(Optional::isPresent) + .map(Optional::get) + .toList(); + + // 평균 메트릭 계산 + KMetrics avgMetrics = calculateAverageKMetrics(userMetrics); + + long elapsedTime = System.currentTimeMillis() - startTime; + + KResult result = KResult.builder() + .name(kConfig.name) + .knnSearchSize(kConfig.knnSearchSize) + .numCandidates(kConfig.numCandidates) + .metrics(avgMetrics) + .executionTimeMs(elapsedTime) + .build(); + + results.add(result); + printKResult(result); + } + + return results; + } + + private KMetrics calculateAverageKMetrics(List userMetrics) { + double r4 = userMetrics.stream().mapToDouble(UserMetrics::getRecall4).average().orElse(0.0); + double n4 = userMetrics.stream().mapToDouble(UserMetrics::getNdcg4).average().orElse(0.0); + double r8 = userMetrics.stream().mapToDouble(UserMetrics::getRecall8).average().orElse(0.0); + double n8 = userMetrics.stream().mapToDouble(UserMetrics::getNdcg8).average().orElse(0.0); + double r30 = userMetrics.stream().mapToDouble(UserMetrics::getRecall30).average().orElse(0.0); + double n30 = userMetrics.stream().mapToDouble(UserMetrics::getNdcg30).average().orElse(0.0); + + return KMetrics.builder() + .recallAt4(r4) + .ndcgAt4(n4) + .recallAt8(r8) + .ndcgAt8(n8) + .recallAt30(r30) + .ndcgAt30(n30) + .build(); + } + + private void printKComparisonHeader() { + log.info(""); + log.info("설정 | K값 | Candidates | R@4 | R@8 | R@30 | nDCG@4 | nDCG@8 | nDCG@30 | 실행시간"); + log.info("----------------------------------------------------------------------------------------------"); + } + + private void printKResult(KResult result) { + log.info(String.format("%-30s | %-9s | %-10s | %.4f | %.4f | %.4f | %.4f | %.4f | %.4f | %dms", + result.name, + result.knnSearchSize, + result.numCandidates, + result.metrics.recallAt4, + result.metrics.recallAt8, + result.metrics.recallAt30, + result.metrics.ndcgAt4, + result.metrics.ndcgAt8, + result.metrics.ndcgAt30, + result.executionTimeMs + )); + } + + private void printBestKResult(List results) { + log.info(""); + log.info("===== 최고 성능 K 값 조합 ====="); + + // Recall@8 최고 + KResult bestRecall = results.stream() + .max(Comparator.comparing(r -> r.metrics.recallAt8)) + .orElse(null); + + // nDCG@8 최고 + KResult bestNdcg = results.stream() + .max(Comparator.comparing(r -> r.metrics.ndcgAt8)) + .orElse(null); + + // 균형 점수 최고 (Recall@8 + nDCG@8 평균) + KResult bestBalance = results.stream() + .max(Comparator.comparing(r -> (r.metrics.recallAt8 + r.metrics.ndcgAt8) / 2.0)) + .orElse(null); + + // 실행 시간 최단 + KResult fastest = results.stream() + .min(Comparator.comparing(r -> r.executionTimeMs)) + .orElse(null); + + log.info(""); + log.info("[Recall@8 최고]"); + if (bestRecall != null) { + log.info(String.format("%s (K=%d, C=%d) | R@8: %.4f | nDCG@8: %.4f | 시간: %dms", + bestRecall.name, bestRecall.knnSearchSize, bestRecall.numCandidates, + bestRecall.metrics.recallAt8, bestRecall.metrics.ndcgAt8, bestRecall.executionTimeMs)); + } + + log.info(""); + log.info("[nDCG@8 최고]"); + if (bestNdcg != null) { + log.info(String.format("%s (K=%d, C=%d) | R@8: %.4f | nDCG@8: %.4f | 시간: %dms", + bestNdcg.name, bestNdcg.knnSearchSize, bestNdcg.numCandidates, + bestNdcg.metrics.recallAt8, bestNdcg.metrics.ndcgAt8, bestNdcg.executionTimeMs)); + } + + log.info(""); + log.info("[균형 점수 최고 (R@8 + nDCG@8 평균)]"); + if (bestBalance != null) { + double balanceScore = (bestBalance.metrics.recallAt8 + bestBalance.metrics.ndcgAt8) / 2.0; + log.info(String.format("%s (K=%d, C=%d) | R@8: %.4f | nDCG@8: %.4f | 균형: %.4f | 시간: %dms", + bestBalance.name, bestBalance.knnSearchSize, bestBalance.numCandidates, + bestBalance.metrics.recallAt8, bestBalance.metrics.ndcgAt8, balanceScore, + bestBalance.executionTimeMs)); + } + + log.info(""); + log.info("[실행 시간 최단]"); + if (fastest != null) { + log.info(String.format("%s (K=%d, C=%d) | R@8: %.4f | nDCG@8: %.4f | 시간: %dms", + fastest.name, fastest.knnSearchSize, fastest.numCandidates, + fastest.metrics.recallAt8, fastest.metrics.ndcgAt8, fastest.executionTimeMs)); + } + + log.info(""); + log.info("===== 성능/품질 트레이드오프 분석 ====="); + results.forEach(r -> { + double efficiency = (r.metrics.recallAt8 + r.metrics.ndcgAt8) / 2.0 / (r.executionTimeMs / 1000.0); + log.info(String.format("%s: 효율성 지수 = %.4f (품질: %.4f, 시간: %.1fs)", + r.name, + efficiency, + (r.metrics.recallAt8 + r.metrics.ndcgAt8) / 2.0, + r.executionTimeMs / 1000.0 + )); + }); + } + + @Getter + @Builder + private static class KConfig { + private String name; + private int knnSearchSize; + private int numCandidates; + } + + @Getter + @Builder + private static class KMetrics { + private double recallAt4; + private double ndcgAt4; + private double recallAt8; + private double ndcgAt8; + private double recallAt30; + private double ndcgAt30; + } + + @Getter + @Builder + private static class KResult { + private String name; + private int knnSearchSize; + private int numCandidates; + private KMetrics metrics; + private long executionTimeMs; + } +} \ No newline at end of file