@@ -161,8 +161,11 @@ class DomainDictAgent(
161161 insights.domainConcepts.size
162162 )
163163
164+ // Analyze package structure to find important business packages
165+ val importantPackages = analyzePackageStructure(insights, onProgress)
166+
164167 // Extract meaningful names from hot files
165- val codebaseNames = extractMeaningfulNames(insights, onProgress)
168+ val codebaseNames = extractMeaningfulNames(insights, onProgress, importantPackages )
166169 onProgress(" 📋 Found ${codebaseNames.size} candidate names" )
167170
168171 // Filter out existing terms
@@ -185,11 +188,21 @@ class DomainDictAgent(
185188 // ============= Step 2: Generate Entries =============
186189 onProgress(" \n ## Step 2/3: Generating Entries" )
187190
188- val namesToProcess = newNames.take(500 )
189- onProgress(" 💭 Translating ${namesToProcess.size} terms (of ${newNames.size} total)..." )
191+ // Process in smaller batches for faster LLM responses
192+ val batchSize = 100
193+ val maxBatches = 3 // Process at most 3 batches = 300 terms
194+ val namesToProcess = newNames.take(batchSize * maxBatches)
195+ val allNewEntries = mutableListOf<DomainEntry >()
196+
197+ namesToProcess.chunked(batchSize).forEachIndexed { index, batch ->
198+ onProgress(" 💭 Batch ${index + 1 } : Translating ${batch.size} terms..." )
199+ val batchEntries = generateEntries(batch, callbacks)
200+ allNewEntries.addAll(batchEntries)
201+ onProgress(" ✅ Batch ${index + 1 } : Got ${batchEntries.size} entries" )
202+ }
190203
191- val newEntries = generateEntries(namesToProcess, callbacks)
192- onProgress(" ✅ Generated ${newEntries.size} entries" )
204+ val newEntries = allNewEntries
205+ onProgress(" 📊 Total: ${newEntries.size} entries from ${namesToProcess.size} terms " )
193206
194207 // Show generated entries
195208 newEntries.take(10 ).forEach { entry ->
@@ -274,21 +287,95 @@ class DomainDictAgent(
274287 return result
275288 }
276289
277- suspend fun extractMeaningfulNames (
290+ /* *
291+ * Analyze package structure to identify important business packages
292+ * Uses heuristics to prioritize domain/business packages over infrastructure
293+ */
294+ private fun analyzePackageStructure (
278295 insights : CodebaseInsightsResult ,
279296 onProgress : (String ) -> Unit
297+ ): Set <String > {
298+ onProgress(" 📦 Analyzing package structure..." )
299+
300+ // Extract unique packages from hot files
301+ val packageCounts = mutableMapOf<String , Int >()
302+
303+ for (file in insights.hotFiles) {
304+ val path = file.path
305+ // Extract package-like path (e.g., cc/unitmesh/agent from path)
306+ val packagePath = extractPackagePath(path)
307+ if (packagePath.isNotEmpty()) {
308+ packageCounts[packagePath] = (packageCounts[packagePath] ? : 0 ) + file.changeCount
309+ }
310+ }
311+
312+ // Filter out infrastructure packages
313+ val infrastructurePatterns = setOf (
314+ " test" , " config" , " util" , " utils" , " helper" , " common" ,
315+ " generated" , " build" , " gradle" , " node_modules" , " target"
316+ )
317+
318+ val businessPackages = packageCounts.filterKeys { pkg ->
319+ val lowerPkg = pkg.lowercase()
320+ infrastructurePatterns.none { lowerPkg.contains(it) }
321+ }
322+
323+ // Sort by change count and take top packages
324+ val topPackages = businessPackages.entries
325+ .sortedByDescending { it.value }
326+ .take(20 )
327+ .map { it.key }
328+ .toSet()
329+
330+ if (topPackages.isNotEmpty()) {
331+ onProgress(" 📁 Top business packages:" )
332+ topPackages.take(5 ).forEach { pkg ->
333+ val count = packageCounts[pkg] ? : 0
334+ onProgress(" • $pkg (${count} changes)" )
335+ }
336+ }
337+
338+ return topPackages
339+ }
340+
341+ /* *
342+ * Extract package path from file path
343+ * e.g., "src/main/kotlin/cc/unitmesh/agent/Tool.kt" -> "cc/unitmesh/agent"
344+ */
345+ private fun extractPackagePath (filePath : String ): String {
346+ // Remove common source prefixes
347+ val cleanPath = filePath
348+ .replace(Regex (" ^.*/src/(main|common)/(kotlin|java|scala)/" ), " " )
349+ .replace(Regex (" ^.*/src/" ), " " )
350+ .replace(Regex (" ^src/(main|common)/(kotlin|java|scala)/" ), " " )
351+ .replace(Regex (" ^src/" ), " " )
352+
353+ // Get directory path (without filename)
354+ val dirPath = cleanPath.substringBeforeLast(" /" , " " )
355+
356+ return dirPath
357+ }
358+
359+ suspend fun extractMeaningfulNames (
360+ insights : CodebaseInsightsResult ,
361+ onProgress : (String ) -> Unit ,
362+ importantPackages : Set <String > = emptySet()
280363 ): List <String > {
281364 val hotFileNames = mutableSetOf<String >()
282365 val allConceptNames = mutableSetOf<String >()
283- // since it's lowly we just disable it
284- // if (codeParser != null) {
285- // onProgress(" 🌲 Using TreeSitter to parse hot files...")
286- // val hotFilesWithCode = parseHotFilesWithTreeSitter(insights.hotFiles, onProgress)
287- // hotFileNames.addAll(hotFilesWithCode)
288- // }
366+
367+ // Prioritize files from important packages
368+ val prioritizedFiles = if (importantPackages.isNotEmpty()) {
369+ insights.hotFiles.sortedByDescending { file ->
370+ val pkg = extractPackagePath(file.path)
371+ if (importantPackages.any { pkg.startsWith(it) || it.startsWith(pkg) }) 2 else 1
372+ }
373+ } else {
374+ insights.hotFiles
375+ }
289376
290377 // Also extract from hot file names
291- for (file in insights.hotFiles ) {
378+ for (file in prioritizedFiles ) {
292379 val fileName = file.path.substringAfterLast(" /" ).substringBeforeLast(" ." )
293380 val domainName = extractDomainFromFileName(fileName)
294381 if (domainName != null && isValidDomainName(domainName)) {
@@ -327,19 +414,26 @@ class DomainDictAgent(
327414 }
328415
329416 /* *
330- * Less strict validation for domain concepts (already extracted from code)
417+ * Validation for domain concepts - must be compound names (like "DomainDict", not "Agent")
418+ * Single words are too generic and don't provide business context
331419 */
332420 private fun isValidDomainConceptName (name : String ): Boolean {
333421 if (name.length < 3 ) return false
334422 if (name.length > 60 ) return false
335423
336424 val lowerName = name.lowercase()
337425
338- // Skip very common/generic names
426+ // Skip only pure technical/programming terms (let AI decide business relevance)
339427 val skipExact = setOf (
340- " unknown" , " init" , " test" , " main" , " app" , " get" , " set" , " is" , " has" ,
428+ // Language keywords & primitives
429+ " unknown" , " init" , " test" , " main" , " get" , " set" , " is" , " has" ,
341430 " string" , " int" , " list" , " map" , " object" , " class" , " function" ,
342- " true" , " false" , " null" , " void" , " return" , " if" , " else" , " for" , " while"
431+ " true" , " false" , " null" , " void" , " return" , " if" , " else" , " for" , " while" ,
432+ // Pure infrastructure patterns
433+ " impl" , " util" , " utils" , " helper" , " helpers" , " base" , " abstract" ,
434+ " interface" , " default" , " common" , " internal" , " private" , " public" ,
435+ // Build/test artifacts
436+ " spec" , " mock" , " stub" , " fake" , " gradle" , " build" , " index"
343437 )
344438 if (lowerName in skipExact) return false
345439
@@ -349,6 +443,11 @@ class DomainDictAgent(
349443 // Skip special characters
350444 if (name.contains(" <" ) || name.contains(" >" ) || name.contains(" $" )) return false
351445
446+ // IMPORTANT: Require at least 2 capital letters (compound name)
447+ // This ensures we get "DomainDict" not "Agent"
448+ val capitalCount = name.count { it.isUpperCase() }
449+ if (capitalCount < 2 ) return false
450+
352451 return true
353452 }
354453
@@ -496,65 +595,34 @@ class DomainDictAgent(
496595
497596 val lowerName = name.lowercase()
498597
499- // Skip generic/common terms ( infrastructure, not domain)
598+ // Skip only pure technical/ infrastructure terms
500599 val skipTerms = setOf (
501- // Testing
600+ // Testing artifacts
502601 " test" , " tests" , " spec" , " mock" , " stub" , " fake" ,
503- // Implementation details
504- " impl" , " util" , " utils" , " helper" , " helpers" , " factory" ,
505- " base" , " abstract" , " interface" , " default" , " common" ,
506- // Build/config
507- " main" , " app" , " application" , " index" ,
508- " run" , " build" , " gradle" , " config" , " settings" ,
509- // Generic programming concepts (too common)
510- " activity" , " action" , " event" , " listener" , " handler" , " callback" ,
511- " model" , " data" , " item" , " entry" , " node" , " element" ,
512- " list" , " map" , " set" , " array" , " collection" , " queue" ,
513- " context" , " state" , " status" , " type" , " kind" , " mode" ,
514- " info" , " detail" , " result" , " response" , " request" ,
515- " color" , " border" , " icon" , " image" , " font" , " style" ,
516- " file" , " path" , " name" , " key" , " value" , " id" ,
517- " size" , " width" , " height" , " offset" , " padding" , " margin" ,
518- " consumer" , " producer" , " provider" , " service" , " manager" ,
519- " builder" , " creator" , " generator" , " loader" , " reader" , " writer" ,
520- " parser" , " formatter" , " converter" , " adapter" , " wrapper" ,
521- " view" , " panel" , " dialog" , " screen" , " page" , " component" ,
522- " button" , " text" , " label" , " field" , " input" , " output" ,
523- " editor" , " renderer" , " painter" , " drawer" ,
524- " exception" , " error" , " warning" , " message" ,
525- " checks" , " diff" , " check" , " unknown"
602+ // Pure implementation details
603+ " impl" , " util" , " utils" , " helper" , " helpers" ,
604+ " base" , " abstract" , " interface" , " default" , " common" , " internal" ,
605+ // Build/config files
606+ " main" , " index" , " build" , " gradle"
526607 )
527608
528609 // Exact match skip
529610 if (lowerName in skipTerms) return false
530611
531- // Skip IntelliJ platform concepts (infrastructure )
612+ // Skip IntelliJ/JetBrains platform internals (framework-specific, not business )
532613 val platformTerms = setOf (
533- " anaction" , " applicationmanager" , " project" , " psifile" , " psielement" ,
534- " virtualfile" , " document" , " editor" , " intention" , " inspection" ,
535- " psiclass" , " psimethod" , " psifield" , " psitype" , " psivariable" ,
536- " language" , " filetype" , " module" , " facet" , " artifact" ,
537- " toolwindow" , " notification" , " progress" , " indicator" ,
538- " runnable" , " callable" , " future" , " promise" , " deferred" ,
539- // JetBrains specific
614+ " anaction" , " psifile" , " psielement" , " psiclass" , " psimethod" ,
615+ " psifield" , " psitype" , " psivariable" , " virtualfile" ,
616+ // JetBrains UI components
540617 " jbcolor" , " jbinsets" , " jbui" , " jbpopup" , " jblist" ,
541- // Java Swing/AWT
542- " jcomponent" , " jpanel" , " jbutton" , " jlabel" , " jframe" ,
543- " swing" , " awt" , " graphics"
618+ // Java Swing/AWT internals
619+ " jcomponent" , " jpanel" , " jbutton" , " jlabel" , " jframe"
544620 )
545621 if (platformTerms.any { lowerName.contains(it) }) return false
546622
547- // Skip technical suffixes that indicate infrastructure
623+ // Skip pure infrastructure suffixes
548624 val technicalSuffixes = setOf (
549- " controller" , " service" , " repository" , " dao" , " mapper" ,
550- " dto" , " vo" , " po" , " entity" , " request" , " response" ,
551- " config" , " configuration" , " settings" , " properties" ,
552- " handler" , " listener" , " callback" , " adapter" , " wrapper" ,
553- " factory" , " builder" , " provider" , " manager" , " registry" ,
554- " helper" , " util" , " utils" , " tool" , " tools" ,
555- " impl" , " implementation" , " abstract" , " base" , " default" ,
556- " exception" , " error" , " filter" , " interceptor" ,
557- " capable" , " aware" , " enabled" , " disabled"
625+ " impl" , " implementation" , " dto" , " vo" , " po"
558626 )
559627 if (technicalSuffixes.any { lowerName.endsWith(it) }) return false
560628
@@ -588,29 +656,27 @@ class DomainDictAgent(
588656
589657 val namesList = names.joinToString(" \n " ) { " - $it " }
590658
591- // DDD-focused prompt, inspired by indexer.vm
659+ // DDD-focused prompt - extract compound domain concepts only
592660 val prompt = """
593- 你是一个 DDD(领域驱动设计)专家,负责构建业务导向的中英文词典。请从以下代码名称中提取重要的业务概念 。
661+ 你是一个 DDD(领域驱动设计)专家,负责构建业务导向的中英文词典。请从以下代码名称中提取**复合业务概念** 。
594662
595- **提取原则: **
663+ **核心规则:只提取复合词(至少包含2个有意义的单词) **
596664
597- ✅ 应该提取的内容:
598- - 核心业务实体(如:Blog、Comment、Payment、User 等名词)
599- - 业务概念和领域模型(如:Member、Points、Order)
600- - 难以理解的词汇或拼音缩写
601- - 领域特定术语
665+ ✅ 应该提取的内容(复合词示例) :
666+ - DomainDict(领域词典)- 由 Domain + Dict 组成
667+ - CodeReview(代码审查)- 由 Code + Review 组成
668+ - ChatContext(聊天上下文)- 由 Chat + Context 组成
669+ - AgentTask(代理任务)- 由 Agent + Task 组成
602670
603- ❌ 应该排除的内容:
604- 1. 技术词汇:Controller、Service、Repository、Mapper、DTO、VO、PO、Entity、Request、Response、Config 等
605- 2. 实现细节和数据传输对象:包含 "Request"、"Response"、"Dto"、"Entity" 后缀的条目
606- 3. 技术操作动词:validate、check、convert、deserialize、serialize、encode、decode 等
607- 4. 方法名中的技术操作:如 "checkIfVipAccount" 应只提取 "VIP Account"
608- 5. 通用库 API(如 Spring、OkHttp)和通用类名(如 List、Map)
671+ ❌ 绝对不要提取的内容(单个通用词):
672+ - Agent、Chat、Code、Task、Model、Service、Config、Handler、Manager
673+ - File、Path、Node、Item、Event、Action、State、Context、Message
674+ - User、Role、Session、Token、Request、Response、Error、Result
675+ - 任何只有一个单词的通用技术术语
609676
610- **处理规则:**
611- 1. 如果提取的条目包含技术后缀(如 "CreateCommentDto"),转换为纯业务概念(如 "Comment")
612- 2. 如果方法名包含技术操作(如 "checkIfVipAccount"),提取业务含义("VIP Account")
613- 3. 如果类名包含技术词汇后缀,移除后缀再添加到词典
677+ ❌ 也要排除:
678+ 1. 技术后缀词:Controller、Service、Repository、Mapper、DTO、Handler 等
679+ 2. 通用库 API 和框架类名
614680
615681## 要分析的名称:
616682$namesList
@@ -619,17 +685,17 @@ $namesList
619685```json
620686{
621687 "entries": [
622- {"chinese": "博客 ", "codeTranslation": "Blog ", "description": "博客文章 "}
688+ {"chinese": "领域词典 ", "codeTranslation": "DomainDict ", "description": "业务术语词典 "}
623689 ]
624690}
625691```
626692
627693## 输出规则:
628- 1. chinese: 简洁的中文术语(2-6个字 )
629- 2. codeTranslation: 纯业务概念名(移除技术后缀 )
630- 3. description: 一句话业务描述(不超过20字)
631- 4. 只输出有意义的业务概念,跳过技术实现细节
632- 5. 如果无法理解或太通用,直接跳过不输出
694+ 1. codeTranslation 必须是**复合词**(包含至少2个大写字母开头的单词 )
695+ 2. 不要拆分复合词!保持原样(如 AgentTask 不要拆成 Agent 和 Task )
696+ 3. 如果输入是单个通用词,直接跳过不输出
697+ 4. chinese: 简洁的中文术语(2-6个字)
698+ 5. description: 一句话业务描述(不超过20字)
633699
634700请直接输出JSON,不要其他解释。
635701 """ .trimIndent()
@@ -674,14 +740,32 @@ $namesList
674740 val code = match.groupValues[2 ].trim()
675741 val desc = match.groupValues[3 ].trim()
676742
677- if (chinese.isNotBlank() && code.isNotBlank()) {
743+ if (chinese.isNotBlank() && code.isNotBlank() && isValidOutputEntry(code) ) {
678744 entries.add(DomainEntry (chinese, code, desc))
679745 }
680746 }
681747
682748 return entries
683749 }
684750
751+ /* *
752+ * Validate LLM output entries - filter out pure technical infrastructure words
753+ */
754+ private fun isValidOutputEntry (code : String ): Boolean {
755+ // Must have at least 2 capital letters (compound word)
756+ val capitalCount = code.count { it.isUpperCase() }
757+ if (capitalCount < 2 ) return false
758+
759+ // Skip only pure technical terms (let AI decide business relevance)
760+ val technicalSkip = setOf (
761+ " impl" , " util" , " utils" , " helper" , " helpers" ,
762+ " test" , " tests" , " spec" , " mock" , " stub" , " fake"
763+ )
764+ if (code.lowercase() in technicalSkip) return false
765+
766+ return true
767+ }
768+
685769 // ============= Step 3: Save =============
686770
687771 private fun parseExistingTerms (csv : String ): Set <String > {
0 commit comments