Skip to content

Commit 637dea7

Browse files
committed
fix(domain-dict): limit batch size and clean up headers
Reduce processed term batch size to 500 and improve CSV header handling by removing duplicates and empty lines.
1 parent 42aaa64 commit 637dea7

File tree

1 file changed

+21
-7
lines changed

1 file changed

+21
-7
lines changed

mpp-core/src/commonMain/kotlin/cc/unitmesh/agent/subagent/DomainDictAgent.kt

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ class DomainDictAgent(
185185
// ============= Step 2: Generate Entries =============
186186
onProgress("\n## Step 2/3: Generating Entries")
187187

188-
val namesToProcess = newNames.take(1000)
188+
val namesToProcess = newNames.take(500)
189189
onProgress(" 💭 Translating ${namesToProcess.size} terms (of ${newNames.size} total)...")
190190

191191
val newEntries = generateEntries(namesToProcess, callbacks)
@@ -699,14 +699,28 @@ $namesList
699699
private fun mergeEntries(currentDict: String, newEntries: List<DomainEntry>): String {
700700
val existingLines = currentDict.lines().toMutableList()
701701

702-
// Ensure header exists
703-
if (existingLines.isEmpty() || !existingLines[0].contains("Chinese")) {
704-
existingLines.add(0, "Chinese,Code Translation,Description")
702+
// Remove empty lines and duplicate headers
703+
existingLines.removeAll { it.isBlank() }
704+
705+
// Remove duplicate headers (keep only the first one)
706+
val headerLine = "Chinese,Code Translation,Description"
707+
var foundHeader = false
708+
existingLines.removeAll { line ->
709+
if (line.trim().equals(headerLine, ignoreCase = true)) {
710+
if (foundHeader) {
711+
true // remove duplicate header
712+
} else {
713+
foundHeader = true
714+
false // keep first header
715+
}
716+
} else {
717+
false
718+
}
705719
}
706720

707-
// Remove empty first line if exists
708-
if (existingLines.isNotEmpty() && existingLines[0].isBlank()) {
709-
existingLines.removeAt(0)
721+
// Ensure header exists at the beginning
722+
if (existingLines.isEmpty() || !existingLines[0].contains("Chinese", ignoreCase = true)) {
723+
existingLines.add(0, headerLine)
710724
}
711725

712726
// Get existing code translations to avoid duplicates

0 commit comments

Comments
 (0)