-
Notifications
You must be signed in to change notification settings - Fork 14
Performance: Optimize mapSentenceEndingsToWords with two-pointer approach #176
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -271,33 +271,48 @@ export class SentenceBoundaryDetector { | |||||||||||||
|
|
||||||||||||||
| mapSentenceEndingsToWords(sentences, originalWords, wordPositions) { | ||||||||||||||
| const sentenceEndingWords = []; | ||||||||||||||
| let wordIdx = 0; | ||||||||||||||
| const numWords = wordPositions.length; | ||||||||||||||
|
|
||||||||||||||
| sentences.forEach((sentence) => { | ||||||||||||||
| for (let i = 0; i < sentences.length; i++) { | ||||||||||||||
| const sentence = sentences[i]; | ||||||||||||||
| const sentenceEndPos = sentence.endPos; | ||||||||||||||
| let closestWordIndex = -1; | ||||||||||||||
| let minDistance = Infinity; | ||||||||||||||
|
|
||||||||||||||
| wordPositions.forEach((wordPos) => { | ||||||||||||||
| while (wordIdx < numWords) { | ||||||||||||||
| const wordPos = wordPositions[wordIdx]; | ||||||||||||||
| const distance = sentenceEndPos - wordPos.textEndPos; | ||||||||||||||
| if (distance >= 0 && distance < minDistance) { | ||||||||||||||
| minDistance = distance; | ||||||||||||||
| closestWordIndex = wordPos.wordIndex; | ||||||||||||||
|
|
||||||||||||||
| if (distance >= 0) { | ||||||||||||||
| if (distance < minDistance) { | ||||||||||||||
| minDistance = distance; | ||||||||||||||
| closestWordIndex = wordPos.wordIndex; | ||||||||||||||
| } | ||||||||||||||
| wordIdx++; | ||||||||||||||
| } else { | ||||||||||||||
| break; | ||||||||||||||
| } | ||||||||||||||
| }); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (closestWordIndex === -1) { | ||||||||||||||
| if (this.config.debug) { | ||||||||||||||
| console.warn( | ||||||||||||||
| `[SentenceDetector] Could not find a word ending before sentence end position ${sentenceEndPos}. Falling back to absolute closest match.`, | ||||||||||||||
| ); | ||||||||||||||
| } | ||||||||||||||
| wordPositions.forEach((wordPos) => { | ||||||||||||||
| for (let j = 0; j < numWords; j++) { | ||||||||||||||
| const wordPos = wordPositions[j]; | ||||||||||||||
| const distance = Math.abs(sentenceEndPos - wordPos.textEndPos); | ||||||||||||||
| if (distance < minDistance) { | ||||||||||||||
| minDistance = distance; | ||||||||||||||
| closestWordIndex = wordPos.wordIndex; | ||||||||||||||
| } | ||||||||||||||
| }); | ||||||||||||||
| } | ||||||||||||||
|
Comment on lines
+304
to
+311
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since if (numWords > 0) {
closestWordIndex = wordPositions[0].wordIndex;
} |
||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (wordIdx > 0 && wordIdx < numWords) { | ||||||||||||||
| wordIdx--; | ||||||||||||||
| } | ||||||||||||||
|
Comment on lines
+314
to
316
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The condition
Suggested change
Comment on lines
+314
to
316
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The step-back fires incorrectly when the while loop didn't advance Two issues with this block:
🔧 Proposed fixTrack whether the while loop advanced + let advancedInLoop = false;
while (wordIdx < numWords) {
const wordPos = wordPositions[wordIdx];
const distance = sentenceEndPos - wordPos.textEndPos;
if (distance >= 0) {
closestWordIndex = wordPos.wordIndex;
wordIdx++;
+ advancedInLoop = true;
} else {
break;
}
}
...fallback block...
- if (wordIdx > 0 && wordIdx < numWords) {
- wordIdx--;
- }
+ // Step back so the next sentence re-examines the last boundary word,
+ // avoiding a needless fallback when two consecutive sentence ends share
+ // the same closest word.
+ if (advancedInLoop && wordIdx < numWords) {
+ wordIdx--;
+ }🤖 Prompt for AI Agents |
||||||||||||||
|
|
||||||||||||||
| if (closestWordIndex !== -1 && closestWordIndex < originalWords.length) { | ||||||||||||||
|
|
@@ -311,7 +326,7 @@ export class SentenceBoundaryDetector { | |||||||||||||
| }, | ||||||||||||||
| }); | ||||||||||||||
| } | ||||||||||||||
| }); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return sentenceEndingWords; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Incorrect date on the new learning entry.
The header reads
## 2024-11-20but the PR was opened in May 2026. This date duplicates one of the immediately preceding entries and will mislead anyone scanning the log chronologically.✏️ Proposed fix
📝 Committable suggestion
🤖 Prompt for AI Agents