Skip to content

Commit

Permalink
Work Area: Fix Concordancer - Generation Settings - Width unit - Char…
Browse files Browse the repository at this point in the history
…acter
  • Loading branch information
BLKSerene committed Jul 23, 2023
1 parent 632c216 commit 0e6057a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
### ✨ Improvements
- Utils: Update Wordless's sentence and sentence segment splitters

### 📌 Bugfixes
- Work Area: Fix Concordancer - Generation Settings - Width unit - Character

### ❌ Removals
- Utils: Remove PyThaiNLP's perceptron part-of-speech tagger (LST20)

Expand Down
9 changes: 8 additions & 1 deletion wordless/wl_concordancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ def run(self):
else:
context_right.append(token_next)

len_context_right += len(token_next)
len_context_right += len_token_next

# Search in Results (Left & Right)
text_search_left = copy.deepcopy(context_left)
Expand All @@ -786,6 +786,13 @@ def run(self):
if not settings['token_settings']['punc_marks']:
context_left = text.tokens_flat_punc_marks_merged[i - len(context_left): i]
context_right = text.tokens_flat_punc_marks_merged[i + len_search_term : i + len_search_term + len(context_right)]

# Modify the first token in left context and the last token in right context
len_left_extra = len_context_left - sum((len(token) for token in context_left))
len_right_missing = len(context_right[-1])

context_left[0] = context_left[0][len_left_extra:]
context_right[-1] = context_right[-1][:len_right_missing]
elif settings['generation_settings']['width_unit'] == self.tr('Token'):
width_left_token = settings['generation_settings']['width_left_token']
width_right_token = settings['generation_settings']['width_right_token']
Expand Down

0 comments on commit 0e6057a

Please sign in to comment.