Skip to content

Commit

Permalink
Add dashes around html entities in escape_lemma
Browse files Browse the repository at this point in the history
Part of #39
  • Loading branch information
goodmami committed Oct 18, 2024
1 parent 0a94425 commit 23fa3b3
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 1 deletion.
2 changes: 1 addition & 1 deletion scripts/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def escape_lemma(lemma: str) -> str:
elif c in _custom_char_escapes:
chars.append(_custom_char_escapes[c])
elif codepoint in codepoint2name:
chars.append(codepoint2name[codepoint])
chars.append(f"-{codepoint2name[codepoint]}-")
else:
esc = f'-{codepoint:04X}-'
warnings.warn(f'no escape character defined for {c!r}; using {esc}')
Expand Down
Empty file added tests/__init__.py
Empty file.
10 changes: 10 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from scripts.util import escape_lemma

def test_escape_lemma():
assert escape_lemma("abc") == "abc"
assert escape_lemma("a.b.c") == "a.b.c"
assert escape_lemma("protégé") == "protégé"
assert escape_lemma("a b c") == "a_b_c"
assert escape_lemma("a:b:c") == "a-colon-b-colon-c"
assert escape_lemma("a-b-c") == "a--b--c"
assert escape_lemma("a´b´c") == "a-acute-b-acute-c"

0 comments on commit 23fa3b3

Please sign in to comment.