-
Notifications
You must be signed in to change notification settings - Fork 388
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
CLDR-18197 kbd: update spec to mention abnf
- add keyboard abnf and sample files and automated tests
- Loading branch information
Showing
6 changed files
with
274 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
; Copyright (c) 2025 Unicode, Inc. | ||
; For terms of use, see http://www.unicode.org/copyright.html | ||
; SPDX-License-Identifier: Unicode-3.0 | ||
; CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) | ||
|
||
; This is an ABNF grammar for the CLDR Keyboard spec transform match syntax. | ||
; Note that there are sample matching/failing data files in tools/scripts/keyboard-abnf-tests/ | ||
|
||
; An entire <transform from="..." /> string. | ||
; Note that the empty string is not a match. | ||
; Also note that a string may match this ABNF but be invalid according to the spec - which see. | ||
|
||
from-match = start-context atoms / atoms | ||
|
||
start-context = "^" | ||
|
||
; an empty match is not allowed. | ||
atoms = atom *(disjunction atom / atom) | ||
|
||
disjunction = "|" | ||
|
||
atom = quark quantifier / quark | ||
|
||
quark = non-group / group | ||
|
||
non-group = simple-matcher / codepointseq / variable | ||
|
||
variable = string-variable / set-variable | ||
|
||
string-variable = "${" var-id "}" | ||
set-variable = "$[" var-id "]" | ||
|
||
group = capturing-group / non-capturing-group | ||
|
||
quantifier = bounded-quantifier / optional-quantifier | ||
|
||
codepointseq = backslash "u" "{" cphexseq "}" | ||
codepoint = backslash "u" "{" cphexseq "}" | ||
|
||
bounded-quantifier = "{" DIGIT "," DIGIT "}" | ||
optional-quantifier = "?" | ||
|
||
non-capturing-group = "(" "?" ":" atoms ")" | ||
|
||
; a capturing group may not contain other capturing groups. | ||
capturing-group = "(" catoms ")" | ||
|
||
; capturing atoms can't include any groups | ||
catoms = catom *(catom) | ||
; capturing atoms can't include any groups | ||
catom = cquark / cquark quantifier | ||
|
||
; capturing atoms can't include groups | ||
cquark = non-group | ||
|
||
; multiple hex codepoints | ||
cphexseq = cphex *(SP cphex) | ||
|
||
; one hex codepoint (1-6 digits) | ||
cphex = 1*6LHEXDIG | ||
|
||
simple-matcher = text-char / class / match-any-codepoint / match-marker | ||
|
||
match-any-codepoint = "." | ||
|
||
match-marker = match-any-marker / match-named-marker | ||
match-any-marker = "\m{.}" | ||
match-named-marker = "\m{" marker-id "}" | ||
|
||
; marker id is nmtoken, but may be UAX31 in the future. | ||
marker-id = NMTOKEN | ||
; variable ID | ||
var-id = 1*32IDCHAR | ||
|
||
class = fixed-class / set-class | ||
|
||
fixed-class = backslash fixed-class-char | ||
|
||
fixed-class-char = "s" / "S" / "t" / "r" / "n" / "f" / "v" / backslash / "$" / "d" / "w" / "D" / "W" / "0" | ||
|
||
set-class = "[" set-negator set-members "]" | ||
set-members = set-member *(set-member) | ||
set-member = text-char / char-range / match-marker | ||
char-range = range-edge "-" range-edge | ||
range-edge = codepoint / range-char | ||
set-negator = "^" / "" | ||
|
||
; Restrictions on characters in various contexts | ||
|
||
; normal text | ||
text-char = content-char / ws / escaped-char / "-" / ":" | ||
; text in a range sequence | ||
range-char = content-char / ws / escaped-char / "."/ "|" / "{" / "}" | ||
; group for everything BUT syntax chars. | ||
content-char = ASCII-CTRLS / ASCII-PUNCT / ALPHA / DIGIT / NON-ASCII | ||
|
||
; Character escapes | ||
escaped-char = backslash ( backslash / "{" / "|" / "}" ) | ||
|
||
backslash = %x5C ; U+005C REVERSE SOLIDUS "\" | ||
ws = SP / HTAB / CR / LF / %x3000 | ||
|
||
IDCHAR = ALPHA / DIGIT / "_" | ||
ASCII-CTRLS = %x01-08 ; omit NULL (%x00), HTAB (%x09) and LF (%x0A) | ||
/ %x0B-0C ; omit CR (%x0D) | ||
/ %x0E-1F ; omit SP (%x20) | ||
ASCII-PUNCT = %x21-23 ; omit DOLLAR | ||
/ %x25-27 ; omit () * + | ||
/ %x2C ; omit . (%x2E) and - (%x2D) | ||
/ %x2F ; skip over digits and : | ||
/ %x3B-3E ; omit ? 3f | ||
/ %x5F ; omit upper A-Z and [\]^ | ||
/ %x60 ; omit a-z {|} | ||
/ %x7E-7F ; just for completeness | ||
NON-ASCII = %x7E-D7FF ; omit surrogates | ||
/ %xE000-10FFFF ; that's the rest. (TODO: omit other non-characters) | ||
|
||
; from STD-68 | ||
DIGIT = %x30-39 ; 0-9 | ||
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z | ||
SP = %x20 | ||
HTAB = %xF900 ; horizontal tab | ||
LF = %x0A ; linefeed | ||
CR = %x0D ; carriage return | ||
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" | ||
; like HEXDIG but lowercase also | ||
LHEXDIG = HEXDIG / "a" / "b" / "c" / "d" / "e" / "f" | ||
|
||
; from XML | ||
NAMESTARTCHAR = ":" / ALPHA / "_" / %xC0-D6 / %xD8-F6 / %xF8-2FF / %x370-37D / %x37F-1FFF / %x200C-200D / %x2070-218F / %x2C00-2FEF / %x3001-D7FF / %xF900-FDCF / %xFDF0-FFFD / %x10000 | ||
; TODO: / %x10000-EFFFE | ||
NAMECHAR = NAMESTARTCHAR / "-" / "." / DIGIT / %xB7 / %x0300-036F / %x203F-2040 | ||
; NAME = NAMESTARTCHAR *(NAMECHAR) | ||
NMTOKEN = 1*NAMECHAR | ||
; NMTOKENS = NMTOKEN *(SP NMTOKEN) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#!/bin/bash | ||
|
||
ABNF_DIR=keyboards/abnf | ||
TEST_DIR=tools/scripts/keyboard-abnf-tests | ||
abnf_check="npx --package=abnf abnf_check" | ||
abnf_test="npx --package=abnf abnf_test" | ||
|
||
echo "-- checking ABNF --" | ||
|
||
for abnf in ${ABNF_DIR}/*.abnf; do | ||
echo Validating ${abnf} | ||
${abnf_check} ${abnf} || exit 1 | ||
done | ||
|
||
echo "-- running test suites --" | ||
|
||
for abnf in ${ABNF_DIR}/*.abnf; do | ||
echo Testing ${abnf} | ||
base=$(basename ${abnf} .abnf) | ||
SUITEDIR=${TEST_DIR}/${base}.d | ||
if [[ -d ${SUITEDIR} ]]; | ||
then | ||
echo " Test suite ${SUITEDIR}" | ||
for testf in ${SUITEDIR}/*.pass.txt; do | ||
start=$(basename ${testf} .pass.txt) | ||
echo " Testing PASS ${testf} for ${start}" | ||
while IFS="" read -r str || [ -n "$str" ] | ||
do | ||
if echo "${str}" | grep -v -q '^#'; then | ||
echo "# '${str}'" | ||
(${abnf_test} ${abnf} -t "${str}") 2>&1 >/dev/null || exit 1 | ||
fi | ||
done <${testf} | ||
done | ||
for testf in ${SUITEDIR}/*.fail.txt; do | ||
start=$(basename ${testf} .fail.txt) | ||
echo " Testing FAIL ${testf} for ${start}" | ||
while IFS="" read -r str || [ -n "$str" ] | ||
do | ||
if echo "${str}" | grep -v -q '^#'; then | ||
echo "# '${str}'" | ||
(${abnf_test} ${abnf} -t "${str}") 2>&1 > /dev/null && (echo ERROR should have failed ; exit 1) | ||
fi | ||
done <${testf} | ||
done | ||
else | ||
echo " Warning: ${SUITEDIR} did not exist" | ||
fi | ||
# npx --package=abnf abnf_check ${abnf} || exit 1 | ||
done | ||
|
||
echo "All OK" | ||
exit 0 | ||
|
29 changes: 29 additions & 0 deletions
29
tools/scripts/keyboard-abnf-tests/transform-from-required.d/from-match.fail.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# this is a comment | ||
# only innermost group isn't the capturing | ||
(?:foo(?:bar(baz(?:bat))?)) | ||
(?:foo(?:bar(baz(bat))?)) | ||
((a|b|c)|(d|e|f)) | ||
# disallowed features | ||
# empty string | ||
|
||
# props | ||
\p{Greek} | ||
# backreferences | ||
([abc])-\1 \k<something> | ||
# unbounded quantifiers | ||
.* | ||
(abc)* | ||
e(abc)+g | ||
(abc){1,} | ||
(abc){0,} | ||
(abc)*? | ||
(abc)+? | ||
# named capture groups | ||
(?<something>) | ||
# Assertions | ||
\b | ||
\B | ||
(?<!abc) | ||
# end marker | ||
Foo$ | ||
^Foo$ |
24 changes: 24 additions & 0 deletions
24
tools/scripts/keyboard-abnf-tests/transform-from-required.d/from-match.pass.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#This is a comment. | ||
abc | ||
abc 𐒵 | ||
#def? | ||
(def)? | ||
ab(cd\u{1234}ef){2,3} | ||
\u{1234} \u{012A} \u{22} \u{012a} \u{1234A} \u{123 456} | ||
(?:foo(?:bar(baz)?)) | ||
([abc])([def\m{w}]) | ||
(?:thismatches) | ||
(?:[abc]([def]))|(?:[ghi]) | ||
abc|def | ||
# NO: nested! | ||
##(?:foo(?:bar(baz(?:bat))?)) | ||
##(?:foo(?:bar(baz(bat))?)) | ||
\m{q}: | ||
#\m{q}L | ||
#\m{q}। | ||
\m{q}ড\m{.} | ||
#\m{q}ঢ | ||
#\m{q}ত | ||
#\m{q}য | ||
#\m{q}র | ||
#\m{q}ল |