Skip to content

Commit 121aab0

Browse files
committed
CLDR-18197 kbd: update spec to mention abnf
- add basic abnf
1 parent 9bbbc77 commit 121aab0

File tree

5 files changed

+210
-1
lines changed

5 files changed

+210
-1
lines changed

.github/workflows/keyboard.yml

+2
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,7 @@ jobs:
3737
run: npm install -g @keymanapp/kmc
3838
- name: Compile Keyboards
3939
run: kmc --error-reporting build keyboards/3.0/*.xml
40+
- name: Check ABNF
41+
run: bash tools/scripts/keyboard-abnf-tests/check-keyboard-abnf.sh
4042
- name: Run Kbd Charts
4143
run: 'cd docs/charts/keyboards && npm ci && npm run build'

docs/ldml/tr35-keyboards.md

+28-1
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ The LDML specification is divided into the following parts:
121121
* [Additional Features](#additional-features)
122122
* [Disallowed Regex Features](#disallowed-regex-features)
123123
* [Replacement syntax](#replacement-syntax)
124+
* [Transform ABNF](#transform-abnf)
124125
* [Element: reorder](#element-reorder)
125126
* [Using `<import>` with `<reorder>` elements](#using-import-with-reorder-elements)
126127
* [Example Post-reorder transforms](#example-post-reorder-transforms)
@@ -2412,6 +2413,31 @@ Used in the `to=`
24122413

24132414
Emits the named mark. Also see [Markers](#markers).
24142415

2416+
#### Transform ABNF
2417+
2418+
The grammar for the transform rules is formally defined
2419+
using the ABNF notation [[STD68](https://www.rfc-editor.org/info/std68)],
2420+
including the modifications found in [RFC 7405](https://www.rfc-editor.org/rfc/rfc7405).
2421+
2422+
RFC7405 defines a variation of ABNF that is case-sensitive.
2423+
Some ABNF tools are only compatible with the specification found in
2424+
[RFC 5234](https://www.rfc-editor.org/rfc/rfc5234).
2425+
2426+
The ABNF files are located in the `keyboards/abnf` directory in the CLDR source directory:
2427+
2428+
* `transform-from-required.abnf`
2429+
This is a partial ABNF for the `<transform from=""/>` attribute. The `from=` attribute MUST match this ABNF. Not all strings which match this ABNF are valid, see the next file.
2430+
2431+
* `transform-from-invalid.abnf`
2432+
This is an additional ABNF for the `<transform from=""/>` attribute showing patterns that are not valid. The `from=` attribute MUST NOT match this ABNF.
2433+
2434+
* `transform-to-required.abnf`
2435+
This is a partial ABNF for the `<transform to=""/>` attribute. The `to=` attribute MUST match this ABNF. Not all strings which match this ABNF are valid, see the next file.
2436+
2437+
* `transform-to-invalid.abnf`
2438+
This is an additional ABNF for the `<transform to=""/>` attribute showing patterns that are not valid. The `to=` attribute MUST NOT match this ABNF.
2439+
2440+
24152441
* * *
24162442

24172443
### Element: reorder
@@ -2873,6 +2899,7 @@ The following are the design principles for the IDs.
28732899

28742900
* * *
28752901

2902+
28762903
© 2024–2024 Unicode, Inc.
28772904
This publication is protected by copyright, and permission must be obtained from Unicode, Inc.
28782905
prior to any reproduction, modification, or other use not permitted by the [Terms of Use](https://www.unicode.org/copyright.html).
@@ -2885,4 +2912,4 @@ The authors, contributors, and publishers have taken care in the preparation of
28852912
but make no express or implied representation or warranty of any kind and assume no responsibility or liability for errors or omissions or for consequential or incidental damages that may arise therefrom.
28862913
This publication is provided “AS-IS” without charge as a convenience to users.
28872914

2888-
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries.
2915+
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the United States and other countries.
+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
from-match = atoms
2+
3+
; an empty match is not allowed.
4+
atoms = atom *(atom)
5+
6+
atom = simple-matcher / codepoint
7+
8+
codepoint = backslash "u" "{" cphex "}"
9+
10+
cphex = 1*6LHEXDIG
11+
12+
; TODO
13+
simple-matcher = DIGIT / ALPHA / SP
14+
15+
; message = simple-message / complex-message
16+
17+
; simple-message = o [simple-start pattern]
18+
; simple-start = simple-start-char / escaped-char / placeholder
19+
; pattern = *(text-char / escaped-char / placeholder)
20+
; placeholder = expression / markup
21+
22+
; complex-message = o *(declaration o) complex-body o
23+
; declaration = input-declaration / local-declaration
24+
; complex-body = quoted-pattern / matcher
25+
26+
; input-declaration = input o variable-expression
27+
; local-declaration = local s variable o "=" o expression
28+
29+
; quoted-pattern = "{{" pattern "}}"
30+
31+
; matcher = match-statement s variant *(o variant)
32+
; match-statement = match 1*(s selector)
33+
; selector = variable
34+
; variant = key *(s key) o quoted-pattern
35+
; key = literal / "*"
36+
37+
; ; Expressions
38+
; expression = literal-expression
39+
; / variable-expression
40+
; / function-expression
41+
; literal-expression = "{" o literal [s function] *(s attribute) o "}"
42+
; variable-expression = "{" o variable [s function] *(s attribute) o "}"
43+
; function-expression = "{" o function *(s attribute) o "}"
44+
45+
; markup = "{" o "#" identifier *(s option) *(s attribute) o ["/"] "}" ; open and standalone
46+
; / "{" o "/" identifier *(s option) *(s attribute) o "}" ; close
47+
48+
; ; Expression and literal parts
49+
; function = ":" identifier *(s option)
50+
; option = identifier o "=" o (literal / variable)
51+
52+
; attribute = "@" identifier [o "=" o literal]
53+
54+
; variable = "$" name
55+
56+
; literal = quoted-literal / unquoted-literal
57+
; quoted-literal = "|" *(quoted-char / escaped-char) "|"
58+
; unquoted-literal = name / number-literal
59+
; ; number-literal matches JSON number (https://www.rfc-editor.org/rfc/rfc8259#section-6)
60+
; number-literal = ["-"] (%x30 / (%x31-39 *DIGIT)) ["." 1*DIGIT] [%i"e" ["-" / "+"] 1*DIGIT]
61+
62+
; ; Keywords; Note that these are case-sensitive
63+
; input = %s".input"
64+
; local = %s".local"
65+
; match = %s".match"
66+
67+
; ; Names and identifiers
68+
; ; identifier matches https://www.w3.org/TR/REC-xml-names/#NT-QName
69+
; ; name matches https://www.w3.org/TR/REC-xml-names/#NT-NCName but excludes U+FFFD and U+061C
70+
; identifier = [namespace ":"] name
71+
; namespace = name
72+
; name = [bidi] name-start *name-char [bidi]
73+
; name-start = ALPHA / "_"
74+
; / %xC0-D6 / %xD8-F6 / %xF8-2FF
75+
; / %x370-37D / %x37F-61B / %x61D-1FFF / %x200C-200D
76+
; / %x2070-218F / %x2C00-2FEF / %x3001-D7FF
77+
; / %xF900-FDCF / %xFDF0-FFFC / %x10000-EFFFF
78+
; name-char = name-start / DIGIT / "-" / "."
79+
; / %xB7 / %x300-36F / %x203F-2040
80+
81+
; ; Restrictions on characters in various contexts
82+
; simple-start-char = content-char / "@" / "|"
83+
; text-char = content-char / ws / "." / "@" / "|"
84+
; quoted-char = content-char / ws / "." / "@" / "{" / "}"
85+
; content-char = %x01-08 ; omit NULL (%x00), HTAB (%x09) and LF (%x0A)
86+
; / %x0B-0C ; omit CR (%x0D)
87+
; / %x0E-1F ; omit SP (%x20)
88+
; / %x21-2D ; omit . (%x2E)
89+
; / %x2F-3F ; omit @ (%x40)
90+
; / %x41-5B ; omit \ (%x5C)
91+
; / %x5D-7A ; omit { | } (%x7B-7D)
92+
; / %x7E-2FFF ; omit IDEOGRAPHIC SPACE (%x3000)
93+
; / %x3001-10FFFF ; allowing surrogates is intentional
94+
95+
; ; Character escapes
96+
; escaped-char = backslash ( backslash / "{" / "|" / "}" )
97+
backslash = %x5C ; U+005C REVERSE SOLIDUS "\"
98+
99+
100+
; ; Required whitespace
101+
; s = *bidi ws o
102+
103+
; ; Optional whitespace
104+
; o = *(ws / bidi)
105+
106+
; ; Bidirectional marks and isolates
107+
; ; ALM / LRM / RLM / LRI, RLI, FSI & PDI
108+
; bidi = %x061C / %x200E / %x200F / %x2066-2069
109+
110+
; Whitespace characters
111+
; ws = SP / HTAB / CR / LF / %x3000
112+
113+
; uppercase rules are omitted from the AST by default
114+
115+
; from STD-68
116+
DIGIT = %x30-39 ; 0-9
117+
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
118+
SP = %x20
119+
; HTAB = %xF900 ; horizontal tab
120+
; LF = %x0A ; linefeed
121+
; CR = %x0D ; carriage return
122+
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
123+
; like HEXDIG but lowercase also
124+
LHEXDIG = HEXDIG / "a" / "b" / "c" / "d" / "e" / "f"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/bin/bash
2+
3+
ABNF_DIR=keyboards/abnf
4+
TEST_DIR=tools/scripts/keyboard-abnf-tests
5+
abnf_check="npx --package=abnf abnf_check"
6+
abnf_test="npx --package=abnf abnf_test"
7+
8+
echo "-- checking ABNF --"
9+
10+
for abnf in ${ABNF_DIR}/*.abnf; do
11+
echo Validating ${abnf}
12+
${abnf_check} ${abnf} || exit 1
13+
done
14+
15+
echo "-- running test suites --"
16+
17+
for abnf in ${ABNF_DIR}/*.abnf; do
18+
echo Testing ${abnf}
19+
base=$(basename ${abnf} .abnf)
20+
SUITEDIR=${TEST_DIR}/${base}.d
21+
if [[ -d ${SUITEDIR} ]];
22+
then
23+
echo " Test suite ${SUITEDIR}"
24+
for testf in ${SUITEDIR}/*.pass.txt; do
25+
start=$(basename ${testf} .pass.txt)
26+
echo " Testing ${testf} for ${start}"
27+
while IFS="" read -r str || [ -n "$str" ]
28+
do
29+
if echo "${str}" | grep -v -q '^#'; then
30+
echo "# '${str}'"
31+
${abnf_test} ${abnf} -t "${str}" || exit 1
32+
fi
33+
done <${testf}
34+
done
35+
else
36+
echo " Warning: ${SUITEDIR} did not exist"
37+
fi
38+
# npx --package=abnf abnf_check ${abnf} || exit 1
39+
done
40+
41+
echo "All OK"
42+
exit 0
43+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
abc
2+
#abc 𐒵
3+
def
4+
\u{1234} \u{012A} \u{22} \u{012a} \u{1234A}
5+
#\m{q}:
6+
#\m{q}L
7+
#\m{q}।
8+
#\m{q}ড
9+
#\m{q}ঢ
10+
#\m{q}ত
11+
#\m{q}য
12+
#\m{q}র
13+
#\m{q}ল

0 commit comments

Comments
 (0)