-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add ABNF snippets for language tag and media type (#437)
Extracted/derived from the RFCs referenced in the GEDCOM spec Signed-off-by: Dave Thaler <[email protected]> Co-authored-by: Dave Thaler <[email protected]>
- Loading branch information
Showing
4 changed files
with
125 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
; Core Rules extracted from RFC 5234 section B.1 | ||
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z | ||
;DIGIT = %x30-39 ; 0-9 | ||
SP = %x20 | ||
HTAB = %x09 ; horizontal tab | ||
DQUOTE = %x22 ; " (Double Quote) | ||
VCHAR = %x21-7E ; visible (printing) characters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
; ABNF derived from RFC 5646 section 2.1 | ||
Language-Tag = langtag ; normal language tags | ||
/ privateuse ; private use tag | ||
/ grandfathered ; grandfathered tags | ||
langtag = language | ||
["-" script] | ||
["-" region] | ||
*("-" variant) | ||
*("-" extension) | ||
["-" privateuse] | ||
|
||
language = 2*3ALPHA ; shortest ISO 639 code | ||
["-" extlang] ; sometimes followed by | ||
; extended language subtags | ||
/ 4ALPHA ; or reserved for future use | ||
/ 5*8ALPHA ; or registered language subtag | ||
|
||
extlang = 3ALPHA ; selected ISO 639 codes | ||
*2("-" 3ALPHA) ; permanently reserved | ||
|
||
script = 4ALPHA ; ISO 15924 code | ||
|
||
region = 2ALPHA ; ISO 3166-1 code | ||
/ 3digit ; UN M.49 code | ||
|
||
variant = 5*8alphanum ; registered variants | ||
/ (digit 3alphanum) | ||
|
||
extension = singleton 1*("-" (2*8alphanum)) | ||
|
||
; Single alphanumerics | ||
; "x" reserved for private use | ||
singleton = digit ; 0 - 9 | ||
/ %x41-57 ; A - W | ||
/ %x59-5A ; Y - Z | ||
/ %x61-77 ; a - w | ||
/ %x79-7A ; y - z | ||
|
||
privateuse = "x" 1*("-" (1*8alphanum)) | ||
|
||
grandfathered = irregular ; non-redundant tags registered | ||
/ regular ; during the RFC 3066 era | ||
|
||
irregular = "en-GB-oed" ; irregular tags do not match | ||
/ "i-ami" ; the 'langtag' production and | ||
/ "i-bnn" ; would not otherwise be | ||
/ "i-default" ; considered 'well-formed' | ||
/ "i-enochian" ; These tags are all valid, | ||
/ "i-hak" ; but most are deprecated | ||
/ "i-klingon" ; in favor of more modern | ||
/ "i-lux" ; subtags or subtag | ||
/ "i-mingo" ; combination | ||
/ "i-navajo" | ||
/ "i-pwn" | ||
/ "i-tao" | ||
/ "i-tay" | ||
/ "i-tsu" | ||
/ "sgn-BE-FR" | ||
/ "sgn-BE-NL" | ||
/ "sgn-CH-DE" | ||
|
||
regular = "art-lojban" ; these tags match the 'langtag' | ||
/ "cel-gaulish" ; production, but their subtags | ||
/ "no-bok" ; are not extended language | ||
/ "no-nyn" ; or variant subtags: their meaning | ||
/ "zh-guoyu" ; is defined by their registration | ||
/ "zh-hakka" ; and all of these are deprecated | ||
/ "zh-min" ; in favor of a more modern | ||
/ "zh-min-nan" ; subtag or sequence of subtags | ||
/ "zh-xiang" | ||
|
||
alphanum = (ALPHA / digit) ; letters and numbers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
; ABNF derived from RFC 2045 section 5.1 | ||
type = discrete-type / composite-type | ||
discrete-type = "text" / "image" / "audio" / "video" / | ||
"application" / extension-token | ||
composite-type = "message" / "multipart" / extension-token | ||
extension-token = ietf-token / x-token | ||
ietf-token = type-name | ||
x-token = "x-" token | ||
subtype = extension-token / iana-token | ||
iana-token = subtype-name | ||
|
||
; ABNF derived from RFC 6838 section 4.2 | ||
type-name = restricted-name | ||
subtype-name = restricted-name | ||
|
||
restricted-name = restricted-name-first *126restricted-name-chars | ||
restricted-name-first = ALPHA / digit | ||
restricted-name-chars = ALPHA / digit / "!" / "#" / | ||
"$" / "&" / "-" / "^" / "_" | ||
restricted-name-chars =/ "." ; Characters before first dot always | ||
; specify a facet name | ||
restricted-name-chars =/ "+" ; Characters after last plus always | ||
; specify a structured syntax suffix | ||
|
||
; ABNF derived from RFC 9110 section 5.6 | ||
parameters = *( OWS ";" OWS [ parameter ] ) | ||
parameter = parameter-name "=" parameter-value | ||
parameter-name = token | ||
parameter-value = ( token / quoted-string ) | ||
token = 1*tchar | ||
tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" | ||
/ "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" | ||
/ digit / ALPHA | ||
; any VCHAR, except delimiters | ||
OWS = *( SP / HTAB ) | ||
; optional whitespace | ||
quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE | ||
qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text | ||
obs-text = %x80-FF | ||
quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) |