Skip to content

Commit

Permalink
first stab.
Browse files Browse the repository at this point in the history
  • Loading branch information
toots committed Mar 22, 2023
1 parent b069236 commit 4fef44a
Show file tree
Hide file tree
Showing 19 changed files with 71,959 additions and 14,682 deletions.
8 changes: 8 additions & 0 deletions src/dune
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
%{target}
"\nlet prefix = \"%{env:CAMOMILE_PREFIX=/usr}/share/camomile\"\nlet datadir = Filename.concat prefix \"database\"\nlet localedir = Filename.concat prefix \"locales\"\nlet charmapdir = Filename.concat prefix \"charmaps\"\nlet unimapdir = Filename.concat prefix \"mappings\"\n")))

(rule
(deps tools/gen_unidata.exe unidata/Scripts.txt)
(target script_type.ml)
(action
(with-stdout-to
%{target}
(run tools/gen_unidata.exe --gen-script-type))))

(library
(name camomileLib)
(public_name camomile.lib)
Expand Down
243 changes: 5 additions & 238 deletions src/internal/unidata.ml
Original file line number Diff line number Diff line change
Expand Up @@ -74,52 +74,7 @@ module type Type = sig
val num_of_cat : general_category_type -> int
val cat_of_num : int -> general_category_type

type script_type =
[ `Common
| `Inherited
| `Latin
| `Greek
| `Cyrillic
| `Armenian
| `Hebrew
| `Arabic
| `Syriac
| `Thaana
| `Devanagari
| `Bengali
| `Gurmukhi
| `Gujarati
| `Oriya
| `Tamil
| `Telugu
| `Kannada
| `Malayalam
| `Sinhala
| `Thai
| `Lao
| `Tibetan
| `Myanmar
| `Georgian
| `Hangul
| `Ethiopic
| `Cherokee
| `Canadian_Aboriginal
| `Ogham
| `Runic
| `Khmer
| `Mongolian
| `Hiragana
| `Katakana
| `Bopomofo
| `Han
| `Yi
| `Old_Italic
| `Gothic
| `Deseret
| `Tagalog
| `Hanunoo
| `Buhid
| `Tagbanwa ]
type script_type = Script_type.t

val script_of_name : string -> script_type
val script_of_num : int -> script_type
Expand Down Expand Up @@ -329,199 +284,11 @@ module Make (Config : Config.Type) = struct

let cat_of_num i = cat_of_num_tbl.(i)

type script_type =
[ `Common
| `Inherited
| `Latin
| `Greek
| `Cyrillic
| `Armenian
| `Hebrew
| `Arabic
| `Syriac
| `Thaana
| `Devanagari
| `Bengali
| `Gurmukhi
| `Gujarati
| `Oriya
| `Tamil
| `Telugu
| `Kannada
| `Malayalam
| `Sinhala
| `Thai
| `Lao
| `Tibetan
| `Myanmar
| `Georgian
| `Hangul
| `Ethiopic
| `Cherokee
| `Canadian_Aboriginal
| `Ogham
| `Runic
| `Khmer
| `Mongolian
| `Hiragana
| `Katakana
| `Bopomofo
| `Han
| `Yi
| `Old_Italic
| `Gothic
| `Deseret
| `Tagalog
| `Hanunoo
| `Buhid
| `Tagbanwa ]

let script_of_name name =
match String.lowercase_ascii name with
| "common" -> `Common
| "inherited" -> `Inherited
| "latin" -> `Latin
| "greek" -> `Greek
| "cyrillic" -> `Cyrillic
| "armenian" -> `Armenian
| "hebrew" -> `Hebrew
| "arabic" -> `Arabic
| "syriac" -> `Syriac
| "thaana" -> `Thaana
| "devanagari" -> `Devanagari
| "bengali" -> `Bengali
| "gurmukhi" -> `Gurmukhi
| "gujarati" -> `Gujarati
| "oriya" -> `Oriya
| "tamil" -> `Tamil
| "telugu" -> `Telugu
| "kannada" -> `Kannada
| "malayalam" -> `Malayalam
| "sinhala" -> `Sinhala
| "thai" -> `Thai
| "lao" -> `Lao
| "tibetan" -> `Tibetan
| "myanmar" -> `Myanmar
| "georgian" -> `Georgian
| "hangul" -> `Hangul
| "ethiopic" -> `Ethiopic
| "cherokee" -> `Cherokee
| "canadian_aboriginal" -> `Canadian_Aboriginal
| "ogham" -> `Ogham
| "runic" -> `Runic
| "khmer" -> `Khmer
| "mongolian" -> `Mongolian
| "hiragana" -> `Hiragana
| "katakana" -> `Katakana
| "bopomofo" -> `Bopomofo
| "han" -> `Han
| "yi" -> `Yi
| "old_italic" -> `Old_Italic
| "gothic" -> `Gothic
| "deseret" -> `Deseret
| "tagalog" -> `Tagalog
| "hanunoo" -> `Hanunoo
| "buhid" -> `Buhid
| "tagbanwa" -> `Tagbanwa
| _ -> raise Not_found

let num_of_script = function
| `Common -> 0
| `Inherited -> 1
| `Latin -> 2
| `Greek -> 3
| `Cyrillic -> 4
| `Armenian -> 5
| `Hebrew -> 6
| `Arabic -> 7
| `Syriac -> 8
| `Thaana -> 9
| `Devanagari -> 10
| `Bengali -> 11
| `Gurmukhi -> 12
| `Gujarati -> 13
| `Oriya -> 14
| `Tamil -> 15
| `Telugu -> 16
| `Kannada -> 17
| `Malayalam -> 18
| `Sinhala -> 19
| `Thai -> 20
| `Lao -> 21
| `Tibetan -> 22
| `Myanmar -> 23
| `Georgian -> 24
| `Hangul -> 25
| `Ethiopic -> 26
| `Cherokee -> 27
| `Canadian_Aboriginal -> 28
| `Ogham -> 29
| `Runic -> 30
| `Khmer -> 31
| `Mongolian -> 32
| `Hiragana -> 33
| `Katakana -> 34
| `Bopomofo -> 35
| `Han -> 36
| `Yi -> 37
| `Old_Italic -> 38
| `Gothic -> 39
| `Deseret -> 40
| `Tagalog -> 41
| `Hanunoo -> 42
| `Buhid -> 43
| `Tagbanwa -> 44

let script_tbl =
[|
`Common;
`Inherited;
`Latin;
`Greek;
`Cyrillic;
`Armenian;
`Hebrew;
`Arabic;
`Syriac;
`Thaana;
`Devanagari;
`Bengali;
`Gurmukhi;
`Gujarati;
`Oriya;
`Tamil;
`Telugu;
`Kannada;
`Malayalam;
`Sinhala;
`Thai;
`Lao;
`Tibetan;
`Myanmar;
`Georgian;
`Hangul;
`Ethiopic;
`Cherokee;
`Canadian_Aboriginal;
`Ogham;
`Runic;
`Khmer;
`Mongolian;
`Hiragana;
`Katakana;
`Bopomofo;
`Han;
`Yi;
`Old_Italic;
`Gothic;
`Deseret;
`Tagalog;
`Hanunoo;
`Buhid;
`Tagbanwa;
|]
type script_type = Script_type.t

let script_of_num i = script_tbl.(i)
let script_of_name = Script_type.script_type_of_name
let num_of_script = Script_type.num_of_script
let script_of_num = Script_type.script_of_num

type decomposition_type =
[ `Canon
Expand Down
47 changes: 1 addition & 46 deletions src/internal/unidata.mli
Original file line number Diff line number Diff line change
Expand Up @@ -74,52 +74,7 @@ module type Type = sig
val num_of_cat : general_category_type -> int
val cat_of_num : int -> general_category_type

type script_type =
[ `Common
| `Inherited
| `Latin
| `Greek
| `Cyrillic
| `Armenian
| `Hebrew
| `Arabic
| `Syriac
| `Thaana
| `Devanagari
| `Bengali
| `Gurmukhi
| `Gujarati
| `Oriya
| `Tamil
| `Telugu
| `Kannada
| `Malayalam
| `Sinhala
| `Thai
| `Lao
| `Tibetan
| `Myanmar
| `Georgian
| `Hangul
| `Ethiopic
| `Cherokee
| `Canadian_Aboriginal
| `Ogham
| `Runic
| `Khmer
| `Mongolian
| `Hiragana
| `Katakana
| `Bopomofo
| `Han
| `Yi
| `Old_Italic
| `Gothic
| `Deseret
| `Tagalog
| `Hanunoo
| `Buhid
| `Tagbanwa ]
type script_type = Script_type.t

val script_of_name : string -> script_type
val script_of_num : int -> script_type
Expand Down
47 changes: 1 addition & 46 deletions src/public/uCharInfo.ml
Original file line number Diff line number Diff line change
Expand Up @@ -112,52 +112,7 @@ module type Type = sig
val load_property_set : character_property_type -> USet.t
val load_property_set_by_name : string -> USet.t

type script_type =
[ `Common
| `Inherited
| `Latin
| `Greek
| `Cyrillic
| `Armenian
| `Hebrew
| `Arabic
| `Syriac
| `Thaana
| `Devanagari
| `Bengali
| `Gurmukhi
| `Gujarati
| `Oriya
| `Tamil
| `Telugu
| `Kannada
| `Malayalam
| `Sinhala
| `Thai
| `Lao
| `Tibetan
| `Myanmar
| `Georgian
| `Hangul
| `Ethiopic
| `Cherokee
| `Canadian_Aboriginal
| `Ogham
| `Runic
| `Khmer
| `Mongolian
| `Hiragana
| `Katakana
| `Bopomofo
| `Han
| `Yi
| `Old_Italic
| `Gothic
| `Deseret
| `Tagalog
| `Hanunoo
| `Buhid
| `Tagbanwa ]
type script_type = Script_type.t

val script : UChar.t -> script_type
val load_script_map : unit -> script_type UMap.t
Expand Down
Loading

0 comments on commit 4fef44a

Please sign in to comment.