diff --git a/g2p/mappings/langs/hur/config-g2p.yaml b/g2p/mappings/langs/hur/config-g2p.yaml new file mode 100644 index 00000000..6b9f5f11 --- /dev/null +++ b/g2p/mappings/langs/hur/config-g2p.yaml @@ -0,0 +1,27 @@ +<<: &shared + language_name: Halkomelem +mappings: + - display_name: Halkomelem APA to Hul’q’umi’num’ (Island) practical orthography + in_lang: hur_apa + out_lang: hur_orthog + authors: + - Zack Gilkison + type: mapping + rules_path: hur_apa_to_hur_orthog.json + prevent_feeding: false + rule_ordering: apply-longest-first + case_sensitive: false + norm_form: NFD + # <<: &shared + - display_name: Halkomelem APA to Hul’q’umi’num’ (Island) practical orthography + in_lang: hur_orthog + out_lang: hur_apa + authors: + - Zack Gilkison + type: mapping + rules_path: hur_orthog_to_hur_apa.json + prevent_feeding: true + rule_ordering: apply-longest-first + case_sensitive: false + norm_form: NFD + language_name: Halkomelem diff --git a/g2p/mappings/langs/hur/hur_apa_to_hur_orthog.json b/g2p/mappings/langs/hur/hur_apa_to_hur_orthog.json new file mode 100644 index 00000000..35bcc9d4 --- /dev/null +++ b/g2p/mappings/langs/hur/hur_apa_to_hur_orthog.json @@ -0,0 +1,83 @@ +[ + {"in": "k̓ʷ", "out": "kw’"}, + {"in": "q̓ʷ", "out": "qw’"}, + {"in": "t̓ᶿ", "out": "tth’"}, + {"in": "č̓", "out": "ch’"}, + {"in": "x̌ʷ", "out": "xw"}, + {"in": "ə́", "out": "ú"}, + {"in": "ə̀", "out": "u"}, + {"in": "à", "out": "à"}, + {"in": "ɛ́", "out": "é"}, + {"in": "a:", "out": "aa"}, + {"in": "e:", "out": "ee"}, + {"in": "i:", "out": "ii"}, + {"in": "u:", "out": "oo"}, + {"in": "p̓", "out": "p’"}, + {"in": "t̓", "out": "t’"}, + {"in": "kʷ", "out": "kw"}, + {"in": "q̓", "out": "q’"}, + {"in": "qʷ", "out": "qw"}, + {"in": "tᶿ", "out": "tth"}, + {"in": "ts", "out": "t-s"}, + {"in": "c̓", "out": "ts’"}, + {"in": "č", "out": "ch"}, + {"in": "ƛ̓", "out": "tl’"}, + {"in": "š", "out": "sh"}, + {"in": "sh", "out": "s-h"}, + {"in": "xʷ", "out": "hw"}, + {"in": "x̌", "out": "x"}, + {"in": "y̓", "out": "’y", "context_before": "e", "context_after": "i", "prevent_feeding": true}, + {"in": "y̓", "out": "’y", "context_before": "e", "context_after": "a", "prevent_feeding": true}, + {"in": "y̓", "out": "’y", "context_before": "e", "context_after": "ə", "prevent_feeding": true}, + {"in": "y̓", "out": "’y", "context_before": "i", "context_after": "a", "prevent_feeding": true}, + {"in": "y̓", "out": "’y", "context_before": "i", "context_after": "ə", "prevent_feeding": true}, + {"in": "y̓", "out": "’y", "context_before": "a", "context_after": "ə", "prevent_feeding": true}, + {"in": "m̓", "out": "’m", "context_before": "e", "context_after": "i", "prevent_feeding": true}, + {"in": "m̓", "out": "’m", "context_before": "e", "context_after": "a", "prevent_feeding": true}, + {"in": "m̓", "out": "’m", "context_before": "e", "context_after": "ə", "prevent_feeding": true}, + {"in": "m̓", "out": "’m", "context_before": "i", "context_after": "a", "prevent_feeding": true}, + {"in": "m̓", "out": "’m", "context_before": "i", "context_after": "ə", "prevent_feeding": true}, + {"in": "m̓", "out": "’m", "context_before": "a", "context_after": "ə", "prevent_feeding": true}, + {"in": "w̓", "out": "’w", "context_before": "e", "context_after": "i", "prevent_feeding": true}, + {"in": "w̓", "out": "’w", "context_before": "e", "context_after": "a", "prevent_feeding": true}, + {"in": "w̓", "out": "’w", "context_before": "e", "context_after": "ə", "prevent_feeding": true}, + {"in": "w̓", "out": "’w", "context_before": "i", "context_after": "a", "prevent_feeding": true}, + {"in": "w̓", "out": "’w", "context_before": "i", "context_after": "ə", "prevent_feeding": true}, + {"in": "w̓", "out": "’w", "context_before": "a", "context_after": "ə", "prevent_feeding": true}, + {"in": "n̓", "out": "’n", "context_before": "e", "context_after": "i", "prevent_feeding": true}, + {"in": "n̓", "out": "’n", "context_before": "e", "context_after": "a", "prevent_feeding": true}, + {"in": "n̓", "out": "’n", "context_before": "e", "context_after": "ə", "prevent_feeding": true}, + {"in": "n̓", "out": "’n", "context_before": "i", "context_after": "a", "prevent_feeding": true}, + {"in": "n̓", "out": "’n", "context_before": "i", "context_after": "ə", "prevent_feeding": true}, + {"in": "n̓", "out": "’n", "context_before": "a", "context_after": "ə", "prevent_feeding": true}, + {"in": "l̕", "out": "’l", "context_before": "e", "context_after": "i", "prevent_feeding": true}, + {"in": "l̕", "out": "’l", "context_before": "e", "context_after": "a", "prevent_feeding": true}, + {"in": "l̕", "out": "’l", "context_before": "e", "context_after": "ə", "prevent_feeding": true}, + {"in": "l̕", "out": "’l", "context_before": "i", "context_after": "a", "prevent_feeding": true}, + {"in": "l̕", "out": "’l", "context_before": "i", "context_after": "ə", "prevent_feeding": true}, + {"in": "l̕", "out": "’l", "context_before": "a", "context_after": "ə", "prevent_feeding": true}, + {"in": "l̓", "out": "’l", "context_before": "e", "context_after": "i", "prevent_feeding": true}, + {"in": "l̓", "out": "’l", "context_before": "e", "context_after": "a", "prevent_feeding": true}, + {"in": "l̓", "out": "’l", "context_before": "e", "context_after": "ə", "prevent_feeding": true}, + {"in": "l̓", "out": "’l", "context_before": "i", "context_after": "a", "prevent_feeding": true}, + {"in": "l̓", "out": "’l", "context_before": "i", "context_after": "ə", "prevent_feeding": true}, + {"in": "l̓", "out": "’l", "context_before": "a", "context_after": "ə", "prevent_feeding": true}, + {"in": "y̓", "out": "y’"}, + {"in": "m̓", "out": "m’"}, + {"in": "w̓", "out": "w’"}, + {"in": "n̓", "out": "n’"}, + {"in": "l̕", "out": "l’"}, + {"in": "l̓", "out": "l’"}, + {"in": "ɛ", "out": "ɛ"}, + {"in": "=", "out": "‘"}, + {"in": "·", "out": "·"}, + {"in": "ʔ", "out": "’"}, + {"in": "ł", "out": "lh"}, + {"in": "ɫ", "out": "lh"}, + {"in": "u", "out": "ou"}, + {"in": "q", "out": "q"}, + {"in": "ə", "out": "u"}, + {"in": "c", "out": "ts"}, + {"in": "θ", "out": "th"}, + {"in": "h", "out": "h"} +] diff --git a/g2p/mappings/langs/hur/hur_orthog_to_hur_apa.json b/g2p/mappings/langs/hur/hur_orthog_to_hur_apa.json new file mode 100644 index 00000000..bbbd75f8 --- /dev/null +++ b/g2p/mappings/langs/hur/hur_orthog_to_hur_apa.json @@ -0,0 +1,52 @@ +[ + {"in": "tth’", "out": "t̓ᶿ"}, + {"in": "kw’", "out": "k̓ʷ"}, + {"in": "qw’", "out": "q̓ʷ"}, + {"in": "tth", "out": "tᶿ"}, + {"in": "t-s", "out": "ts"}, + {"in": "ts’", "out": "c̓"}, + {"in": "ch’", "out": "č̓"}, + {"in": "tl’", "out": "ƛ̓"}, + {"in": "s-h", "out": "sh"}, + {"in": "ú", "out": "ə́"}, + {"in": "ù", "out": "ə̀"}, + {"in": "à", "out": "à"}, + {"in": "é", "out": "é"}, + {"in": "lh", "out": "ł"}, + {"in": "aa", "out": "a:"}, + {"in": "ee", "out": "e:"}, + {"in": "ii", "out": "i:"}, + {"in": "ou", "out": "u"}, + {"in": "oo", "out": "u:"}, + {"in": "p’", "out": "p̓"}, + {"in": "t’", "out": "t̓"}, + {"in": "kw", "out": "kʷ"}, + {"in": "q’", "out": "q̓"}, + {"in": "qw", "out": "qʷ"}, + {"in": "ts", "out": "c"}, + {"in": "ch", "out": "č"}, + {"in": "th", "out": "θ"}, + {"in": "sh", "out": "š"}, + {"in": "hw", "out": "xʷ"}, + {"in": "xw", "out": "x̌ʷ"}, + {"in": "’y", "out": "y̓"}, + {"in": "y’", "out": "y̓"}, + {"in": "’w", "out": "w̓"}, + {"in": "w’", "out": "w̓"}, + {"in": "’m", "out": "m̓"}, + {"in": "m’", "out": "m̓"}, + {"in": "’l", "out": "l̓"}, + {"in": "l’", "out": "l̓"}, + {"in": "’l", "out": "l̕"}, + {"in": "l’", "out": "l̕"}, + {"in": "’n", "out": "n̓"}, + {"in": "n’", "out": "n̓"}, + {"in": "ɛ", "out": "ɛ"}, + {"in": "‘", "out": "="}, + {"in": "·", "out": "·"}, + {"in": "’", "out": "ʔ"}, + {"in": "q", "out": "q"}, + {"in": "u", "out": "ə"}, + {"in": "h", "out": "h"}, + {"in": "x", "out": "x̌"} +] diff --git a/g2p/mappings/langs/network.pkl b/g2p/mappings/langs/network.pkl new file mode 100644 index 00000000..765dad56 Binary files /dev/null and b/g2p/mappings/langs/network.pkl differ diff --git a/g2p/tests/public/data/hur.psv b/g2p/tests/public/data/hur.psv new file mode 100644 index 00000000..40569feb --- /dev/null +++ b/g2p/tests/public/data/hur.psv @@ -0,0 +1,136 @@ +hur-apa|hur-orthog|im̓ə|i’mu +hur-apa|hur-orthog|am̓ə|a’mu +hur-apa|hur-orthog|əm̓ə|um’u +hur-apa|hur-orthog|ey̓ə|e’yu +hur-apa|hur-orthog|iy̓ə|i’yu +hur-apa|hur-orthog|ay̓ə|a’yu +hur-apa|hur-orthog|əy̓ə|uy’u +hur-apa|hur-orthog|el̓ə|e’lu +hur-apa|hur-orthog|il̓ə|i’lu +hur-apa|hur-orthog|al̓ə|a’lu +hur-apa|hur-orthog|əl̓ə|ul’u +hur-apa|hur-orthog|ey̓ə|e’yu +hur-apa|hur-orthog|iy̓ə|i’yu +hur-apa|hur-orthog|ay̓ə|a’yu +hur-apa|hur-orthog|əy̓ə|uy’u +hur-apa|hur-orthog|ew̓ə|e’wu +hur-apa|hur-orthog|iw̓ə|i’wu +hur-apa|hur-orthog|aw̓ə|a’wu +hur-apa|hur-orthog|əw̓ə|uw’u +hur-apa|hur-orthog|ey̓ə|e’yu +hur-apa|hur-orthog|iy̓ə|i’yu +hur-apa|hur-orthog|ay̓ə|a’yu +hur-apa|hur-orthog|əy̓ə|uy’u +hur-apa|hur-orthog|el̓ə|e’lu +hur-apa|hur-orthog|il̓ə|i’lu +hur-apa|hur-orthog|al̓ə|a’lu +hur-apa|hur-orthog|əl̓ə|ul’u +hur-apa|hur-orthog|ey̓ə|e’yu +hur-apa|hur-orthog|iy̓ə|i’yu +hur-apa|hur-orthog|ay̓ə|a’yu +hur-apa|hur-orthog|əy̓ə|uy’u +hur-apa|hur-orthog|en̓ə|e’nu +hur-apa|hur-orthog|in̓ə|i’nu +hur-apa|hur-orthog|an̓ə|a’nu +hur-apa|hur-orthog|ən̓ə|un’u +hur-apa|hur-orthog|ey̓ə|e’yu +hur-apa|hur-orthog|iy̓ə|i’yu +hur-apa|hur-orthog|ay̓ə|a’yu +hur-apa|hur-orthog|əy̓ə|uy’u +hur-apa|hur-orthog|el̓ə|e’lu +hur-apa|hur-orthog|il̓ə|i’lu +hur-apa|hur-orthog|al̓ə|a’lu +hur-apa|hur-orthog|əl̓ə|ul’u +hur-apa|hur-orthog|ey̓ə|e’yu +hur-apa|hur-orthog|iy̓ə|i’yu +hur-apa|hur-orthog|ay̓ə|a’yu +hur-apa|hur-orthog|əy̓ə|uy’u +hur-apa|hur-orthog|ew̓ə|e’wu +hur-apa|hur-orthog|iw̓ə|i’wu +hur-apa|hur-orthog|aw̓ə|a’wu +hur-apa|hur-orthog|əw̓ə|uw’u +hur-apa|hur-orthog|ey̓ə|e’yu +hur-apa|hur-orthog|iy̓ə|i’yu +hur-apa|hur-orthog|ay̓ə|a’yu +hur-apa|hur-orthog|əy̓ə|uy’u +hur-apa|hur-orthog|el̓ə|e’lu +hur-apa|hur-orthog|il̓ə|i’lu +hur-apa|hur-orthog|al̓ə|a’lu +hur-apa|hur-orthog|əl̓ə|ul’u +hur-apa|hur-orthog|ey̓ə|e’yu +hur-apa|hur-orthog|iy̓ə|i’yu +hur-apa|hur-orthog|ay̓ə|a’yu +hur-apa|hur-orthog|əy̓ə|uy’u +hur-orthog|hur-apa|i’mu|im̓ə +hur-orthog|hur-apa|a’mu|am̓ə +hur-orthog|hur-apa|um’u|əm̓ə +hur-orthog|hur-apa|e’yu|ey̓ə +hur-orthog|hur-apa|i’yu|iy̓ə +hur-orthog|hur-apa|a’yu|ay̓ə +hur-orthog|hur-apa|uy’u|əy̓ə +hur-orthog|hur-apa|e’lu|el̓ə +hur-orthog|hur-apa|i’lu|il̓ə +hur-orthog|hur-apa|a’lu|al̓ə +hur-orthog|hur-apa|ul’u|əl̓ə +hur-orthog|hur-apa|e’yu|ey̓ə +hur-orthog|hur-apa|i’yu|iy̓ə +hur-orthog|hur-apa|a’yu|ay̓ə +hur-orthog|hur-apa|uy’u|əy̓ə +hur-orthog|hur-apa|e’wu|ew̓ə +hur-orthog|hur-apa|i’wu|iw̓ə +hur-orthog|hur-apa|a’wu|aw̓ə +hur-orthog|hur-apa|uw’u|əw̓ə +hur-orthog|hur-apa|e’yu|ey̓ə +hur-orthog|hur-apa|i’yu|iy̓ə +hur-orthog|hur-apa|a’yu|ay̓ə +hur-orthog|hur-apa|uy’u|əy̓ə +hur-orthog|hur-apa|e’lu|el̓ə +hur-orthog|hur-apa|i’lu|il̓ə +hur-orthog|hur-apa|a’lu|al̓ə +hur-orthog|hur-apa|ul’u|əl̓ə +hur-orthog|hur-apa|e’yu|ey̓ə +hur-orthog|hur-apa|i’yu|iy̓ə +hur-orthog|hur-apa|a’yu|ay̓ə +hur-orthog|hur-apa|uy’u|əy̓ə +hur-orthog|hur-apa|e’nu|en̓ə +hur-orthog|hur-apa|i’nu|in̓ə +hur-orthog|hur-apa|a’nu|an̓ə +hur-orthog|hur-apa|un’u|ən̓ə +hur-orthog|hur-apa|e’yu|ey̓ə +hur-orthog|hur-apa|i’yu|iy̓ə +hur-orthog|hur-apa|a’yu|ay̓ə +hur-orthog|hur-apa|uy’u|əy̓ə +hur-orthog|hur-apa|e’lu|el̓ə +hur-orthog|hur-apa|i’lu|il̓ə +hur-orthog|hur-apa|a’lu|al̓ə +hur-orthog|hur-apa|ul’u|əl̓ə +hur-orthog|hur-apa|e’yu|ey̓ə +hur-orthog|hur-apa|i’yu|iy̓ə +hur-orthog|hur-apa|a’yu|ay̓ə +hur-orthog|hur-apa|uy’u|əy̓ə +hur-orthog|hur-apa|e’wu|ew̓ə +hur-orthog|hur-apa|i’wu|iw̓ə +hur-orthog|hur-apa|a’wu|aw̓ə +hur-orthog|hur-apa|uw’u|əw̓ə +hur-orthog|hur-apa|e’yu|ey̓ə +hur-orthog|hur-apa|i’yu|iy̓ə +hur-orthog|hur-apa|a’yu|ay̓ə +hur-orthog|hur-apa|uy’u|əy̓ə +hur-orthog|hur-apa|e’lu|el̓ə +hur-orthog|hur-apa|i’lu|il̓ə +hur-orthog|hur-apa|a’lu|al̓ə +hur-orthog|hur-apa|ul’u|əl̓ə +hur-orthog|hur-apa|e’yu|ey̓ə +hur-orthog|hur-apa|i’yu|iy̓ə +hur-orthog|hur-apa|a’yu|ay̓ə +hur-orthog|hur-apa|uy’u|əy̓ə +hur-orthog|hur-apa|e’mu|em̓ə +hur-orthog|hur-apa|i’mu|im̓ə +hur-orthog|hur-apa|a’mu|am̓ə +hur-orthog|hur-apa|um’u|əm̓ə +hur-apa|hur-orthog|em̓ə|e’mu +hur-apa|hur-orthog|im̓ə|i’mu +hur-apa|hur-orthog|am̓ə|a’mu +hur-apa|hur-orthog|əm̓ə|um’u +hur-orthog|hur-apa|hun’lhultnamut-s|hən̓łəltnaməts +hur-orthog|hur-apa|tl’lim’|ƛ̓lim̓