From f2bd2149328ee217bbc075662fc49ac58eed3201 Mon Sep 17 00:00:00 2001 From: Christopher Cox Date: Tue, 18 Jul 2023 14:28:43 -0600 Subject: [PATCH] Initial (and long overdue) support for noun morphology --- src/fst/Makefile.am | 17 +- src/fst/affixes/noun_affixes.lexc | 305 ++++++++++++++++++++++++++++++ src/fst/affixes/nouns.lexc | 32 ---- src/fst/root.lexc | 285 +++++++--------------------- src/fst/stems/noun_stems.lexc | 58 ++++++ src/fst/stems/nouns.lexc | 55 ------ src/fst/stems/verbs.lexc | 47 ----- 7 files changed, 436 insertions(+), 363 deletions(-) create mode 100644 src/fst/affixes/noun_affixes.lexc delete mode 100644 src/fst/affixes/nouns.lexc create mode 100644 src/fst/stems/noun_stems.lexc delete mode 100644 src/fst/stems/nouns.lexc delete mode 100644 src/fst/stems/verbs.lexc diff --git a/src/fst/Makefile.am b/src/fst/Makefile.am index f18fb930..3669dc7f 100644 --- a/src/fst/Makefile.am +++ b/src/fst/Makefile.am @@ -29,11 +29,11 @@ endif GT_LEXC_ROOT=$(srcdir)/root.lexc # Set this to the names of all regular lexc source files: -GT_LEXC_SRCS_L1_L2= -# stems/words.lexc \ -# affixes/nouns.lexc \ -# affixes/symbols.lexc \ -# stems/nouns.lexc +GT_LEXC_SRCS_L1_L2= affixes/noun_affixes.lexc \ + stems/noun_stems.lexc \ + affixes/symbols.lexc +# affixes/propernouns.lexc +# stems/words.lexc # If you are building an error-detecting L2 analyser, specify the lexc files # that differ between the regular L1 and the L2 analysers below, in L1 and @@ -85,10 +85,11 @@ GT_LOCAL_XFSCRIPT_SRCS= # Define here any additional sources just included in the distro: GT_DISTRO_SRCS=affixes/verb_inner_affixes.lexc \ - affixes/verb_middle_affixes.lexc \ - affixes/verb_outer_affixes.lexc \ - affixes/verb_oblique_affixes.lexc \ + affixes/verb_middle_affixes.lexc \ + affixes/verb_outer_affixes.lexc \ + affixes/verb_oblique_affixes.lexc \ affixes/verb_tags.lexc \ + affixes/postverbal_affixes.lexc \ stems/verb_stems.lexc ### BEGIN: Local processing: ### diff --git a/src/fst/affixes/noun_affixes.lexc b/src/fst/affixes/noun_affixes.lexc new file mode 100644 index 00000000..95a80e10 --- /dev/null +++ b/src/fst/affixes/noun_affixes.lexc @@ -0,0 +1,305 @@ +! Tsuut'ina nouns can appear in inflected forms that reflect two features: +! possession (person and number of possessor, with some entanglement with +! the person and number of the subject of the utterance in cases where the +! possessed noun isn't itself the subject) and number (singular vs. plural, +! only in the case of a closed set of nouns): +! +! 1. Possession: Tsuut'ina nouns can be divided into three classes on the +! basis of possession: +! +! 1. Nouns that always appear in unpossessed form (e.g., nàk'us "cloud"). +! This class includes both common and proper nouns. +! +! 2. Nouns that always appear in possessed form (e.g., sitsì "my head", nitò +! "your father"). This class is generally limited to body parts and +! kinship terms. +! +! 3. Nouns that may appear in either possessed or unpossessed form (e.g., +! más "knife" ~ simázà "my knife", guuniizh "story" ~ máa guunijà +! "his/her/its story"). +! +! Nouns in the third class typically have different possessed and unpossessed +! stem forms (e.g., más [unpossessed] vs. -mázà [possessed] "knife", násʔághá +! [unpossessed] vs. -násʔághà [possessed] "house"). +! +! Nouns that have possessed forms may indicate possession morphosyntactically +! in two different ways: +! +! A. With the addition of a possessive prefix (e.g., si- "1SG.POSS"). There +! are three phonologically distinct sets of these prefixes: +! +! i. Mid-tone possessive prefixes: Possessive prefixes appear with a +! short, mid-tone vowel of the form si-, ni-, mi- (e.g., sinásʔághà +! "my house"). These are the most common possessive prefix forms +! in Tsuut'ina. +! +! ii. High-tone possessive prefixes: Possessive prefixes appear with a +! short, high-tone vowel of the form sí-, ní-, mí- (e.g., sítsí +! "my nose", sídá "my older sister). This high tone derives +! historically from a nasalized vowel in this prefix, and only +! appear in a small, closed set of lexemes in present-day Tsuut'ina. +! +! iii. Mid-rising-tone possessive prefixes: Possessive prefixes appear +! with a long, mid-to-high rising tone vowel of the form sií-, +! nií-, mií- (e.g., siízá "my son [woman speaking]", siíts'a "my +! daughter [woman speaking]"). These appear only a very limited, +! closed set of lexemes. +! +! B. With an inflected postposition of the form sáa, náa, máa, that appears +! immediately before the possessed noun (e.g., sáa guunijà "my story" +! [alongside siguunijà], sáa 'uncle' "my uncle"). This strategy for +! indicating possession is often used with loanwords from languages like +! English, and the inflected postposition is sometimes written as a prefix +! on the possessed noun (i.e., without any whitespace separating the +! postposition from the possessed noun) +! +! While most possessed nouns occur only appear in one of these possessive +! constructions, a small number are attested in both (e.g., máa guunijà ~ +! miguunijà "his/her/its story"). +! +! +! 2. Number: Most nouns in Tsuut'ina are not inflected for number (singular +! vs. plural), and do not provide any morphological indications of a number +! distinction. That is, the same noun form can have both singular and plural +! interpretations, based on the context in which it appears (e.g., diná +! "person, people"; tłìk'í más"one knife" vs. akíyí más "two knives"). +! +! A small, closed set of Tsuut'ina nouns have distinct singular and plural +! forms. These can be divided into two classes: +! +! 1. Nouns that may appear with the suffix -ká to indicate plural number +! (e.g., tłích'á "dog" ~ tłích'áká "dogs", xaní "cow" ~ xaníká/xanáká +! "cows"). +! +! This set is limited to a small number of nouns referring to living +! beings, generally domesticated and semi-domesticated animals (e.g., +! most livestock, dogs, cats) and, for some speakers, some kinship +! terms (e.g., grandchildren, younger sisters and brothers, etc.). +! This plural suffix appears with both possessed and unpossessed forms +! of these nouns (e.g., ístłíká "horses" ~ silích'àká "my horses"). +! +! It should be noted that +! +! i. A number of these nouns whose singular forms contain /i/ come to +! have assimilated pronunciations with /a/ when -ká is added. This +! results in irregular-seeming plural forms (e.g., ístłí "horse" ~ +! ástłáká "horses", alongside ístłíká). +! +! ii. Some Tsuut'ina speakers report that -ká may only be used with +! animals and not with kinship terms (cf. TLL-20230619), while +! others accept -ká with both. +! +! 2. Nouns that have suppletive plural forms (e.g., ts'ìdoótsa "girl" ~ +! it'óókúwá "girls", k'àt'íní/k'òt'íní "man" ~ k'àt'únághá/k'òt'únághá +! "men"). +! +! Many of these nouns can be analyzed as incorporating historical plural +! suffixes with several forms, including: +! +! i. -íghá/ághá (e.g., xàkíjí "chief" ~ xàkújághá "chiefs"); +! +! ii. -kúwá (e.g., ts'ootsa "old woman" ~ ts'ookúwá "old women", +! alongside ts'ooká). +! +! iii. -ká (in cases where the stem is shorter in the plural form than +! in the singular, suggesting that this is not an instance of +! regular suffixation of -ká as discussed above; e.g., isgiyá +! "young man" ~ isgááká "young men", xàlítsa "old man" ~ xàlíká +! "old men", ts'ootsa "old woman" ~ ts'ooká "old women"). +! +! For the purposes of FST modelling, all of these historical sub-classes +! can be treated synchronically as involving the same kind of suppletive +! relationship between singular and plural forms. +! +! There is also a separate post-nominal marker, yiná, that marks associative +! plurals referring to groups of human beings (e.g., Harold yiná "Harold and +! them", sídá yiná "my older sisters [as a group]"). It would be possible to +! analyze this as part of the nominal morphology surrounding nouns in +! Tsuut'ina, but, since yiná is often written as a separate word (or separated +! by a hyphen), we treat it here as an independent word form. +! +! The following lexc definitions use "+Sg" and "+Pl" tags to indicate the +! value of the number feature of nouns that distinguish singular and plural +! forms (both with the suffix -ká and by suppletion). Nouns that do not +! distinguish singular and plural forms are not marked for this feature. +! +! +! The Big Crow and Big Plume dialect forms of the possessive prefixes defined +! below are widely attested in documentation of Tsuut'ina, and are drawn from +! several sources (e.g., inflected forms found in TLL-20220825). Crowchief +! dialect possessive prefix forms are less well documented; the forms defined +! here are taken from Onespot & Young (1939: MSS 672 BC, Box 1, Folder 22a, +! p. 67; Edward Onespot, speaker), which provides a full list of prefix forms. +! Other attestations of Crowchief dialect possessive prefix forms may exist +! (e.g., in the grammatical elicitation sections of Goddard's unpublished +! Tsuut'ina fieldnotes, particularly in consultation with Annie Onespot, a +! Crowchief dialect speaker), and it may be valuable for those sources to be +! consulted further in future work. +LEXICON NounPrefixes + UnpossessedStems; + miPossessivePrefixes; + miHPossessivePrefixes; + miiHPossessivePrefixes; + maaPossessivePrefixes; + +LEXICON miPossessivePrefixes + +@U.PX.SG1@:@U.PX.SG1@si miPossessedStems; +@U.PX.SG2@:@U.PX.SG2@ni miPossessedStems; +@U.PX.SG3@:@U.PX.SG3@mi miPossessedStems; +@U.PX.SG3SBJSG3@:@U.PX.SG3SBJSG3@yi miPossessedStems; +@U.PX.SG3SBJSG3COREF@:@U.PX.SG3SBJSG3COREF@di miPossessedStems; +@U.PX.SG3SBJPL3@:@U.PX.SG3SBJPL3@giyi miPossessedStems; +@U.PX.SG4@:@U.PX.SG4@gu miPossessedStems; + +@U.PX.PL1@ miPossessivePrefixes12PL; +@U.PX.PL2@ miPossessivePrefixes12PL; +@U.PX.PL3@:@U.PX.PL3@gimi miPossessedStems; +@U.PX.PL3SBJ3@:@U.PX.PL3SBJ3@gu miPossessedStems; +@U.PX.PL3SBJ3COREF@:@U.PX.PL3SBJ3COREF@gidi miPossessedStems; + +@U.PX.AREAL@:@U.PX.AREAL@gu miPossessedStems; +@U.PX.GIVEN@:@U.PX.GIVEN@ miPossessedStems; +@U.PX.INDEF@:@U.PX.INDEF@i miPossessedStems; +@U.PX.RECIP@ miPossessivePrefixesRecip; + +LEXICON miPossessivePrefixes12PL +@U.DIALECT.BIGCROW@:@U.DIALECT.BIGCROW@nihi miPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@nahi miPossessedStems; +@U.DIALECT.CROWCHIEF@:@U.DIALECT.CROWCHIEF@nahi miPossessedStems; + +LEXICON miPossessivePrefixesRecip +@U.DIALECT.BIGCROW@:@U.DIALECT.BIGCROW@ítłi miPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@átłi miPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@ástłi miPossessedStems; +@U.DIALECT.CROWCHIEF@:@U.DIALECT.CROWCHIEF@átłi miPossessedStems; + + +LEXICON miHPossessivePrefixes + +@U.PX.SG1@:@U.PX.SG1@sí miHPossessedStems; +@U.PX.SG2@:@U.PX.SG2@ní miHPossessedStems; +@U.PX.SG3@:@U.PX.SG3@mí miHPossessedStems; +@U.PX.SG3SBJSG3@:@U.PX.SG3SBJSG3@yí miHPossessedStems; +@U.PX.SG3SBJSG3COREF@:@U.PX.SG3SBJSG3COREF@dí miHPossessedStems; +@U.PX.SG3SBJPL3@:@U.PX.SG3SBJPL3@giyí miHPossessedStems; +@U.PX.SG4@:@U.PX.SG4@gú miHPossessedStems; + +@U.PX.PL1@ miHPossessivePrefixes12PL; +@U.PX.PL2@ miHPossessivePrefixes12PL; +@U.PX.PL3@:@U.PX.PL3@gimí miHPossessedStems; +@U.PX.PL3SBJ3@:@U.PX.PL3SBJ3@gú miHPossessedStems; +@U.PX.PL3SBJ3COREF@:@U.PX.PL3SBJ3COREF@gidí miHPossessedStems; + +@U.PX.AREAL@:@U.PX.AREAL@gú miHPossessedStems; +@U.PX.GIVEN@:@U.PX.GIVEN@ miHPossessedStems; +@U.PX.INDEF@:@U.PX.INDEF@í miHPossessedStems; +@U.PX.RECIP@ miHPossessivePrefixesRecip; + +LEXICON miHPossessivePrefixes12PL +@U.DIALECT.BIGCROW@:@U.DIALECT.BIGCROW@nihí miHPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@nahí miHPossessedStems; +@U.DIALECT.CROWCHIEF@:@U.DIALECT.CROWCHIEF@nahí miHPossessedStems; + +LEXICON miHPossessivePrefixesRecip +@U.DIALECT.BIGCROW@:@U.DIALECT.BIGCROW@ítłí miHPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@átłí miHPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@ástłí miHPossessedStems; +@U.DIALECT.CROWCHIEF@:@U.DIALECT.CROWCHIEF@átłí miHPossessedStems; + + +LEXICON miiHPossessivePrefixes + +@U.PX.SG1@:@U.PX.SG1@sií miiHPossessedStems; +@U.PX.SG2@:@U.PX.SG2@nií miiHPossessedStems; +@U.PX.SG3@:@U.PX.SG3@mií miiHPossessedStems; +@U.PX.SG3SBJSG3@:@U.PX.SG3SBJSG3@yií miiHPossessedStems; +@U.PX.SG3SBJSG3COREF@:@U.PX.SG3SBJSG3COREF@dií miiHPossessedStems; +@U.PX.SG3SBJPL3@:@U.PX.SG3SBJPL3@giyií miiHPossessedStems; +@U.PX.SG4@:@U.PX.SG4@guú miiHPossessedStems; + +@U.PX.PL1@ miiHPossessivePrefixes12PL; +@U.PX.PL2@ miiHPossessivePrefixes12PL; +@U.PX.PL3@:@U.PX.PL3@gimií miiHPossessedStems; +@U.PX.PL3SBJ3@:@U.PX.PL3SBJ3@guú miiHPossessedStems; +@U.PX.PL3SBJ3COREF@:@U.PX.PL3SBJ3COREF@gidií miiHPossessedStems; + +@U.PX.AREAL@:@U.PX.AREAL@guú miiHPossessedStems; +@U.PX.GIVEN@:@U.PX.GIVEN@mií miiHPossessedStems; ! CHECK +@U.PX.INDEF@:@U.PX.INDEF@ií miiHPossessedStems; ! CHECK +@U.PX.RECIP@ miiHPossessivePrefixesRecip; + +LEXICON miiHPossessivePrefixes12PL +@U.DIALECT.BIGCROW@:@U.DIALECT.BIGCROW@nihií miiHPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@nahií miiHPossessedStems; +@U.DIALECT.CROWCHIEF@:@U.DIALECT.CROWCHIEF@nahií miiHPossessedStems; + +LEXICON miiHPossessivePrefixesRecip +@U.DIALECT.BIGCROW@:@U.DIALECT.BIGCROW@ítłií miiHPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@átłií miiHPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@ástłií miiHPossessedStems; +@U.DIALECT.CROWCHIEF@:@U.DIALECT.CROWCHIEF@átłií miiHPossessedStems; + + +LEXICON maaPossessivePrefixes + +@U.PX.SG1@:@U.PX.SG1@sáa% maaPossessedStems; +@U.PX.SG2@:@U.PX.SG2@náa% maaPossessedStems; +@U.PX.SG3@:@U.PX.SG3@máa% maaPossessedStems; +@U.PX.SG3SBJSG3@:@U.PX.SG3SBJSG3@yáa% maaPossessedStems; +@U.PX.SG3SBJSG3COREF@:@U.PX.SG3SBJSG3COREF@dáa% maaPossessedStems; +@U.PX.SG3SBJPL3@:@U.PX.SG3SBJPL3@giyáa% maaPossessedStems; +@U.PX.SG4@:@U.PX.SG4@gwáa% maaPossessedStems; + +@U.PX.PL1@ maaPossessivePrefixes12PL; +@U.PX.PL2@ maaPossessivePrefixes12PL; +@U.PX.PL3@:@U.PX.PL3@gimáa% maaPossessedStems; +@U.PX.PL3SBJ3@:@U.PX.PL3SBJ3@gwáa% maaPossessedStems; +@U.PX.PL3SBJ3COREF@:@U.PX.PL3SBJ3COREF@gidáa% maaPossessedStems; + +@U.PX.AREAL@:@U.PX.AREAL@gwáa% maaPossessedStems; +@U.PX.GIVEN@:@U.PX.GIVEN@áa% maaPossessedStems; +@U.PX.INDEF@:@U.PX.INDEF@áa% maaPossessedStems; ! CHECK +@U.PX.RECIP@ maaPossessivePrefixesRecip; + +LEXICON maaPossessivePrefixes12PL +@U.DIALECT.BIGCROW@:@U.DIALECT.BIGCROW@niháa% maaPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@naháa% maaPossessedStems; +@U.DIALECT.CROWCHIEF@:@U.DIALECT.CROWCHIEF@naháa% maaPossessedStems; + +LEXICON maaPossessivePrefixesRecip +@U.DIALECT.BIGCROW@:@U.DIALECT.BIGCROW@ítłáa% maaPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@átłáa% maaPossessedStems; +@U.DIALECT.BIGPLUME@:@U.DIALECT.BIGPLUME@ástłáa% maaPossessedStems; +@U.DIALECT.CROWCHIEF@:@U.DIALECT.CROWCHIEF@átłáa% maaPossessedStems; + + +LEXICON Noun ++N:0 Tags; + +LEXICON SingularNoun ++N+Sg:0 Tags; + +LEXICON PluralNoun ++N+Pl:0 Tags; + +LEXICON Tags +@D.PX@ #; +@R.PX.SG1@+PxSg1:@R.PX.SG1@ #; +@R.PX.SG2@+PxSg2:@R.PX.SG2@ #; +@R.PX.SG3@+PxSg3:@R.PX.SG3@ #; +@R.PX.SG3SBJSG3@+PxSg3+SbjSg3:@R.PX.SG3SBJSG3@ #; +@R.PX.SG3SBJSG3COREF@+PxSg3+SbjSg3+CR:@R.PX.SG3SBJSG3COREF@ #; +@R.PX.SG3SBJPL3@+PxSg3+SbjPl3:@R.PX.SG3SBJPL3@ #; +@R.PX.SG4@+PxSg4:@R.PX.SG4@ #; + +@R.PX.PL1@+PxPl1:@R.PX.PL1@ #; +@R.PX.PL2@+PxPl2:@R.PX.PL2@ #; +@R.PX.PL3@+PxPl3:@R.PX.PL3@ #; +@R.PX.PL3SBJ3@+PxPl3+Sbj3:@R.PX.PL3SBJ3@ #; +@R.PX.PL3SBJ3COREF@+PxPl3+Sbj3+CR:@R.PX.PL3SBJ3COREF@ #; + +@R.PX.AREAL@+PxAreal:@R.PX.AREAL@ #; +@R.PX.GIVEN@+PxGiven:@R.PX.GIVEN@ #; +@R.PX.INDEF@+PxIndef:@R.PX.INDEF@ #; +@R.PX.RECIP@+PxRecip:@R.PX.RECIP@ #; diff --git a/src/fst/affixes/nouns.lexc b/src/fst/affixes/nouns.lexc deleted file mode 100644 index 7e12584f..00000000 --- a/src/fst/affixes/nouns.lexc +++ /dev/null @@ -1,32 +0,0 @@ -!! ## Tsuut'ina Noun inflection -! --------------- - -!! ## Classification. -!! 1. Always unpossessed nouns: nàk'ús "cloud" -!! 1. Always possessed nouns: sitsì "my head" (body parts, kinship terms) -!! 1. Possessed or unpossessed: tłích'á "dog" vs. silích'à "my dog" - -!! (see explanation in the affixes file) - -!! ## Lexicons - -LEXICON NounPrefixes !!= * @CODE@ Splitting in 3 -N+: AlwaysUnpossessedNouns ; - AlwaysPossessedNounPrefixes ; - VariablyPossessedNouns ; - -LEXICON AlwaysPossessedNounPrefixes !!= * @CODE@ Px -PxSg1+N+:si%< AlwaysPossessedNouns ; -PxSg2+N+:ni%< AlwaysPossessedNouns ; -PxSg3+N+:mi%< AlwaysPossessedNouns ; -PxSg4+N+:gi%< AlwaysPossessedNouns ; - -LEXICON VariablyPossessedNouns !!= * @CODE@ Px or not -N+: UnpossessedNouns ; -PxSg1+N+:si%< PossessedNouns ; -PxSg2+N+:ni%< PossessedNouns ; -PxSg3+N+:mi%< PossessedNouns ; -PxSg4+N+:gi%< PossessedNouns ; - -! vim: set ft=xfst-lexc: - diff --git a/src/fst/root.lexc b/src/fst/root.lexc index 9f6f7143..56b71638 100644 --- a/src/fst/root.lexc +++ b/src/fst/root.lexc @@ -1,4 +1,4 @@ -! Divvun & Giellatekno - open source grammars for Sarsi language +! Divvun & Giellatekno - open source grammars for Tsuut'ina language ! Copyright © 2015 The University of Tromsø & the Norwegian Sámi Parliament ! http://giellatekno.uit.no & http://divvun.no ! @@ -13,229 +13,72 @@ ! giellatekno@uit.no or feedback@divvun.no ! ========================================================================== ! -!! # Tsuut'ina morphological analyser ! +!! # Tsuut'ina morphological analyser ! ! ========================================================================== ! -!! INTRODUCTION TO THE MORPHOLOGICAL ANALYSER OF Tsuut'ina. - -Multichar_Symbols !!≈ # Definitions for @CODE@ - -!! ## Analysis symbols -! ---------------- -!! The morphological analyses of wordforms of Tsuut'ina are presented -!! in this system in terms of the following symbols. -!! (It is highly suggested to follow existing standards when adding new tags). - -! Selected punctuation tags -+CLB -+PUNCT -+LEFT -+RIGHT - -+Asp !!= * @CODE@ asp, aspect -+Dem !!= * @CODE@ D, demonstrative -+Dim !!= * @CODE@ dim, diminutive -+Du !!= * @CODE@ du, dual -+Err/Orth !!= * @CODE@ Substandard, not implemented -+Foc !!= * @CODE@ foc, focus -+Hab !!= * @CODE@ hab, habitual -+Imprs !!= * @CODE@ impers, impersonal (+Impers?) -+Inc !!= * @CODE@ inc, inceptive (Incpt?) -+Inch !!= * @CODE@ incho, inchoative -+Mod !!= * @CODE@ M mode (this seems more like category than property) -+Mom !!= * @CODE@ momentaneous -N+ !!= * @CODE@ N, noun -+Neg !!= * @CODE@ neg, negative -+Num !!= * @CODE@ num, Numeral -+PI+ !!= * @CODE@ postposition incorporation (this is not a Morphosyn tag :-( -+PNS !!= * @CODE@ possessed noun suffix (this is not a Morphosyn tag :-( -+Part !!= * @CODE@ Part, particle -+Pl !!= * @CODE@ pl, Plural -+Po !!= * @CODE@ P, Postposition -+Prt !!= * @CODE@ T, tense (past) -+Qst !!= * @CODE@ Q, question marker -+Qt !!= * @CODE@ Qt, quantifier -+Sem/Hum !!= * @CODE@ Human -+Sem/Obj !!= * @CODE@ O, Object (have a look at this) -V+ !!= * @CODE@ V, verb -12Du+ !!= * @CODE@ 12, first person dual inclusive -13Du+ !!= * @CODE@ 13, first person dual exclusive -1Pl+ !!= * @CODE@ 12, first person plural inclusive -1PlO+ !!= * @CODE@ 12, first person plural inclusive O -1Sg+ !!= * @CODE@ 1, first person singular -1SgO+ -2Du+ !!= * @CODE@ 22, second person dual -2Pl+ -2PlO+ -2Sg+ !!= * @CODE@ 2 -2SgO+ -3Du+ !!= * @CODE@ 33, third person dual -3Pl+ -3Sg+ !!= * @CODE@ 3 -3SgO+ -4Sg+ !!= * @CODE@ 4, the other -4SgO+ - -PxSg1+ !!= * @CODE@ Px -PxSg2+ !!= * @CODE@ Px -PxSg3+ !!= * @CODE@ Px -PxSg4+ !!= * @CODE@ Px - -Adv+ -Ar+ !!= * @CODE@ 5, areal subject, it (place, condition, weather) -Cl/0+ -Cl/?+ -Cl/d+ -Cl/l+ -Distr+ -Gen1+ -Gen2+ -Gen3+ -Impf+ !!= * @CODE@ imp, imperfective (or prefix?) -Incept+ -Iter+ !!= * @CODE@ iter, iterative -NI+ !!= * @CODE@ NI, noun incorporation (probably not a tag) -Opt+ !!= * @CODE@ opt, optative -Prf+ !!= * @CODE@ perf, perfective -Recipr+ !!= * @CODE@ rec, reciprocal -Refl+ !!= * @CODE@ refl, reflexive -Semel+ -Ser+ -Th+ !!= * @CODE@ Th, thematic prefix (probably not a tag) -Unspec+ !!= * @CODE@ 0, Unspecified person -UnspecO+ !!= * @CODE@ 0, Unspecified person -UnspecS+ !!= * @CODE@ 0, Unspecified person -+Symbol !!≈ * @CODE@ = independent symbols in the text stream, like £, €, © - -Areal+ -ArealS+ -ArealO+ - -!! ## Prefixes - -Pref/xà+ -Pref/di+ -Pref/zi+ -Pref/na+ -Pref/ni+ - -!! ## our flags - -@U.asp.perf@ !!= * @CODE@ -@U.asp.ipfv@ !!= * @CODE@ - -@U.xaH.ON@ !!= * @CODE@ -@R.xaH.ON@ !!= * @CODE@ -@D.xaH@ !!= * @CODE@ - -@U.xaM.ON@ !!= * @CODE@ -@R.xaM.ON@ !!= * @CODE@ -@D.xaM@ !!= * @CODE@ - -@U.xaL.ON@ !!= * @CODE@ -@R.xaL.ON@ !!= * @CODE@ -@D.xaL@ !!= * @CODE@ - -@U.di.ON@ !!= * @CODE@ -@R.di.ON@ !!= * @CODE@ -@D.di@ !!= * @CODE@ - -@U.zi.ON@ !!= * @CODE@ -@R.zi.ON@ !!= * @CODE@ -@D.zi@ !!= * @CODE@ - -@U.na.ON@ !!= * @CODE@ -@R.na.ON@ !!= * @CODE@ -@D.na@ !!= * @CODE@ - -@U.ni.ON@ !!= * @CODE@ -@R.ni.ON@ !!= * @CODE@ -@D.ni@ !!= * @CODE@ - -@R.TV.ON@ !!= * @CODE@ -@U.TV.ON@ !!= * @CODE@ -@U.TV.OFF@ !!= * @CODE@ - -!! ## Archphonemes (multi-character definitions) - -%^VH !!= * @CODE@ denoting floating high tone - -!! ## Border - -%< !!= * @CODE@ prefix border -%> !!= * @CODE@ suffix border - -!! ## Flag diacritics -!! We have manually optimised the structure of our lexicon using following -!! flag diacritics to restrict morhpological combinatorics - only allow compounds -!! with verbs if the verb is further derived into a noun again: - @P.NeedNoun.ON@ !!≈ | @CODE@ | (Dis)allow compounds with verbs unless nominalised - @D.NeedNoun.ON@ !!≈ | @CODE@ | (Dis)allow compounds with verbs unless nominalised - @C.NeedNoun@ !!≈ | @CODE@ | (Dis)allow compounds with verbs unless nominalised -!! -!! For languages that allow compounding, the following flag diacritics are needed -!! to control position-based compounding restrictions for nominals. Their use is -!! handled automatically if combined with +CmpN/xxx tags. If not used, they will -!! do no harm. - @P.CmpFrst.FALSE@ !!≈ | @CODE@ | Require that words tagged as such only appear first - @D.CmpPref.TRUE@ !!≈ | @CODE@ | Block such words from entering ENDLEX - @P.CmpPref.FALSE@ !!≈ | @CODE@ | Block these words from making further compounds - @D.CmpLast.TRUE@ !!≈ | @CODE@ | Block such words from entering R - @D.CmpNone.TRUE@ !!≈ | @CODE@ | Combines with the next tag to prohibit compounding - @U.CmpNone.FALSE@ !!≈ | @CODE@ | Combines with the prev tag to prohibit compounding - @P.CmpOnly.TRUE@ !!≈ | @CODE@ | Sets a flag to indicate that the word has passed R - @D.CmpOnly.FALSE@ !!≈ | @CODE@ | Disallow words coming directly from root. -!! -!! Use the following flag diacritics to control downcasing of derived proper -!! nouns (e.g. Finnish Pariisi -> pariisilainen). See e.g. North Sámi for how to use -!! these flags. There exists a ready-made regex that will do the actual down-casing -!! given the proper use of these flags. - @U.Cap.Obl@ !!≈ | @CODE@ | Allowing downcasing of derived names: deatnulasj. - @U.Cap.Opt@ !!≈ | @CODE@ | Allowing downcasing of derived names: deatnulasj. - - - -!Undeclared? (tt) - - -@D.PREFIX@ -@P.DISTRIBUTIVE.OFF@ -@P.DISTRIBUTIVE.ON@ -@P.PREFIX.INNER@ -@P.PREFIX.MIDDLE@ -@P.PREFIX.OUTER@ -@R.PREFIX.INNER@ -@R.PREFIX.MIDDLE@ -@R.PREFIX.OUTER@ -@R.SUBJECTNUMBER.PL@ -@R.SUBJECTNUMBER.SG@ -@R.SUBJECTPERSON.3@ -@R.SUBJECTPERSON.4@ -@U.OBJECTNUMBER.PL@ -@U.PREFIX.MIDDLE@ -@U.PREFIX.OUTER@ -@U.SUBJECTNUMBER.PL@ -@U.SUBJECTNUMBER.SG@ -@U.SUBJECTPERSON.1@ -@U.SUBJECTPERSON.2@ -@U.SUBJECTPERSON.3@ -@U.SUBJECTPERSON.4@ -@U.TAMA.0s@ -@U.TAMA.nii@ -@U.TAMA.nis@ -@U.TAMA.si@ -@U.TAMA.sis@ -@U.TAMA.yi-a@ -@U.TAMA.yii-a@ -@U.TAMA.yis-y@ -@U.TAMA.yis@ -@U.VALENCE.TRANSITIVE@ +Multichar_Symbols + +@U.DIALECT.BIGCROW@ +@U.DIALECT.BIGPLUME@ +@U.DIALECT.CROWCHIEF@ + +@D.PX@ +@U.PX.SG1@ +@U.PX.SG2@ +@U.PX.SG3@ +@U.PX.SG3SBJSG3@ +@U.PX.SG3SBJSG3COREF@ +@U.PX.SG3SBJPL3@ +@U.PX.SG4@ +@U.PX.PL1@ +@U.PX.PL2@ +@U.PX.PL3@ +@U.PX.PL3SBJ3@ +@U.PX.PL3SBJ3COREF@ +@U.PX.AREAL@ +@U.PX.GIVEN@ +@U.PX.INDEF@ +@U.PX.RECIP@ +@R.PX.SG1@ +@R.PX.SG2@ +@R.PX.SG3@ +@R.PX.SG3SBJSG3@ +@R.PX.SG3SBJSG3COREF@ +@R.PX.SG3SBJPL3@ +@R.PX.SG4@ +@R.PX.PL1@ +@R.PX.PL2@ +@R.PX.PL3@ +@R.PX.PL3SBJ3@ +@R.PX.PL3SBJ3COREF@ +@R.PX.AREAL@ +@R.PX.GIVEN@ +@R.PX.INDEF@ +@R.PX.RECIP@ + ++PxSg1 ++PxSg2 ++PxSg3 ++PxSg4 ++PxPl1 ++PxPl2 ++PxPl3 ++PxAreal ++PxGiven ++PxIndef ++PxRecip ++Sbj3 ++SbjSg3 ++SbjPl3 ++CR ++N ++Sg ++Pl LEXICON Root -!! The word forms in Tsuut'ina start from noun and verb prefixes -NounPrefixes ; -!Words ; -Punctuation ; -Symbols ; +NounPrefixes ; +Punctuation ; +Symbols ; +!Words ; diff --git a/src/fst/stems/noun_stems.lexc b/src/fst/stems/noun_stems.lexc new file mode 100644 index 00000000..68041f9f --- /dev/null +++ b/src/fst/stems/noun_stems.lexc @@ -0,0 +1,58 @@ +! +! Tsuut'ina noun stems. +! + +! Unpossessed stem forms. +LEXICON UnpossessedStems + UnpossessedSgStems; + UnpossessedPlStems; + UnpossessedNoNumStems; + +LEXICON UnpossessedSgStems +ístłí SingularNoun; +tłích'á SingularNoun; + +LEXICON UnpossessedPlStems +ístłí:ístłíká PluralNoun; +ístłí:ástłáká PluralNoun; +tłích'á:tłích'áká PluralNoun; + +LEXICON UnpossessedNoNumStems +guuniizh Noun; +más Noun; + + +! Possessed stem forms with mi- possessive prefixes. +LEXICON miPossessedStems + miPossessedSgStems; + miPossessedPlStems; + miPossessedNoNumStems; + +LEXICON miPossessedSgStems +ístłí:lích'à SingularNoun; +tłích'á:lích'à SingularNoun; +tłích'á:tłích'à SingularNoun; + +LEXICON miPossessedPlStems +ístłí:lích'àká PluralNoun; +tłích'á:lích'àká PluralNoun; +tłích'á:tłích'àká PluralNoun; + +LEXICON miPossessedNoNumStems +guuniizh:guunijà Noun; +más:mázà Noun; + + +! Possessed stem forms with mí- possessive prefixes. +LEXICON miHPossessedStems +mídá:dá Noun; +mítsí:tsí Noun; + +! Possessed stem forms with mii- possessive prefixes. +LEXICON miiHPossessedStems +miíts'a:ts'a Noun; +miízá:zá Noun; + +! Possessed stem forms with máa possessive prefixes. +LEXICON maaPossessedStems +guuniizh:guunijà Noun; diff --git a/src/fst/stems/nouns.lexc b/src/fst/stems/nouns.lexc deleted file mode 100644 index 6492c12d..00000000 --- a/src/fst/stems/nouns.lexc +++ /dev/null @@ -1,55 +0,0 @@ -!! # Tsuut'ina Nouns -! ----- -!! ## Classification -!! 1. Always unpossessed nouns: nàk'ús "cloud" -!! 1. Always possessed nouns: sitsì "my head" (body parts, kinship terms) -!! 1. Possessed or unpossessed: tłích'á "dog" vs. silích'à "my dog" - -!! Three (phonological) cases for the possessive prefixes: -!! # Consonant-initial stem: si- "1SG" tsì "head" -> sitsì "my head" -!! # Preceding H-tone stem: si- "1SG" V́tsí "nose" -> sítsí "my nose" -!! # Vowel-initial stem: si- "1SG" óó "mother" -> sóó "my mother" \\ -!! (cf. ʔinóó "mother" , ʔi- "UNSPEC.POSS" (n)óó; -!! ```gu- "SOMEONE" óó "mother" -> gwóó gu > gw / _ [oa] ) Cu[oa] > Cw[oa]?``` - -!! Periphrastic / non-morphological constructions are used for always -!! unpossessed nouns: sá(à) nàk'ús "my cloud", ná(à) ʔidínít'ùgù -!! yiitł'áłí "your vehicle") - -!! ## Lexicons - -LEXICON AlwaysUnpossessedNouns !!= * @CODE@ never Px -diná # "man" ; -gútł'ìs # "mud" ; -ʔidínít'ùgù% yiitł'áłí # "automobile" ; -nàk'ús # "cloud" ; -ts'ìká # "woman" ; - - -LEXICON AlwaysPossessedNouns !!= * @CODE@ always Px, body part, kinship -tsì # "head" ; -0tsí:%^VHtsí # "nose" ; -kòlà # "husband" ; -ts'òyá # "wife" ; ! CHECK: unpossessed form, as well? - - -LEXICON UnpossessedNouns !!= * @CODE@ Px or not, here not. Cf. PossessedNouns -kù # "fire" ; -más # "knife" ; -tìs # "cane, walking stick" ; -tłích'á # "dog" ; -tú # "water" ; -xoní # "buffalo" ; -zos # "snow; year" ; - -LEXICON PossessedNouns !!= * @CODE@ Px or not, here Px. Cf. UnpossessedNouns -kùwà # "fire" ; ! CHECK -lích'à # "dog" ; -mázà # "knife" ; -tìsà # "cane" ; -tùwà # "water" ; -xonà # "buffalo" ; -zosà # "snow; year" ; - - -! vim: set ft=xfst-lexc: diff --git a/src/fst/stems/verbs.lexc b/src/fst/stems/verbs.lexc deleted file mode 100644 index 5e35974d..00000000 --- a/src/fst/stems/verbs.lexc +++ /dev/null @@ -1,47 +0,0 @@ - -!! # Tsuut'ina verb stems - -!! ## Intransitive Verbs - -LEXICON PERF_CLASS_1 !!= * @CODE@ -@U.asp.perf@Prf+V+:@U.asp.perf@ PERF_STEMS_1 ; - -LEXICON IPFV_CLASS_1a !!= * @CODE@ -@U.asp.ipfv@Impf+V+:@U.asp.ipfv@ IPFV_STEMS_1a ; - -LEXICON IPFV_CLASS_1b !!= * @CODE@ -@U.asp.ipfv@Impf+V+:@U.asp.ipfv@ IPFV_STEMS_1b ; - -LEXICON IPFV_CLASS_2 !!= * @CODE@ -@U.asp.ipfv@Impf+V+:@U.asp.ipfv@ IPFV_STEMS_2 ; - -LEXICON PERF_STEMS_1 !!= * @CODE@ the list of most verbs in perf -tsiy:tsày endlex ; -gàd:gàd endlex ; - -LEXICON IPFV_STEMS_1a !!= * @CODE@ same list in ipfv, but no prefix -tsiy:tsiy endlex ; - -LEXICON IPFV_STEMS_1b !!= * @CODE@ same list in ipfv, but with pref -@R.di.ON@didús:@R.di.ON@dús # ; ! "crawl" -@R.zi.ON@zisghá:@R.zi.ON@ghá # ; ! "kill" -@R.ni.ON@nidò:@R.ni.ON@ghá # ; ! "sit down" - -LEXICON IPFV_STEMS_2 ! ... and the same again type 2, but ipfv -endlex ; - -!! ## Transitive Verbs - -LEXICON T_IPFV_CLASS_1a !!= * @CODE@ type 1a -@R.TV.ON@yisxòł:@R.TV.ON@xòł # ; ! hit, whip - -@R.xaL.ON@V+xà_gàd:@R.xaL.ON@gàd # ; - -!! ## Endlex cleanup - -LEXICON endlex !! @CODE@ gives all D flags blocking unwanted forms -@D.xaL@@D.di@@D.zi@@D.na@@D.ni@ # ; - - - -! vim: set ft=xfst-lexc: