From 500b914695407a7f9ff68afbcce47fed46ed3172 Mon Sep 17 00:00:00 2001 From: "antti.arppe@iki.fi" Date: Wed, 6 Mar 2024 16:47:42 -0700 Subject: [PATCH] Created a XFSCRIPT for demoing morpheme boundaries. --- src/fst/srs-bound-demo.xfscript | 154 ++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 src/fst/srs-bound-demo.xfscript diff --git a/src/fst/srs-bound-demo.xfscript b/src/fst/srs-bound-demo.xfscript new file mode 100644 index 0000000..4913f6d --- /dev/null +++ b/src/fst/srs-bound-demo.xfscript @@ -0,0 +1,154 @@ +# Demo script on morpheme boundary marking + +read lexc morphology/stems/verb_stems.lexc +define Stems + +read lexc morphology/affixes/verb_inner_affixes.lexc +define InnerAffixes + +read lexc morphology/affixes/verb_middle_affixes.lexc +define MiddleAffixes + +read lexc morphology/affixes/verb_outer_affixes.lexc +define OuterAffixes + +read lexc morphology/affixes/verb_oblique_affixes.lexc +define ObliqueAffixes + +read lexc morphology/affixes/postverbal_affixes.lexc +define PostverbalAffixes + +# Mark prefix type (insert corresponding flags to the +# original prefixes we know to be present from the lexical entry. +# ab=cd_ef.gh => ab@P.PREFIX.OUTER@=cd@P.PREFIX.MIDDLE@_ef@P.PREFIX.INNER@.gh + +define MarkPrefixes ~$["@P.PREFIX.OUTER@"|"@P.PREFIX.INNER@"|"@P.PREFIX.MIDDLE@"] .o. + "=" -> "@P.PREFIX.OUTER@" "=" , + "_" -> "@P.PREFIX.MIDDLE@" "_" , + "." -> "@P.PREFIX.INNER@" "."; + +# Insert . (inner), _ (middle), and = (outer) if missing in the intermediate rep: +# tsiy > =_.tsiy +# ts'á=zíd > ts'á=_.zíd +# tsí=di.tł'á > tsí=_di.tł'á +# gu.blah > =_gu.blah +# gu_blah > =gu_.blah + +# (1) No . > insert . after last marker (= or _), or in the beginning if none exists +# (2) No _ > insert _ (a) after =, if one exists, or (b) beginning +# (3) No = > insert at beginning + +define InsInner [..] -> "." || "_" _ ~$"." .#. .o. + [..] -> "." || "=" _ ~$"." .#. .o. + [..] -> "." || .#. _ ~$"." .#. ; + +define InsMiddle [..] -> "_" || "=" _ ~$["_"] .#. .o. + [..] -> "_" || .#. _ ~$["_"] .#. ; + +define InsOuter [..] -> "=" || .#. _ ~$["="] .#. ; + +# Some lexical entries include a "^L" immediately before the inner-prefix +# boundary marker "." (e.g., xá=_^L.ʔò "take it [solid object] out", or +# ta=_di^L.ʔò "lift/pick/hold it [solid object] up"), with or without +# any other inner prefixes present. In these (rare) cases, Tsuut'ina uses +# middle prefix TAMA allomorphs, rather than outer or inner prefix forms, e.g. +# +# 2SG xáaʔò "you (sg.) will carry it (solid object) past" +# (not *xaniʔò or something similar if 0-IPFV outer or no +# preceding prefix allomorphs were used) +# 1PL xáasaàʔò "we will carry it (solid object) past" +# (not *xaàʔò or something similar if 0-IPFV inner prefix +# allomorphs were used) +# +# 2SG tadiʔò "you (sg.) will lift it (solid object) up" +# (not *tadiniʔò if 0-IPFV outer or no preceding prefix +# allomorphs were used) +# 1PL tadìsaàʔò "we will lift it (solid object) up" +# (not *tadaàʔò or something similar if 0-IPFV inner +# prefix allomorphs were used) +# +# This rule therefore turns @P.PREFIX.INNER@ into @P.PREFIX.MIDDLE@ to ensure +# that middle-prefix allomorphs are used, then sets an additional flag (@P. +# LOWTONE.ON@) to help differentiate between this situation and all other +# middle-prefix contexts (for use in 'affixes/verb_inner_affixes.lexc'). +define LInnerPrefixAllomorphs "@P.PREFIX.INNER@" -> + "@P.PREFIX.MIDDLE@" "@P.LOWTONE.ON@" || "^L" _ ; + +# Some middle and inner prefixes (e.g., middle íH- conative/half-transitive, +# inner ná-) appear with outer prefix TAMA chunk allomorphs. In order to +# get this right, we append the symbol "^O" after the vowel in the lexical +# entry (e.g., í^H^O-, ná^O-), then turn that into a flag that requires outer +# prefix allomorphs. +define RequireOuterAllomorphs "^O" "@P.PREFIX.INNER@" -> "@P.PREFIX.OUTER@" .o. + "^O" "@P.PREFIX.MIDDLE@" -> "@P.PREFIX.OUTER@" .o. + "^O" -> "@P.PREFIX.OUTER@"; + +# We temporarily keep the boundary symbol for inner affixes ("."), middle +# affixes ("_"), and outer affixes ("=") in place so that we can target +# morphophonology in each position more easily (especially when aiming to drop +# the "weak" /i/ vowels that appear in inner lexical prefixes). +read regex [Stems PostverbalAffixes] .o. + MarkPrefixes .o. + InsInner .o. InsMiddle .o. InsOuter .o. + LInnerPrefixAllomorphs .o. RequireOuterAllomorphs .o. + "." -> "." InnerAffixes "@P.PREFIX.INNER@" , + "_" -> "_" MiddleAffixes "@P.PREFIX.MIDDLE@" , + "=" -> "=" OuterAffixes "@P.PREFIX.OUTER@" ; +define WordForms; + +# Rewrite rule for resurrecting the prefix boundary markers + +define ShowBoundaries [ "@P.PREFIX.INNER@" -> "." , + "@P.PREFIX.MIDDLE@" -> "_" , + "@P.PREFIX.OUTER@" -> "=" +]; + +define SimplifyBoundaries [ [ "=" "=" -> "=" , "_" "_" -> "_" , "." "." -> "." ] +.o. "=" "_" "." -> "=" +.o. "_" "." -> "_" +.o. "=" "_" -> "=" +.o. [ "=" | "_" | "." ] -> 0 || .#. _ +]; + +# Concatenate ObliqueAffixes and the other inflectional FSTs. +read regex WordForms .o. [..] -> ObliqueAffixes || .#. _; + +twosided flag-diacritics +define Grammar; + +# Morphophonology + +source morphology/phonology.xfscript +define MorphoPhonology; + +read lexc morphology/affixes/verb_tags.lexc +define Tags; + +# To prevent morphophonology to be tripped up be intervening flags +set flag-is-epsilon ON + +# regex Grammar Tags; + +# Tentative code for dealing with Morphophonology as read for a separate +# file. Though we would want to figure a way to compose each rewrite rule +# the the morphological component one-by-one for faster compilation. + +regex [Grammar Tags] .o. MorphoPhonology ; +define VerbModel + +# regex [Grammar Tags] .o. deletePrefixI .o. hToneSpreading .o. hToneSpreadingCleanup .o. deleteBoundarySymbol .o. uBeforeA .o. aBeforeI .o. lowABeforeI .o. iBeforeA .o. iBeforeO .o. uBeforeO .o. lInitialStemsSbjPl2 .o. lInitialStemsSbjPl2Cleanup .o. slDissimilation .o. zhDevoicing .o. zDevoicing; + +# Make flags visible, so that they can be converted to explicit boundary markers +set flag-is-epsilon OFF + +# Output boundary markers based on flags + +regex VerbModel .o. ShowBoundaries ; +define VerbModelWithBound + +# Make flags invisible again, so that they will not intervene in removing excessive boundary markers +set flag-is-epsilon ON + +regex VerbModelWithBound .o. SimplifyBoundaries ; + +twosided flag-diacritics