Skip to content

Commit

Permalink
Created a XFSCRIPT for demoing morpheme boundaries.
Browse files Browse the repository at this point in the history
  • Loading branch information
aarppe committed Mar 6, 2024
1 parent 8d05817 commit 500b914
Showing 1 changed file with 154 additions and 0 deletions.
154 changes: 154 additions & 0 deletions src/fst/srs-bound-demo.xfscript
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Demo script on morpheme boundary marking

read lexc morphology/stems/verb_stems.lexc
define Stems

read lexc morphology/affixes/verb_inner_affixes.lexc
define InnerAffixes

read lexc morphology/affixes/verb_middle_affixes.lexc
define MiddleAffixes

read lexc morphology/affixes/verb_outer_affixes.lexc
define OuterAffixes

read lexc morphology/affixes/verb_oblique_affixes.lexc
define ObliqueAffixes

read lexc morphology/affixes/postverbal_affixes.lexc
define PostverbalAffixes

# Mark prefix type (insert corresponding flags to the
# original prefixes we know to be present from the lexical entry.
# ab=cd_ef.gh => ab@P.PREFIX.OUTER@=cd@P.PREFIX.MIDDLE@_ef@P.PREFIX.INNER@.gh

define MarkPrefixes ~$["@P.PREFIX.OUTER@"|"@P.PREFIX.INNER@"|"@P.PREFIX.MIDDLE@"] .o.
"=" -> "@P.PREFIX.OUTER@" "=" ,
"_" -> "@P.PREFIX.MIDDLE@" "_" ,
"." -> "@P.PREFIX.INNER@" ".";

# Insert . (inner), _ (middle), and = (outer) if missing in the intermediate rep:
# tsiy > =_.tsiy
# ts'á=zíd > ts'á=_.zíd
# tsí=di.tł'á > tsí=_di.tł'á
# gu.blah > =_gu.blah
# gu_blah > =gu_.blah

# (1) No . > insert . after last marker (= or _), or in the beginning if none exists
# (2) No _ > insert _ (a) after =, if one exists, or (b) beginning
# (3) No = > insert at beginning

define InsInner [..] -> "." || "_" _ ~$"." .#. .o.
[..] -> "." || "=" _ ~$"." .#. .o.
[..] -> "." || .#. _ ~$"." .#. ;

define InsMiddle [..] -> "_" || "=" _ ~$["_"] .#. .o.
[..] -> "_" || .#. _ ~$["_"] .#. ;

define InsOuter [..] -> "=" || .#. _ ~$["="] .#. ;

# Some lexical entries include a "^L" immediately before the inner-prefix
# boundary marker "." (e.g., xá=_^L.ʔò "take it [solid object] out", or
# ta=_di^L.ʔò "lift/pick/hold it [solid object] up"), with or without
# any other inner prefixes present. In these (rare) cases, Tsuut'ina uses
# middle prefix TAMA allomorphs, rather than outer or inner prefix forms, e.g.
#
# 2SG xáaʔò "you (sg.) will carry it (solid object) past"
# (not *xaniʔò or something similar if 0-IPFV outer or no
# preceding prefix allomorphs were used)
# 1PL xáasaàʔò "we will carry it (solid object) past"
# (not *xaàʔò or something similar if 0-IPFV inner prefix
# allomorphs were used)
#
# 2SG tadiʔò "you (sg.) will lift it (solid object) up"
# (not *tadiniʔò if 0-IPFV outer or no preceding prefix
# allomorphs were used)
# 1PL tadìsaàʔò "we will lift it (solid object) up"
# (not *tadaàʔò or something similar if 0-IPFV inner
# prefix allomorphs were used)
#
# This rule therefore turns @P.PREFIX.INNER@ into @P.PREFIX.MIDDLE@ to ensure
# that middle-prefix allomorphs are used, then sets an additional flag (@P.
# LOWTONE.ON@) to help differentiate between this situation and all other
# middle-prefix contexts (for use in 'affixes/verb_inner_affixes.lexc').
define LInnerPrefixAllomorphs "@P.PREFIX.INNER@" ->
"@P.PREFIX.MIDDLE@" "@P.LOWTONE.ON@" || "^L" _ ;

# Some middle and inner prefixes (e.g., middle íH- conative/half-transitive,
# inner ná-) appear with outer prefix TAMA chunk allomorphs. In order to
# get this right, we append the symbol "^O" after the vowel in the lexical
# entry (e.g., í^H^O-, ná^O-), then turn that into a flag that requires outer
# prefix allomorphs.
define RequireOuterAllomorphs "^O" "@P.PREFIX.INNER@" -> "@P.PREFIX.OUTER@" .o.
"^O" "@P.PREFIX.MIDDLE@" -> "@P.PREFIX.OUTER@" .o.
"^O" -> "@P.PREFIX.OUTER@";

# We temporarily keep the boundary symbol for inner affixes ("."), middle
# affixes ("_"), and outer affixes ("=") in place so that we can target
# morphophonology in each position more easily (especially when aiming to drop
# the "weak" /i/ vowels that appear in inner lexical prefixes).
read regex [Stems PostverbalAffixes] .o.
MarkPrefixes .o.
InsInner .o. InsMiddle .o. InsOuter .o.
LInnerPrefixAllomorphs .o. RequireOuterAllomorphs .o.
"." -> "." InnerAffixes "@P.PREFIX.INNER@" ,
"_" -> "_" MiddleAffixes "@P.PREFIX.MIDDLE@" ,
"=" -> "=" OuterAffixes "@P.PREFIX.OUTER@" ;
define WordForms;

# Rewrite rule for resurrecting the prefix boundary markers

define ShowBoundaries [ "@P.PREFIX.INNER@" -> "." ,
"@P.PREFIX.MIDDLE@" -> "_" ,
"@P.PREFIX.OUTER@" -> "="
];

define SimplifyBoundaries [ [ "=" "=" -> "=" , "_" "_" -> "_" , "." "." -> "." ]
.o. "=" "_" "." -> "="
.o. "_" "." -> "_"
.o. "=" "_" -> "="
.o. [ "=" | "_" | "." ] -> 0 || .#. _
];

# Concatenate ObliqueAffixes and the other inflectional FSTs.
read regex WordForms .o. [..] -> ObliqueAffixes || .#. _;

twosided flag-diacritics
define Grammar;

# Morphophonology

source morphology/phonology.xfscript
define MorphoPhonology;

read lexc morphology/affixes/verb_tags.lexc
define Tags;

# To prevent morphophonology to be tripped up be intervening flags
set flag-is-epsilon ON

# regex Grammar Tags;

# Tentative code for dealing with Morphophonology as read for a separate
# file. Though we would want to figure a way to compose each rewrite rule
# the the morphological component one-by-one for faster compilation.

regex [Grammar Tags] .o. MorphoPhonology ;
define VerbModel

# regex [Grammar Tags] .o. deletePrefixI .o. hToneSpreading .o. hToneSpreadingCleanup .o. deleteBoundarySymbol .o. uBeforeA .o. aBeforeI .o. lowABeforeI .o. iBeforeA .o. iBeforeO .o. uBeforeO .o. lInitialStemsSbjPl2 .o. lInitialStemsSbjPl2Cleanup .o. slDissimilation .o. zhDevoicing .o. zDevoicing;

# Make flags visible, so that they can be converted to explicit boundary markers
set flag-is-epsilon OFF

# Output boundary markers based on flags

regex VerbModel .o. ShowBoundaries ;
define VerbModelWithBound

# Make flags invisible again, so that they will not intervene in removing excessive boundary markers
set flag-is-epsilon ON

regex VerbModelWithBound .o. SimplifyBoundaries ;

twosided flag-diacritics

0 comments on commit 500b914

Please sign in to comment.