From 3f28aa2707382250d21df51b175da5476220fd34 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Thu, 5 Oct 2023 12:14:29 +0200 Subject: [PATCH] cleanups --- src/filters/Makefile.am | 6 +++- src/filters/remove-DNorm-tags.regex | 18 +++++++++++ .../remove-derivation-position-tags.regex | 10 ++++++ src/filters/remove-norm-comp-tags.regex | 23 +++++++++++++ src/filters/rename-POS_before_Der-tags.regex | 32 +++++++++++++++++++ 5 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 src/filters/remove-DNorm-tags.regex create mode 100644 src/filters/remove-derivation-position-tags.regex create mode 100644 src/filters/remove-norm-comp-tags.regex create mode 100644 src/filters/rename-POS_before_Der-tags.regex diff --git a/src/filters/Makefile.am b/src/filters/Makefile.am index 4100d44a..0189bba2 100644 --- a/src/filters/Makefile.am +++ b/src/filters/Makefile.am @@ -22,7 +22,11 @@ GIELLA_FILTER_LOCAL_REGEX_SRCS=\ downcase_UCletters.regex\ allow_uppercase.regex\ - remove-bracketed-english-gloss.regex + remove-bracketed-english-gloss.regex\ + remove-DNorm-tags.regex \ + remove-derivation-position-tags.regex \ + remove-norm-comp-tags.regex \ + rename-POS_before_Der-tags.regex # List any local filter xfscript files here: GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS= diff --git a/src/filters/remove-DNorm-tags.regex b/src/filters/remove-DNorm-tags.regex new file mode 100644 index 00000000..456f3ed6 --- /dev/null +++ b/src/filters/remove-DNorm-tags.regex @@ -0,0 +1,18 @@ +! Divvun & Giellatekno - open source grammars for Sámi and other languages +! Copyright © 2000-2010 The University of Tromsø & the Norwegian Sámi Parliament +! http://giellatekno.uit.no & http://divvun.no +! +! This program is free software; you can redistribute and/or modify +! this file under the terms of the GNU General Public License as published by +! the Free Software Foundation, either version 3 of the License, or +! (at your option) any later version. The GNU General Public License +! is found at http://www.gnu.org/licenses/gpl.html. It is +! also available in the file $GTHOME/LICENSE.txt. +! +! Other licensing options are available upon request, please contact +! giellatekno@uit.no or feedback@divvun.no + +# This filter removes Rel-Foc combinations. + +0 <- %+Use%/NotDNorm, +0 <- %+Use%/DNorm; diff --git a/src/filters/remove-derivation-position-tags.regex b/src/filters/remove-derivation-position-tags.regex new file mode 100644 index 00000000..7f86484e --- /dev/null +++ b/src/filters/remove-derivation-position-tags.regex @@ -0,0 +1,10 @@ +# This regex removes tags we do not want in the ordinary $(GTLANG).fst +# That is, we do not give them as output from analysis + +# delete the tags governing normative/speller derivation restrictions +0 <- %+Der, +0 <- %+Der1, +0 <- %+Der2, +0 <- %+Der3, +0 <- %+Der4, +0 <- %+Der5; diff --git a/src/filters/remove-norm-comp-tags.regex b/src/filters/remove-norm-comp-tags.regex new file mode 100644 index 00000000..8f83ca91 --- /dev/null +++ b/src/filters/remove-norm-comp-tags.regex @@ -0,0 +1,23 @@ +# This regex removes tags we do not want in the ordinary $(GTLANG).fst +# That is, we do not give them as output from analysis + +# delete normative tags governing compound behaviour +# -N- in the tag name stands for Normative +0 <- %+CmpNP%/All, +0 <- %+CmpNP%/First, +0 <- %+CmpNP%/Pref, +0 <- %+CmpNP%/Last, +0 <- %+CmpNP%/Suff, +0 <- %+CmpNP%/None, +0 <- %+CmpNP%/Only, +0 <- %+CmpN%/SgN, +0 <- %+CmpN%/SgG, +0 <- %+CmpN%/PlG, +0 <- %+CmpN%/SgNomLeft, +0 <- %+CmpN%/SgGenLeft, +0 <- %+CmpN%/PlGenLeft, +0 <- %+CmpN%/Def, +0 <- %+CmpN%/DefSgNom, +0 <- %+CmpN%/DefSgGen, +0 <- %+CmpN%/DefPlGen +; diff --git a/src/filters/rename-POS_before_Der-tags.regex b/src/filters/rename-POS_before_Der-tags.regex new file mode 100644 index 00000000..9395a0c5 --- /dev/null +++ b/src/filters/rename-POS_before_Der-tags.regex @@ -0,0 +1,32 @@ +# This script renames POS tags when in front of derivations, such that +# only the final POS tag remains. This allows disambiguation to work +# properly also for derived words, without having to resort to post-processing +# hacks. The script is language specific. + +# POS tags changes: +[ "+Ex/Adv" <- "+Adv" , + "+Ex/N" <- "+N" , + "+Ex/A" <- "+A" , + "+Ex/Attr" <- "+Attr", + "+Ex/V" <- "+V" || +# change always and only when followed by a derivation: + _ \[ "+Cmp" ]* + [ "+Der" + | "+Der1" + | "+Der2" + | "+Der3" + | "+Der4" + | "+Der5" + ] +] .o. + +# Transitivity tag changes: +[ "+Ex/TV" <- "+TV" , + "+Ex/IV" <- "+IV" || +# change only within the same stem, and only when followed by +# a verbal derivation: + _ \[ "+Cmp" ]* + [ "+TV" + | "+IV" + ] +] ;