Skip to content

Commit

Permalink
cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
flammie committed Oct 5, 2023
1 parent 3ef9a4f commit 3f28aa2
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/filters/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@
GIELLA_FILTER_LOCAL_REGEX_SRCS=\
downcase_UCletters.regex\
allow_uppercase.regex\
remove-bracketed-english-gloss.regex
remove-bracketed-english-gloss.regex\
remove-DNorm-tags.regex \
remove-derivation-position-tags.regex \
remove-norm-comp-tags.regex \
rename-POS_before_Der-tags.regex

# List any local filter xfscript files here:
GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS=
Expand Down
18 changes: 18 additions & 0 deletions src/filters/remove-DNorm-tags.regex
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
! Divvun & Giellatekno - open source grammars for Sámi and other languages
! Copyright © 2000-2010 The University of Tromsø & the Norwegian Sámi Parliament
! http://giellatekno.uit.no & http://divvun.no
!
! This program is free software; you can redistribute and/or modify
! this file under the terms of the GNU General Public License as published by
! the Free Software Foundation, either version 3 of the License, or
! (at your option) any later version. The GNU General Public License
! is found at http://www.gnu.org/licenses/gpl.html. It is
! also available in the file $GTHOME/LICENSE.txt.
!
! Other licensing options are available upon request, please contact
! giellatekno@uit.no or feedback@divvun.no

# This filter removes Rel-Foc combinations.

0 <- %+Use%/NotDNorm,
0 <- %+Use%/DNorm;
10 changes: 10 additions & 0 deletions src/filters/remove-derivation-position-tags.regex
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# This regex removes tags we do not want in the ordinary $(GTLANG).fst
# That is, we do not give them as output from analysis

# delete the tags governing normative/speller derivation restrictions
0 <- %+Der,
0 <- %+Der1,
0 <- %+Der2,
0 <- %+Der3,
0 <- %+Der4,
0 <- %+Der5;
23 changes: 23 additions & 0 deletions src/filters/remove-norm-comp-tags.regex
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# This regex removes tags we do not want in the ordinary $(GTLANG).fst
# That is, we do not give them as output from analysis

# delete normative tags governing compound behaviour
# -N- in the tag name stands for Normative
0 <- %+CmpNP%/All,
0 <- %+CmpNP%/First,
0 <- %+CmpNP%/Pref,
0 <- %+CmpNP%/Last,
0 <- %+CmpNP%/Suff,
0 <- %+CmpNP%/None,
0 <- %+CmpNP%/Only,
0 <- %+CmpN%/SgN,
0 <- %+CmpN%/SgG,
0 <- %+CmpN%/PlG,
0 <- %+CmpN%/SgNomLeft,
0 <- %+CmpN%/SgGenLeft,
0 <- %+CmpN%/PlGenLeft,
0 <- %+CmpN%/Def,
0 <- %+CmpN%/DefSgNom,
0 <- %+CmpN%/DefSgGen,
0 <- %+CmpN%/DefPlGen
;
32 changes: 32 additions & 0 deletions src/filters/rename-POS_before_Der-tags.regex
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# This script renames POS tags when in front of derivations, such that
# only the final POS tag remains. This allows disambiguation to work
# properly also for derived words, without having to resort to post-processing
# hacks. The script is language specific.

# POS tags changes:
[ "+Ex/Adv" <- "+Adv" ,
"+Ex/N" <- "+N" ,
"+Ex/A" <- "+A" ,
"+Ex/Attr" <- "+Attr",
"+Ex/V" <- "+V" ||
# change always and only when followed by a derivation:
_ \[ "+Cmp" ]*
[ "+Der"
| "+Der1"
| "+Der2"
| "+Der3"
| "+Der4"
| "+Der5"
]
] .o.

# Transitivity tag changes:
[ "+Ex/TV" <- "+TV" ,
"+Ex/IV" <- "+IV" ||
# change only within the same stem, and only when followed by
# a verbal derivation:
_ \[ "+Cmp" ]*
[ "+TV"
| "+IV"
]
] ;

0 comments on commit 3f28aa2

Please sign in to comment.