Skip to content

Commit

Permalink
Test files adapted to apu
Browse files Browse the repository at this point in the history
  • Loading branch information
Trondtr committed May 18, 2024
1 parent 5fe4eb6 commit 0a9c1ea
Show file tree
Hide file tree
Showing 12 changed files with 371 additions and 0 deletions.
35 changes: 35 additions & 0 deletions devtools/fem_minip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash

# script to generate paradigms for generating word forms
# command:
# sh generate_contlex_para.sh PATTERN
# example, when you are in apu:
# sh devtools/noun_minip.sh n_21 | less
# sh devtools/noun_minip.sh järvenpää
# Only get the lemma you ask for:
# sh devtools/noun_minip.sh '^pää[ :+]'


LOOKUP=$(echo $LOOKUP)
HLOOKUP=$(echo $HLOOKUP)
GTLANGS=$(echo $GTLANGS)


PATTERN=$1
L_FILE="in.txt"
cut -d '!' -f1 src/fst/morphology/stems/nouns.lexc | egrep $PATTERN | sed 's/% /%/g' | tr ' +' ':' | cut -d ':' -f1 | sed 's/%/% /g' | tr -d '%'>$L_FILE


P_FILE="test/data/testfemparadigm.txt"

for lemma in $(cat $L_FILE);
do
for form in $(cat $P_FILE);
do
# echo "${lemma}${form}" | $LOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.xfst
echo "${lemma}${form}" | $HLOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.hfstol
done
rm -f $L_FILE
done


53 changes: 53 additions & 0 deletions devtools/generate-fem-wordforms.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash

# A short shell script to test word form generation for all continuation
# lexicons except the ones listed in the exception list.

# Path to $GIELLA_CORE - we don't use Autotools for these scripts:
if test -d "../giella-core" ; then
giella_core="$(pwd)/../giella-core"
elif test "x$GTLANGS" != "x" -a -d "$GTLANGS/giella-core" ; then
giella_core=$GTLANGS/giella-core
elif test "x$GIELLA_CORE" != "x" -a -d "$GIELLA_CORE" ; then
giella_core=$GIELLA_CORE
elif test "x$GTCORE" != "x" -a -d "$GTCORE" ; then
giella_core=$GTCORE
else
echo "ERROR: Neither of $$GIELLA_CORE, $$GTCORE or $$GTLANGS defined, and nothing found within the parent folder."
exit 1
fi

######### USER Variables - change these to your liking: #########
# Codes for the word forms to be generated - list as many or few as needed:
morf_codes="+N+Fem+Sg+NPossd+Nom \
+N+Fem+Sg+NPossd+Aff \
+N+Fem+Sg+NPossd+All \
+N+Fem+Sg+NPossd+Cau \
+N+Fem+Sg+NPossd+Com \
+N+Fem+Sg+NPossd+Dat \
+N+Fem+Sg+NPossd+Loc \
+N+Fem+Sg+NPossd+Prx"

# Lexicon source file for lexicons and lemmas:
source_file=src/fst/morphology/stems/nouns.lexc

# Lexicons that should NOT be used to extract lemmas (egrep expression):
exception_lexicons="(flagK)"

# FST used for generation, MINUS suffix:
generator_file=src/fst/generator-gt-norm

# How many lemmas maximally for each lexicon:
lemmacount=10

# Specify path to the dir containing the script used for generation:
script_dir=$giella_core/scripts

################## DO NOT CHANGE BELOW HERE!!! ##################
"$script_dir/generate-wordforms-for-cont_lexes.sh" \
"$giella_core" \
"$morf_codes" \
"$source_file" \
"$generator_file" \
"$lemmacount" \
"$exception_lexicons"
55 changes: 55 additions & 0 deletions devtools/generate-iv-wordforms.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

# A short shell script to test word form generation for all continuation
# lexicons except the ones listed in the exception list.

# Path to $GIELLA_CORE - we don't use Autotools for these scripts:
if test -d "../giella-core" ; then
giella_core="$(pwd)/../giella-core"
elif test "x$GTLANGS" != "x" -a -d "$GTLANGS/giella-core" ; then
giella_core=$GTLANGS/giella-core
elif test "x$GIELLA_CORE" != "x" -a -d "$GIELLA_CORE" ; then
giella_core=$GIELLA_CORE
elif test "x$GTCORE" != "x" -a -d "$GTCORE" ; then
giella_core=$GTCORE
else
echo "ERROR: Neither of $$GIELLA_CORE, $$GTCORE or $$GTLANGS defined, and nothing found within the parent folder."
exit 1
fi

######### USER Variables - change these to your liking: #########
# Codes for the word forms to be generated - list as many or few as needed:
morf_codes="+V+Ind+Prs \
+V+Ind+Prs+Sg1 \
+V+Ind+Prs+Sg2 \
+V+Ind+Prs+Sg3 \
+V+Ind+Prs+Sg3c \
+V+Ind+Prs+Pl1 \
+V+Ind+Prs+Pl2 \
+V+Ind+Prs+pl3c \
+V+Ind+Pst+Imprf+Sg1 \
+V+Ind+Pst+Imprf+Rep+Sg1"

# Lexicon source file for lexicons and lemmas:
source_file=src/fst/morphology/stems/verbs.lexc

# Lexicons that should NOT be used to extract lemmas (egrep expression):
exception_lexicons="(flagK)"

# FST used for generation, MINUS suffix:
generator_file=src/fst/generator-gt-norm

# How many lemmas maximally for each lexicon:
lemmacount=30

# Specify path to the dir containing the script used for generation:
script_dir=$giella_core/scripts

################## DO NOT CHANGE BELOW HERE!!! ##################
"$script_dir/generate-wordforms-for-cont_lexes.sh" \
"$giella_core" \
"$morf_codes" \
"$source_file" \
"$generator_file" \
"$lemmacount" \
"$exception_lexicons"
54 changes: 54 additions & 0 deletions devtools/generate-msc-wordforms.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash

# A short shell script to test word form generation for all continuation
# lexicons except the ones listed in the exception list.

# Path to $GIELLA_CORE - we don't use Autotools for these scripts:
if test -d "../giella-core" ; then
giella_core="$(pwd)/../giella-core"
elif test "x$GTLANGS" != "x" -a -d "$GTLANGS/giella-core" ; then
giella_core=$GTLANGS/giella-core
elif test "x$GIELLA_CORE" != "x" -a -d "$GIELLA_CORE" ; then
giella_core=$GIELLA_CORE
elif test "x$GTCORE" != "x" -a -d "$GTCORE" ; then
giella_core=$GTCORE
else
echo "ERROR: Neither of $$GIELLA_CORE, $$GTCORE or $$GTLANGS defined, and nothing found within the parent folder."
exit 1
fi

######### USER Variables - change these to your liking: #########
# Codes for the word forms to be generated - list as many or few as needed:
morf_codes="+N+Msc+Sg+NPossd+Nom \
+N+Msc+Sg+NPossd+Aff \
+N+Msc+Sg+NPossd+All \
+N+Msc+Sg+NPossd+Cau \
+N+Msc+Sg+NPossd+Com \
+N+Msc+Sg+NPossd+Dat \
+N+Msc+Sg+NPossd+Loc \
+N+Msc+Sg+NPossd+Prx"


# Lexicon source file for lexicons and lemmas:
source_file=src/fst/morphology/stems/nouns.lexc

# Lexicons that should NOT be used to extract lemmas (egrep expression):
exception_lexicons="(flagK)"

# FST used for generation, MINUS suffix:
generator_file=src/fst/generator-gt-norm

# How many lemmas maximally for each lexicon:
lemmacount=10

# Specify path to the dir containing the script used for generation:
script_dir=$giella_core/scripts

################## DO NOT CHANGE BELOW HERE!!! ##################
"$script_dir/generate-wordforms-for-cont_lexes.sh" \
"$giella_core" \
"$morf_codes" \
"$source_file" \
"$generator_file" \
"$lemmacount" \
"$exception_lexicons"
55 changes: 55 additions & 0 deletions devtools/generate-tv-wordforms.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

# A short shell script to test word form generation for all continuation
# lexicons except the ones listed in the exception list.

# Path to $GIELLA_CORE - we don't use Autotools for these scripts:
if test -d "../giella-core" ; then
giella_core="$(pwd)/../giella-core"
elif test "x$GTLANGS" != "x" -a -d "$GTLANGS/giella-core" ; then
giella_core=$GTLANGS/giella-core
elif test "x$GIELLA_CORE" != "x" -a -d "$GIELLA_CORE" ; then
giella_core=$GIELLA_CORE
elif test "x$GTCORE" != "x" -a -d "$GTCORE" ; then
giella_core=$GTCORE
else
echo "ERROR: Neither of $$GIELLA_CORE, $$GTCORE or $$GTLANGS defined, and nothing found within the parent folder."
exit 1
fi

######### USER Variables - change these to your liking: #########
# Codes for the word forms to be generated - list as many or few as needed:
morf_codes="+V+Ind+Prs \
+V+Ind+Prs+Sg1 \
+V+Ind+Prs+Sg2 \
+V+Ind+Prs+Sg3 \
+V+Ind+Prs+Sg3c \
+V+Ind+Prs+Pl1 \
+V+Ind+Prs+Pl2 \
+V+Ind+Prs+pl3c \
+V+Ind+Pst+Imprf+Sg1 \
+V+Ind+Pst+Imprf+Rep+Sg1"

# Lexicon source file for lexicons and lemmas:
source_file=src/fst/morphology/stems/verbs.lexc

# Lexicons that should NOT be used to extract lemmas (egrep expression):
exception_lexicons="(flagK)"

# FST used for generation, MINUS suffix:
generator_file=src/fst/generator-gt-norm

# How many lemmas maximally for each lexicon:
lemmacount=30

# Specify path to the dir containing the script used for generation:
script_dir=$giella_core/scripts

################## DO NOT CHANGE BELOW HERE!!! ##################
"$script_dir/generate-wordforms-for-cont_lexes.sh" \
"$giella_core" \
"$morf_codes" \
"$source_file" \
"$generator_file" \
"$lemmacount" \
"$exception_lexicons"
29 changes: 29 additions & 0 deletions devtools/iv_minip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

# script to generate paradigms for generating word forms
# command, when you are in apu:
# sh devtools/iv_minip.sh 2SYLL_OD | less
# sh devtools/iv_minip.sh kihlođ


LOOKUP=$(echo $LOOKUP)
HLOOKUP=$(echo $HLOOKUP)
GTLANGS=$(echo $GTLANGS)


PATTERN=$1
L_FILE="in.txt"
cut -d '!' -f1 src/fst/morphology/stems/verbs.lexc | egrep $PATTERN | tr '+' ':' | cut -d ':' -f1>$L_FILE

P_FILE="test/data/testivparadigm.txt"

for lemma in $(cat $L_FILE);
do
for form in $(cat $P_FILE);
do
# echo "${lemma}${form}" | $LOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.xfst # xfst
echo "${lemma}${form}" | $HLOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.hfstol # hfst
done
rm -f $L_FILE
done

35 changes: 35 additions & 0 deletions devtools/msc_minip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash

# script to generate paradigms for generating word forms
# command:
# sh generate_contlex_para.sh PATTERN
# example, when you are in apu:
# sh devtools/noun_minip.sh n_21 | less
# sh devtools/noun_minip.sh järvenpää
# Only get the lemma you ask for:
# sh devtools/noun_minip.sh '^pää[ :+]'


LOOKUP=$(echo $LOOKUP)
HLOOKUP=$(echo $HLOOKUP)
GTLANGS=$(echo $GTLANGS)


PATTERN=$1
L_FILE="in.txt"
cut -d '!' -f1 src/fst/morphology/stems/nouns.lexc | egrep $PATTERN | sed 's/% /%/g' | tr ' +' ':' | cut -d ':' -f1 | sed 's/%/% /g' | tr -d '%'>$L_FILE


P_FILE="test/data/testmscparadigm.txt"

for lemma in $(cat $L_FILE);
do
for form in $(cat $P_FILE);
do
# echo "${lemma}${form}" | $LOOKUP $GTLANGS/lang-apu/src/generator-gt-norm.xfst
echo "${lemma}${form}" | $HLOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.hfstol
done
rm -f $L_FILE
done


29 changes: 29 additions & 0 deletions devtools/tv_minip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

# script to generate paradigms for generating word forms
# command, when you are in apu:
# sh devtools/tv_minip.sh 2SYLL_OD | less
# sh devtools/tv_minip.sh kihlođ


LOOKUP=$(echo $LOOKUP)
HLOOKUP=$(echo $HLOOKUP)
GTLANGS=$(echo $GTLANGS)


PATTERN=$1
L_FILE="in.txt"
cut -d '!' -f1 src/fst/morphology/stems/verbs.lexc | egrep $PATTERN | tr '+' ':' | cut -d ':' -f1>$L_FILE

P_FILE="test/data/testtvparadigm.txt"

for lemma in $(cat $L_FILE);
do
for form in $(cat $P_FILE);
do
# echo "${lemma}${form}" | $LOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.xfst # xfst
echo "${lemma}${form}" | $HLOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.hfstol # hfst
done
rm -f $L_FILE
done

13 changes: 13 additions & 0 deletions test/data/testfemparadigm.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
+N+Sg+Nom
+N+Sg+Abl
+N+Sg+Com
+N+Sg+Dat
+N+Sg+Loc
+N+Coll+Nom
+N+Sg+Nom+PxSg2
+N+Sg+Abl+PxSg2
+N+Sg+Com+PxSg2
+N+Sg+Dat+PxSg2
+N+Sg+Loc+PxSg2
+N+Coll+Nom+PxSg2

Empty file added test/data/testivparadigm
Empty file.
13 changes: 13 additions & 0 deletions test/data/testmscparadigm.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
+N+Sg+Nom
+N+Sg+Abl
+N+Sg+Com
+N+Sg+Dat
+N+Sg+Loc
+N+Coll+Nom
+N+Sg+Nom+PxSg2
+N+Sg+Abl+PxSg2
+N+Sg+Com+PxSg2
+N+Sg+Dat+PxSg2
+N+Sg+Loc+PxSg2
+N+Coll+Nom+PxSg2

Empty file added test/data/testtvparadigm
Empty file.

0 comments on commit 0a9c1ea

Please sign in to comment.