-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
371 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/bash | ||
|
||
# script to generate paradigms for generating word forms | ||
# command: | ||
# sh generate_contlex_para.sh PATTERN | ||
# example, when you are in apu: | ||
# sh devtools/noun_minip.sh n_21 | less | ||
# sh devtools/noun_minip.sh järvenpää | ||
# Only get the lemma you ask for: | ||
# sh devtools/noun_minip.sh '^pää[ :+]' | ||
|
||
|
||
LOOKUP=$(echo $LOOKUP) | ||
HLOOKUP=$(echo $HLOOKUP) | ||
GTLANGS=$(echo $GTLANGS) | ||
|
||
|
||
PATTERN=$1 | ||
L_FILE="in.txt" | ||
cut -d '!' -f1 src/fst/morphology/stems/nouns.lexc | egrep $PATTERN | sed 's/% /%/g' | tr ' +' ':' | cut -d ':' -f1 | sed 's/%/% /g' | tr -d '%'>$L_FILE | ||
|
||
|
||
P_FILE="test/data/testfemparadigm.txt" | ||
|
||
for lemma in $(cat $L_FILE); | ||
do | ||
for form in $(cat $P_FILE); | ||
do | ||
# echo "${lemma}${form}" | $LOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.xfst | ||
echo "${lemma}${form}" | $HLOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.hfstol | ||
done | ||
rm -f $L_FILE | ||
done | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/bin/bash | ||
|
||
# A short shell script to test word form generation for all continuation | ||
# lexicons except the ones listed in the exception list. | ||
|
||
# Path to $GIELLA_CORE - we don't use Autotools for these scripts: | ||
if test -d "../giella-core" ; then | ||
giella_core="$(pwd)/../giella-core" | ||
elif test "x$GTLANGS" != "x" -a -d "$GTLANGS/giella-core" ; then | ||
giella_core=$GTLANGS/giella-core | ||
elif test "x$GIELLA_CORE" != "x" -a -d "$GIELLA_CORE" ; then | ||
giella_core=$GIELLA_CORE | ||
elif test "x$GTCORE" != "x" -a -d "$GTCORE" ; then | ||
giella_core=$GTCORE | ||
else | ||
echo "ERROR: Neither of $$GIELLA_CORE, $$GTCORE or $$GTLANGS defined, and nothing found within the parent folder." | ||
exit 1 | ||
fi | ||
|
||
######### USER Variables - change these to your liking: ######### | ||
# Codes for the word forms to be generated - list as many or few as needed: | ||
morf_codes="+N+Fem+Sg+NPossd+Nom \ | ||
+N+Fem+Sg+NPossd+Aff \ | ||
+N+Fem+Sg+NPossd+All \ | ||
+N+Fem+Sg+NPossd+Cau \ | ||
+N+Fem+Sg+NPossd+Com \ | ||
+N+Fem+Sg+NPossd+Dat \ | ||
+N+Fem+Sg+NPossd+Loc \ | ||
+N+Fem+Sg+NPossd+Prx" | ||
|
||
# Lexicon source file for lexicons and lemmas: | ||
source_file=src/fst/morphology/stems/nouns.lexc | ||
|
||
# Lexicons that should NOT be used to extract lemmas (egrep expression): | ||
exception_lexicons="(flagK)" | ||
|
||
# FST used for generation, MINUS suffix: | ||
generator_file=src/fst/generator-gt-norm | ||
|
||
# How many lemmas maximally for each lexicon: | ||
lemmacount=10 | ||
|
||
# Specify path to the dir containing the script used for generation: | ||
script_dir=$giella_core/scripts | ||
|
||
################## DO NOT CHANGE BELOW HERE!!! ################## | ||
"$script_dir/generate-wordforms-for-cont_lexes.sh" \ | ||
"$giella_core" \ | ||
"$morf_codes" \ | ||
"$source_file" \ | ||
"$generator_file" \ | ||
"$lemmacount" \ | ||
"$exception_lexicons" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/bin/bash | ||
|
||
# A short shell script to test word form generation for all continuation | ||
# lexicons except the ones listed in the exception list. | ||
|
||
# Path to $GIELLA_CORE - we don't use Autotools for these scripts: | ||
if test -d "../giella-core" ; then | ||
giella_core="$(pwd)/../giella-core" | ||
elif test "x$GTLANGS" != "x" -a -d "$GTLANGS/giella-core" ; then | ||
giella_core=$GTLANGS/giella-core | ||
elif test "x$GIELLA_CORE" != "x" -a -d "$GIELLA_CORE" ; then | ||
giella_core=$GIELLA_CORE | ||
elif test "x$GTCORE" != "x" -a -d "$GTCORE" ; then | ||
giella_core=$GTCORE | ||
else | ||
echo "ERROR: Neither of $$GIELLA_CORE, $$GTCORE or $$GTLANGS defined, and nothing found within the parent folder." | ||
exit 1 | ||
fi | ||
|
||
######### USER Variables - change these to your liking: ######### | ||
# Codes for the word forms to be generated - list as many or few as needed: | ||
morf_codes="+V+Ind+Prs \ | ||
+V+Ind+Prs+Sg1 \ | ||
+V+Ind+Prs+Sg2 \ | ||
+V+Ind+Prs+Sg3 \ | ||
+V+Ind+Prs+Sg3c \ | ||
+V+Ind+Prs+Pl1 \ | ||
+V+Ind+Prs+Pl2 \ | ||
+V+Ind+Prs+pl3c \ | ||
+V+Ind+Pst+Imprf+Sg1 \ | ||
+V+Ind+Pst+Imprf+Rep+Sg1" | ||
|
||
# Lexicon source file for lexicons and lemmas: | ||
source_file=src/fst/morphology/stems/verbs.lexc | ||
|
||
# Lexicons that should NOT be used to extract lemmas (egrep expression): | ||
exception_lexicons="(flagK)" | ||
|
||
# FST used for generation, MINUS suffix: | ||
generator_file=src/fst/generator-gt-norm | ||
|
||
# How many lemmas maximally for each lexicon: | ||
lemmacount=30 | ||
|
||
# Specify path to the dir containing the script used for generation: | ||
script_dir=$giella_core/scripts | ||
|
||
################## DO NOT CHANGE BELOW HERE!!! ################## | ||
"$script_dir/generate-wordforms-for-cont_lexes.sh" \ | ||
"$giella_core" \ | ||
"$morf_codes" \ | ||
"$source_file" \ | ||
"$generator_file" \ | ||
"$lemmacount" \ | ||
"$exception_lexicons" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#!/bin/bash | ||
|
||
# A short shell script to test word form generation for all continuation | ||
# lexicons except the ones listed in the exception list. | ||
|
||
# Path to $GIELLA_CORE - we don't use Autotools for these scripts: | ||
if test -d "../giella-core" ; then | ||
giella_core="$(pwd)/../giella-core" | ||
elif test "x$GTLANGS" != "x" -a -d "$GTLANGS/giella-core" ; then | ||
giella_core=$GTLANGS/giella-core | ||
elif test "x$GIELLA_CORE" != "x" -a -d "$GIELLA_CORE" ; then | ||
giella_core=$GIELLA_CORE | ||
elif test "x$GTCORE" != "x" -a -d "$GTCORE" ; then | ||
giella_core=$GTCORE | ||
else | ||
echo "ERROR: Neither of $$GIELLA_CORE, $$GTCORE or $$GTLANGS defined, and nothing found within the parent folder." | ||
exit 1 | ||
fi | ||
|
||
######### USER Variables - change these to your liking: ######### | ||
# Codes for the word forms to be generated - list as many or few as needed: | ||
morf_codes="+N+Msc+Sg+NPossd+Nom \ | ||
+N+Msc+Sg+NPossd+Aff \ | ||
+N+Msc+Sg+NPossd+All \ | ||
+N+Msc+Sg+NPossd+Cau \ | ||
+N+Msc+Sg+NPossd+Com \ | ||
+N+Msc+Sg+NPossd+Dat \ | ||
+N+Msc+Sg+NPossd+Loc \ | ||
+N+Msc+Sg+NPossd+Prx" | ||
|
||
|
||
# Lexicon source file for lexicons and lemmas: | ||
source_file=src/fst/morphology/stems/nouns.lexc | ||
|
||
# Lexicons that should NOT be used to extract lemmas (egrep expression): | ||
exception_lexicons="(flagK)" | ||
|
||
# FST used for generation, MINUS suffix: | ||
generator_file=src/fst/generator-gt-norm | ||
|
||
# How many lemmas maximally for each lexicon: | ||
lemmacount=10 | ||
|
||
# Specify path to the dir containing the script used for generation: | ||
script_dir=$giella_core/scripts | ||
|
||
################## DO NOT CHANGE BELOW HERE!!! ################## | ||
"$script_dir/generate-wordforms-for-cont_lexes.sh" \ | ||
"$giella_core" \ | ||
"$morf_codes" \ | ||
"$source_file" \ | ||
"$generator_file" \ | ||
"$lemmacount" \ | ||
"$exception_lexicons" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/bin/bash | ||
|
||
# A short shell script to test word form generation for all continuation | ||
# lexicons except the ones listed in the exception list. | ||
|
||
# Path to $GIELLA_CORE - we don't use Autotools for these scripts: | ||
if test -d "../giella-core" ; then | ||
giella_core="$(pwd)/../giella-core" | ||
elif test "x$GTLANGS" != "x" -a -d "$GTLANGS/giella-core" ; then | ||
giella_core=$GTLANGS/giella-core | ||
elif test "x$GIELLA_CORE" != "x" -a -d "$GIELLA_CORE" ; then | ||
giella_core=$GIELLA_CORE | ||
elif test "x$GTCORE" != "x" -a -d "$GTCORE" ; then | ||
giella_core=$GTCORE | ||
else | ||
echo "ERROR: Neither of $$GIELLA_CORE, $$GTCORE or $$GTLANGS defined, and nothing found within the parent folder." | ||
exit 1 | ||
fi | ||
|
||
######### USER Variables - change these to your liking: ######### | ||
# Codes for the word forms to be generated - list as many or few as needed: | ||
morf_codes="+V+Ind+Prs \ | ||
+V+Ind+Prs+Sg1 \ | ||
+V+Ind+Prs+Sg2 \ | ||
+V+Ind+Prs+Sg3 \ | ||
+V+Ind+Prs+Sg3c \ | ||
+V+Ind+Prs+Pl1 \ | ||
+V+Ind+Prs+Pl2 \ | ||
+V+Ind+Prs+pl3c \ | ||
+V+Ind+Pst+Imprf+Sg1 \ | ||
+V+Ind+Pst+Imprf+Rep+Sg1" | ||
|
||
# Lexicon source file for lexicons and lemmas: | ||
source_file=src/fst/morphology/stems/verbs.lexc | ||
|
||
# Lexicons that should NOT be used to extract lemmas (egrep expression): | ||
exception_lexicons="(flagK)" | ||
|
||
# FST used for generation, MINUS suffix: | ||
generator_file=src/fst/generator-gt-norm | ||
|
||
# How many lemmas maximally for each lexicon: | ||
lemmacount=30 | ||
|
||
# Specify path to the dir containing the script used for generation: | ||
script_dir=$giella_core/scripts | ||
|
||
################## DO NOT CHANGE BELOW HERE!!! ################## | ||
"$script_dir/generate-wordforms-for-cont_lexes.sh" \ | ||
"$giella_core" \ | ||
"$morf_codes" \ | ||
"$source_file" \ | ||
"$generator_file" \ | ||
"$lemmacount" \ | ||
"$exception_lexicons" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/bin/bash | ||
|
||
# script to generate paradigms for generating word forms | ||
# command, when you are in apu: | ||
# sh devtools/iv_minip.sh 2SYLL_OD | less | ||
# sh devtools/iv_minip.sh kihlođ | ||
|
||
|
||
LOOKUP=$(echo $LOOKUP) | ||
HLOOKUP=$(echo $HLOOKUP) | ||
GTLANGS=$(echo $GTLANGS) | ||
|
||
|
||
PATTERN=$1 | ||
L_FILE="in.txt" | ||
cut -d '!' -f1 src/fst/morphology/stems/verbs.lexc | egrep $PATTERN | tr '+' ':' | cut -d ':' -f1>$L_FILE | ||
|
||
P_FILE="test/data/testivparadigm.txt" | ||
|
||
for lemma in $(cat $L_FILE); | ||
do | ||
for form in $(cat $P_FILE); | ||
do | ||
# echo "${lemma}${form}" | $LOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.xfst # xfst | ||
echo "${lemma}${form}" | $HLOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.hfstol # hfst | ||
done | ||
rm -f $L_FILE | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/bash | ||
|
||
# script to generate paradigms for generating word forms | ||
# command: | ||
# sh generate_contlex_para.sh PATTERN | ||
# example, when you are in apu: | ||
# sh devtools/noun_minip.sh n_21 | less | ||
# sh devtools/noun_minip.sh järvenpää | ||
# Only get the lemma you ask for: | ||
# sh devtools/noun_minip.sh '^pää[ :+]' | ||
|
||
|
||
LOOKUP=$(echo $LOOKUP) | ||
HLOOKUP=$(echo $HLOOKUP) | ||
GTLANGS=$(echo $GTLANGS) | ||
|
||
|
||
PATTERN=$1 | ||
L_FILE="in.txt" | ||
cut -d '!' -f1 src/fst/morphology/stems/nouns.lexc | egrep $PATTERN | sed 's/% /%/g' | tr ' +' ':' | cut -d ':' -f1 | sed 's/%/% /g' | tr -d '%'>$L_FILE | ||
|
||
|
||
P_FILE="test/data/testmscparadigm.txt" | ||
|
||
for lemma in $(cat $L_FILE); | ||
do | ||
for form in $(cat $P_FILE); | ||
do | ||
# echo "${lemma}${form}" | $LOOKUP $GTLANGS/lang-apu/src/generator-gt-norm.xfst | ||
echo "${lemma}${form}" | $HLOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.hfstol | ||
done | ||
rm -f $L_FILE | ||
done | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/bin/bash | ||
|
||
# script to generate paradigms for generating word forms | ||
# command, when you are in apu: | ||
# sh devtools/tv_minip.sh 2SYLL_OD | less | ||
# sh devtools/tv_minip.sh kihlođ | ||
|
||
|
||
LOOKUP=$(echo $LOOKUP) | ||
HLOOKUP=$(echo $HLOOKUP) | ||
GTLANGS=$(echo $GTLANGS) | ||
|
||
|
||
PATTERN=$1 | ||
L_FILE="in.txt" | ||
cut -d '!' -f1 src/fst/morphology/stems/verbs.lexc | egrep $PATTERN | tr '+' ':' | cut -d ':' -f1>$L_FILE | ||
|
||
P_FILE="test/data/testtvparadigm.txt" | ||
|
||
for lemma in $(cat $L_FILE); | ||
do | ||
for form in $(cat $P_FILE); | ||
do | ||
# echo "${lemma}${form}" | $LOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.xfst # xfst | ||
echo "${lemma}${form}" | $HLOOKUP $GTLANGS/lang-apu/src/fst/generator-gt-norm.hfstol # hfst | ||
done | ||
rm -f $L_FILE | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
+N+Sg+Nom | ||
+N+Sg+Abl | ||
+N+Sg+Com | ||
+N+Sg+Dat | ||
+N+Sg+Loc | ||
+N+Coll+Nom | ||
+N+Sg+Nom+PxSg2 | ||
+N+Sg+Abl+PxSg2 | ||
+N+Sg+Com+PxSg2 | ||
+N+Sg+Dat+PxSg2 | ||
+N+Sg+Loc+PxSg2 | ||
+N+Coll+Nom+PxSg2 | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
+N+Sg+Nom | ||
+N+Sg+Abl | ||
+N+Sg+Com | ||
+N+Sg+Dat | ||
+N+Sg+Loc | ||
+N+Coll+Nom | ||
+N+Sg+Nom+PxSg2 | ||
+N+Sg+Abl+PxSg2 | ||
+N+Sg+Com+PxSg2 | ||
+N+Sg+Dat+PxSg2 | ||
+N+Sg+Loc+PxSg2 | ||
+N+Coll+Nom+PxSg2 | ||
|
Empty file.