Skip to content

Commit

Permalink
Created some helper scripts, for extracting flags and tags from LEXC …
Browse files Browse the repository at this point in the history
…files, and aggregating multiple LEC files into a single one.
  • Loading branch information
aarppe committed Jun 9, 2024
1 parent 668367a commit 8b88d6c
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/fst/morphology/incoming/resplit/aggregate-lexc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/sh

cat root.lexc noun_prefixes.lexc noun_stems.lexc noun_suffixes.lexc verb_prefixes.lexc prevn.lexc verb_stems.lexc verb_suffixes.lexc > lexicon.lexc
18 changes: 18 additions & 0 deletions src/fst/morphology/incoming/resplit/extract-flags.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/sh

# extract-flags.sh (LEXC)

# Usage:
# cat lexicon.lexc | ./extract-flags.sh

gawk '{
sub("!.*$","");
while(match($0,"@[^@]+@",f)!=0)
{
sub(f[0],""); flags[f[0]]++;
}
}
END {
PROCINFO["sorted_in"]="@ind_str_asc";
for(ff in flags) print ff;
}'
20 changes: 20 additions & 0 deletions src/fst/morphology/incoming/resplit/extract-tags.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/sh

# extract-flags.sh (LEXC)

# Usage:
# cat lexicon.lexc | ./extract-flags.sh

gawk '{
sub("!.*$","");
while(match($0,"\\+[^\\+]+",f)!=0)
{
tag=f[0];
gsub("\\+","\\+",tag);
sub(tag,""); tags[tag]++;
}
}
END {
PROCINFO["sorted_in"]="@ind_str_asc";
for(tt in tags) print tt;
}'

0 comments on commit 8b88d6c

Please sign in to comment.