Skip to content

Commit

Permalink
Revised script to work at all, and with both prefixal and suffixal tags.
Browse files Browse the repository at this point in the history
  • Loading branch information
aarppe committed Jun 9, 2024
1 parent 8b88d6c commit e2d6f1b
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions src/fst/morphology/incoming/resplit/extract-tags.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
#!/bin/sh

# extract-flags.sh (LEXC)
# extract-tags.sh (LEXC)

# Usage:
# cat lexicon.lexc | ./extract-flags.sh
# cat lexicon.lexc | ./extract-tags.sh

gawk '{
sub("!.*$","");
while(match($0,"\\+[^\\+]+",f)!=0)
{
tag=f[0];
gsub("\\+","\\+",tag);
sub(tag,""); tags[tag]++;
n=split($0,lexc,":");
if(n==2)
while(match(lexc[1], "([^\\+@]+\\+)|(\\+[^\\+@]+)", f)!=0)
{
tag=f[0];
sub("\\+","\\+",tag);
sub(tag, "", lexc[1]);
tags[f[0]]++;
}
}
END {
PROCINFO["sorted_in"]="@ind_str_asc";
for(tt in tags) print tt;
for(tt in tags)
print tt;
}'

0 comments on commit e2d6f1b

Please sign in to comment.