Skip to content

Commit

Permalink
Deal with raw apertium tags in giella
Browse files Browse the repository at this point in the history
Add some case tags and +PxSP3
Or do we want +Px3
@flammie
  • Loading branch information
rueter committed Nov 8, 2023
1 parent 9d0846f commit 8ffaf2d
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 65 deletions.
7 changes: 4 additions & 3 deletions src/fst/affixes/nouns.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,13 @@ LEXICON NOUN_COMP

LEXICON ACRO_SGNOM_0

%<sg%>%<nom%>:0 # ;
!%<sg%>%<nom%>:0 # ;
+Sg+Nom:0 # ;

LEXICON ACRO_CASES

%<sg%>%<gen%>:n # ;

!%<sg%>%<gen%>:n # ;
+Sg+Gen:n # ;



Expand Down
103 changes: 67 additions & 36 deletions src/fst/affixes/verbs.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -251,40 +251,55 @@ LEXICON VERB_COND_PASSIVE_TÄIS
LEXICON AUX_PCP
!! **LEXICON @LEXNAME@**
!! FIXME
%<pp%>:t # ;
%<pp%>+Sg%<gen%>:n # ;
!%<pp%>:t # ;
!%<pp%>+Sg%<gen%>:n # ;
+PrfPrc:t # ;
+PrfPrc+Sg+Gen:n # ;

LEXICON AUX_3SG_PI
!! **LEXICON @LEXNAME@**
+Act%<pri%>%<p3%>+Sg:pi # ;
!+Act%<pri%>%<p3%>+Sg:pi # ;
+Act+Ind+Prs+Sg3:pi # ;

LEXICON AUX_3SG_0
!! **LEXICON @LEXNAME@**
+Act%<pri%>%<p3%>+Sg:0 # ;
!! **LEXICON @LEXNAME@**
!+Act%<pri%>%<p3%>+Sg:0 # ;
+Act+Ind+Prs+Sg3:0 # ;

LEXICON AUX_3SG_Y
!! **LEXICON @LEXNAME@**
+Act%<pri%>%<p3%>+Sg:y # ;
!+Act%<pri%>%<p3%>+Sg:y # ;
+Act+Ind+Prs+Sg3:y # ;


LEXICON AUX_CONNEG_0
!! **LEXICON @LEXNAME@**
+Act%<pri%>%<conneg%>:0 # ;
!+Act%<pri%>%<conneg%>:0 # ;
+Act+Ind+Prs+ConNeg:0 # ;

LEXICON AUX_PRES_FRONT
!! **LEXICON @LEXNAME@**
+Act%<pri%>%<p1%>+Sg:n # ;
+Act%<pri%>%<p2%>+Sg:t # ;
+Act%<pri%>%<p1%>%<pl%>:mmä # ;
+Act%<pri%>%<p2%>%<pl%>:ttä # ;
!+Act%<pri%>%<p1%>+Sg:n # ;
!+Act%<pri%>%<p2%>+Sg:t # ;
!+Act%<pri%>%<p1%>%<pl%>:mmä # ;
!+Act%<pri%>%<p2%>%<pl%>:ttä # ;
+Act+Ind+Prs+Sg1:n # ;
+Act+Ind+Prs+Sg2:t # ;
+Act+Ind+Prs+Pl1:mmä # ;
+Act+Ind+Prs+Pl2:ttä # ;

LEXICON AUX_PRES_BACK
!! **LEXICON @LEXNAME@**
+Act%<pri%>%<p1%>+Sg:n # ;
+Act%<pri%>%<p2%>+Sg:t # ;
+Act%<pri%>%<p2%>+Sg%<qst%>:tko # ;
+Act%<pri%>%<p1%>%<pl%>:mma # ;
+Act%<pri%>%<p2%>%<pl%>:tta # ;
!+Act%<pri%>%<p1%>+Sg:n # ;
!+Act%<pri%>%<p2%>+Sg:t # ;
!+Act%<pri%>%<p2%>+Sg%<qst%>:tko # ;
!+Act%<pri%>%<p1%>%<pl%>:mma # ;
!+Act%<pri%>%<p2%>%<pl%>:tta # ;
+Act+Ind+Prs+Sg1:n # ;
+Act+Ind+Prs+Sg2:t # ;
+Act+Ind+Prs+Sg2:tko # ;
+Act+Ind+Prs+Pl1:mma # ;
+Act+Ind+Prs+Pl2:tta # ;

!!!LEXICON AUX_PAST_WEAK_FRONT
!!!
Expand All @@ -295,31 +310,44 @@ LEXICON AUX_PRES_BACK

LEXICON AUX_PAST_WEAK_BACK
!! **LEXICON @LEXNAME@**
+Act%<pri%>%<p1%>+Sg:in # ;
+Act%<pri%>%<p2%>+Sg:it # ;
+Act%<pri%>%<p1%>%<pl%>:imma # ;
+Act%<pri%>%<p2%>%<pl%>:itta # ;
!+Act%<pri%>%<p1%>+Sg:in # ;
!+Act%<pri%>%<p2%>+Sg:it # ;
!+Act%<pri%>%<p1%>%<pl%>:imma # ;
!+Act%<pri%>%<p2%>%<pl%>:itta # ;
+Act+Ind+Prt+Sg1:in # ;
+Act+Ind+Prt+Sg2:it # ;
+Act+Ind+Prt+Pl1:imma # ;
+Act+Ind+Prt+Pl2:itta # ;

LEXICON AUX_PAST_3SG_0
!! **LEXICON @LEXNAME@**
+Act%<past%>%<p3%>+Sg:0 # ;
!+Act%<past%>%<p3%>+Sg:0 # ;
+Act+Ind+Prt+Sg3:0 # ;

LEXICON AUX_PASSIVE_H

+Act%<pri%>%<p3%>%<pl%>:h # ;
%<pasv%>%<pri%>:h # ;
!+Act%<pri%>%<p3%>%<pl%>:h # ;
!%<pasv%>%<pri%>:h # ;
+Act+Ind+Prt+Pl3:h # ;
+Pss+Ind+Prt:h # ;

LEXICON AUX_PAST_PASSIVE_H

+Act%<past%>%<p3%>%<pl%>:h # ;
%<pasv%>%<past%>:h # ;
!+Act%<past%>%<p3%>%<pl%>:h # ;
!%<pasv%>%<past%>:h # ;
+Act+Ind+Prt+Pl3:h # ;
+Pss+Ind+Prt:h # ;

LEXICON AUX_COND

+Act%<cni%>%<p1%>+Sg:isin # ;
+Act%<cni%>%<p2%>+Sg:isit # ;
+Act%<cni%>%<p3%>+Sg:is # ;
+Act%<cni%>%<p3%>+Sg%<qst%>:isko # ;
!+Act%<cni%>%<p1%>+Sg:isin # ;
!+Act%<cni%>%<p2%>+Sg:isit # ;
!+Act%<cni%>%<p3%>+Sg:is # ;
!+Act%<cni%>%<p3%>+Sg%<qst%>:isko # ;
+Act+Cond+Sg1:isin # ;
+Act+Cond+Sg2:isin # ;
+Act+Cond+Sg3:is # ;
+Act+Cond+Sg3+Qst:isko # ;

LEXICON AUX_INF_A

Expand All @@ -332,7 +360,8 @@ LEXICON AUX_INF_Ä
LEXICON AUX_INF_E

+Inf+Ine:ešša # ;
+Inf+Ine%<px3sp%>:eššah # ;
!+Inf+Ine%<px3sp%>:eššah # ;
+Inf+Ine+PxSP3:eššah # ;

LEXICON AUX_INF_MA

Expand All @@ -353,16 +382,18 @@ LEXICON AUX_INF_MÄ

LEXICON AUX_PCP_TY

%<pasv%>%<pp%>+Sg%<nom%>:ty # ;
%<pasv%>%<pp%>+Sg%<par%>%<px3sp%>:työh # ;
!%<pasv%>%<pp%>+Sg%<nom%>:ty # ;
!%<pasv%>%<pp%>+Sg%<par%>%<px3sp%>:työh # ;
+Pss+PrfPrc+Sg+Nom:ty # ;
+Pss+PrfPrc+Sg+Par+PxSP3:työh # ;


LEXICON AUX_PCP_N

+Act%<pp%>:n # ;
+Act%<past%>+Sg%<conneg%>:n # ;


!+Act%<pp%>:n # ;
!+Act%<past%>+Sg%<conneg%>:n # ;
+Act+PrfPrc:n # ;
+Act+Ind+Prt+Sg+ConNeg:n # ;

! vim: set ft=xfst-lexc:

Expand Down
9 changes: 7 additions & 2 deletions src/fst/root.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ Multichar_Symbols !!≈ # Definitions for @CODE@
!! The parts-of-speech are:
+N +A +Adv +V !!≈ * **@CODE@**
+Pron +CS +CC +Adp +Po +Pr +Interj +Pcle +Num !!≈ * **@CODE@**

+C !!≈ * **@CODE@** TYÖÖÖÖ! this need depricating :-) 2023-11-08 Jaska

!! The parts of speech are further split up into:
+Prop +Pers +Dem +Interr +Refl +Recipr +Rel +Indef !!≈ * **@CODE@**

Expand All @@ -45,14 +46,18 @@ Multichar_Symbols !!≈ # Definitions for @CODE@
!! The nominals are inflected in the following Case and Number
+Sg +Du +Pl !!≈ * **@CODE@**
+Ess +Nom +Gen +Acc +Ill +Loc +Com +Com/Sh !!≈ * **@CODE@**
+Ine +Ill +Ela +Par +Tra +Ins

!! The possession is marked as such:
+PxSg1 +PxSg2 +PxSg3 +PxDu1 +PxDu2 +PxDu3 +PxPl1 +PxPl2 +PxPl3 !!≈ * **@CODE@**
+PxSP3
!! The comparative forms are:
+Comp +Superl !!≈ * **@CODE@**
+Pos +Comp +Superl !!≈ * **@CODE@**
!! Numerals are classified under:
+Attr +Card !!≈ * **@CODE@**
+Ord !!≈ * **@CODE@**
!! Verb voices are:
+Act +Pss
!! Verb moods are:
+Ind +Prs +Prt +Pot +Cond +Imprt !!≈ * **@CODE@**
!! Verb personal forms are:
Expand Down
3 changes: 2 additions & 1 deletion src/fst/stems/adpositions.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,8 @@ eh postposition ;


LEXICON postposition
+Post: # ;
!+Post: # ;
+Adp+Po: # ;

LEXICON preposition
+Pr: # ;
Expand Down
22 changes: 11 additions & 11 deletions src/fst/stems/numerals.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ kymmenen+Num+Card:kymmene NUM_KYMMENE/N ;
miljon+Num+Card:miljon NUM_MILJON/A ;
miljardi+Num+Card:miljard NUM_MILJARD/I ;

enšimäini+Adj+Ord:enšimäi NUM_ENŠIMMÄI/NI ;
enšimmäini+Adj+Ord:enšimmäi NUM_ENŠIMMÄI/NI ;
toini+Adj+Ord:toi NUM_TOI/NI ;
kolmaš+Adj+Ord:kolma NUM_KOLMA/Š ;
nelläš+Adj+Ord:nellä NUM_NELLÄ/Š ;
viiješ+Adj+Ord:viije NUM_NELLÄ/Š ;
kuuvveš+Adj+Ord:kuuvve NUM_KOLMA/Š ;
seiččemeš+Adj+Ord:seiččeme NUM_NELLÄ/Š ;
kahekšaš+Adj+Ord:kahekša NUM_KOLMA/Š ;
yhekšäš+Adj+Ord:yhekšä NUM_NELLÄ/Š ;
kymmeneš+Adj+Ord:kymmene NUM_NELLÄ/Š ;
enšimäini+A+Ord:enšimäi NUM_ENŠIMMÄI/NI ;
enšimmäini+A+Ord:enšimmäi NUM_ENŠIMMÄI/NI ;
toini+A+Ord:toi NUM_TOI/NI ;
kolmaš+A+Ord:kolma NUM_KOLMA/Š ;
nelläš+A+Ord:nellä NUM_NELLÄ/Š ;
viiješ+A+Ord:viije NUM_NELLÄ/Š ;
kuuvveš+A+Ord:kuuvve NUM_KOLMA/Š ;
seiččemeš+A+Ord:seiččeme NUM_NELLÄ/Š ;
kahekšaš+A+Ord:kahekša NUM_KOLMA/Š ;
yhekšäš+A+Ord:yhekšä NUM_NELLÄ/Š ;
kymmeneš+A+Ord:kymmene NUM_NELLÄ/Š ;

pari+Num+Card:pari NUM_PARI ;
puolitoista+Num+Card:puolitoista # ;
Expand Down
36 changes: 24 additions & 12 deletions src/fst/stems/pronouns.lexc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ LEXICON PRON_MI/E

0:e PRON_SGNOM_0 ;
0:u PRON_CASES_SG_BACK ;
%<sg%>%<par%>:lma # ;
!%<sg%>%<par%>:lma # ;
+Sg+Par:lma # ;

LEXICON PRON_H/IÄN
!! **LEXICON @LEXNAME@**
Expand All @@ -82,7 +83,8 @@ LEXICON PRON_TÄ/MÄ
0 PRON_CASES_SG_FRONT ;
0 PRON_SGPAR_TÄ ;
0:mä PRON_SGGEN_N ;
%<sg%>%<ill%>:hä # ;
!%<sg%>%<ill%>:hä # ;
+Sg+Ill:hä # ;

LEXICON PRON_NÄ/MÄ
!! **LEXICON @LEXNAME@**
Expand All @@ -104,19 +106,27 @@ LEXICON PRON_Š/E
!! **LEXICON @LEXNAME@**
0:e PRON_SGNOM_0 ;
0:i PRON_CASES_SG_FRONT ;
%<sg%>%<gen%>:en # ;
%<sg%>%<ine%>:iinä # ;
%<sg%>%<ela%>:iitä # ;
%<sg%>%<ill%>:iih # ;
%<sg%>%<ill%>:inne # ;
%<sg%>%<par%>:itä # ;
!%<sg%>%<gen%>:en # ;
!%<sg%>%<ine%>:iinä # ;
!%<sg%>%<ela%>:iitä # ;
!%<sg%>%<ill%>:iih # ;
!%<sg%>%<ill%>:inne # ;
!%<sg%>%<par%>:itä # ;
+Sg+Gen:en # ;
+Sg+Ine:iinä # ;
+Sg+Ela:iitä # ;
+Sg+Ill:iih # ;
+Sg+Ill:inne # ;
+Sg+Par:itä # ;

LEXICON PRON_N/E
!! **LEXICON @LEXNAME@**
0:e PRON_PLNOM_0 ;
0:i PRON_CASES_PL_FRONT ;
%<pl%>%<gen%>:iijen # ;
%<pl%>%<ill%>:iihi # ;
!%<pl%>%<gen%>:iijen # ;
!%<pl%>%<ill%>:iihi # ;
+Pl+Gen:iijen # ;
+Pl+Ill:iihi # ;

LEXICON PRON_IČ/E
!! **LEXICON @LEXNAME@**
Expand All @@ -130,8 +140,10 @@ LEXICON PRON_KAI/KKI
0:kki PRON_SGNOM_0 ;
0:ke PRON_CASES_SG_WEAK_BACK ;
0:k PRON_CASES_PL_WEAK_BACK ;
%<pl%>%<ins%>:kin # ;
%<pl%>%<com%>:kkine # ;
!%<pl%>%<ins%>:kin # ;
!%<pl%>%<com%>:kkine # ;
+Pl+Ins:kin # ;
+Pl+Com:kkine # ;

LEXICON PRON_KU/DAI
!! **LEXICON @LEXNAME@**
Expand Down

0 comments on commit 8ffaf2d

Please sign in to comment.