diff --git a/tools/grammarcheckers/grammarchecker.cg3 b/tools/grammarcheckers/grammarchecker.cg3 new file mode 100644 index 0000000..f242ed0 --- /dev/null +++ b/tools/grammarcheckers/grammarchecker.cg3 @@ -0,0 +1,638 @@ +# -*- cg-pre-pipe: "hfst-tokenise -g ../../tools/tokenisers/tokeniser-gramcheck-gt-desc.pmhfst" -*- +# Divvun & Giellatekno - open source grammars for Sámi and other languages +# Copyright © 2000-2017 UiT The arctic University of Norway +# http://giellatekno.uit.no & http://divvun.no +# +# This program is free software; you can redistribute and/or modify +# this file under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. The GNU General Public License +# is found at http://www.gnu.org/licenses/gpl.html. It is +# also available in the file $GTHOME/LICENSE.txt. +# +# Other licensing options are available upon request, please contact +# giellatekno@uit.no or feedback@divvun.no + +# ==================================================================== # +#!! # __UNDEFINED__ G R A M M A R C H E C K E R +# ==================================================================== # +# ==================================================================== + +# Sámi language technology 2000-2017, UiT The arctic University of Norway # + +# ---------------------- # +# Development setup: # +# ---------------------- # +# +# cd $GTHOME/langs/??? # replace ??? with your language code +# ./autogen.sh +# ./configure --enable-apertium --with-hfst --enable-syntax --enable-grammarchecker --enable-tokenisers --enable-alignment --enable-reversed-intersect +# make +# cd tools/grammarcheckers +# make dev +# +# ---------------------- # +# Then edit/test as: # +# ---------------------- # +# +# emacs grammarchecker.cg3 # and C-c C-i / C-c C-c if you have cg-mode installed +# echo "Sun ij puátá." | sh modes/???gram.mode # from the terminal, replacing ??? with your language code +# +# ---------------------- # +# Other debug-pipes: # +# ---------------------- # +# +# ls modes/ +# +# ---------------------- # + + +# The pipeline (but use modes, above) +# echo "Sun ij puátá." | hfst-tokenise —giella-cg tools/tokenisers/tokeniser-gramcheck-gt-desc.pmhfst | vislcg3 -g src/cg3/disambiguator.cg3 -t | vislcg3 -g tools/grammarcheckers/grammarchecker.cg3 -t + + + + # =========================================== # + # Short table of contents # + # =========================================== # + + # Delimiters + # Tags and sets + # Grammar checking rules + +# =============== # +#!! # DELIMITERS +# =============== # + +DELIMITERS = "<.>" "" "" "<...>" "<¶>" sent ; + +# ================== # +#!! # TAGS AND SETS +# ================== # + +SETS + + +#!! ## Tags + + +#!! This section lists all the tags inherited from the fst, and used as tags +#!! in the syntactic analysis. The next section, **Sets**, contains sets defined +#!! on the basis of the tags listed here, those set names are not visible in the output. + + + +# Tags declared as single-membered LISTs +# ====================================== + +#!! ### Beginning and end of sentence +LIST BOS = (>>>) () ; #!! BOS +LIST EOS = (<<<) () ; #!! EOS + +# We define end of clause and beginning of clause in a way so that the file +# may be read both by the CG-2 and the vislcg formalisms. +# CG3 doesn´t function without >>> and <<< ! + + +#!! ### Parts of speech tags + +LIST N = N ; #!! N +LIST A = A ; #!! A +LIST Adv = Adv ; #!! Adv +LIST V = V ; #!! V +LIST Pron = Pron ; #!! Pron +LIST CS = CS ; #!! CS +LIST CC = CC ; #!! CC +SET CC-CS = CC OR CS ; #!! CC-CS +LIST Po = Po ; #!! Po +LIST Pr = Pr ; #!! Pr +LIST Pcle = Pcle ; #!! Pcle +LIST Num = Num ; #!! Num +LIST Interj = Interj ; #!! Interj +LIST ABBR = ABBR ; #!! ABBR +LIST ACR = ACR ; #!! ACR +LIST CLB = CLB ; #!! CLB +LIST LEFT = LEFT ; #!! LEFT +LIST RIGHT = RIGHT ; #!! RIGHT +LIST WEB = WEB ; #!! WEB +LIST QMARK = """ ; # " #!! QMARK +LIST PPUNCT = PUNCT ; #!! PPUNCT +SET PUNCT = PPUNCT - QMARK ; #!! PUNCT + +LIST COMMA = "," ; #!! COMMA +LIST ¶ = ¶; #!! ¶ + + + +#!! ### Tags for POS sub-categories + +LIST Pers = Pers ; #!! Pers +LIST Dem = Dem ; #!! Dem +LIST Interr = Interr ; #!! Interr +LIST Indef = Indef ; #!! Indef +LIST Recipr = Recipr ; #!! Recipr +LIST Refl = Refl ; #!! Refl +LIST Rel = Rel ; #!! Rel +LIST Coll = Coll ; #!! Coll +LIST NomAg = NomAg ; #!! NomAg +LIST Prop = Prop ; #!! Prop +LIST Allegro = Allegro ; #!! Allegro +LIST Arab = Arab ; #!! Arab +LIST Rom = Rom ; #!! Romertall + + +#!! ### Tags for morphosyntactic properties + +LIST Nom = Nom ; #!! Nom +LIST Acc = Acc ; #!! Acc +LIST Gen = Gen ; #!! Gen +LIST Ill = Ill ; #!! Ill +LIST Loc = Loc ; #!! Loc +LIST Com = Com ; #!! Com +LIST Ess = Ess ; #!! Ess +LIST Par = Par ; #!! Ess +LIST Sg = Sg ; #!! Sg +LIST Du = Du ; #!! Du +LIST Pl = Pl ; #!! Pl +LIST Cmp/SplitR = Cmp/SplitR ; #!! Cmp/SplitR +LIST Cmp/SgNom = Cmp/SgNom ; #!! Cmp/SgNom Cmp/SgGen +LIST Cmp/SgGen = Cmp/SgGen ; #!! Cmp/SgGen +LIST PxSg1 = PxSg1 ; #!! PxSg1 +LIST PxSg2 = PxSg2 ; #!! PxSg2 +LIST PxSg3 = PxSg3 ; #!! PxSg3 +LIST PxDu1 = PxDu1 ; #!! PxDu1 +LIST PxDu2 = PxDu2 ; #!! PxDu2 +LIST PxDu3 = PxDu3 ; #!! PxDu3 +LIST PxPl1 = PxPl1 ; #!! PxPl1 +LIST PxPl2 = PxPl2 ; #!! PxPl2 +LIST PxPl3 = PxPl3 ; #!! PxPl3 +LIST Px = PxSg1 PxSg2 PxSg3 PxDu1 PxDu2 PxDu3 PxPl1 PxPl2 PxPl3 ; #!! Px + +LIST Comp = Comp ; #!! Comp +LIST Superl = Superl ; #!! Superl +LIST Attr = Attr ; #!! Attr +LIST Ord = Ord ; #!! Ord +LIST Qst = Qst ; #!! Qst +LIST IV = IV ; #!! IV +LIST TV = TV ; #!! TV +LIST Prt = Prt; #!! Prt +LIST Prs = Prs ; #!! Prs +LIST Ind = Ind ; #!! Ind +LIST Pot = Pot ; #!! Pot +LIST Cond = Cond ; #!! Cond +LIST Imprt = Imprt ; #!! Imprt +LIST ImprtII = ImprtII ; #!! ImprtII +LIST Sg1 = Sg1 (p1 sg) ; #!! Sg1 +LIST Sg2 = Sg2 (p2 sg) ; #!! Sg2 +LIST Sg3 = Sg3 (p3 sg) ; #!! Sg3 +LIST Du1 = Du1 (p1 du) ; #!! Du1 +LIST Du2 = Du2 (p2 du) ; #!! Du2 +LIST Du3 = Du3 (p3 du) ; #!! Du3 +LIST Pl1 = Pl1 (p1 pl) ; #!! Pl1 +LIST Pl2 = Pl2 (p2 pl); #!! Pl2 +LIST Pl3 = Pl3 (p3 pl); #!! Pl3 +LIST Inf = Inf ; #!! Inf +LIST ConNeg = ConNeg ; #!! ConNeg +LIST Neg = Neg ; #!! Neg +LIST PrfPrc = PrfPrc ; #!! PrfPrc +LIST VGen = VGen ; #!! VGen +LIST PrsPrc = PrsPrc ; #!! PrsPrc +LIST Ger = Ger ; #!! Ger +LIST Sup = Sup ; #!! Sup +LIST Actio = Actio ; #!! Actio +LIST VAbess = VAbess ; #!! VAbess + + + +LIST Err/Orth = Err/Orth Err/Orth-a/á Err/Orth-nom/gen Err/Orth-nom/acc Err/DerSub Err/CmpSub Err/UnspaceCmp Err/HyphSub Err/SpaceCmp Err/Spellrelax err_orth_mt ; #!! Err/Orth + + + +#!! ### Semantic tags + +LIST Sem/Act = Sem/Act ; #!! Sem/Act +LIST Sem/Ani = Sem/Ani ; #!! Sem/Ani +LIST Sem/Atr = Sem/Atr ; #!! Sem/Atr +LIST Sem/Body = Sem/Body ; #!! Sem/Body +LIST Sem/Clth = Sem/Clth ; #!! Sem/Clth +LIST Sem/Domain = Sem/Domain ; #!! Sem/Domain +LIST Sem/Feat-phys = Sem/Feat-phys ; #!! Sem/Feat-phys +LIST Sem/Fem = Sem/Fem ; #!! Sem/Fem +LIST Sem/Group = Sem/Group ; #!! Sem/Group +LIST Sem/Lang = Sem/Lang ; #!! Sem/Lang +LIST Sem/Mal = Sem/Mal ; #!! Sem/Mal +LIST Sem/Measr = Sem/Measr ; #!! Sem/Measr +LIST Sem/Money = Sem/Money ; #!! Sem/Money +LIST Sem/Obj = Sem/Obj ; #!! Sem/Obj +LIST Sem/Obj-el = Sem/Obj-el ; #!! Sem/Obj-el +LIST Sem/Org = Sem/Org ; #!! Sem/Org +LIST Sem/Perc-emo = Sem/Perc-emo ; #!! Sem/Perc-emo +LIST Sem/Plc = Sem/Plc ; #!! Sem/Plc +LIST Sem/Sign = Sem/Sign ; #!! Sem/Sign +LIST Sem/State-sick = Sem/State-lang-sick ; #!! Sem/State-sick +LIST Sem/Sur = Sem/Sur ; #!! Sem/Sur +LIST Sem/Time = Sem/Time ; #!! Sem/Time +LIST Sem/Txt = Sem/Txt ; #!! Sem/Txt + +LIST HUMAN = Sem/Fem Sem/Mal Sem/Sur ; #!! HUMAN + +SET HAB-ACTOR = HUMAN ; # Goal: make this like the sme one. #!! HAB-ACTOR +SET HAB-ACTOR-NOT-HUMAN = Sem/Org ; # make this like the sme one #!! HAB-ACTOR-NOT-HUMAN + + +LIST PROP-ATTR = Sem/Mal Sem/Sur Sem/Fem ; #!! PROP-ATTR +LIST PROP-SUR = Sem/Sur Sem/Mal Sem/Fem ; #!! PROP-SUR + + + +SET TIME-N-SET = N + Sem/Time ; #!! TIME-N-SET + + +#!! ### Syntactic tags + +LIST @+FAUXV = @+FAUXV ; #!! @+FAUXV +LIST @+FMAINV = @+FMAINV ; #!! @+FMAINV +LIST @-FAUXV = @-FAUXV ; #!! @-FAUXV +LIST @-FMAINV = @-FMAINV ; #!! @-FMAINV +LIST @-FSUBJ> = @-FSUBJ> ; #!! @-FSUBJ> +LIST @-F = @-FOBJ> ; #!! @-FOBJ> +LIST @SPRED = @-FADVL> ; #!! @-FADVL> +LIST @-F = @-FSPRED> ; #!! @-FSPRED> +LIST @-FOPRED> = @-FOPRED> ; #!! @-FOPRED> +SET FOBJ = @-F ; +SET FMAINV = @-FMAINV OR @+FMAINV ; +SET FAUXV = @-FAUXV OR @+FAUXV ; +LIST @>ADVL = @>ADVL ; #!! @>ADVL +LIST @ADVL< = @ADVL< ; #!! @ADVL< +LIST @ = @ADVL> ; #!! @ADVL> +LIST ADVL = @ADVL @ADVL> @ADVL< @ADVL @-F ; #!! @ADVL +LIST @HAB> = @HAB> ; #!! @HAB> +LIST @ ; +LIST @>N = @>N ; #!! @>N +LIST @Interj = @Interj ; #!! @Interj +LIST @N< = @N< ; #!! @N< +LIST @>A = @>A ; #!! @>A +LIST @P< = @P< ; #!! @P< +LIST @>P = @>P ; #!! @>P +LIST @HNOUN = @HNOUN ; #!! @HNOUN +LIST @INTERJ = @INTERJ ; #!! @INTERJ +LIST @>Num = @>Num; #!! @>Num +LIST @Pron< = @Pron< ; #!! @Pron< +LIST @>Pron = @>Pron ; #!! @>Pron +LIST @Num< = @Num< ; #!! @Num< +LIST @OBJ = @OBJ ; #!! @OBJ +LIST @ = @OBJ> ; #!! @OBJ> +LIST @OPRED = @OPRED ; #!! @OPRED +LIST @ = @OPRED> ; #!! @OPRED> +LIST @PCLE = @PCLE ; #!! @PCLE +LIST @COMP-CS< = @COMP-CS< ; #!! @COMP-CS< +LIST @SPRED = @SPRED ; #!! @SPRED +LIST @ = @SPRED> ; #!! @SPRED> +LIST @SUBJ = @SUBJ ; #!! @SUBJ +LIST @ = @SUBJ> ; #!! @SUBJ> +SET SUBJ = @ OR @SUBJ ; #!! SUBJ +SET SPRED = @ OR @SPRED ; #!! SPRED +SET OPRED = @ OR @OPRED ; #!! OPRED +LIST @PPRED = @PPRED ; #!! @PPRED +LIST @APP = @APP ; #!! @APP +LIST @APP-N< = @APP-N< ; #!! @APP-N< +LIST @APP-Pron< = @APP-Pron< ; #!! @APP-Pron< +LIST @APP>Pron = @APP>Pron ; #!! @APP>Pron +LIST @APP-Num< = @APP-Num< ; #!! @APP-Num< +LIST @APP-ADVL< = @APP-ADVL< ; #!! @APP-ADVL< +LIST @VOC = @VOC ; #!! @VOC +LIST @CVP = @CVP ; #!! @CVP +LIST @CNP = @CNP ; #!! @CNP +SET OBJ = (@) OR (@OBJ) OR (@-F) ; #!! OBJ +LIST = @OBJ> @-FOBJ> ; #!! OBJ> +SET -OTHERS = OBJ> OR (Gen) OR (Nom) OR (Ess) OR (Loc) OR (Adv) ; #!! OBJ>-OTHERS +SET NOT-FAUXV = FMAINV OR OBJ + V OR ADVL + V ; +# Works after the mapping rules for verbs. +SET SYN-V = FMAINV OR FAUXV OR V + SUBJ OR OBJ + V OR ADVL + V OR (V @>N) OR (V @N<) OR (V @A<) ; #!! SYN-V +LIST @X = @X ; #!! @X + + + +# ======== + SETS +# ======== + + +#!! ## Sets containing sets of lists and tags + +#!! This part of the file lists a large number of sets based partly upon the tags defined above, and +#!! partly upon lexemes drawn from the lexicon. +#!! See the sourcefile itself to inspect the sets, what follows here is an overview of the set types. + + + +#!! ### Sets for Single-word sets + +LIST INITIAL = "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" + "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" + "á" "æ" "ø" "å" "ö" "ä" ; #!! INITIAL + + +#!! ### Sets for word or not + +LIST WORD = N A Adv V Pron CS CC Po Pr Interj Pcle Num ABBR ACR \? ; #!! WORD + # any word +# SET REAL-WORD = WORD - Num - Ord ; #!! REAL-WORD +SET REAL-WORD-NOT-ABBR = WORD - Num - Ord - (ABBR N) ; # This is former REALWORD-NOTABBR #!! REAL-WORD-NOT-ABBR +SET NOT-COMMA = WORD - COMMA ; #!! NOT-COMMA + + +#!! ### Case sets +# --------- + +LIST ADVLCASE = Ill Loc Com Ess ; #!! ADLVCASE + +LIST CASE-AGREEMENT = Nom Acc Gen (Pl Ill) Loc Com Ess ; #!! CASE-AGREEMENT +LIST CASE = Nom Acc Gen Ill Loc Com Ess ; #!! CASE + +SET NOT-NOM = CASE - Nom ; #!! NOT-NOM +SET NOT-GEN = CASE - Gen ; #!! NOT-GEN +SET NOT-ACC = CASE - Acc ; #!! NOT-ACC + +#!! ### Verb sets +# --------- + +# Verbs and their complements +# - - - - - - - - - - - - - - + +SET NOT-V = WORD - V ; #!! NOT-V + +#!! ### Sets for finiteness and mood +# - - - - - - - - - - + +SET REAL-NEG = Neg - Sup ; #!! REAL-NEG + +SET MOOD-V = Ind OR Pot OR Imprt OR ImprtII OR Cond OR (Neg Sup) ; #!! MOOD-V + +SET NOT-PRFPRC = WORD - PrfPrc ; #!! NOT-PRFPRC + + +#!! ### Sets for person +# - - - - + +LIST SG1-V = (V Sg1) ; #!! SG1-V +LIST SG2-V = (V Sg2) ; #!! SG2-V +LIST SG3-V = (V Sg3) ; #!! SG3-V +LIST DU1-V = (V Du1) ; #!! DU1-V +LIST DU2-V = (V Du2) ; #!! DU2-V +LIST DU3-V = (V Du3) ; #!! DU3-V +LIST PL1-V = (V Pl1) ; #!! PL1-V +LIST PL2-V = (V Pl2) ; #!! PL2-V +LIST PL3-V = (V Pl3) ; #!! PL3-V + + # Note that imperative verbs are not included in these sets! + +# Some subsets of the VFIN sets +# - - - - - - - - - - - - - - - +SET SG-V = SG1-V OR SG2-V OR SG3-V ; +SET DU-V = DU1-V OR DU2-V OR DU3-V ; +SET PL-V = PL1-V OR PL2-V OR PL3-V ; + +SET DU-PL-V = DU1-V OR DU2-V OR DU3-V OR PL1-V OR PL2-V OR PL3-V ; + +SET 1-2-V = SG1-V OR SG2-V OR DU1-V OR DU2-V OR PL1-V OR PL2-V ; + +#!! ### Pronoun sets +# ------------ + +LIST MUN = (Pron Pers Sg1 Nom) (pers p1 sg nom) ; +LIST DON = (Pron Pers Sg2 Nom) (pers p2 sg nom) ; +LIST SON = (Pron Pers Sg3 Nom) (pers p3 sg nom) ; +LIST MOAI = (Pron Pers Du1 Nom) (pers p1 du nom) ; +LIST DOAI = (Pron Pers Du2 Nom) (pers p2 du nom) ; +LIST SOAI = (Pron Pers Du3 Nom) (pers p3 du nom) ; +LIST MII-PERS = (Pron Pers Pl1 Nom) (pers p1 pl nom) ; +LIST DII = (Pron Pers Pl2 Nom) (pers p2 pl nom) ; +LIST SII = (Pron Pers Pl3 Nom) (pers p3 pl nom) ; + +SET PPRON-NOM-NOT-DAT = MUN OR DON OR SON OR MOAI OR DOAI OR SOAI OR MII-PERS OR DII OR SII ; + +SET PPRON-DU-PL = MOAI OR DOAI OR SOAI OR MII-PERS OR DII OR SII ; +SET PPRON-PL = MII-PERS OR DII OR SII ; + +SET PRON-DU = MOAI OR DOAI OR SOAI ; + +SET PPRON-NOT-SII = MUN OR DON OR SON OR MOAI OR DOAI OR SOAI OR MII-PERS OR DII ; + +LIST PPRON-GEN = (Sg1 Gen) (Sg2 Gen) (Sg3 Gen) (Du1 Gen) (Du2 Gen) (Du3 Gen) + (Pl1 Gen) (Pl2 Gen) (Pl3 Gen) (p1 sg gen) (p2 sg gen) (p3 sg gen) (p1 du gen) (p2 du gen) (p3 du gen) + (p1 pl gen) (p2 pl gen) (p3 pl gen) ; + +SET PPRON-NOT-GEN = (Pron Pers) - PPRON-GEN ; + +LIST SG-DEM = (Pron Dem Sg Nom); +LIST PL-DEM = (Pron Dem Pl Nom); +SET NOT-DEM = WORD - Dem ; + +LIST SG-PRON = (Pron Sg1) (Pron Sg2) (Pron Sg3) (Pron Sg) (Pron PxSg1) (Pron PxSg2) (Pron PxSg3) (pron p1 sg) (pron p2 sg) (pron p3 sg); +LIST DU-PRON = (Pron Du1) (Pron Du2) (Pron Du3) (Pron PxDu1) (Pron PxDu2) (Pron PxDu3) (pron p1 du) (pron p2 du) (pron p3 du); +LIST PL-PRON = (Pron Pl1) (Pron Pl2) (Pron Pl3) (Pron Pl) (Pron PxPl1) (Pron PxPl2) (Pron PxPl3) (pron p1 pl) (pron p2 pl) (pron p3 pl); +LIST DU-PRON-NOTPX = (Pron Du1) (Pron Du2) (Pron Du3) (pron p1 du) (pron p2 du) (pron p3 du) ; + + +LIST FIRST-PX = PxSg1 PxDu1 PxPl1 ; +LIST SECOND-PX = PxSg2 PxDu2 PxPl2 ; +LIST THIRD-PX = PxSg3 PxDu3 PxPl3 ; + +LIST SG-PX = PxSg1 PxSg2 PxSg3 ; +LIST DU-PX = PxDu1 PxDu2 PxDu3 ; +LIST PL-PX = PxPl1 PxPl2 PxPl3 ; + +LIST PX = PxSg1 PxDu1 PxPl1 PxSg2 PxDu2 PxPl2 PxSg3 PxDu3 PxPl3 ; + +LIST DU-NR = Du1 Du2 Du3 ; + +SET NOT-SG-PRON = DU-PRON OR PL-PRON ; + + +#!! ### Adjectival sets and their complements +# ------------------------------------- +SET A-CASE = A - Attr - Adv ; + +LIST A-CC = A CC ; + +SET NOT-A = WORD - A ; # This is former NOT-ADJ +SET NOT-A-COMMA = WORD - A - COMMA ; +SET NOT-Attr = WORD - Attr ; +SET NOT-A-PCLE = WORD - A - Pcle ; +SET NOT-A-CC = WORD - A-CC ; + + # and many others + +#!! ### Adverbial sets and their complements +# ------------------------------------ +SET LEX-ADV = Adv - (AA) ; + +SET NOT-ADV-DE = WORD - Adv ; +SET NOT-ADV = NOT-ADV-DE OR CLB ; +SET NOT-ADV-N = NOT-ADV - N; +SET NOT-ADV-PCLE = NOT-ADV - Pcle ; +SET NOT-ADV-INDEF = NOT-ADV - Indef ; +SET NOT-ADV-PCLE-ILL-LOC-COM = WORD - Adv - Pcle - Ill - Loc - Com; +SET NOT-A-ADV-PCLE = WORD - Pcle - A - Adv ; # + + + +#!! ### Sets of elements with common syntactic behaviour +# ================================================ + +SETS + +#!! ### NP sets defined according to their morphosyntactic features +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +LIST N-SG-NOM = (N Sg Nom); + +LIST PROP = (N Prop); + +SET COMMON-N = N - Prop; + +SET HEAD-N = N - Cmp/SplitR ; + +SET HEAD-N-NOM = (N Nom) - Cmp/SplitR ; + +# SET SUBJECTHEAD = N OR A OR Pron - Refl ; # These, can be subject heads + +SET NP = N OR A ; # anything that can take except numerals +SET NP-HEAD = Pron OR HEAD-N ; +SET NP-HEAD-SG = SG-PRON OR (N Sg) OR (A Sg) - Cmp/SplitR ; +SET NP-HEAD-PL = PL-PRON OR (N Pl) OR (A Pl) - Cmp/SplitR ; +SET NP-HEAD-SG-NOM = SG-PRON + Nom OR (N Sg Nom) OR (A Sg Nom) - Cmp/SplitR ; +SET NP-HEAD-PL-NOM = PL-PRON + Nom OR (N Pl Nom) OR (N Coll Nom) OR (A Pl Nom) - Cmp/SplitR ; +SET NP-HEAD-NOM = NP-HEAD-SG-NOM OR NP-HEAD-PL-NOM ; +SET NP-HEAD-ACC = (Pron Acc) OR (N Acc) OR (A Acc) - Cmp/SplitR - (Dem Attr); +SET NP-HEAD-GEN = (Pron Gen) OR (N Gen) OR (A Gen) - Cmp/SplitR - (Dem Attr) ; + +#!! ### The PRE-NP-HEAD family of sets + +#!! These sets model noun phrases (NPs). The idea is to first define whatever can +#!! occur in front of the head of the NP, and thereafter negate that with the +#!! expression **WORD - premodifiers**. + + +SET PRE-NP-HEAD = (Prop Attr) OR (Prop @>N) OR (A Attr) OR (ABBR Attr) + OR (Pron Pers Gen) OR (N Gen) OR + Num OR Cmp/SplitR OR CC OR (Pron Dem) OR (Pron Refl Gen) OR (Indef Attr) OR + (PrfPrc @>N) OR PrsPrc OR (A Ord) ; + # The strict version of items that can only be premodifiers, not parts of the predicate + +SET PRE-A-N = (Pron Pers Gen) OR (Pron Pers Acc) OR (Pron Indef) OR Num OR (A Ord) OR (Pron Dem) OR (Pron Refl Gen) OR (Pron Refl Acc) ; + +SET NOT-PRE-A-N = WORD - PRE-A-N ; + +LIST PUNCT-LEFT = (PUNCT LEFT) ; +LIST PUNCT-RIGHT = (PUNCT RIGHT) ; + + + +SET NOT-NPMOD = WORD - PRE-NP-HEAD OR ABBR OR @CVP ; +# This is the previous NPNH (npnh) set. +# NOT-NPMOD = "NOT-PRE-NP-HEAD" + +SET NOT-NPMOD-ACC = NOT-NPMOD - Acc OR ABBR ; +SET NOT-NPMOD-ACC-ADV = NOT-NPMOD - Acc - Adv OR ABBR ; + +SET NOT-NPMODADV = WORD - PRE-NP-HEAD - Adv ; +SET NOT-NPMODADV-INDEF = WORD - PRE-NP-HEAD - Adv - Indef ; + + +SET NOT-N = WORD - N ; + +SET NOT-N-A = WORD - N - A ; + +SET NOT-NP = Inf OR Pcle OR Interj OR CS ; + +SET NUM = Num ; + +SET NOT-NUM = WORD - Num ; + +SET NOT-CC = WORD - CC ; + +SET NOT-PCLE = WORD - Pcle ; + +SET REAL-CLB = CLB - COMMA ; + + +#!! ### Border sets and their complements +# --------------------------------- + +SET CP = (Pron Interr) OR (Pron Rel) ; + +LIST BOUNDARYSYMBOLS = "\;" ":" "-" "–" ; + +SET S-BOUNDARY = BOUNDARYSYMBOLS OR (@CVP) ; + +SET BOC = S-BOUNDARY OR BOS ; +SET BOC-PUNCT = BOC - ("-") - ("–") ; +SET EOC = S-BOUNDARY OR EOS ; + +SET NP-BOUNDARY = BOS OR EOS OR REAL-CLB OR Inf OR VGen OR Sup OR PPRON-NOT-GEN OR Recipr OR Po OR Pr OR Pcle OR Interj OR CS OR CP OR @CVP ; + +SET SV-BOUNDARY = S-BOUNDARY OR Inf OR Sup OR FMAINV ; + +SET CCCOMMA = CC OR COMMA ; + # remember that those are potential sentence boundaries, too + + + + +#!! ### Grammarchecker sets + +LIST &err-agr = &err-agr ; +LIST &err-gen-po = &err-gen-po ; +LIST &err-no-conneg = &err-no-conneg ; +LIST &err-orrood-inf = &err-orrood-inf ; +LIST &err-ext-agr = &err-ext-agr ; +LIST &err-ext-vagr = &err-ext-vagr ; + +MAPPING-PREFIX = & ; + +SECTION + +# Speller suggestions rule – add &SUGGESTWF to any spelling suggestion +# that we actually want to suggest to the user. +# The simplest is to just add it to all spelled words: +ADD:spell-it-all (&typo &SUGGESTWF) () ; +# But perhaps you want to only suggest spellings of words that are not inside "quotes": +ADD:spell-conservatively (&typo &SUGGESTWF) () IF (NEGATE -1 QMARK LINK 2 QMARK); + +# A simple grammar checker rule without suggestions: Ensure preceding nominal agrees with the verb +ADD:err-agr (&err-agr) TARGET V IF (-1C Sg + Nom) (0 V) (NOT 0 Sg3); +ADD:err-agr (&err-agr) TARGET V IF (-1C Pron + Sg1 + Nom) (0 V) (NOT 0 Sg1); +ADD:err-agr (&err-agr) TARGET V IF (-1C Pron + Sg2 + Nom) (0 V) (NOT 0 Sg2); +ADD:err-agr (&err-agr) TARGET V IF (-1C Pron + Pl1 + Nom) (0 V) (NOT 0 Pl1); +ADD:err-agr (&err-agr) TARGET V IF (-1C Pron + Pl2 + Nom) (0 V) (NOT 0 Pl2); + +# And one with a suggestion where we simply change the tag Sg into Attr: +ADD:attr-not-pred (&attr-not-pred) TARGET (A Sg) IF (NOT 0 Attr OR Gen) (1 CC LINK *1 COMMON-N) ; +COPY:attr-not-pred (Attr &SUGGEST) EXCEPT (Sg) TARGET (&attr-not-pred) ; +# The method is: Add &SUGGEST to a copied reading to *generate* a +# suggestion form from that reading. The copy should contain the error +# tag too – &-prefixed error tags are ignored when generating, but +# used to create human-readable messages. + +# Simple punctuation rules showing how to change the lemma in the suggestions: +ADD:use-guillemets (&guillemets) TARGET ("""); +COPY:left-guillemet ("«" &SUGGEST) EXCEPT (""") TARGET (&guillemets) ; +COPY:right-guillemet ("»" &SUGGEST) EXCEPT (""") TARGET (&guillemets) ; + +ADD:use-ellipsis (&ellipsis) TARGET ("..."); +COPY:use-ellipsis ("…" &SUGGEST) EXCEPT ("...") TARGET (&ellipsis) ; diff --git a/tools/grammarcheckers/spellchecker.cg3 b/tools/grammarcheckers/spellchecker.cg3 new file mode 100644 index 0000000..3bb6fcf --- /dev/null +++ b/tools/grammarcheckers/spellchecker.cg3 @@ -0,0 +1,9 @@ +# An alternative to grammarchecker.cg3 that just does spelling. + +MAPPING-PREFIX = & ; +DELIMITERS = "<.>" "" "" "<...>" "<¶>" "<\n\n>" ; + +ADD:spelled (&typo &SUGGESTWF) () ; + +ADD:Err/Orth (&typo) (Err/Orth) (NOT 0 (*) - (Err/Orth)) ; +COPY:Err/Orth (&SUGGEST) EXCEPT (Ex/N Ex/V Ex/A Ex/IV Ex/TV Err/Orth) TARGET (Err/Orth &typo) ;