From d4ff2a848164e882dd2336e19c7fa56dcef8f34c Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Wed, 27 Mar 2024 14:31:52 +0100 Subject: [PATCH] lost bits --- src/fst/Makefile.am | 144 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/src/fst/Makefile.am b/src/fst/Makefile.am index 3f1f9fc..c185200 100644 --- a/src/fst/Makefile.am +++ b/src/fst/Makefile.am @@ -79,6 +79,150 @@ endif # CAN_FOMA ################################################# #### Add language-specific build rules here: #### +# Hfst - add weights to compounds if using tropical-semiring fst format: +if WITH_OFST_TROPICAL +.generated/generator-raw-gt-desc.hfst: .generated/generator-raw-gt-desc.tmp.hfst + $(AM_V_REWEIGHT)$(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) \ + -S '+Cmp' -a 10 --arcs-only -i $< \ + > $@ +endif + + +# We need to add processing of language-specific tags in the analyser: +.generated/analyser-gt-desc.%: .generated/analyser-gt-desc.tmp.% \ + filters/remove-norm-comp-tags.% \ + filters/remove-acute-accent.% \ + filters/remove-usage-tags.% + $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ + @\"filters/remove-usage-tags.$*\" \ + .o. @\"filters/remove-norm-comp-tags.$*\" \ + .o. @\"$<\" \ + .o. @\"filters/remove-acute-accent.$*\" \ + ;\n\ + $(INVERT_HFST)\ + save stack $@\n\ + quit\n" | $(XFST_TOOL) + +# And also for the normative analyser (foma and hfst): +.generated/analyser-gt-norm.%: .generated/analyser-gt-norm.tmp.% \ + filters/remove-illegal-derivation-strings-flagbased.% \ + filters/insert-default-compounding-tags.% \ + filters/insert-default_left_compounding-tags.% \ + filters/block-illegal_compound-strings.% \ + filters/split-CmpN-tags.% \ + filters/convert_to_flags-CmpNP-tags.% \ + filters/split-CmpNP-tags.% \ + filters/remove-acute-accent.% \ + filters/remove-usage-tags.% + $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ + @\"filters/remove-usage-tags.$*\" \ + .o. @\"filters/block-illegal_compound-strings.$*\" \ + .o. @\"filters/split-CmpN-tags.$*\" \ + .o. @\"filters/insert-default_left_compounding-tags.$*\" \ + .o. @\"filters/insert-default-compounding-tags.$*\" \ + .o. @\"filters/remove-illegal-derivation-strings-flagbased.$*\" \ + .o. @\"filters/convert_to_flags-CmpNP-tags.$*\" \ + .o. @\"filters/split-CmpNP-tags.$*\" \ + .o. @\"$<\" \ + .o. @\"filters/remove-acute-accent.$*\" \ + ;\n\ + twosided flag-diacritics\n\ + $(INVERT_HFST)\ + save stack $@\n\ + quit\n" | $(XFST_TOOL) + +# The operation 'twosided flag-diacritics" crashes Xerox badly, so we do with +# a simpler, less restrictive normative fst when building with Xerox: +.generated/analyser-gt-norm.xfst: .generated/analyser-gt-norm.tmp.xfst \ + filters/remove-norm-comp-tags.xfst \ + filters/remove-illegal-derivation-strings.xfst \ + filters/remove-acute-accent.xfst \ + filters/remove-usage-tags.xfst + $(AM_V_XFST)$(PRINTF) "read regex \ + @\"filters/remove-usage-tags.xfst\" \ + .o. @\"filters/remove-norm-comp-tags.xfst\" \ + .o. @\"filters/remove-illegal-derivation-strings.xfst\" \ + .o. @\"$<\" \ + .o. @\"filters/remove-acute-accent.xfst\" \ + ;\n\ + save stack $@\n\ + quit\n" | $(XFST) $(VERBOSITY) + + +# We need special treatment of the disamb fst going to further pmatch processing +# mainly due to the target pattern, thus listed here. The rule body and the +# dependencies should be the same as the regular disamb analysers below: +.generated/analyser-pmatchdisamb-gt-desc.hfst: .generated/analyser-pmatchdisamb-gt-desc.tmp.hfst \ + filters/remove-norm-comp-tags.hfst \ + filters/remove-orig_lang-tags.hfst \ + filters/remove-usage-tags.hfst + $(AM_V_HXFST)$(PRINTF) "read regex \ + @\"filters/remove-usage-tags.hfst\" \ + .o. @\"filters/remove-norm-comp-tags.hfst\" \ + .o. @\"filters/remove-orig_lang-tags.hfst\" \ + .o. @\"$<\" \ + ;\n\ + invert net\n\ + save stack $@\n\ + quit\n" | $(HFST_XFST) -p $(MORE_VERBOSITY) + + +# Special case for the disamb analyser, since it follows the same filename +# pattern as the raw fst: +.generated/analyser-disamb-gt-desc.%: .generated/analyser-disamb-gt-desc.tmp.% \ + filters/remove-norm-comp-tags.% \ + filters/remove-orig_lang-tags.% \ + filters/remove-dialect-tags.% \ + filters/remove-homonymy-tags.% \ + filters/remove-acute-accent.% \ + filters/remove-usage-tags.% + $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ + @\"filters/remove-usage-tags.$*\" \ + .o. @\"filters/remove-orig_lang-tags.$*\" \ + .o. @\"filters/remove-dialect-tags.$*\" \ + .o. @\"filters/remove-homonymy-tags.$*\" \ + .o. @\"filters/remove-norm-comp-tags.$*\" \ + .o. @\"$<\" \ + .o. @\"filters/remove-acute-accent.$*\" \ + ;\n\ + $(INVERT_HFST)\ + save stack $@\n\ + quit\n" | $(XFST_TOOL) + +# We need to add processing of language-specific tags in the generator: +define giella_generators +.generated/generator-gt-%.$(1): .generated/generator-gt-%.tmp.$(1) \ + filters/remove-norm-comp-tags.$(1) \ + filters/remove-acute-accent.$(1) \ + filters/remove-usage-tags.$(1) + $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ + @\"filters/remove-usage-tags.$(1)\" \ + .o. @\"filters/remove-norm-comp-tags.$(1)\" \ + .o. @\"$$<\" \ + .o. @\"filters/remove-acute-accent.$(1)\" \ + ;\n\ + $$(INVERT_XFST)$$(INVERT_FOMA)\ + save stack $$@\n\ + quit\n" | $$(XFST_TOOL) +endef +$(foreach fst,hfst xfst foma,$(eval $(call giella_generators,$(fst)))) + +# Do NOT apply the accent removal filters to the normative +# dictionary generator: +.generated/generator-dict-gt-norm.%: .generated/generator-dict-gt-norm.tmp.% \ + filters/remove-norm-comp-tags.% \ + filters/remove-usage-tags.% + $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ + @\"filters/remove-usage-tags.$*\" \ + .o. @\"filters/remove-norm-comp-tags.$*\" \ + .o. @\"$<\" \ + ;\n\ + $(INVERT_XFST)$(INVERT_FOMA)\ + save stack $@\n\ + quit\n" | $(XFST_TOOL) + + + ################################################################## #### END: Add local processing instructions ABOVE this line ###### ##################################################################