From c7caf7db43bb4bb07a0a76c91088faa555909dfe Mon Sep 17 00:00:00 2001 From: Hai Liang Wang Date: Sat, 24 Mar 2018 19:16:42 +0800 Subject: [PATCH] #5 add default ACTION in standard oracle with i == len(sent) --- admin/standard.ewt.train.sh | 10 ++++---- app/common/deps.py | 5 ++-- app/transitionparser/configurations.py | 4 ++++ app/transitionparser/oracles.py | 2 +- app/transitionparser/parsers.py | 6 ++--- app/transitionparser/standard.py | 4 ++-- test/fixtures/standard.recursion.conllu | 31 +++++++++++++++++++++++++ test/standard.recursion.sh | 26 +++++++++++++++++++++ 8 files changed, 75 insertions(+), 13 deletions(-) create mode 100644 test/fixtures/standard.recursion.conllu create mode 100755 test/standard.recursion.sh diff --git a/admin/standard.ewt.train.sh b/admin/standard.ewt.train.sh index df65c2b..9fc9a54 100755 --- a/admin/standard.ewt.train.sh +++ b/admin/standard.ewt.train.sh @@ -12,12 +12,12 @@ baseDir=$(cd `dirname "$0"`;pwd) # variables ####################### PY=$baseDir/../app/transitionparser/standard.py -TRAIN_DATA=$baseDir/../data/conll.example -# TRAIN_DATA=$baseDir/../data/UD_English-EWT/en-ud-train.conllu -# MODEL=$baseDir/../tmp/standard.ewt.model -MODEL=$baseDir/../tmp/standard.example.model +# TRAIN_DATA=$baseDir/../data/conll.example +TRAIN_DATA=$baseDir/../data/UD_English-EWT/en-ud-train.conllu +MODEL=$baseDir/../tmp/standard.ewt.model +# MODEL=$baseDir/../tmp/standard.example.model EPOCH=1 -LOG_VERBOSITY=1 # info +LOG_VERBOSITY=0 # info # functions diff --git a/app/common/deps.py b/app/common/deps.py index a01888f..172ceaa 100644 --- a/app/common/deps.py +++ b/app/common/deps.py @@ -39,7 +39,9 @@ def add(self, parent, child): if (not rc) or child['id'] > rc['id']: self._right_child[parent['id']] = child - #{{{ remove + def __len__(self): + return len(self.deps) + def remove(self, parent, child): pid = parent['id'] cid = child['id'] @@ -75,7 +77,6 @@ def remove_right_children(self, parent): def remove_parent(self, child): self.remove(self.parent(child), child) - #}}} def annotate(self, sent): for tok in sent: diff --git a/app/transitionparser/configurations.py b/app/transitionparser/configurations.py index 3cb0ee8..c2568c6 100644 --- a/app/transitionparser/configurations.py +++ b/app/transitionparser/configurations.py @@ -103,7 +103,9 @@ def do_shift(self): self.i += 1 def do_reduceR(self): + logging.debug("ArcStandardConfiguration do_reduceR") if len(self.stack) < 2: + print("ArcStandardConfiguration do_reduceR error.") raise IllegalActionException() self.actions.append(REDUCE_R) self._features = [] @@ -116,7 +118,9 @@ def do_reduceR(self): stack.append(tokt1) def do_reduceL(self): + logging.debug("ArcStandardConfiguration do_reduceL") if len(self.stack) < 2: + print("ArcStandardConfiguration do_reduceL error.") raise IllegalActionException() self.actions.append(REDUCE_L) self._features = [] diff --git a/app/transitionparser/oracles.py b/app/transitionparser/oracles.py index 18885d7..53d79f4 100644 --- a/app/transitionparser/oracles.py +++ b/app/transitionparser/oracles.py @@ -79,7 +79,7 @@ def next_action(self, stack, deps, sent, i): pass # else if len(sent) <= i: - pass + return REDUCE_L return SHIFT class ArcEagerParsingOracle: # {{{ diff --git a/app/transitionparser/parsers.py b/app/transitionparser/parsers.py index 0d58385..87b71fe 100644 --- a/app/transitionparser/parsers.py +++ b/app/transitionparser/parsers.py @@ -67,14 +67,14 @@ def parse(self, sent): while not conf.is_in_finish_state(): logging.debug("parse: not finish") next_actions = self.decide(conf) - logging.debug("parse: next_actions") for act in next_actions: try: - logging.debug("parse: next_actions 1") + logging.debug("parse: next_actions [%s]", act) conf.do_action(act) + logging.debug("parse: stack len(%s), deps(%s), buffer len(%s)", len(conf.stack), len(conf.deps), (len(conf.sent) - conf.i)) break except IllegalActionException, e: - logging.debug("parse: next_actions 2 %s", e) + logging.debug("parse: next_actions error - %s", e) pass logging.debug("parse: finish") return conf.deps # ,conf.chunks diff --git a/app/transitionparser/standard.py b/app/transitionparser/standard.py index 35b51ab..61fcdc5 100755 --- a/app/transitionparser/standard.py +++ b/app/transitionparser/standard.py @@ -89,7 +89,7 @@ import ml import random from pio import io -from transitionparser import * +from transitionparser.parsers import * def transform_conll_sents(conll_file_path): ''' @@ -120,7 +120,7 @@ def train(): featExt) p = ArcStandardParser2(trainer) total = len(sents) - random.seed("seed") + # random.seed("seed") for x in xrange(FLAGS.epoch): random.shuffle(sents) logging.info("iter %s/%s", x + 1, FLAGS.epoch) diff --git a/test/fixtures/standard.recursion.conllu b/test/fixtures/standard.recursion.conllu new file mode 100644 index 0000000..ad77106 --- /dev/null +++ b/test/fixtures/standard.recursion.conllu @@ -0,0 +1,31 @@ +# sent_id = email-enronsent39_01-0078 +# text = Enron needs to use this situation to quickly get our viewpoints up into the PUCT and ERCOT ISO on what is driving these problems and our proposed fixes. +1 Enron Enron PROPN NNP Number=Sing 2 nsubj 2:nsubj _ +2 needs need VERB VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 0 root 0:root _ +3 to to PART TO _ 4 mark 4:mark _ +4 use use VERB VB VerbForm=Inf 2 xcomp 2:xcomp _ +5 this this DET DT Number=Sing|PronType=Dem 6 det 6:det _ +6 situation situation NOUN NN Number=Sing 4 obj 4:obj _ +7 to to PART TO _ 9 mark 9:mark _ +8 quickly quickly ADV RB _ 9 advmod 9:advmod _ +9 get get VERB VB VerbForm=Inf 4 advcl 4:advcl _ +10 our we PRON PRP$ Number=Plur|Person=1|Poss=Yes|PronType=Prs 11 nmod:poss 11:nmod:poss _ +11 viewpoints viewpoint NOUN NNS Number=Plur 9 obj 9:obj _ +12 up up ADV RB _ 9 advmod 9:advmod _ +13 into into ADP IN _ 18 case 18:case _ +14 the the DET DT Definite=Def|PronType=Art 18 det 18:det _ +15 PUCT PUCT PROPN NNP Number=Sing 18 compound 18:compound _ +16 and and CCONJ CC _ 17 cc 17:cc _ +17 ERCOT ERCOT PROPN NNP Number=Sing 11 conj 11:conj _ +18 ISO iso NOUN NN Number=Sing 9 obl 9:obl _ +19 on on SCONJ IN _ 20 case 20:case _ +20 what what PRON WP PronType=Int 9 obl 9:obl _ +21 is be AUX VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 22 aux 22:aux _ +22 driving drive VERB VBG Tense=Pres|VerbForm=Part 20 acl:relcl 20:acl:relcl _ +23 these these DET DT Number=Plur|PronType=Dem 24 det 24:det _ +24 problems problem NOUN NNS Number=Plur 22 obj 22:obj _ +25 and and CCONJ CC _ 28 cc 28:cc _ +26 our we PRON PRP$ Number=Plur|Person=1|Poss=Yes|PronType=Prs 28 nmod:poss 28:nmod:poss _ +27 proposed propose VERB VBN Tense=Past|VerbForm=Part 28 amod 28:amod _ +28 fixes fix NOUN NNS Number=Plur 20 conj 20:conj SpaceAfter=No +29 . . PUNCT . _ 2 punct 2:punct _ diff --git a/test/standard.recursion.sh b/test/standard.recursion.sh new file mode 100755 index 0000000..6e13f84 --- /dev/null +++ b/test/standard.recursion.sh @@ -0,0 +1,26 @@ +#! /bin/bash +########################################### +# +########################################### + +# constants +baseDir=$(cd `dirname "$0"`;pwd) +. $baseDir/../admin/util.sh + + +####################### +# variables +####################### +PY=$baseDir/../app/transitionparser/standard.py +TRAIN_DATA=$baseDir/../test/fixtures/standard.recursion.conllu +MODEL=$baseDir/../tmp/standard.fixtures.model +EPOCH=1 +LOG_VERBOSITY=1 # info + +# functions + + +# main +[ -z "${BASH_SOURCE[0]}" -o "${BASH_SOURCE[0]}" = "$0" ] || return +set -x +train $PY $LOG_VERBOSITY $MODEL $TRAIN_DATA $EPOCH