Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion barchybrid/src/arc_hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def __init__(self, words, pos, rels, w2i, options):
self.rlFlag = options.rlFlag
self.k = options.window

self.cposFlag = options.cposFlag

self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0)

self.external_embedding = None
Expand Down Expand Up @@ -171,7 +173,10 @@ def getWordEmbeddings(self, sentence, train):
c = float(self.wordsCount.get(root.norm, 0))
dropFlag = not train or (random.random() < (c/(0.25+c)))
root.wordvec = self.wlookup[int(self.vocab.get(root.norm, 0)) if dropFlag else 0]
root.posvec = self.plookup[int(self.pos[root.pos])] if self.pdims > 0 else None
if self.cposFlag:
root.posvec = self.plookup[int(self.pos[root.cpos])] if self.pdims > 0 else None
else:
root.posvec = self.plookup[int(self.pos[root.pos])] if self.pdims > 0 else None

if self.external_embedding is not None:
#if not dropFlag and random.random() < 0.5:
Expand Down
11 changes: 6 additions & 5 deletions barchybrid/src/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
parser.add_option("--userl", action="store_true", dest="rlMostFlag", default=False)
parser.add_option("--predict", action="store_true", dest="predictFlag", default=False)
parser.add_option("--dynet-mem", type="int", dest="cnn_mem", default=512)
parser.add_option("--cpos", action="store_true", help="To use CPOS/UPOS field instead of POS/XPOS", dest="cposFlag", default=False)

(options, args) = parser.parse_args()
print 'Using external embedding:', options.external_embedding
Expand All @@ -41,7 +42,7 @@
sys.exit()

print 'Preparing vocab'
words, w2i, pos, rels = utils.vocab(options.conll_train)
words, w2i, pos, rels = utils.vocab(options.conll_train, options.cposFlag)

with open(os.path.join(options.output, options.params), 'w') as paramsfp:
pickle.dump((words, w2i, pos, rels, options), paramsfp)
Expand Down Expand Up @@ -73,16 +74,16 @@
parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt)
parser.Load(options.model)
conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu')
tespath = os.path.join(options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu')
testpath = os.path.join(options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu')
ts = time.time()
pred = list(parser.Predict(options.conll_test))
te = time.time()
utils.write_conll(tespath, pred)
utils.write_conll(testpath, pred)

if not conllu:
os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt')
os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + testpath + ' > ' + testpath + '.txt')
else:
os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt')
os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + testpath + ' > ' + testpath + '.txt')

print 'Finished predicting test',te-ts

7 changes: 5 additions & 2 deletions barchybrid/src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,18 @@ def isProj(sentence):
return len(forest.roots) == 1


def vocab(conll_path):
def vocab(conll_path, cposFlag):
wordsCount = Counter()
posCount = Counter()
relCount = Counter()

with open(conll_path, 'r') as conllFP:
for sentence in read_conll(conllFP, True):
wordsCount.update([node.norm for node in sentence if isinstance(node, ConllEntry)])
posCount.update([node.pos for node in sentence if isinstance(node, ConllEntry)])
if cposFlag:
posCount.update([node.cpos for node in sentence if isinstance(node, ConllEntry)])
else:
posCount.update([node.pos for node in sentence if isinstance(node, ConllEntry)])
relCount.update([node.relation for node in sentence if isinstance(node, ConllEntry)])

return (wordsCount, {w: i for i, w in enumerate(wordsCount.keys())}, posCount.keys(), relCount.keys())
Expand Down
11 changes: 9 additions & 2 deletions bmstparser/src/mstlstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(self, vocab, pos, rels, w2i, options):
self.rels = {word: ind for ind, word in enumerate(rels)}
self.irels = rels

self.cposFlag = options.cposFlag

self.external_embedding, self.edim = None, 0
if options.external_embedding is not None:
Expand Down Expand Up @@ -146,7 +147,10 @@ def Predict(self, conll_path):

for entry in conll_sentence:
wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None
posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None
if self.cposFlag:
posvec = self.plookup[int(self.pos[entry.cpos])] if self.pdims > 0 else None
else:
posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None
evec = self.elookup[int(self.extrnd.get(entry.form, self.extrnd.get(entry.norm, 0)))] if self.external_embedding is not None else None
entry.vec = concatenate(filter(None, [wordvec, posvec, evec]))

Expand Down Expand Up @@ -234,7 +238,10 @@ def Train(self, conll_path):
c = float(self.wordsCount.get(entry.norm, 0))
dropFlag = (random.random() < (c/(0.25+c)))
wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0)) if dropFlag else 0] if self.wdims > 0 else None
posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None
if self.cposFlag:
posvec = self.plookup[int(self.pos[entry.cpos])] if self.pdims > 0 else None
else:
posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None
evec = None

if self.external_embedding is not None:
Expand Down
11 changes: 6 additions & 5 deletions bmstparser/src/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
parser.add_option("--disablecostaug", action="store_false", dest="costaugFlag", default=True)
parser.add_option("--dynet-seed", type="int", dest="seed", default=0)
parser.add_option("--dynet-mem", type="int", dest="mem", default=0)
parser.add_option("--cpos", action="store_true", help="To use CPOS/UPOS field instead of POS/XPOS", dest="cposFlag", default=False)

(options, args) = parser.parse_args()

Expand All @@ -44,21 +45,21 @@

parser.Load(options.model)
conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu')
tespath = os.path.join(options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu')
testpath = os.path.join(options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu')

ts = time.time()
test_res = list(parser.Predict(options.conll_test))
te = time.time()
print 'Finished predicting test.', te-ts, 'seconds.'
utils.write_conll(tespath, test_res)
utils.write_conll(testpath, test_res)

if not conllu:
os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt')
os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + testpath + ' > ' + testpath + '.txt')
else:
os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt')
os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + testpath + ' > ' + testpath + '.txt')
else:
print 'Preparing vocab'
words, w2i, pos, rels = utils.vocab(options.conll_train)
words, w2i, pos, rels = utils.vocab(options.conll_train, options.cposFlag)

with open(os.path.join(options.output, options.params), 'w') as paramsfp:
pickle.dump((words, w2i, pos, rels, options), paramsfp)
Expand Down
7 changes: 5 additions & 2 deletions bmstparser/src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,18 @@ def __str__(self):
return '\t'.join(['_' if v is None else v for v in values])


def vocab(conll_path):
def vocab(conll_path, cposFlag):
wordsCount = Counter()
posCount = Counter()
relCount = Counter()

with open(conll_path, 'r') as conllFP:
for sentence in read_conll(conllFP):
wordsCount.update([node.norm for node in sentence if isinstance(node, ConllEntry)])
posCount.update([node.pos for node in sentence if isinstance(node, ConllEntry)])
if cposFlag:
posCount.update([node.cpos for node in sentence if isinstance(node, ConllEntry)])
else:
posCount.update([node.pos for node in sentence if isinstance(node, ConllEntry)])
relCount.update([node.relation for node in sentence if isinstance(node, ConllEntry)])

return (wordsCount, {w: i for i, w in enumerate(wordsCount.keys())}, posCount.keys(), relCount.keys())
Expand Down