Skip to content

Commit

Permalink
Allow entity types with dashes (resolves explosion#1967)
Browse files Browse the repository at this point in the history
  • Loading branch information
ines committed Mar 28, 2018
1 parent 9615ed5 commit 3eb67bb
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
4 changes: 1 addition & 3 deletions spacy/syntax/ner.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,7 @@ cdef class BiluoPushDown(TransitionSystem):
for (ids, words, tags, heads, labels, biluo), _ in sents:
for i, ner_tag in enumerate(biluo):
if ner_tag != 'O' and ner_tag != '-':
if ner_tag.count('-') != 1:
raise ValueError(ner_tag)
_, label = ner_tag.split('-')
_, label = ner_tag.split('-', 1)
if label not in seen_entities:
seen_entities.add(label)
for move_str in ('B', 'I', 'L', 'U'):
Expand Down
15 changes: 15 additions & 0 deletions spacy/tests/regression/test_issue1967.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# coding: utf8
from __future__ import unicode_literals

import pytest

from ...pipeline import EntityRecognizer
from ...vocab import Vocab


@pytest.mark.parametrize('label', ['U-JOB-NAME'])
def test_issue1967(label):
ner = EntityRecognizer(Vocab())
entry = ([0], ['word'], ['tag'], [0], ['dep'], [label])
gold_parses = [(None, [(entry, None)])]
ner.moves.get_actions(gold_parses=gold_parses)

0 comments on commit 3eb67bb

Please sign in to comment.