forked from explosion/spaCy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_issue2800.py
36 lines (24 loc) · 907 Bytes
/
test_issue2800.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
'''Test issue that arises when too many labels are added to NER model.'''
from __future__ import unicode_literals
import random
from ...lang.en import English
def train_model(train_data, entity_types):
nlp = English(pipeline=[])
ner = nlp.create_pipe("ner")
nlp.add_pipe(ner)
for entity_type in list(entity_types):
ner.add_label(entity_type)
optimizer = nlp.begin_training()
# Start training
for i in range(20):
losses = {}
index = 0
random.shuffle(train_data)
for statement, entities in train_data:
nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5)
return nlp
def test_train_with_many_entity_types():
train_data = []
train_data.extend([("One sentence", {"entities": []})])
entity_types = [str(i) for i in range(1000)]
model = train_model(train_data, entity_types)