Skip to content

Commit 34d240c

Browse files
authored
Release 1.3.0
2 parents d39944b + c2c9f80 commit 34d240c

File tree

94 files changed

+3224
-2556
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+3224
-2556
lines changed

deeppavlov/_meta.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = '1.2.0'
1+
__version__ = '1.3.0'
22
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
33
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
44
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"chainer": {
3+
"in": ["texts", "dataset"],
4+
"in_y": ["y_true"],
5+
"pipe": [
6+
{
7+
"class_name": "dnnc_pair_generator",
8+
"in": ["texts", "dataset"],
9+
"out": ["x", "x_support", "x_populated", "y_support"],
10+
"bidirectional": true
11+
},
12+
{
13+
"class_name": "torch_transformers_preprocessor",
14+
"in": ["x_populated", "x_support"],
15+
"out": ["bert_features"],
16+
"vocab_file": "{BASE_MODEL}",
17+
"do_lower_case": true,
18+
"max_seq_length": 128
19+
},
20+
{
21+
"class_name": "torch_transformers_classifier",
22+
"main": true,
23+
"in": ["bert_features"],
24+
"out": ["simmilarity_scores"],
25+
"n_classes": 2,
26+
"return_probas": true,
27+
"pretrained_bert": "{BASE_MODEL}",
28+
"save_path": "{MODEL_PATH}/model",
29+
"load_path": "{MODEL_PATH}/model",
30+
"is_binary": "{BINARY_CLASSIFICATION}"
31+
},
32+
{
33+
"class_name": "dnnc_proba2labels",
34+
"is_binary": "{BINARY_CLASSIFICATION}",
35+
"in": ["simmilarity_scores", "x", "x_populated", "x_support", "y_support"],
36+
"out": ["y_pred"],
37+
"confidence_threshold": 0.0
38+
}
39+
],
40+
"out": ["y_pred"]
41+
},
42+
"metadata": {
43+
"variables": {
44+
"ROOT_PATH": "~/.deeppavlov",
45+
"MODEL_PATH": "{ROOT_PATH}/models/fewshot/roberta_nli_mrpc_1_10",
46+
"BINARY_CLASSIFICATION": true,
47+
"BASE_MODEL": "roberta-base"
48+
},
49+
"download": [
50+
{
51+
"url": "http://files.deeppavlov.ai/v1/classifiers/fewshot/roberta_nli_mrpc_1_10.tar.gz",
52+
"subdir": "{MODEL_PATH}"
53+
}
54+
]
55+
}
56+
}

deeppavlov/configs/classifiers/glue/glue_cola_roberta.json

+18-52
Original file line numberDiff line numberDiff line change
@@ -14,47 +14,29 @@
1414
"seed": 42
1515
},
1616
"chainer": {
17-
"in": [
18-
"x"
19-
],
20-
"in_y": [
21-
"y"
22-
],
17+
"in": ["x"],
18+
"in_y": ["y"],
2319
"pipe": [
2420
{
2521
"class_name": "torch_transformers_preprocessor",
2622
"vocab_file": "{BASE_MODEL}",
2723
"do_lower_case": false,
2824
"max_seq_length": 64,
29-
"in": [
30-
"x"
31-
],
32-
"out": [
33-
"bert_features"
34-
]
25+
"in": ["x"],
26+
"out": ["bert_features"]
3527
},
3628
{
3729
"id": "classes_vocab",
3830
"class_name": "simple_vocab",
39-
"fit_on": [
40-
"y"
41-
],
31+
"fit_on": ["y"],
4232
"save_path": "{MODEL_PATH}/classes.dict",
4333
"load_path": "{MODEL_PATH}/classes.dict",
44-
"in": [
45-
"y"
46-
],
47-
"out": [
48-
"y_ids"
49-
]
34+
"in": ["y"],
35+
"out": ["y_ids"]
5036
},
5137
{
52-
"in": [
53-
"y_ids"
54-
],
55-
"out": [
56-
"y_onehot"
57-
],
38+
"in": ["y_ids"],
39+
"out": ["y_onehot"],
5840
"class_name": "one_hotter",
5941
"depth": "#classes_vocab.len",
6042
"single_vector": true
@@ -72,42 +54,26 @@
7254
},
7355
"learning_rate_drop_patience": 3,
7456
"learning_rate_drop_div": 2.0,
75-
"in": [
76-
"bert_features"
77-
],
78-
"in_y": [
79-
"y_ids"
80-
],
81-
"out": [
82-
"y_pred_probas"
83-
]
57+
"in": ["bert_features"],
58+
"in_y": ["y_ids"],
59+
"out": ["y_pred_probas"]
8460
},
8561
{
86-
"in": [
87-
"y_pred_probas"
88-
],
89-
"out": [
90-
"y_pred_ids"
91-
],
62+
"in": ["y_pred_probas"],
63+
"out": ["y_pred_ids"],
9264
"class_name": "proba2labels",
9365
"max_proba": true
9466
},
9567
{
96-
"in": [
97-
"y_pred_ids"
98-
],
99-
"out": [
100-
"y_pred_labels"
101-
],
68+
"in": ["y_pred_ids"],
69+
"out": ["y_pred_labels"],
10270
"ref": "classes_vocab"
10371
}
10472
],
105-
"out": [
106-
"y_pred_labels"
107-
]
73+
"out": ["y_pred_labels"]
10874
},
10975
"train": {
110-
"batch_size": 128,
76+
"batch_size": 32,
11177
"metrics": ["matthews_correlation"],
11278
"validation_patience": 10,
11379
"val_every_n_batches": 250,

deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json

+27-68
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,42 @@
11
{
22
"dataset_reader": {
33
"class_name": "huggingface_dataset_reader",
4-
"path": "glue",
5-
"name": "mnli",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
66
"train": "train",
77
"valid": "validation_matched",
88
"test": "test_matched"
99
},
1010
"dataset_iterator": {
1111
"class_name": "huggingface_dataset_iterator",
12-
"features": [
13-
"hypothesis",
14-
"premise"
15-
],
12+
"features": ["hypothesis", "premise"],
1613
"label": "label",
1714
"seed": 42
1815
},
1916
"chainer": {
20-
"in": [
21-
"hypothesis",
22-
"premise"
23-
],
24-
"in_y": [
25-
"y"
26-
],
17+
"in": ["hypothesis", "premise"],
18+
"in_y": ["y"],
2719
"pipe": [
2820
{
2921
"class_name": "torch_transformers_preprocessor",
3022
"vocab_file": "{BASE_MODEL}",
3123
"do_lower_case": false,
3224
"max_seq_length": 128,
33-
"in": [
34-
"hypothesis",
35-
"premise"
36-
],
37-
"out": [
38-
"bert_features"
39-
]
25+
"in": ["hypothesis", "premise"],
26+
"out": ["bert_features"]
4027
},
4128
{
4229
"id": "classes_vocab",
4330
"class_name": "simple_vocab",
44-
"fit_on": [
45-
"y"
46-
],
31+
"fit_on": ["y"],
4732
"save_path": "{MODEL_PATH}/classes.dict",
4833
"load_path": "{MODEL_PATH}/classes.dict",
49-
"in": [
50-
"y"
51-
],
52-
"out": [
53-
"y_ids"
54-
]
34+
"in": ["y"],
35+
"out": ["y_ids"]
5536
},
5637
{
57-
"in": [
58-
"y_ids"
59-
],
60-
"out": [
61-
"y_onehot"
62-
],
38+
"in": ["y_ids"],
39+
"out": ["y_onehot"],
6340
"class_name": "one_hotter",
6441
"depth": "#classes_vocab.len",
6542
"single_vector": true
@@ -77,68 +54,50 @@
7754
},
7855
"learning_rate_drop_patience": 3,
7956
"learning_rate_drop_div": 2.0,
80-
"in": [
81-
"bert_features"
82-
],
83-
"in_y": [
84-
"y_ids"
85-
],
86-
"out": [
87-
"y_pred_probas"
88-
]
57+
"in": ["bert_features"],
58+
"in_y": ["y_ids"],
59+
"out": ["y_pred_probas"]
8960
},
9061
{
91-
"in": [
92-
"y_pred_probas"
93-
],
94-
"out": [
95-
"y_pred_ids"
96-
],
62+
"in": ["y_pred_probas"],
63+
"out": ["y_pred_ids"],
9764
"class_name": "proba2labels",
9865
"max_proba": true
9966
},
10067
{
101-
"in": [
102-
"y_pred_ids"
103-
],
104-
"out": [
105-
"y_pred_labels"
106-
],
68+
"in": ["y_pred_ids"],
69+
"out": ["y_pred_labels"],
10770
"ref": "classes_vocab"
10871
}
10972
],
110-
"out": [
111-
"y_pred_labels"
112-
]
73+
"out": ["y_pred_labels"]
11374
},
11475
"train": {
11576
"batch_size": 4,
116-
"metrics": [
117-
"accuracy"
118-
],
77+
"metrics": ["accuracy"],
11978
"validation_patience": 10,
12079
"val_every_n_batches": 250,
12180
"log_every_n_batches": 250,
12281
"show_examples": false,
123-
"evaluation_targets": [
124-
"valid"
125-
],
82+
"evaluation_targets": ["valid"],
12683
"class_name": "torch_trainer",
12784
"tensorboard_log_dir": "{MODEL_PATH}/",
12885
"pytest_max_batches": 2
12986
},
13087
"metadata": {
13188
"variables": {
132-
"ROOT_PATH": "~/.deeppavlov",
13389
"BASE_MODEL": "roberta-large",
90+
"ROOT_PATH": "~/.deeppavlov",
13491
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
13592
"MODELS_PATH": "{ROOT_PATH}/models",
136-
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli_{BASE_MODEL}"
93+
"COMPETITION": "glue",
94+
"TASK": "mnli",
95+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
13796
},
13897
"download": [
13998
{
14099
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_mnli.tar.gz",
141-
"subdir": "{MODELS_PATH}"
100+
"subdir": "{MODEL_PATH}"
142101
}
143102
]
144103
}

deeppavlov/configs/classifiers/glue/glue_qnli_roberta.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
"out": ["y_pred_labels"]
7474
},
7575
"train": {
76-
"batch_size": 64,
76+
"batch_size": 16,
7777
"metrics": ["accuracy"],
7878
"validation_patience": 10,
7979
"val_every_n_batches": 250,

deeppavlov/configs/classifiers/glue/glue_qqp_roberta.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
"out": ["y_pred_ids"]
6161
},
6262
"train": {
63-
"batch_size": 64,
63+
"batch_size": 16,
6464
"metrics": [
6565
"f1",
6666
"accuracy"

0 commit comments

Comments
 (0)