diff --git a/deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json b/deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json index 7ff348e303..cbddd9b488 100644 --- a/deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json +++ b/deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json @@ -1,4 +1,19 @@ { + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "BASE_MODEL": "roberta-large", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli_{BASE_MODEL}" + }, + "download": [ + { + "url": "https://files.deeppavlov.ai/0.16/classifiers/glue_mnli.tar.gz", + "subdir": "{MODELS_PATH}" + } + ] + }, "dataset_reader": { "class_name": "huggingface_dataset_reader", "path": "glue", @@ -30,6 +45,8 @@ "vocab_file": "{BASE_MODEL}", "do_lower_case": false, "max_seq_length": 128, + "truncation": "longest_first", + "padding": "longest", "in": [ "hypothesis", "premise" @@ -127,20 +144,5 @@ "class_name": "torch_trainer", "tensorboard_log_dir": "{MODEL_PATH}/", "pytest_max_batches": 2 - }, - "metadata": { - "variables": { - "ROOT_PATH": "~/.deeppavlov", - "BASE_MODEL": "roberta-large", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli_{BASE_MODEL}" - }, - "download": [ - { - "url": "https://files.deeppavlov.ai/0.16/classifiers/glue_mnli.tar.gz", - "subdir": "{MODELS_PATH}" - } - ] } } diff --git a/deeppavlov/configs/classifiers/glue/glue_mnli_xlm_roberta.json b/deeppavlov/configs/classifiers/glue/glue_mnli_xlm_roberta.json new file mode 100644 index 0000000000..f9423fe948 --- /dev/null +++ b/deeppavlov/configs/classifiers/glue/glue_mnli_xlm_roberta.json @@ -0,0 +1,141 @@ +{ + "metadata": { + "variables": { + "ROOT_PATH": "~/.deeppavlov", + "BASE_MODEL": "DeepPavlov/xlm-roberta-large-en-ru", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli/{BASE_MODEL}" + } + }, + "dataset_reader": { + "class_name": "huggingface_dataset_reader", + "path": "glue", + "name": "mnli", + "train": "train", + "valid": "validation_matched", + "test": "test_matched" + }, + "dataset_iterator": { + "class_name": "huggingface_dataset_iterator", + "features": [ + "hypothesis", + "premise" + ], + "label": "label", + "seed": 42 + }, + "chainer": { + "in": [ + "hypothesis", + "premise" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BASE_MODEL}", + "do_lower_case": false, + "max_seq_length": 192, + "truncation": "longest_first", + "padding": "longest", + "in": [ + "hypothesis", + "premise" + ], + "out": [ + "bert_features" + ] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": [ + "y" + ], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": [ + "y" + ], + "out": [ + "y_ids" + ] + }, + { + "in": [ + "y_ids" + ], + "out": [ + "y_onehot" + ], + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{BASE_MODEL}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 5e-06 + }, + "learning_rate_drop_patience": 4, + "learning_rate_drop_div": 2.0, + "in": [ + "bert_features" + ], + "in_y": [ + "y_ids" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "in": [ + "y_pred_probas" + ], + "out": [ + "y_pred_ids" + ], + "class_name": "proba2labels", + "max_proba": true + }, + { + "in": [ + "y_pred_ids" + ], + "out": [ + "y_pred_labels" + ], + "ref": "classes_vocab" + } + ], + "out": [ + "y_pred_labels" + ] + }, + "train": { + "batch_size": 64, + "metrics": [ + "accuracy" + ], + "validation_patience": 10, + "val_every_n_batches": 3066, + "log_every_n_batches": 3066, + "show_examples": false, + "evaluation_targets": [ + "valid" + ], + "class_name": "torch_trainer", + "tensorboard_log_dir": "{MODEL_PATH}/", + "pytest_max_batches": 2 + } +} diff --git a/deeppavlov/configs/classifiers/superglue/russian_superglue_danetqa_xlm_roberta_mnli.json b/deeppavlov/configs/classifiers/superglue/russian_superglue_danetqa_xlm_roberta_mnli.json new file mode 100644 index 0000000000..414411b177 --- /dev/null +++ b/deeppavlov/configs/classifiers/superglue/russian_superglue_danetqa_xlm_roberta_mnli.json @@ -0,0 +1,146 @@ +{ + "metadata": { + "variables": { + "BASE_MODEL": "DeepPavlov/xlm-roberta-large-en-ru-mnli", + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/russian_superglue_boolq/{BASE_MODEL}", + "BINARY_CLASSIFICATION": true + } + }, + "dataset_reader": { + "class_name": "huggingface_dataset_reader", + "path": "russian_super_glue", + "name": "danetqa", + "train": "train", + "valid": "validation", + "test": "test", + "dev_percentage": 50 + }, + "dataset_iterator": { + "class_name": "huggingface_dataset_iterator", + "features": [ + "question", + "passage" + ], + "label": "label", + "seed": 42 + }, + "chainer": { + "in": [ + "question", + "passage" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BASE_MODEL}", + "do_lower_case": false, + "max_seq_length": 256, + "in": [ + "question", + "passage" + ], + "out": [ + "bert_features" + ] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": [ + "y" + ], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": [ + "y" + ], + "out": [ + "y_ids" + ] + }, + { + "in": [ + "y_ids" + ], + "out": [ + "y_onehot" + ], + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{BASE_MODEL}", + "is_binary": "{BINARY_CLASSIFICATION}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2e-05, + "weight_decay": 0.1 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 2.0, + "in": [ + "bert_features" + ], + "in_y": [ + "y_ids" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "in": [ + "y_pred_probas" + ], + "out": [ + "y_pred_ids" + ], + "class_name": "proba2labels", + "is_binary": "{BINARY_CLASSIFICATION}", + "confidence_threshold": 0.5 + }, + { + "in": [ + "y_pred_ids" + ], + "out": [ + "y_pred_labels" + ], + "ref": "classes_vocab" + } + ], + "out": [ + "y_pred_labels" + ] + }, + "train": { + "batch_size": 24, + "metrics": [ + "accuracy" + ], + "validation_patience": 10, + "val_every_n_epochs": 1, + "log_every_n_epochs": 1, + "show_examples": false, + "evaluation_targets": [ + "train", + "valid" + ], + "class_name": "torch_trainer", + "tensorboard_log_dir": "{MODEL_PATH}/", + "pytest_max_batches": 2, + "pytest_batch_size": 2 + } +} diff --git a/deeppavlov/configs/classifiers/superglue/superglue_boolq_xlm_roberta_mnli.json b/deeppavlov/configs/classifiers/superglue/superglue_boolq_xlm_roberta_mnli.json new file mode 100644 index 0000000000..5a79f2c967 --- /dev/null +++ b/deeppavlov/configs/classifiers/superglue/superglue_boolq_xlm_roberta_mnli.json @@ -0,0 +1,146 @@ +{ + "metadata": { + "variables": { + "BASE_MODEL": "DeepPavlov/xlm-roberta-large-en-ru-mnli", + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/superglue_boolq/{BASE_MODEL}", + "BINARY_CLASSIFICATION": true + } + }, + "dataset_reader": { + "class_name": "huggingface_dataset_reader", + "path": "super_glue", + "name": "boolq", + "train": "train", + "valid": "validation", + "test": "test", + "dev_percentage": 50 + }, + "dataset_iterator": { + "class_name": "huggingface_dataset_iterator", + "features": [ + "question", + "passage" + ], + "label": "label", + "seed": 42 + }, + "chainer": { + "in": [ + "question", + "passage" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BASE_MODEL}", + "do_lower_case": false, + "max_seq_length": 256, + "in": [ + "question", + "passage" + ], + "out": [ + "bert_features" + ] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": [ + "y" + ], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": [ + "y" + ], + "out": [ + "y_ids" + ] + }, + { + "in": [ + "y_ids" + ], + "out": [ + "y_onehot" + ], + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{BASE_MODEL}", + "is_binary": "{BINARY_CLASSIFICATION}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2e-05, + "weight_decay": 0.1 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 2.0, + "in": [ + "bert_features" + ], + "in_y": [ + "y_ids" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "in": [ + "y_pred_probas" + ], + "out": [ + "y_pred_ids" + ], + "class_name": "proba2labels", + "is_binary": "{BINARY_CLASSIFICATION}", + "confidence_threshold": 0.5 + }, + { + "in": [ + "y_pred_ids" + ], + "out": [ + "y_pred_labels" + ], + "ref": "classes_vocab" + } + ], + "out": [ + "y_pred_labels" + ] + }, + "train": { + "batch_size": 24, + "metrics": [ + "accuracy" + ], + "validation_patience": 10, + "val_every_n_epochs": 1, + "log_every_n_epochs": 1, + "show_examples": false, + "evaluation_targets": [ + "train", + "valid" + ], + "class_name": "torch_trainer", + "tensorboard_log_dir": "{MODEL_PATH}/", + "pytest_max_batches": 2, + "pytest_batch_size": 2 + } +} diff --git a/deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json b/deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json index 1a9fda443d..16063abc46 100644 --- a/deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json +++ b/deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json @@ -1,4 +1,13 @@ { + "metadata": { + "variables": { + "BASE_MODEL": "roberta-large", + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/superglue_copa/{BASE_MODEL}" + } + }, "dataset_reader": { "class_name": "huggingface_dataset_reader", "path": "super_glue", @@ -22,6 +31,8 @@ "vocab_file": "{BASE_MODEL}", "do_lower_case": false, "max_seq_length": 64, + "truncation": "longest_first", + "padding": "longest", "in": ["contexts_list", "choices_list"], "out": ["bert_features"] }, @@ -84,14 +95,5 @@ "tensorboard_log_dir": "{MODEL_PATH}/", "pytest_max_batches": 2, "pytest_batch_size": 2 - }, - "metadata": { - "variables": { - "BASE_MODEL": "roberta-large", - "ROOT_PATH": "~/.deeppavlov", - "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", - "MODELS_PATH": "{ROOT_PATH}/models", - "MODEL_PATH": "{MODELS_PATH}/classifiers/superglue_copa_{BASE_MODEL}" - } } } diff --git a/deeppavlov/configs/classifiers/superglue/superglue_copa_xlm_roberta.json b/deeppavlov/configs/classifiers/superglue/superglue_copa_xlm_roberta.json new file mode 100644 index 0000000000..6ba2e4a1df --- /dev/null +++ b/deeppavlov/configs/classifiers/superglue/superglue_copa_xlm_roberta.json @@ -0,0 +1,99 @@ +{ + "metadata": { + "variables": { + "BASE_MODEL": "DeepPavlov/xlm-roberta-large-en-ru-mnli", + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/superglue_copa/{BASE_MODEL}" + } + }, + "dataset_reader": { + "class_name": "huggingface_dataset_reader", + "path": "super_glue", + "name": "copa", + "train": "train", + "valid": "validation", + "test": "test" + }, + "dataset_iterator": { + "class_name": "huggingface_dataset_iterator", + "features": ["contexts", "choices"], + "label": "label", + "seed": 100 + }, + "chainer": { + "in": ["contexts_list", "choices_list"], + "in_y": ["y"], + "pipe": [ + { + "class_name": "torch_transformers_multiplechoice_preprocessor", + "vocab_file": "{BASE_MODEL}", + "do_lower_case": false, + "max_seq_length": 64, + "truncation": "longest_first", + "padding": "longest", + "in": ["contexts_list", "choices_list"], + "out": ["bert_features"] + }, + { + "id": "classes_vocab", + "class_name": "simple_vocab", + "fit_on": ["y"], + "save_path": "{MODEL_PATH}/classes.dict", + "load_path": "{MODEL_PATH}/classes.dict", + "in": ["y"], + "out": ["y_ids"] + }, + { + "in": ["y_ids"], + "out": ["y_onehot"], + "class_name": "one_hotter", + "depth": "#classes_vocab.len", + "single_vector": true + }, + { + "class_name": "torch_transformers_multiplechoice", + "n_classes": "#classes_vocab.len", + "return_probas": true, + "pretrained_bert": "{BASE_MODEL}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 1e-05 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 2.0, + "in": ["bert_features"], + "in_y": ["y_ids"], + "out": ["y_pred_probas"] + }, + { + "in": ["y_pred_probas"], + "out": ["y_pred_ids"], + "class_name": "proba2labels", + "max_proba": true + }, + { + "in": ["y_pred_ids"], + "out": ["y_pred_labels"], + "ref": "classes_vocab" + } + ], + "out": ["y_pred_labels"] + }, + "train": { + "batch_size": 28, + "metrics": ["accuracy"], + "validation_patience": 20, + "val_every_n_epochs": 1, + "log_every_n_epochs": 1, + "show_examples": false, + "evaluation_targets": ["train", "valid"], + "class_name": "torch_trainer", + "tensorboard_log_dir": "{MODEL_PATH}/", + "pytest_max_batches": 2, + "pytest_batch_size": 2 + } +} diff --git a/deeppavlov/configs/classifiers/superglue/superglue_record_xlm_roberta.json b/deeppavlov/configs/classifiers/superglue/superglue_record_xlm_roberta.json new file mode 100644 index 0000000000..9d356d4699 --- /dev/null +++ b/deeppavlov/configs/classifiers/superglue/superglue_record_xlm_roberta.json @@ -0,0 +1,170 @@ +{ + "metadata": { + "variables": { + "BASE_MODEL": "DeepPavlov/xlm-roberta-large-en-ru", + "TASK": "record", + "ROOT_PATH": "~/.deeppavlov", + "DOWNLOADS_PATH": "{ROOT_PATH}/downloads", + "MODELS_PATH": "{ROOT_PATH}/models", + "MODEL_PATH": "{MODELS_PATH}/classifiers/superglue_record_{BASE_MODEL}", + "BINARY_CLASSIFICATION": false, + "SEED": 42 + }, + "download": [ + { + "url": "http://files.deeppavlov.ai/0.17/classifiers/superglue/superglue_record_roberta.tar.gz", + "subdir": "{MODEL_PATH}" + } + ] + }, + "dataset_reader": { + "class_name": "huggingface_dataset_reader", + "path": "super_glue", + "name": "{TASK}", + "train": "train", + "valid": "validation", + "seed": "{SEED}", + "downsample_ratio": [ + 1.8, + 1.8 + ], + "do_index_correction": false + }, + "dataset_iterator": { + "class_name": "huggingface_dataset_iterator", + "features": [ + "idx", + "query", + "passage", + "entities", + "num_examples" + ], + "label": "label", + "seed": "{SEED}", + "use_label_name": false + }, + "chainer": { + "in": [ + "idx", + "query", + "passage", + "entities", + "num_examples" + ], + "in_y": [ + "y" + ], + "pipe": [ + { + "class_name": "torch_transformers_preprocessor", + "vocab_file": "{BASE_MODEL}", + "do_lower_case": false, + "max_seq_length": 320, + "in": [ + "query", + "passage" + ], + "out": [ + "bert_features" + ] + }, + { + "class_name": "torch_transformers_classifier", + "n_classes": 2, + "return_probas": true, + "is_binary": "{BINARY_CLASSIFICATION}", + "pretrained_bert": "{BASE_MODEL}", + "save_path": "{MODEL_PATH}/model", + "load_path": "{MODEL_PATH}/model", + "optimizer": "AdamW", + "optimizer_parameters": { + "lr": 2e-05, + "weight_decay": 0.1 + }, + "learning_rate_drop_patience": 3, + "learning_rate_drop_div": 2.0, + "in": [ + "bert_features" + ], + "in_y": [ + "y" + ], + "out": [ + "y_pred_probas" + ] + }, + { + "class_name": "proba2labels", + "in": [ + "y_pred_probas" + ], + "out": [ + "y_pred_ids" + ], + "is_binary": "{BINARY_CLASSIFICATION}", + "max_proba": true + }, + { + "class_name": "torch_record_postprocessor", + "is_binary": "{BINARY_CLASSIFICATION}", + "in": [ + "idx", + "y", + "y_pred_probas", + "entities", + "num_examples" + ], + "out": [ + "record_examples" + ] + } + ], + "out": [ + "y_pred_probas" + ] + }, + "train": { + "batch_size": 24, + "train_metrics": [ + { + "name": "accuracy", + "inputs": [ + "y", + "y_pred_ids" + ] + } + ], + "metrics": [ + { + "name": "record_em_score", + "inputs": [ + "record_examples" + ] + }, + { + "name": "record_f1_score", + "inputs": [ + "record_examples" + ] + }, + { + "name": "accuracy", + "inputs": [ + "y", + "y_pred_ids" + ] + } + ], + "validation_patience": 10, + "val_every_n_batches": 3066, + "log_every_n_batches": 3066, + "show_examples": false, + "class_name": "torch_trainer", + "evaluation_targets": [ + "valid" + ], + "tensorboard_log_dir": "{MODEL_PATH}/", + "pytest_max_batches": 2, + "pytest_batch_size": 2 + } +} diff --git a/deeppavlov/core/models/torch_model.py b/deeppavlov/core/models/torch_model.py index 67bfee27ab..f1b7811428 100644 --- a/deeppavlov/core/models/torch_model.py +++ b/deeppavlov/core/models/torch_model.py @@ -16,7 +16,7 @@ from copy import deepcopy from logging import getLogger from pathlib import Path -from typing import Optional +from typing import Optional, Tuple import torch from overrides import overrides @@ -70,9 +70,12 @@ def __init__(self, device: str = "gpu", learning_rate_drop_div: Optional[float] = None, load_before_drop: bool = True, min_learning_rate: float = 0., - *args, **kwargs): + multi_gpu: bool = True, + *args, + **kwargs): super().__init__(*args, **kwargs) self.device = torch.device("cuda" if torch.cuda.is_available() and device == "gpu" else "cpu") + self.is_multi_gpu = multi_gpu self.model = None self.optimizer = None self.lr_scheduler = None @@ -230,3 +233,27 @@ def process_event(self, event_name: str, data: dict) -> None: @abstractmethod def train_on_batch(self, x: list, y: list): pass + + @property + def accepted_keys(self) -> Tuple[str]: + if self.model is None: + raise AttributeError + if self.is_data_parallel: + accepted_keys = self.model.module.forward.__code__.co_varnames + else: + accepted_keys = self.model.forward.__code__.co_varnames + return accepted_keys + + @property + def is_data_parallel(self) -> bool: + if self.model is None: + raise AttributeError + return isinstance(self.model, torch.nn.DataParallel) + + def _make_data_parallel(self): + if self.device.type == "cuda" and torch.cuda.device_count() > 1: + if self.model is None: + raise AttributeError + self.model = torch.nn.DataParallel(self.model) + else: + self.is_multi_gpu = False diff --git a/deeppavlov/dataset_readers/huggingface_dataset_reader.py b/deeppavlov/dataset_readers/huggingface_dataset_reader.py index 8311e5319b..9fc0e587b6 100644 --- a/deeppavlov/dataset_readers/huggingface_dataset_reader.py +++ b/deeppavlov/dataset_readers/huggingface_dataset_reader.py @@ -73,7 +73,7 @@ def read(self, dataset = load_dataset(path=path, name=name, split=list(split_mapping.values()), **kwargs) if path == "super_glue" and name == "copa": dataset = [dataset_split.map(preprocess_copa, batched=True) for dataset_split in dataset] - elif path == "super_glue" and name == "boolq": + elif (path == "super_glue" and name == "boolq") or (path == "russian_super_glue" and name == "danetqa"): dataset = load_dataset(path=path, name=name, split=interleave_splits(splits=list(split_mapping.values()), diff --git a/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py b/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py index ed690a959a..e62afe68b2 100644 --- a/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py +++ b/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py @@ -58,13 +58,16 @@ def __init__(self, do_lower_case: bool = True, max_seq_length: int = 512, return_tokens: bool = False, + truncation: Union[bool, str] = 'longest_first', + padding: Union[bool, str] = 'longest', **kwargs) -> None: self.max_seq_length = max_seq_length self.return_tokens = return_tokens + self.truncation = truncation + self.padding = padding if Path(vocab_file).is_file(): vocab_file = str(expand_path(vocab_file)) - self.tokenizer = AutoTokenizer(vocab_file=vocab_file, - do_lower_case=do_lower_case) + self.tokenizer = AutoTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case) else: self.tokenizer = AutoTokenizer.from_pretrained(vocab_file, do_lower_case=do_lower_case) @@ -79,17 +82,20 @@ def tokenize_mc_examples(self, examples = [] for context_list, choice_list in zip(contexts, choices): for context, choice in zip(context_list, choice_list): - tokenized_input = self.tokenizer.encode_plus(text=context, - text_pair=choice, - return_attention_mask=True, - add_special_tokens=True, - truncation=True) + tokenized_input = self.tokenizer( + text=context, + text_pair=choice, + return_attention_mask=True, + add_special_tokens=True, + truncation=self.truncation, + max_length=self.max_seq_length, + ) examples.append(tokenized_input) padded_examples = self.tokenizer.pad( examples, - padding=True, + padding=self.padding, max_length=self.max_seq_length, return_tensors='pt', ) @@ -140,9 +146,13 @@ def __init__(self, do_lower_case: bool = True, max_seq_length: int = 512, return_tokens: bool = False, + truncation: Union[bool, str] = 'longest_first', + padding: Union[bool, str] = 'longest', **kwargs) -> None: self.max_seq_length = max_seq_length self.return_tokens = return_tokens + self.truncation = truncation + self.padding = padding if Path(vocab_file).is_file(): vocab_file = str(expand_path(vocab_file)) self.tokenizer = AutoTokenizer(vocab_file=vocab_file, @@ -170,14 +180,16 @@ def __call__(self, texts_a: List[str], texts_b: Optional[List[str]] = None) -> U if isinstance(texts_a, tuple): texts_a = list(texts_a) - input_features = self.tokenizer(text=texts_a, - text_pair=texts_b, - add_special_tokens=True, - max_length=self.max_seq_length, - padding='max_length', - return_attention_mask=True, - truncation=True, - return_tensors='pt') + input_features = self.tokenizer( + text=texts_a, + text_pair=texts_b, + add_special_tokens=True, + max_length=self.max_seq_length, + padding=self.padding, + return_attention_mask=True, + truncation=self.truncation, + return_tensors='pt' + ) return input_features diff --git a/deeppavlov/models/torch_bert/torch_transformers_classifier.py b/deeppavlov/models/torch_bert/torch_transformers_classifier.py index 3bf8077518..f44415d510 100644 --- a/deeppavlov/models/torch_bert/torch_transformers_classifier.py +++ b/deeppavlov/models/torch_bert/torch_transformers_classifier.py @@ -15,7 +15,7 @@ import re from logging import getLogger from pathlib import Path -from typing import List, Dict, Union, Optional, Tuple +from typing import List, Dict, Union, Optional import numpy as np import torch @@ -177,20 +177,6 @@ def __call__(self, features: Dict[str, torch.tensor]) -> Union[List[int], List[L return pred - # TODO move to the super class - @property - def accepted_keys(self) -> Tuple[str]: - if self.is_data_parallel: - accepted_keys = self.model.module.forward.__code__.co_varnames - else: - accepted_keys = self.model.forward.__code__.co_varnames - return accepted_keys - - # TODO move to the super class - @property - def is_data_parallel(self) -> bool: - return isinstance(self.model, torch.nn.DataParallel) - # TODO this method requires massive refactoring @overrides def load(self, fname=None): @@ -240,9 +226,8 @@ def load(self, fname=None): else: raise ConfigError("No pre-trained BERT model is given.") - # TODO that should probably be parametrized in config - if self.device.type == "cuda" and torch.cuda.device_count() > 1: - self.model = torch.nn.DataParallel(self.model) + if self.is_multi_gpu: + self._make_data_parallel() self.model.to(self.device) @@ -250,7 +235,8 @@ def load(self, fname=None): self.model.parameters(), **self.optimizer_parameters) if self.lr_scheduler_name is not None: self.lr_scheduler = getattr(torch.optim.lr_scheduler, self.lr_scheduler_name)( - self.optimizer, **self.lr_scheduler_parameters) + self.optimizer, **self.lr_scheduler_parameters + ) if self.load_path: log.info(f"Load path {self.load_path} is given.") diff --git a/deeppavlov/models/torch_bert/torch_transformers_multiplechoice.py b/deeppavlov/models/torch_bert/torch_transformers_multiplechoice.py index efaae6c206..39c73e42ec 100644 --- a/deeppavlov/models/torch_bert/torch_transformers_multiplechoice.py +++ b/deeppavlov/models/torch_bert/torch_transformers_multiplechoice.py @@ -165,8 +165,12 @@ def load(self, fname = None): if self.pretrained_bert: log.info(f"From pretrained {self.pretrained_bert}.") - config = AutoConfig.from_pretrained(self.pretrained_bert, num_labels=self.n_classes, - output_attentions=False, output_hidden_states=False) + config = AutoConfig.from_pretrained( + self.pretrained_bert, + num_labels=self.n_classes, + output_attentions=False, + output_hidden_states=False, + ) self.model = AutoModelForMultipleChoice.from_pretrained(self.pretrained_bert, config=config) diff --git a/deeppavlov/models/torch_bert/torch_transformers_squad.py b/deeppavlov/models/torch_bert/torch_transformers_squad.py index 9506ce924e..b8807a8948 100644 --- a/deeppavlov/models/torch_bert/torch_transformers_squad.py +++ b/deeppavlov/models/torch_bert/torch_transformers_squad.py @@ -155,18 +155,6 @@ def train_on_batch(self, features: List[InputFeatures], y_st: List[List[int]], y return {'loss': loss.item()} - @property - def accepted_keys(self) -> Tuple[str]: - if self.is_data_parallel: - accepted_keys = self.model.module.forward.__code__.co_varnames - else: - accepted_keys = self.model.forward.__code__.co_varnames - return accepted_keys - - @property - def is_data_parallel(self) -> bool: - return isinstance(self.model, torch.nn.DataParallel) - def __call__(self, features: List[InputFeatures]) -> Tuple[List[int], List[int], List[float], List[float]]: """get predictions using features as input