Release 1.0.2

IgnatovFedor · web-flow · commit a1e9477bf4c2 · 2023-01-10T13:47:00.000+06:00
diff --git a/README.md b/README.md
@@ -181,6 +181,9 @@ from deeppavlov import evaluate_model
 model = evaluate_model(<config_path>, install=True, download=True)
 ```
 
+DeepPavlov also [allows](https://docs.deeppavlov.ai/en/master/features/python.html) to build a model from components for
+inference using Python.
+
 ## License
 
 DeepPavlov is Apache 2.0 - licensed.
diff --git a/deeppavlov/_meta.py b/deeppavlov/_meta.py
@@ -1,4 +1,4 @@
-__version__ = '1.0.1'
+__version__ = '1.0.2'
 __author__ = 'Neural Networks and Deep Learning lab, MIPT'
 __description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
 __keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
diff --git a/docs/index.rst b/docs/index.rst
@@ -9,6 +9,7 @@ Welcome to DeepPavlov's documentation!
    QuickStart <intro/quick_start>
    General concepts <intro/overview>
    Configuration file <intro/configuration>
+   Python pipelines <intro/python.ipynb>
    Models overview <features/overview>
 
 
diff --git a/docs/intro/python.ipynb b/docs/intro/python.ipynb
@@ -0,0 +1,141 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6d5cd16b",
+   "metadata": {},
+   "source": [
+    "#### Python pipelines"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da10fd80",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deeppavlov/DeepPavlov/blob/master/docs/intro/python.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d55ebe35",
+   "metadata": {},
+   "source": [
+    "Python models could be used without .json configuration files.\n",
+    "\n",
+    "The code below is an alternative to building [insults_kaggle_bert](https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/configs/classifiers/insults_kaggle_bert.json) model and using it with\n",
+    "\n",
+    "```python\n",
+    "from deeppavlov import build_model\n",
+    "\n",
+    "model = build_model('insults_kaggle_bert', download=True)\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fa1db63b",
+   "metadata": {},
+   "source": [
+    "At first, define variables for model components and download model data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d6671e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from deeppavlov.core.commands.utils import expand_path\n",
+    "from deeppavlov.download import download_resource\n",
+    "\n",
+    "\n",
+    "classifiers_path = expand_path('~/.deeppavlov/models/classifiers')\n",
+    "model_path = classifiers_path / 'insults_kaggle_torch_bert'\n",
+    "transformer_name = 'bert-base-uncased'\n",
+    "\n",
+    "download_resource(\n",
+    "    'http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v5.tar.gz',\n",
+    "    {classifiers_path}\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "332d644e",
+   "metadata": {},
+   "source": [
+    "Then, initialize model components."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "809c31ad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from deeppavlov.core.data.simple_vocab import SimpleVocabulary\n",
+    "from deeppavlov.models.classifiers.proba2labels import Proba2Labels\n",
+    "from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchTransformersPreprocessor\n",
+    "from deeppavlov.models.torch_bert.torch_transformers_classifier import TorchTransformersClassifierModel\n",
+    "\n",
+    "\n",
+    "preprocessor = TorchTransformersPreprocessor(\n",
+    "    vocab_file=transformer_name,\n",
+    "    max_seq_length=64\n",
+    ")\n",
+    "\n",
+    "classes_vocab = SimpleVocabulary(\n",
+    "    load_path=model_path/'classes.dict',\n",
+    "    save_path=model_path/'classes.dict'\n",
+    ")\n",
+    "\n",
+    "classifier =  TorchTransformersClassifierModel(\n",
+    "    n_classes=classes_vocab.len,\n",
+    "    return_probas=True,\n",
+    "    pretrained_bert=transformer_name,\n",
+    "    save_path=model_path/'model',\n",
+    "    optimizer_parameters={'lr': 1e-05}\n",
+    ")\n",
+    "\n",
+    "proba2labels = Proba2Labels(max_proba=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "87e8ec20",
+   "metadata": {},
+   "source": [
+    "Finally, create model from components. ``Element`` is a wrapper for a component. ``Element`` receives the component and the names of the incoming and outgoing arguments. ``Model`` combines ``Element``s into pipeline."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acfe29de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from deeppavlov import Element, Model\n",
+    "\n",
+    "model = Model(\n",
+    "    x=['x'],\n",
+    "    out=['y_pred_labels'],\n",
+    "    pipe=[\n",
+    "        Element(component=preprocessor, x=['x'], out=['bert_features']),\n",
+    "        Element(component=classifier, x=['bert_features'], out=['y_pred_probas']),\n",
+    "        Element(component=proba2labels, x=['y_pred_probas'], out=['y_pred_ids']),\n",
+    "        Element(component=classes_vocab, x=['y_pred_ids'], out=['y_pred_labels'])\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "model(['you are stupid', 'you are smart'])"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,7 @@ aio-pika>=3.2.2,<6.9.0
 fastapi>=0.47.0,<0.78.0
 filelock>=3.0.0,<3.8.0
 nltk>=3.2.5,<3.8.0
-numpy
+numpy<1.24
 overrides==4.1.2
 pandas>=1.0.0,<1.5.0
 prometheus-client>=0.13.0,<0.15.0
diff --git a/utils/prepare/upload.py b/utils/prepare/upload.py
@@ -13,8 +13,7 @@
 # limitations under the License.
 
 import argparse
-import os
-import shutil
+import pathlib
 import tarfile
 from pathlib import Path
 
@@ -23,45 +22,48 @@
 from hashes import main
 
 
-def upload(config_in_file):
+def upload(config_in_file: str, tar_name: str, tar_output_dir: Path):
+    if not tar_output_dir.exists():
+        raise RuntimeError(f'A folder {tar_output_dir} does not exist')
+
+    print(f'Config: {config_in_file}')
+    if not Path(config_in_file).exists():
+        raise RuntimeError(f'A config {config_in_file} does not exist')
 
-    print(config_in_file)
     config_in = parse_config(config_in_file)
     config_in_file = find_config(config_in_file)
 
     model_path = Path(config_in['metadata']['variables']['MODEL_PATH']).expanduser()
-    models_path = Path(config_in['metadata']['variables']['MODELS_PATH']).expanduser()
     model_name, class_name = config_in_file.stem, config_in_file.parent.name
-    
-    if str(model_name) not in str(model_path):
-        raise(f'{model_name} is not the path of the {model_path}')
-    
-    arcname = str(model_path).split("models/")[1]
-    tar_path = models_path/model_name
-    tmp_folder = f'/tmp/'
-    tmp_tar = tmp_folder + f'{model_name}.tar.gz'
 
-    print("model_path", model_path)
-    print("class_name", class_name)
-    print("model_name", model_name)
-    
-    print("Start tarring")
-    archive = tarfile.open(tmp_tar, "w|gz")
-    archive.add(model_path, arcname=arcname)
-    archive.close()
+    if tar_name is None:
+        tar_name = f'{model_name}'
+        print(f'tar_name set to {tar_name}')
+
+    full_tar_name = tar_output_dir / f'{tar_name}.tar.gz'
+    if Path(full_tar_name).exists():
+        raise RuntimeError(f'An archive {Path(full_tar_name)} already exists')
+
+    print(f'model_path: {model_path}')
+    print(f'class_name: {class_name}')
+    print(f'model_name: {model_name}')
+    print(f'Start tarring to {full_tar_name}')
+    with tarfile.open(str(full_tar_name), "w|gz") as archive:
+        archive.add(model_path, arcname=pathlib.os.sep)
+
     print("Stop tarring")
+    print(f'Tar archive: {Path(full_tar_name)} has been created')
 
     print("Calculating hash")
-    main(tmp_tar)
-
-    print("tmp_tar", tmp_tar)
-    command = f'scp -r {tmp_folder}{model_name}* share.ipavlov.mipt.ru:/home/export/v1/{class_name}'
-    donwload_url = f'http://files.deeppavlov.ai/v1/{class_name}/{model_name}.tar.gz'
-    print(command, donwload_url, sep='\n')
+    main(full_tar_name)
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument("config_in", help="path to a config", type=str)
+    parser.add_argument('-c', '--config_in', help='path to a config', type=str)
+    parser.add_argument('-n', '--tar_name', help='name of the tar archive (without tar.gz extension)',
+                        default=None, required=False, type=str)
+    parser.add_argument('-o', '--tar_output_dir', help='dir to save a tar archive', default='./',
+                        required=False, type=Path)
     args = parser.parse_args()
-    upload(args.config_in)
+    upload(args.config_in, args.tar_name, args.tar_output_dir)

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = '1.0.1'`
	`1`	`+__version__ = '1.0.2'`
`2`	`2`	`__author__ = 'Neural Networks and Deep Learning lab, MIPT'`
`3`	`3`	`__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'`
`4`	`4`	`__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']`