From 2bacba61b89e3add49539dde4799c66f6331207c Mon Sep 17 00:00:00 2001 From: nick863 <30440255+nick863@users.noreply.github.com> Date: Wed, 17 Apr 2024 18:43:40 -0700 Subject: [PATCH] Add Notebook, demonstrating saving and uploading of evaluators. (#2769) # Description In this PR we are adding the notebook, demonstrating saving and uploading of the standard evaluator to Azure workspace or registry. At the end of the notebook we are introducing function, uploading all the evaluators in the given namespace. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [x] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes. --- .gitignore | 4 + .../samples/EvaluatorUpload.ipynb | 494 ++++++++++++++++++ 2 files changed, 498 insertions(+) create mode 100644 src/promptflow-evals/samples/EvaluatorUpload.ipynb diff --git a/.gitignore b/.gitignore index 74df3834b44..36c50ebf596 100644 --- a/.gitignore +++ b/.gitignore @@ -191,3 +191,7 @@ config.json poetry.lock # promptflow subpackages __init__ src/promptflow-*/promptflow/__init__.py + +# Eclipse project files +**/.project +**/.pydevproject diff --git a/src/promptflow-evals/samples/EvaluatorUpload.ipynb b/src/promptflow-evals/samples/EvaluatorUpload.ipynb new file mode 100644 index 00000000000..aaf81325c63 --- /dev/null +++ b/src/promptflow-evals/samples/EvaluatorUpload.ipynb @@ -0,0 +1,494 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6018523f-3425-4bb3-9810-d31b8912991c", + "metadata": {}, + "source": [ + "# Upload of evaluators\n", + "In this notebook we are demonstrating the upload of the standard evaluators.\n", + "\n", + "### Import" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3e21ceb-1e58-4ba7-884a-5e103aea7ecc", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import pandas as pd\n", + "import shutil\n", + "import uuid\n", + "import yaml\n", + "\n", + "from azure.ai.ml import MLClient\n", + "from azure.identity import DefaultAzureCredential\n", + "from azure.ai.ml.entities import (\n", + " Model\n", + ")\n", + "\n", + "from promptflow.client import PFClient\n", + "from promptflow.evals.evaluate import evaluate\n", + "from promptflow.evals.evaluators import F1ScoreEvaluator" + ] + }, + { + "cell_type": "markdown", + "id": "846babb1-59b5-4d38-bb3a-d6eebd39ebee", + "metadata": {}, + "source": [ + "## End to end demonstration of evaluator saving and uploading to Azure.\n", + "### Saving the standard evaluators to the flex format.\n", + "First we will create the promptflow client, which will be used to save the existing flows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b03e557-19bd-4a2a-ae3f-bbaf6846fb33", + "metadata": {}, + "outputs": [], + "source": [ + "pf = PFClient()" + ] + }, + { + "cell_type": "markdown", + "id": "e3fcaf38-6caa-4c1b-ada5-479686232cd1", + "metadata": {}, + "source": [ + "We will use F1 score evaluator from the standard evaluator set and save it to local directory. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66fae04d-f6d0-4cc3-b149-a6058158c797", + "metadata": {}, + "outputs": [], + "source": [ + "pf.flows.save(F1ScoreEvaluator, path='./f1_score')" + ] + }, + { + "cell_type": "markdown", + "id": "03552c5a-3ecc-4154-a0bd-e3fe2831e323", + "metadata": {}, + "source": [ + "Let us inspect, what has been saved" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a27602c-e2c8-49c2-8d00-1eb5a11e55a1", + "metadata": {}, + "outputs": [], + "source": [ + "print('\\n'.join(os.listdir('f1_score')))" + ] + }, + { + "cell_type": "markdown", + "id": "7dc4dbfe-faa0-44f9-a53e-310c946d91fe", + "metadata": {}, + "source": [ + "The file, defining entrypoint of our model is called flow.flex.yaml, let us display it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c30c3e3-8113-4e0f-9210-b062e7354099", + "metadata": {}, + "outputs": [], + "source": [ + "with open(os.path.join('f1_score', 'flow.flex.yaml')) as fp:\n", + " flex_definition = yaml.safe_load(fp)\n", + "print(f\"The evaluator entrypoint is {flex_definition['entry']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9bb9a12-8bdd-4679-9957-998f3c7ceb75", + "metadata": {}, + "outputs": [], + "source": [ + "pf = PFClient()\n", + "run = Run(\n", + " flow='f1_score',\n", + " data='data.jsonl',\n", + " name=f'test_{uuid.uuid1()}'\n", + ")\n", + "run = pf.runs.create_or_update(\n", + " run=run,\n", + ")\n", + "pf.stream(run)" + ] + }, + { + "cell_type": "markdown", + "id": "6e7ee69f-4db3-4f5f-874a-5dc95d2e2f7c", + "metadata": {}, + "source": [ + "**Hack for standard evaluators.** If we will try to load our evaluator directly, we will get an error, because we try to modify `__path__` variable, which will work inside the python package, however, it will fail if we will try to run evaluator as a standalone script. To fix it, we will remove this line from our code. **This code is not needed for custom user evaluators!**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "918d39c2-dfb1-4660-b664-1830adf2708c", + "metadata": {}, + "outputs": [], + "source": [ + "def fix_evaluator_code(flex_dir: str) -> None:\n", + " \"\"\"\n", + " Read the flow.flex.yaml file from the flex_dir and remove the line, modifying __path__.\n", + "\n", + " :param flex_dir: The directory with evaluator saved in flex format.\n", + " \"\"\"\n", + " with open(os.path.join(flex_dir, 'flow.flex.yaml')) as fp:\n", + " flex_definition = yaml.safe_load(fp)\n", + " entry_script = flex_definition['entry'].split(':')[0] + '.py'\n", + " if entry_script == '__init__.py':\n", + " with open(os.path.join(flex_dir, entry_script)) as f:\n", + " with open(os.path.join(flex_dir, '_' + entry_script), 'w') as new_f:\n", + " for line in f:\n", + " if not '__path__' in line:\n", + " new_f.write(line)\n", + " os.replace(os.path.join(flex_dir, '_' + entry_script), os.path.join(flex_dir, entry_script))\n", + "\n", + "fix_evaluator_code('f1_score')" + ] + }, + { + "cell_type": "markdown", + "id": "88fbd6bf-6977-457b-aa55-f6dad9ec1f73", + "metadata": {}, + "source": [ + "Now let us test the flow with the simple dataset, consisting of one ground true and one actual sentense." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb73f698-7cab-4b30-8947-411c2060560c", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.DataFrame({\n", + " \"ground_truth\": [\"January is the coldest winter month.\"],\n", + " \"answer\": [\"June is the coldest summer month.\"]\n", + "})\n", + "in_file = 'data.jsonl'\n", + "data.to_json('data.jsonl', orient='records', lines=True, index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "bd3589c8-0df8-489f-b5a8-beb5ae2aec6a", + "metadata": {}, + "source": [ + "Load the evaluator in a FLEX format and test it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "841d6109-23b9-45c5-b709-b588f932f29d", + "metadata": {}, + "outputs": [], + "source": [ + "flow_result = pf.test(flow='f1_score', inputs='data.jsonl')\n", + "print(f\"Flow outputs: {flow_result}\")" + ] + }, + { + "cell_type": "markdown", + "id": "4a94aab5-73f8-4c7d-a7e0-a92853db0198", + "metadata": {}, + "source": [ + "Now we have all the tools to upload our model to Azure\n", + "### Uploading data to Azure\n", + "First we will need to authenticate to azure. For this purpose we will use the the configuration file of the net structure.\n", + "```json\r\n", + "{\r\n", + " \"resource_group_name\": \"resource-group-name\",\r\n", + " \"workspace_name\": \"ws-name\",\r\n", + " \"subscription_id\": \"subscription-uuid\",\r\n", + " \"registry_name\": \"registry-name\"\r\n", + "}\r\n", + "```\r\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7188cb7d-d7c1-460f-9f3e-91546d8b8b09", + "metadata": {}, + "outputs": [], + "source": [ + "with open('config.json') as f:\n", + " configuration = json.load(f)" + ] + }, + { + "cell_type": "markdown", + "id": "397e6627-67d8-43c3-b491-e3a8802197b2", + "metadata": {}, + "source": [ + "#### Uploading to the workspace\n", + "In this scenario we will not need the `registry_name` in our configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36338014-05f9-4f37-9fb0-726bb1c137b8", + "metadata": {}, + "outputs": [], + "source": [ + "config_ws = configuration.copy()\n", + "del config_ws[\"registry_name\"]\n", + "\n", + "credential = DefaultAzureCredential()\n", + "ml_client = MLClient(\n", + " credential=credential,\n", + " **config_ws\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6fe4a912-047f-4614-85a7-cfff86874303", + "metadata": {}, + "source": [ + "We will use the evaluator operations API to upload our model to workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a4edb19-f0b8-498c-908d-c7e23ba7b30d", + "metadata": {}, + "outputs": [], + "source": [ + "eval = Model(\n", + " path=\"f1_score\",\n", + " name='f1_score_uploaded',\n", + " description=\"F1 score evaluator.\",\n", + ")\n", + "ml_client.evaluators.create_or_update(eval)" + ] + }, + { + "cell_type": "markdown", + "id": "d6423eb6-415c-463c-839d-da0cf70bf245", + "metadata": {}, + "source": [ + "Now we will retrieve model and check that it is functional." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e18dd491-ee43-4dda-8a5b-d5317f8cb64d", + "metadata": {}, + "outputs": [], + "source": [ + "ml_client.evaluators.download('f1_score_uploaded', version='1', download_path='f1_score_downloaded')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a15468f0-681a-49fe-a883-0da44f68293f", + "metadata": {}, + "outputs": [], + "source": [ + "flow_result = pf.test(flow=os.path.join('f1_score_downloaded', 'f1_score_uploaded', 'f1_score'), inputs='data.jsonl')\n", + "print(f\"Flow outputs: {flow_result}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0708cd5-66e7-46f2-a8d0-b41e82278a54", + "metadata": {}, + "outputs": [], + "source": [ + "shutil.rmtree('f1_score_downloaded')\n", + "assert not os.path.isdir('f1_score_downloaded')" + ] + }, + { + "cell_type": "markdown", + "id": "fd4a4588-a0b7-4bd9-adc6-6595084da3b7", + "metadata": {}, + "source": [ + "#### Uploading to the registry\n", + "In this scenario we will not need the `workspace_name` in our configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75b57845-77f0-4a2e-9b2f-ccb3fb825da0", + "metadata": {}, + "outputs": [], + "source": [ + "config_reg = configuration.copy()\n", + "del config_reg[\"workspace_name\"]\n", + "\n", + "ml_client = MLClient(\n", + " credential=credential,\n", + " **config_reg\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7eb0f024-c6e5-4e51-aaaa-021eaa4c14c4", + "metadata": {}, + "source": [ + "We are creating new eval here, because create_or_update changes the model inplace, adding non existing link to workspace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b488f4ff-b97f-43e6-82ab-a78a2e9e2da8", + "metadata": {}, + "outputs": [], + "source": [ + "eval = Model(\n", + " path=\"f1_score\",\n", + " name='f1_score_uploaded',\n", + " description=\"F1 score evaluator.\",\n", + ")\n", + "ml_client.evaluators.create_or_update(eval)" + ] + }, + { + "cell_type": "markdown", + "id": "c6603c7d-7eaf-454e-a59a-2dd01fd3afc6", + "metadata": {}, + "source": [ + "Now we will perform the same sanity check, we have done for the workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccfc3d3b-db1a-4a5a-97c5-4ff701051695", + "metadata": {}, + "outputs": [], + "source": [ + "ml_client.evaluators.download('f1_score_uploaded', version='1', download_path='f1_score_downloaded')\n", + "flow_result = pf.test(flow=os.path.join('f1_score_downloaded', 'f1_score_uploaded', 'f1_score'), inputs='data.jsonl')\n", + "print(f\"Flow outputs: {flow_result}\")" + ] + }, + { + "cell_type": "markdown", + "id": "6e5dc1c9-9c7a-40f7-9b23-9b30bedd5dd4", + "metadata": {}, + "source": [ + "Finally, we will do the cleanup." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a90b52e8-1f46-454d-a6a5-2ad725e927fe", + "metadata": {}, + "outputs": [], + "source": [ + "shutil.rmtree('f1_score_downloaded')\n", + "assert not os.path.isdir('f1_score_downloaded')" + ] + }, + { + "cell_type": "markdown", + "id": "57f3ce40-5b55-459a-829a-5e4373c82218", + "metadata": {}, + "source": [ + "Take evaluators from two different namespaces: `evaluators` and `evaluators.content_safety`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6058cc1a-ac9e-4d49-a7dd-7fce0641aed0", + "metadata": {}, + "outputs": [], + "source": [ + "import inspect\n", + "import tempfile\n", + "\n", + "from promptflow.evals import evaluators\n", + "from promptflow.evals.evaluators import content_safety\n", + "\n", + "def upload_models(namespace):\n", + " \"\"\"\n", + " Upload all the evaluators in namespace.\n", + "\n", + " :param namespace: The namespace to take evaluators from.\n", + " \"\"\"\n", + " for name, obj in inspect.getmembers(namespace):\n", + " if inspect.isclass(obj):\n", + " try:\n", + " description = inspect.getdoc(obj) or inspect.getdoc(getattr(obj, '__init__'))\n", + " with tempfile.TemporaryDirectory() as d:\n", + " os.makedirs(name, exist_ok=True)\n", + " artifact_dir = os.path.join(d, name)\n", + " pf.flows.save(obj, path=artifact_dir)\n", + " default_display_file = os.path.join(name, 'flow', 'prompt.jinja2')\n", + " properties = None\n", + " if os.path.isfile(os.path.join(d, default_display_file)):\n", + " properties = {'_default-display-file': default_display_file}\n", + " eval = Model(\n", + " path=artifact_dir,\n", + " name=name,\n", + " description=description,\n", + " properties=properties\n", + " )\n", + " #if not list(ml_client.evaluators.list(eval.name)):\n", + " ml_client.evaluators.create_or_update(eval)\n", + " print(f'{name} saved')\n", + " except BaseException as e:\n", + " print(f'Failed to save {name} Error: {e}')\n", + "\n", + "\n", + "upload_models(evaluators)\n", + "upload_models(content_safety)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python311", + "language": "python", + "name": "python311" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}