diff --git a/.gitignore b/.gitignore index 5788b4be7e5..6236b9c9db3 100644 --- a/.gitignore +++ b/.gitignore @@ -182,3 +182,7 @@ scripts/docs/_build/ .pdfs config.json src/promptflow/promptflow/azure/_restclient/swagger.json + +# chat-with-pdf's prebuilt index +!.pdfs/ +!.index/ diff --git a/README.md b/README.md index 1e3802a472a..d04aeb5ab9a 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ We also offer a VS Code extension (a flow designer) for an interactive flow deve You can install it from the visualstudio marketplace. -#### Deep dive into flow development +#### Deep delve into flow development [Getting Started with Prompt Flow](https://microsoft.github.io/promptflow/how-to-guides/quick-start.html): A step by step guidance to invoke your first flow run. diff --git a/docs/cloud/azureai/quick-start.md b/docs/cloud/azureai/quick-start.md index 2ceacc85b61..4c4e708664b 100644 --- a/docs/cloud/azureai/quick-start.md +++ b/docs/cloud/azureai/quick-start.md @@ -126,7 +126,7 @@ pf = PFClient( # load flow flow = "web-classification" data = "web-classification/data.jsonl" -runtime = "demo-mir" # assume you have existing runtime with this name provisioned +runtime = "example-runtime-ci" # assume you have existing runtime with this name provisioned # runtime = None # un-comment use automatic runtime # create run diff --git a/docs/dev/dev_setup.md b/docs/dev/dev_setup.md index d908dc77ad1..fca3b81cc83 100644 --- a/docs/dev/dev_setup.md +++ b/docs/dev/dev_setup.md @@ -6,7 +6,7 @@ `conda create -n python=3.9`. - Activate the env you created. - Set environment variable `PYTHONPATH` in your new conda environment. - `conda env config vars set PYTHONPATH=`. + `conda env config vars set PYTHONPATH=\promptflow`. Once you have set the environment variable, you have to reactivate your environment. `conda activate `. - In root folder, run `python scripts/building/dev_setup.py --promptflow-extra-deps azure` to install the package and dependencies. diff --git a/docs/how-to-guides/init-and-test-a-flow.md b/docs/how-to-guides/init-and-test-a-flow.md index 4afaeff38d5..5a01c352cce 100644 --- a/docs/how-to-guides/init-and-test-a-flow.md +++ b/docs/how-to-guides/init-and-test-a-flow.md @@ -46,8 +46,8 @@ Alternatively, you can use the "Create new flow" action on the Prompt flow pane Structure of flow folder: - **flow.dag.yaml**: The flow definition with inputs/outputs, nodes, tools and variants for authoring purpose. -- **.promptflow/flow.tools.json**: It contains all package tools meta that references in `flow.dag.yaml`. -- **Source code files (.py, .jinja2)**: User managed, the code scripts that references by tools. +- **.promptflow/flow.tools.json**: It contains tools meta referenced in `flow.dag.yaml`. +- **Source code files (.py, .jinja2)**: User managed, the code scripts referenced by tools. - **requirements.txt**: Python package dependencies for this flow. ![init_flow_folder](../media/how-to-guides/init-and-test-a-flow/flow_folder.png) diff --git a/docs/how-to-guides/quick-start.md b/docs/how-to-guides/quick-start.md index 29052344d91..311edacb017 100644 --- a/docs/how-to-guides/quick-start.md +++ b/docs/how-to-guides/quick-start.md @@ -36,7 +36,7 @@ pf -v ## Understand what's a flow -A flow, represented as a YAML file, is a DAG of functions, which connected via input/output dependencies, and executed based on the topology by Prompt flow executor. See [Flows](../../concepts/concept-flows.md) for more details. +A flow, represented as a YAML file, is a DAG of functions, which is connected via input/output dependencies, and executed based on the topology by Prompt flow executor. See [Flows](../../concepts/concept-flows.md) for more details. ### Get the flow sample @@ -55,8 +55,8 @@ cd promptflow/examples/flows/standard/web-classification A flow directory is a directory that contains all contents of a flow. Structure of flow folder: - **flow.dag.yaml**: The flow definition with inputs/outputs, nodes, tools and variants for authoring purpose. -- **.promptflow/flow.tools.json**: It contains all package tools meta that references in `flow.dag.yaml`. -- **Source code files (.py, .jinja2)**: User managed, the code scripts that references by tools. +- **.promptflow/flow.tools.json**: It contains tools meta referenced in `flow.dag.yaml`. +- **Source code files (.py, .jinja2)**: User managed, the code scripts referenced by tools. - **requirements.txt**: Python package dependencies for this flow. @@ -69,7 +69,7 @@ pip install -r requirements.txt ``` ### Understand the flow yaml -The entry file of a flow directory is [`flow.dag.yaml`](https://github.com/microsoft/promptflow/blob/main/examples/flows/standard/web-classification/flow.dag.yaml) which describes the `DAG(Directed Acyclic Graph)` of a flow. The flow dag of this sample likes below: +The entry file of a flow directory is [`flow.dag.yaml`](https://github.com/microsoft/promptflow/blob/main/examples/flows/standard/web-classification/flow.dag.yaml) which describes the `DAG(Directed Acyclic Graph)` of a flow. Below is a sample of flow DAG: ![flow_dag](../media/how-to-guides/quick-start/flow_dag.png) diff --git a/docs/how-to-guides/set-global-configs.md b/docs/how-to-guides/set-global-configs.md index 727ba75b8cb..9db9cca796f 100644 --- a/docs/how-to-guides/set-global-configs.md +++ b/docs/how-to-guides/set-global-configs.md @@ -13,7 +13,7 @@ pf config set = ``` For example: ```shell -pf config set connection.provider="azureml:/subscriptions//resourceGroups//providers/Microsoft.MachineLearningServices/workspaces/" +pf config set connection.provider="azureml://subscriptions//resourceGroups//providers/Microsoft.MachineLearningServices/workspaces/" ``` ## Show config @@ -25,7 +25,7 @@ After running the above config set command, show command will return the followi ```json { "connection": { - "provider": "azureml:/subscriptions//resourceGroups//providers/Microsoft.MachineLearningServices/workspaces/" + "provider": "azureml://subscriptions//resourceGroups//providers/Microsoft.MachineLearningServices/workspaces/" } } ``` @@ -40,7 +40,7 @@ Connections will be saved locally. `PFClient`(or `pf connection` commands) will #### full azure machine learning workspace resource id Set connection provider to a specific workspace with: ``` -connection.provider=azureml:/subscriptions//resourceGroups//providers/Microsoft.MachineLearningServices/workspaces/ +connection.provider=azureml://subscriptions//resourceGroups//providers/Microsoft.MachineLearningServices/workspaces/ ``` When `get` or `list` connections, `PFClient`(or `pf connection` commands) will return workspace connections, and flow will be executed using these workspace connections. diff --git a/examples/flows/chat/chat-with-pdf/README.md b/examples/flows/chat/chat-with-pdf/README.md index ba1759e7219..2a36d62745d 100644 --- a/examples/flows/chat/chat-with-pdf/README.md +++ b/examples/flows/chat/chat-with-pdf/README.md @@ -33,6 +33,9 @@ fi #### Run flow +**Note**: this sample uses [predownloaded PDFs](./chat_with_pdf/.pdfs/) and [prebuilt FAISS Index](./chat_with_pdf/.index/) to speed up execution time. +You can remove the folders to start a fresh run. + ```bash # test with default input value in flow.dag.yaml pf flow test --flow . diff --git a/examples/flows/chat/chat-with-pdf/chat-with-pdf.ipynb b/examples/flows/chat/chat-with-pdf/chat-with-pdf.ipynb index 1f9692e57f4..6b9d57e6075 100644 --- a/examples/flows/chat/chat-with-pdf/chat-with-pdf.ipynb +++ b/examples/flows/chat/chat-with-pdf/chat-with-pdf.ipynb @@ -91,7 +91,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 2. Test the flow" + "## 2. Test the flow\n", + "\n", + "**Note**: this sample uses [predownloaded PDFs](./chat_with_pdf/.pdfs/) and [prebuilt FAISS Index](./chat_with_pdf/.index/) to speed up execution time.\n", + "You can remove the folders to start a fresh run." ] }, { @@ -135,7 +138,7 @@ " \"MAX_COMPLETION_TOKENS\": 256,\n", " \"VERBOSE\": True,\n", " \"CHUNK_SIZE\": 256,\n", - " \"CHUNK_OVERLAP\": 32,\n", + " \"CHUNK_OVERLAP\": 64,\n", "}\n", "\n", "column_mapping = {\n", @@ -246,7 +249,7 @@ " \"MAX_COMPLETION_TOKENS\": 256,\n", " \"VERBOSE\": True,\n", " \"CHUNK_SIZE\": 256,\n", - " \"CHUNK_OVERLAP\": 32,\n", + " \"CHUNK_OVERLAP\": 64,\n", "}\n", "\n", "run_3k_context = pf.run(flow=flow_path, data=data_path, column_mapping=column_mapping)\n", @@ -312,7 +315,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___arxiv.org_pdf_1810.04805.pdf.pdf.index_1024_64/index.faiss b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___arxiv.org_pdf_1810.04805.pdf.pdf.index_1024_64/index.faiss new file mode 100644 index 00000000000..898bfdee3ae Binary files /dev/null and b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___arxiv.org_pdf_1810.04805.pdf.pdf.index_1024_64/index.faiss differ diff --git a/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___arxiv.org_pdf_1810.04805.pdf.pdf.index_1024_64/index.pkl b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___arxiv.org_pdf_1810.04805.pdf.pdf.index_1024_64/index.pkl new file mode 100644 index 00000000000..2fda0b5a794 Binary files /dev/null and b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___arxiv.org_pdf_1810.04805.pdf.pdf.index_1024_64/index.pkl differ diff --git a/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___grs.pku.edu.cn_docs_2018-03_20180301083100898652.pdf.pdf.index_1024_64/index.faiss b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___grs.pku.edu.cn_docs_2018-03_20180301083100898652.pdf.pdf.index_1024_64/index.faiss new file mode 100644 index 00000000000..cfe93c5ace5 Binary files /dev/null and b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___grs.pku.edu.cn_docs_2018-03_20180301083100898652.pdf.pdf.index_1024_64/index.faiss differ diff --git a/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___grs.pku.edu.cn_docs_2018-03_20180301083100898652.pdf.pdf.index_1024_64/index.pkl b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___grs.pku.edu.cn_docs_2018-03_20180301083100898652.pdf.pdf.index_1024_64/index.pkl new file mode 100644 index 00000000000..a1666ef9d86 Binary files /dev/null and b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.index/.pdfs/https___grs.pku.edu.cn_docs_2018-03_20180301083100898652.pdf.pdf.index_1024_64/index.pkl differ diff --git a/examples/flows/chat/chat-with-pdf/chat_with_pdf/.pdfs/https___grs.pku.edu.cn_docs_2018-03_20180301083100898652.pdf.pdf b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.pdfs/https___grs.pku.edu.cn_docs_2018-03_20180301083100898652.pdf.pdf new file mode 100644 index 00000000000..e22a1c5ff85 Binary files /dev/null and b/examples/flows/chat/chat-with-pdf/chat_with_pdf/.pdfs/https___grs.pku.edu.cn_docs_2018-03_20180301083100898652.pdf.pdf differ diff --git a/examples/flows/chat/chat-with-pdf/chat_with_pdf/build_index.py b/examples/flows/chat/chat-with-pdf/chat_with_pdf/build_index.py index c1aadce4bf3..d96765adb3f 100644 --- a/examples/flows/chat/chat-with-pdf/chat_with_pdf/build_index.py +++ b/examples/flows/chat/chat-with-pdf/chat_with_pdf/build_index.py @@ -2,10 +2,13 @@ import faiss import os +from pathlib import Path + from utils.oai import OAIEmbedding from utils.index import FAISSIndex from utils.logging import log from utils.lock import acquire_lock +from constants import INDEX_DIR def create_faiss_index(pdf_path: str) -> str: @@ -13,7 +16,9 @@ def create_faiss_index(pdf_path: str) -> str: chunk_overlap = int(os.environ.get("CHUNK_OVERLAP")) log(f"Chunk size: {chunk_size}, chunk overlap: {chunk_overlap}") - index_persistent_path = ".index/" + pdf_path + f".index_{chunk_size}_{chunk_overlap}" + file_name = Path(pdf_path).name + f".index_{chunk_size}_{chunk_overlap}" + index_persistent_path = Path(INDEX_DIR) / file_name + index_persistent_path = index_persistent_path.resolve().as_posix() lock_path = index_persistent_path + ".lock" log("Index path: " + os.path.abspath(index_persistent_path)) diff --git a/examples/flows/chat/chat-with-pdf/chat_with_pdf/constants.py b/examples/flows/chat/chat-with-pdf/chat_with_pdf/constants.py new file mode 100644 index 00000000000..cc937d43c39 --- /dev/null +++ b/examples/flows/chat/chat-with-pdf/chat_with_pdf/constants.py @@ -0,0 +1,5 @@ +import os + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +PDF_DIR = os.path.join(BASE_DIR, ".pdfs") +INDEX_DIR = os.path.join(BASE_DIR, ".index/.pdfs/") diff --git a/examples/flows/chat/chat-with-pdf/chat_with_pdf/download.py b/examples/flows/chat/chat-with-pdf/chat_with_pdf/download.py index dca91f430fd..999b935e82c 100644 --- a/examples/flows/chat/chat-with-pdf/chat_with_pdf/download.py +++ b/examples/flows/chat/chat-with-pdf/chat_with_pdf/download.py @@ -4,11 +4,12 @@ from utils.lock import acquire_lock from utils.logging import log +from constants import PDF_DIR # Download a pdf file from a url and return the path to the file def download(url: str) -> str: - path = ".pdfs/" + normalize_filename(url) + ".pdf" + path = os.path.join(PDF_DIR, normalize_filename(url) + ".pdf") lock_path = path + ".lock" with acquire_lock(lock_path): diff --git a/examples/flows/chat/chat-with-pdf/chat_with_pdf/main.py b/examples/flows/chat/chat-with-pdf/chat_with_pdf/main.py index 58bbee1134b..e50ce66de61 100644 --- a/examples/flows/chat/chat-with-pdf/chat_with_pdf/main.py +++ b/examples/flows/chat/chat-with-pdf/chat_with_pdf/main.py @@ -8,14 +8,15 @@ from build_index import create_faiss_index from download import download from utils.lock import acquire_lock +from constants import PDF_DIR, INDEX_DIR def chat_with_pdf(question: str, pdf_url: str, history: list): with acquire_lock("create_folder.lock"): - if not os.path.exists(".pdfs"): - os.mkdir(".pdfs") - if not os.path.exists(".index/.pdfs"): - os.makedirs(".index/.pdfs") + if not os.path.exists(PDF_DIR): + os.mkdir(PDF_DIR) + if not os.path.exists(INDEX_DIR): + os.makedirs(INDEX_DIR) pdf_path = download(pdf_url) index_path = create_faiss_index(pdf_path) diff --git a/examples/flows/chat/chat-with-pdf/flow.dag.yaml b/examples/flows/chat/chat-with-pdf/flow.dag.yaml index 68fe91839c6..c052801f787 100644 --- a/examples/flows/chat/chat-with-pdf/flow.dag.yaml +++ b/examples/flows/chat/chat-with-pdf/flow.dag.yaml @@ -18,7 +18,7 @@ inputs: PROMPT_TOKEN_LIMIT: 3000 MAX_COMPLETION_TOKENS: 1024 VERBOSE: true - CHUNK_SIZE: 256 + CHUNK_SIZE: 1024 CHUNK_OVERLAP: 64 outputs: answer: diff --git a/examples/flows/chat/chat-with-pdf/setup_env.py b/examples/flows/chat/chat-with-pdf/setup_env.py index 06066f08e51..6b231b878c9 100644 --- a/examples/flows/chat/chat-with-pdf/setup_env.py +++ b/examples/flows/chat/chat-with-pdf/setup_env.py @@ -6,6 +6,8 @@ from chat_with_pdf.utils.lock import acquire_lock +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + "/chat_with_pdf/" + @tool def setup_env(connection: Union[AzureOpenAIConnection, OpenAIConnection], config: dict): @@ -26,10 +28,10 @@ def setup_env(connection: Union[AzureOpenAIConnection, OpenAIConnection], config for key in config: os.environ[key] = str(config[key]) - with acquire_lock("create_folder.lock"): - if not os.path.exists(".pdfs"): - os.mkdir(".pdfs") - if not os.path.exists(".index/.pdfs"): - os.makedirs(".index/.pdfs") + with acquire_lock(BASE_DIR + "create_folder.lock"): + if not os.path.exists(BASE_DIR + ".pdfs"): + os.mkdir(BASE_DIR + ".pdfs") + if not os.path.exists(BASE_DIR + ".index/.pdfs"): + os.makedirs(BASE_DIR + ".index/.pdfs") return "Ready" diff --git a/examples/flows/chat/chat-with-pdf/tests/base_test.py b/examples/flows/chat/chat-with-pdf/tests/base_test.py index b7612afaa66..b65341743ea 100644 --- a/examples/flows/chat/chat-with-pdf/tests/base_test.py +++ b/examples/flows/chat/chat-with-pdf/tests/base_test.py @@ -25,7 +25,7 @@ def setUp(self): "MAX_COMPLETION_TOKENS": 256, "VERBOSE": True, "CHUNK_SIZE": 1024, - "CHUNK_OVERLAP": 32, + "CHUNK_OVERLAP": 64, } self.config_2k_context = { "EMBEDDING_MODEL_DEPLOYMENT_NAME": "text-embedding-ada-002", @@ -34,7 +34,7 @@ def setUp(self): "MAX_COMPLETION_TOKENS": 256, "VERBOSE": True, "CHUNK_SIZE": 1024, - "CHUNK_OVERLAP": 32, + "CHUNK_OVERLAP": 64, } # Switch current working directory to the folder of this file diff --git a/examples/flows/standard/basic-with-connection/README.md b/examples/flows/standard/basic-with-connection/README.md index e35d1fd3bed..351c342ed5c 100644 --- a/examples/flows/standard/basic-with-connection/README.md +++ b/examples/flows/standard/basic-with-connection/README.md @@ -101,5 +101,5 @@ Run flow with connection `open_ai_connection`. az account set -s az configure --defaults group= workspace= -pfazure run create --flow . --data ./data.jsonl --connections llm.connection=open_ai_connection --stream --runtime demo-mir +pfazure run create --flow . --data ./data.jsonl --connections llm.connection=open_ai_connection --stream --runtime example-runtime-ci ``` diff --git a/examples/flows/standard/basic/README.md b/examples/flows/standard/basic/README.md index 8642d513ad7..43ae4c41d74 100644 --- a/examples/flows/standard/basic/README.md +++ b/examples/flows/standard/basic/README.md @@ -107,9 +107,9 @@ az configure --defaults group= workspace= workspace= -g -w +pfazure run create --flow . --data ./data.jsonl --stream --runtime example-runtime-ci --subscription -g -w # pfazure run create --flow . --data ./data.jsonl --stream # automatic runtime # set default workspace az account set -s az configure --defaults group= workspace= -pfazure run create --file run.yml --runtime demo-mir --stream +pfazure run create --file run.yml --runtime example-runtime-ci --stream # pfazure run create --file run.yml --stream # automatic runtime ``` diff --git a/examples/flows/standard/web-classification/README.md b/examples/flows/standard/web-classification/README.md index a19fed957c6..448610725a6 100644 --- a/examples/flows/standard/web-classification/README.md +++ b/examples/flows/standard/web-classification/README.md @@ -96,14 +96,14 @@ az account set -s az configure --defaults group= workspace= # create run -pfazure run create --flow . --data ./data.jsonl --stream --runtime demo-mir +pfazure run create --flow . --data ./data.jsonl --stream --runtime example-runtime-ci # pfazure run create --flow . --data ./data.jsonl --stream # automatic runtime # (Optional) create a new random run name for further use run_name="web_classification_"$(openssl rand -hex 12) # create run using yaml file, --name is optional -pfazure run create --file run.yml --runtime demo-mir --name $run_name +pfazure run create --file run.yml --runtime example-runtime-ci --name $run_name # pfazure run create --file run.yml --stream --name $run_name # automatic runtime @@ -117,8 +117,8 @@ prev_run_name=$run_name run_name="classification_accuracy_"$(openssl rand -hex 12) # create evaluation run, --name is optional -pfazure run create --flow ../../evaluation/eval-classification-accuracy --data ./data.jsonl --column-mapping groundtruth='${data.answer}' prediction='${run.outputs.category}' --run $prev_run_name --runtime demo-mir -pfazure run create --file run_evaluation.yml --run $prev_run_name --stream --name $run_name --runtime demo-mir +pfazure run create --flow ../../evaluation/eval-classification-accuracy --data ./data.jsonl --column-mapping groundtruth='${data.answer}' prediction='${run.outputs.category}' --run $prev_run_name --runtime example-runtime-ci +pfazure run create --file run_evaluation.yml --run $prev_run_name --stream --name $run_name --runtime example-runtime-ci pfazure run stream --name $run_name pfazure run show --name $run_name diff --git a/examples/tools/tool-package-quickstart/my_tool_package/tools/tool_with_dynamic_list_input.py b/examples/tools/tool-package-quickstart/my_tool_package/tools/tool_with_dynamic_list_input.py new file mode 100644 index 00000000000..496530c2acb --- /dev/null +++ b/examples/tools/tool-package-quickstart/my_tool_package/tools/tool_with_dynamic_list_input.py @@ -0,0 +1,36 @@ +from promptflow import tool +from typing import List, Union, Dict + + +def my_list_func(prefix: str = "", size: int = 10, **kwargs) -> List[Dict[str, Union[str, int, float, list, Dict]]]: + """This is a dummy function to generate a list of items. + + :param prefix: prefix to add to each item. + :param size: number of items to generate. + :param kwargs: other parameters. + :return: a list of items. Each item is a dict with the following keys: + - value: for backend use. Required. + - display_value: for UI display. Optional. + - hyperlink: external link. Optional. + - description: information icon tip. Optional. + """ + import random + + words = ["apple", "banana", "cherry", "date", "elderberry", "fig", "grape", "honeydew", "kiwi", "lemon"] + result = [] + for i in range(size): + random_word = f"{random.choice(words)}{i}" + cur_item = { + "value": random_word, + "display_value": f"{prefix}_{random_word}", + "hyperlink": f'https://www.google.com/search?q={random_word}', + "description": f"this is {i} item", + } + result.append(cur_item) + + return result + + +@tool +def my_tool(input_text: list, input_prefix: str) -> str: + return f"Hello {input_prefix} {','.join(input_text)}" diff --git a/examples/tools/tool-package-quickstart/my_tool_package/yamls/tool_with_dynamic_list_input.yaml b/examples/tools/tool-package-quickstart/my_tool_package/yamls/tool_with_dynamic_list_input.yaml new file mode 100644 index 00000000000..81a7e29e3c2 --- /dev/null +++ b/examples/tools/tool-package-quickstart/my_tool_package/yamls/tool_with_dynamic_list_input.yaml @@ -0,0 +1,35 @@ +my_tool_package.tools.tool_with_dynamic_list_input.my_tool: + function: my_tool + inputs: + input_text: + type: + - list + dynamic_list: + # UX send dynamic_list content to backend. + # specifies the function to generate dynamic list. format: . + func_path: my_tool_package.tools.tool_with_dynamic_list_input.my_list_func + func_kwargs: + - name: prefix # Argument name to be passed to the function + type: + - string + # if optional is not specified, default to false. + # this is for UX pre-validaton. If optional is false, but no input. UX can throw error in advanced. + optional: true + reference: ${inputs.input_prefix} # Dynamic reference to another input parameter + - name: size # Another argument name to be passed to the function + type: + - int + optional: true + default: 10 + # enum and dynamic list may need below setting, default false. + # allow user to enter input `index_path` value manually. + allow_manual_entry: true + is_multi_select: true + # used to filter + input_prefix: + type: + - string + module: my_tool_package.tools.tool_with_dynamic_list_input + name: My Tool with Dynamic List Input + description: This is my tool with dynamic list input + type: python diff --git a/examples/tools/tool-package-quickstart/setup.py b/examples/tools/tool-package-quickstart/setup.py index 2c111fb8a8d..901026da85c 100644 --- a/examples/tools/tool-package-quickstart/setup.py +++ b/examples/tools/tool-package-quickstart/setup.py @@ -4,7 +4,7 @@ setup( name=PACKAGE_NAME, - version="0.0.3", + version="0.0.4", description="This is my tools package", packages=find_packages(), entry_points={ diff --git a/examples/tools/tool-package-quickstart/tests/test_tool_with_dynamic_input.py b/examples/tools/tool-package-quickstart/tests/test_tool_with_dynamic_input.py new file mode 100644 index 00000000000..cae210ceb8b --- /dev/null +++ b/examples/tools/tool-package-quickstart/tests/test_tool_with_dynamic_input.py @@ -0,0 +1,12 @@ +from my_tool_package.tools.tool_with_dynamic_list_input import my_tool, my_list_func + + +def test_my_tool(): + result = my_tool(input_text=["apple", "banana"], input_prefix="My") + assert result == 'Hello My apple,banana' + + +def test_my_list_func(): + result = my_list_func(prefix="My") + assert len(result) == 10 + assert "value" in result[0] diff --git a/src/promptflow-tools/connections.json.example b/src/promptflow-tools/connections.json.example index 27da4ed0937..75fdfc95bcc 100644 --- a/src/promptflow-tools/connections.json.example +++ b/src/promptflow-tools/connections.json.example @@ -51,6 +51,16 @@ "endpoint_api_key" ] }, + "open_source_llm_ws_service_connection": { + "type": "CustomConnection", + "value": { + "service_credential": "service-credential" + }, + "module": "promptflow.connections", + "secret_keys": [ + "service_credential" + ] + }, "open_ai_connection": { "type": "OpenAIConnection", "value": { diff --git a/src/promptflow-tools/promptflow/tools/aoai.py b/src/promptflow-tools/promptflow/tools/aoai.py index 3104fcbafbf..7d0ce8ccdca 100644 --- a/src/promptflow-tools/promptflow/tools/aoai.py +++ b/src/promptflow-tools/promptflow/tools/aoai.py @@ -112,7 +112,8 @@ def chat( frequency_penalty: float = 0, logit_bias: dict = {}, user: str = "", - function_call: str = None, + # function_call can be of type str or dict. + function_call: object = None, functions: list = None, **kwargs, ) -> [str, dict]: @@ -218,7 +219,7 @@ def chat( frequency_penalty: float = 0, logit_bias: dict = {}, user: str = "", - function_call: str = None, + function_call: object = None, functions: list = None, **kwargs, ) -> str: diff --git a/src/promptflow-tools/promptflow/tools/common.py b/src/promptflow-tools/promptflow/tools/common.py index d998578efae..37f242917b0 100644 --- a/src/promptflow-tools/promptflow/tools/common.py +++ b/src/promptflow-tools/promptflow/tools/common.py @@ -218,24 +218,15 @@ def process_function_call(function_call): common_tsg = f"Here is a valid example: {function_call_example}. See the guide at " \ "https://platform.openai.com/docs/api-reference/chat/create#chat/create-function_call " \ "or view sample 'How to call functions with chat models' in our gallery." - try: - param = json.loads(function_call) - except json.JSONDecodeError: - raise ChatAPIInvalidFunctions( - message=f"function_call parameter '{function_call}' is an invalid json. {common_tsg}") - except TypeError: - raise ChatAPIInvalidFunctions( - message=f"function_call parameter '{function_call}' must be str, bytes or bytearray" - f", but not {type(function_call)}. {common_tsg}" - ) + param = function_call if not isinstance(param, dict): raise ChatAPIInvalidFunctions( - message=f"function_call parameter '{function_call}' must be a dict, but not {type(param)}. {common_tsg}" + message=f"function_call parameter '{param}' must be a dict, but not {type(function_call)}. {common_tsg}" ) else: - if "name" not in param: + if "name" not in function_call: raise ChatAPIInvalidFunctions( - message=f'function_call parameter {function_call} must contain "name" field. {common_tsg}' + message=f'function_call parameter {json.dumps(param)} must contain "name" field. {common_tsg}' ) return param diff --git a/src/promptflow-tools/promptflow/tools/open_source_llm.py b/src/promptflow-tools/promptflow/tools/open_source_llm.py index c05b7bf202c..1c3bb8fc7c6 100644 --- a/src/promptflow-tools/promptflow/tools/open_source_llm.py +++ b/src/promptflow-tools/promptflow/tools/open_source_llm.py @@ -22,7 +22,7 @@ ChatAPIInvalidRole ) -VALID_LLAMA_ROLES = {"user", "assistant"} +VALID_LLAMA_ROLES = {"system", "user", "assistant"} REQUIRED_CONFIG_KEYS = ["endpoint_url", "model_family"] REQUIRED_SECRET_KEYS = ["endpoint_api_key"] DEFAULT_ENDPOINT_NAME = "-- please enter an endpoint name --" @@ -140,7 +140,7 @@ def get_deployment_from_endpoint(endpoint_name: str, deployment_name: str = None return (endpoint_uri, endpoint_key, model) -def get_deployment_from_connection(connection: CustomConnection, deployment_name: str = None) -> Tuple[str, str, str]: +def get_deployment_from_connection(connection: CustomConnection) -> Tuple[str, str, str]: conn_dict = dict(connection) for key in REQUIRED_CONFIG_KEYS: if key not in conn_dict: @@ -352,17 +352,7 @@ def get_content_formatter( class AzureMLOnlineEndpoint: - """Azure ML Online Endpoint models. - - Example: - .. code-block:: python - - azure_llm = AzureMLModel( - endpoint_url="https://..inference.ml.azure.com/score", - endpoint_api_key="my-api-key", - content_formatter=content_formatter, - ) - """ # noqa: E501 + """Azure ML Online Endpoint models.""" endpoint_url: str = "" """URL of pre-existing Endpoint. Should be passed to constructor or specified as @@ -385,6 +375,7 @@ def __init__( endpoint_url: str, endpoint_api_key: str, content_formatter: ContentFormatterBase, + model_family: ModelFamily, deployment_name: Optional[str] = None, model_kwargs: Optional[Dict] = None, ): @@ -393,6 +384,7 @@ def __init__( self.deployment_name = deployment_name self.content_formatter = content_formatter self.model_kwargs = model_kwargs + self.model_family = model_family @property def _identifying_params(self) -> Mapping[str, Any]: @@ -410,7 +402,10 @@ def _llm_type(self) -> str: def _call_endpoint(self, body: bytes) -> bytes: """call.""" - headers = {"Content-Type": "application/json", "Authorization": ("Bearer " + self.endpoint_api_key)} + headers = { + "Content-Type": "application/json", + "Authorization": ("Bearer " + self.endpoint_api_key), + "x-ms-user-agent": "PromptFlow/OpenSourceLLM/" + self.model_family} # If this is not set it'll use the default deployment on the endpoint. if self.deployment_name is not None: @@ -448,19 +443,16 @@ class OpenSourceLLM(ToolProvider): def __init__(self, connection: CustomConnection = None, - endpoint_name: str = None, - deployment_name: str = None): + endpoint_name: str = None): super().__init__() - self.deployment_name = deployment_name - if endpoint_name is not None and endpoint_name != DEFAULT_ENDPOINT_NAME: - (self.endpoint_uri, - self.endpoint_key, - self.model_family) = get_deployment_from_endpoint(endpoint_name, deployment_name) - else: + self.endpoint_key = None + self.endpoint_name = endpoint_name + + if endpoint_name is None or endpoint_name == DEFAULT_ENDPOINT_NAME: (self.endpoint_uri, self.endpoint_key, - self.model_family) = get_deployment_from_connection(connection, deployment_name) + self.model_family) = get_deployment_from_connection(connection) @tool @handle_oneline_endpoint_error() @@ -468,12 +460,20 @@ def call( self, prompt: PromptTemplate, api: API, + deployment_name: str = None, temperature: float = 1.0, max_new_tokens: int = 500, top_p: float = 1.0, model_kwargs: Optional[Dict] = {}, **kwargs ) -> str: + self.deployment_name = deployment_name + + if self.endpoint_key is None and self.endpoint_name is not None: + (self.endpoint_uri, + self.endpoint_key, + self.model_family) = get_deployment_from_endpoint(self.endpoint_name, self.deployment_name) + prompt = render_jinja_template(prompt, trim_blocks=True, keep_trailing_newline=True, **kwargs) model_kwargs["top_p"] = top_p @@ -489,6 +489,7 @@ def call( llm = AzureMLOnlineEndpoint( endpoint_url=self.endpoint_uri, endpoint_api_key=self.endpoint_key, + model_family=self.model_family, content_formatter=content_formatter, deployment_name=self.deployment_name, model_kwargs=model_kwargs diff --git a/src/promptflow-tools/promptflow/tools/openai.py b/src/promptflow-tools/promptflow/tools/openai.py index 9e5f64c087e..0edd7af1c01 100644 --- a/src/promptflow-tools/promptflow/tools/openai.py +++ b/src/promptflow-tools/promptflow/tools/openai.py @@ -107,7 +107,8 @@ def chat( frequency_penalty: float = 0, logit_bias: dict = {}, user: str = "", - function_call: str = None, + # function_call can be of type str or dict. + function_call: object = None, functions: list = None, **kwargs, ) -> [str, dict]: @@ -211,7 +212,7 @@ def chat( frequency_penalty: float = 0, logit_bias: dict = {}, user: str = "", - function_call: str = None, + function_call: object = None, functions: list = None, **kwargs, ) -> [str, dict]: diff --git a/src/promptflow-tools/promptflow/tools/yamls/open_source_llm.yaml b/src/promptflow-tools/promptflow/tools/yamls/open_source_llm.yaml index d5221244ae7..120aee91ca8 100644 --- a/src/promptflow-tools/promptflow/tools/yamls/open_source_llm.yaml +++ b/src/promptflow-tools/promptflow/tools/yamls/open_source_llm.yaml @@ -1,6 +1,7 @@ promptflow.tools.open_source_llm.OpenSourceLLM.call: name: Open Source LLM description: Use an Open Source model from the Azure Model catalog, deployed to an AzureML Online Endpoint for LLM Chat or Completion API calls. + icon: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAQAAAC1+jfqAAABYUlEQVR4nIWRsUtbcRRGz70v5BmHiLSh7pJu0oJDHUrf1kVwlCIRHMTN/yDasd0imCkgZGibFlrd5DmFNDSiksADsdAhk3TIZmj6kCa/65BERCie6Rsu93LPByPeajUxztWEKXcxzwTg/VQz18y9mwYwMQ9AxkNHLzI5nfvT9lwqa1H3c9AAwRATob7qLw8mLTz/tHEJxZnnK/4i/+Ivr8oGUNtqho1gdCz5yx+mRtAK63kAooPSAhwVTor7TwE+Zs+K4Q6UFqIDSIDG02nTb7tTa7N7xxFMPuvVrsumX9MaQwJM+wNxtNne9Fc/iJZfF2KAysAUbv81rzqxe939cfW9EFcnhi8CKIjz1BSjjyQfJR8j9DFTT8UNN6Q6XXFIRo3eRe/CyCgirtN1KQDq+VZ4GnCPRtAKa3nuiEq5w5+V9d+w82T+TXJJR6Luqf7b9pyftahXeXk8VP1gWbf8r+4bvICY0FkMumIAAAAASUVORK5CYII= type: custom_llm module: promptflow.tools.open_source_llm class_name: OpenSourceLLM diff --git a/src/promptflow-tools/tests/conftest.py b/src/promptflow-tools/tests/conftest.py index 0ec4f53865d..bae9c5b3642 100644 --- a/src/promptflow-tools/tests/conftest.py +++ b/src/promptflow-tools/tests/conftest.py @@ -1,9 +1,9 @@ import json import os +import pytest import sys -from pathlib import Path -import pytest +from pathlib import Path from pytest_mock import MockerFixture # noqa: E402 # Avoid circular dependencies: Use import 'from promptflow._internal' instead of 'from promptflow' # since the code here is in promptflow namespace as well @@ -45,6 +45,28 @@ def serp_connection(): return ConnectionManager().get("serp_connection") +def verify_oss_llm_custom_connection(connection: CustomConnection) -> bool: + '''Verify that there is a MIR endpoint up and available for the Custom Connection. + We explicitly do not pass the endpoint key to avoid the delay in generating a response. + ''' + + import urllib.request + from urllib.request import HTTPError + from urllib.error import URLError + + try: + urllib.request.urlopen( + urllib.request.Request(connection.configs['endpoint_url']), + timeout=50) + except HTTPError as e: + # verify that the connection is not authorized, anything else would mean the endpoint is failed + return e.code == 403 + except URLError: + # Endpoint does not exist - skip the test + return False + raise Exception("Task Succeeded unexpectedly.") + + @pytest.fixture def gpt2_custom_connection(): return ConnectionManager().get("gpt2_connection") @@ -55,6 +77,19 @@ def llama_chat_custom_connection(): return ConnectionManager().get("llama_chat_connection") +@pytest.fixture +def open_source_llm_ws_service_connection() -> bool: + try: + creds_custom_connection: CustomConnection = ConnectionManager().get("open_source_llm_ws_service_connection") + subs = json.loads(creds_custom_connection.secrets['service_credential']) + for key, value in subs.items(): + os.environ[key] = value + return True + except Exception as e: + print(f'Something failed setting environment variables for service credentials. Error: {e}') + return False + + @pytest.fixture(autouse=True) def skip_if_no_key(request, mocker): mocker.patch.dict(os.environ, {"PROMPTFLOW_CONNECTIONS": CONNECTION_FILE}) @@ -68,6 +103,10 @@ def skip_if_no_key(request, mocker): elif isinstance(connection, CustomConnection): if "endpoint_api_key" not in connection.secrets or "-api-key" in connection.secrets["endpoint_api_key"]: pytest.skip('skipped because no key') + # Verify Custom Connections, but only those used by the Open_Source_LLM Tool + if "endpoint_url" in connection.configs and "-endpoint-url" not in connection.configs["endpoint_url"]: + if not verify_oss_llm_custom_connection(connection): + pytest.skip('skipped because the connection is not valid') # example prompts diff --git a/src/promptflow-tools/tests/test_aoai.py b/src/promptflow-tools/tests/test_aoai.py index 20cc607a31f..ef83e02cd40 100644 --- a/src/promptflow-tools/tests/test_aoai.py +++ b/src/promptflow-tools/tests/test_aoai.py @@ -48,8 +48,15 @@ def test_aoai_chat_api(self, azure_open_ai_connection, example_prompt_template, ) assert "Product X".lower() in result.lower() + @pytest.mark.parametrize( + "function_call", + [ + "auto", + {"name": "get_current_weather"}, + ], + ) def test_aoai_chat_with_function( - self, azure_open_ai_connection, example_prompt_template, chat_history, functions): + self, azure_open_ai_connection, example_prompt_template, chat_history, functions, function_call): result = chat( connection=azure_open_ai_connection, prompt=example_prompt_template, @@ -59,7 +66,7 @@ def test_aoai_chat_with_function( user_input="What is the weather in Boston?", chat_history=chat_history, functions=functions, - function_call="auto" + function_call=function_call ) assert "function_call" in result assert result["function_call"]["name"] == "get_current_weather" diff --git a/src/promptflow-tools/tests/test_common.py b/src/promptflow-tools/tests/test_common.py index 758b0a5b2bb..ff167a44e90 100644 --- a/src/promptflow-tools/tests/test_common.py +++ b/src/promptflow-tools/tests/test_common.py @@ -30,12 +30,10 @@ def test_chat_api_invalid_functions(self, functions, error_message): @pytest.mark.parametrize( "function_call, error_message", [ - ({"name": "get_current_weather"}, "must be str, bytes or bytearray"), - ("{'name': 'get_current_weather'}", "is an invalid json"), - ("get_current_weather", "is an invalid json"), ("123", "function_call parameter '123' must be a dict"), - ('{"name1": "get_current_weather"}', 'function_call parameter {"name1": "get_current_weather"} must ' - 'contain "name" field'), + ({"name1": "get_current_weather"}, + 'function_call parameter {"name1": "get_current_weather"} must ' + 'contain "name" field'), ], ) def test_chat_api_invalid_function_call(self, function_call, error_message): diff --git a/src/promptflow-tools/tests/test_open_source_llm.py b/src/promptflow-tools/tests/test_open_source_llm.py index 924329df4ab..fe6d8b4d5eb 100644 --- a/src/promptflow-tools/tests/test_open_source_llm.py +++ b/src/promptflow-tools/tests/test_open_source_llm.py @@ -1,3 +1,4 @@ +import copy import os import pytest from promptflow.tools.exception import ( @@ -6,6 +7,7 @@ OpenSourceLLMKeyValidationError ) from promptflow.tools.open_source_llm import OpenSourceLLM, API, ContentFormatterBase, LlamaContentFormatter +from typing import List, Dict @pytest.fixture @@ -18,19 +20,66 @@ def llama_chat_provider(llama_chat_custom_connection) -> OpenSourceLLM: return OpenSourceLLM(llama_chat_custom_connection) +@pytest.fixture +def endpoints_provider(open_source_llm_ws_service_connection) -> Dict[str, List[str]]: + if not open_source_llm_ws_service_connection: + pytest.skip("Service Credential not available") + + from azure.ai.ml import MLClient + from azure.identity import DefaultAzureCredential + credential = DefaultAzureCredential(exclude_interactive_browser_credential=False) + ml_client = MLClient( + credential=credential, + subscription_id=os.getenv("AZUREML_ARM_SUBSCRIPTION"), + resource_group_name=os.getenv("AZUREML_ARM_RESOURCEGROUP"), + workspace_name=os.getenv("AZUREML_ARM_WORKSPACE_NAME")) + + endpoints = {} + for ep in ml_client.online_endpoints.list(): + endpoints[ep.name] = [d.name for d in ml_client.online_deployments.list(ep.name)] + + return endpoints + + +@pytest.fixture +def chat_endpoints_provider(endpoints_provider: Dict[str, List[str]]) -> Dict[str, List[str]]: + chat_endpoint_names = ["gpt2", "llama-chat"] + + chat_endpoints = {} + for key, value in endpoints_provider.items(): + for ep_name in chat_endpoint_names: + if ep_name in key: + chat_endpoints[key] = value + + if len(chat_endpoints) <= 0: + pytest.skip("No Chat Endpoints Found") + + return chat_endpoints + + +@pytest.fixture +def completion_endpoints_provider(endpoints_provider: Dict[str, List[str]]) -> Dict[str, List[str]]: + completion_endpoint_names = ["gpt2", "llama-comp"] + + completion_endpoints = {} + for key, value in endpoints_provider.items(): + for ep_name in completion_endpoint_names: + if ep_name in key: + completion_endpoints[key] = value + + if len(completion_endpoints) <= 0: + pytest.skip("No Completion Endpoints Found") + + return completion_endpoints + + @pytest.mark.usefixtures("use_secrets_config_file") class TestOpenSourceLLM: completion_prompt = "In the context of Azure ML, what does the ML stand for?" - - gpt2_chat_prompt = """user: + chat_prompt = """system: You are a AI which helps Customers answer questions. user: -""" + completion_prompt - - llama_chat_prompt = """user: -You are a AI which helps Customers answer questions. - """ + completion_prompt @pytest.mark.skip_if_no_key("gpt2_custom_connection") @@ -41,36 +90,32 @@ def test_open_source_llm_completion(self, gpt2_provider): assert len(response) > 25 @pytest.mark.skip_if_no_key("gpt2_custom_connection") - def test_open_source_llm_completion_with_deploy(self, gpt2_custom_connection): - os_tool = OpenSourceLLM( - gpt2_custom_connection, - deployment_name="gpt2-8") - response = os_tool.call( + def test_open_source_llm_completion_with_deploy(self, gpt2_provider): + response = gpt2_provider.call( self.completion_prompt, - API.COMPLETION) + API.COMPLETION, + deployment_name="gpt2-9") assert len(response) > 25 @pytest.mark.skip_if_no_key("gpt2_custom_connection") def test_open_source_llm_chat(self, gpt2_provider): response = gpt2_provider.call( - self.gpt2_chat_prompt, + self.chat_prompt, API.CHAT) assert len(response) > 25 @pytest.mark.skip_if_no_key("gpt2_custom_connection") - def test_open_source_llm_chat_with_deploy(self, gpt2_custom_connection): - os_tool = OpenSourceLLM( - gpt2_custom_connection, - deployment_name="gpt2-8") - response = os_tool.call( - self.gpt2_chat_prompt, - API.CHAT) + def test_open_source_llm_chat_with_deploy(self, gpt2_provider): + response = gpt2_provider.call( + self.chat_prompt, + API.CHAT, + deployment_name="gpt2-9") assert len(response) > 25 @pytest.mark.skip_if_no_key("gpt2_custom_connection") def test_open_source_llm_chat_with_max_length(self, gpt2_provider): response = gpt2_provider.call( - self.gpt2_chat_prompt, + self.chat_prompt, API.CHAT, max_new_tokens=2) # GPT-2 doesn't take this parameter @@ -78,9 +123,10 @@ def test_open_source_llm_chat_with_max_length(self, gpt2_provider): @pytest.mark.skip_if_no_key("gpt2_custom_connection") def test_open_source_llm_con_url_chat(self, gpt2_custom_connection): - del gpt2_custom_connection.configs['endpoint_url'] + tmp = copy.deepcopy(gpt2_custom_connection) + del tmp.configs['endpoint_url'] with pytest.raises(OpenSourceLLMKeyValidationError) as exc_info: - os = OpenSourceLLM(gpt2_custom_connection) + os = OpenSourceLLM(tmp) os.call(self.chat_prompt, API.CHAT) assert exc_info.value.message == """Required key `endpoint_url` not found in given custom connection. Required keys are: endpoint_url,model_family.""" @@ -88,9 +134,10 @@ def test_open_source_llm_con_url_chat(self, gpt2_custom_connection): @pytest.mark.skip_if_no_key("gpt2_custom_connection") def test_open_source_llm_con_key_chat(self, gpt2_custom_connection): - del gpt2_custom_connection.secrets['endpoint_api_key'] + tmp = copy.deepcopy(gpt2_custom_connection) + del tmp.secrets['endpoint_api_key'] with pytest.raises(OpenSourceLLMKeyValidationError) as exc_info: - os = OpenSourceLLM(gpt2_custom_connection) + os = OpenSourceLLM(tmp) os.call(self.chat_prompt, API.CHAT) assert exc_info.value.message == ( "Required secret key `endpoint_api_key` " @@ -100,9 +147,10 @@ def test_open_source_llm_con_key_chat(self, gpt2_custom_connection): @pytest.mark.skip_if_no_key("gpt2_custom_connection") def test_open_source_llm_con_model_chat(self, gpt2_custom_connection): - del gpt2_custom_connection.configs['model_family'] + tmp = copy.deepcopy(gpt2_custom_connection) + del tmp.configs['model_family'] with pytest.raises(OpenSourceLLMKeyValidationError) as exc_info: - os = OpenSourceLLM(gpt2_custom_connection) + os = OpenSourceLLM(tmp) os.call(self.completion_prompt, API.COMPLETION) assert exc_info.value.message == """Required key `model_family` not found in given custom connection. Required keys are: endpoint_url,model_family.""" @@ -114,7 +162,7 @@ def test_open_source_llm_escape_chat(self): assert out_of_danger == "The quick \\brown fox\\tjumped\\\\over \\the \\\\boy\\r\\n" def test_open_source_llm_llama_parse_chat_with_chat(self): - LlamaContentFormatter.parse_chat(self.llama_chat_prompt) + LlamaContentFormatter.parse_chat(self.chat_prompt) def test_open_source_llm_llama_parse_multi_turn(self): multi_turn_chat = """user: @@ -130,23 +178,6 @@ def test_open_source_llm_llama_parse_multi_turn(self): """ LlamaContentFormatter.parse_chat(multi_turn_chat) - def test_open_source_llm_llama_parse_system_not_accepted(self): - bad_chat_prompt = """system: -You are a AI which helps Customers answer questions. - -user: -""" + self.completion_prompt - with pytest.raises(OpenSourceLLMUserError) as exc_info: - LlamaContentFormatter.parse_chat(bad_chat_prompt) - assert exc_info.value.message == ( - "The Chat API requires a specific format for prompt definition," - + " and the prompt should include separate lines as role delimiters: 'assistant:\\n','user:\\n'." - + " Current parsed role 'system' does not meet the requirement. If you intend to use the Completion " - + "API, please select the appropriate API type and deployment name. If you do intend to use the Chat " - + "API, please refer to the guideline at https://aka.ms/pfdoc/chat-prompt or view the samples in our " - + "gallery that contain 'Chat' in the name.") - assert exc_info.value.error_codes == "UserError/OpenSourceLLMUserError".split("/") - def test_open_source_llm_llama_parse_ignore_whitespace(self): bad_chat_prompt = f"""system: You are a AI which helps Customers answer questions. @@ -158,30 +189,31 @@ def test_open_source_llm_llama_parse_ignore_whitespace(self): with pytest.raises(OpenSourceLLMUserError) as exc_info: LlamaContentFormatter.parse_chat(bad_chat_prompt) assert exc_info.value.message == ( - "The Chat API requires a specific format for prompt definition, and " - + "the prompt should include separate lines as role delimiters: 'assistant:\\n','user:\\n'. Current parsed " - + "role 'system' does not meet the requirement. If you intend to use the Completion API, please select the " - + "appropriate API type and deployment name. If you do intend to use the Chat API, please refer to the " - + "guideline at https://aka.ms/pfdoc/chat-prompt or view the samples in our gallery that contain 'Chat' " - + "in the name.") + "The Chat API requires a specific format for prompt definition, and the prompt should include separate " + + "lines as role delimiters: 'assistant:\\n','system:\\n','user:\\n'. Current parsed role 'in the context " + + "of azure ml, what does the ml stand for?' does not meet the requirement. If you intend to use the " + + "Completion API, please select the appropriate API type and deployment name. If you do intend to use " + + "the Chat API, please refer to the guideline at https://aka.ms/pfdoc/chat-prompt or view the samples in " + + "our gallery that contain 'Chat' in the name.") assert exc_info.value.error_codes == "UserError/OpenSourceLLMUserError".split("/") def test_open_source_llm_llama_parse_chat_with_comp(self): with pytest.raises(OpenSourceLLMUserError) as exc_info: LlamaContentFormatter.parse_chat(self.completion_prompt) assert exc_info.value.message == ( - "The Chat API requires a specific format for prompt definition, and " - + "the prompt should include separate lines as role delimiters: 'assistant:\\n','user:\\n'. Current parsed " - + "role 'in the context of azure ml, what does the ml stand for?' does not meet the requirement. If you " - + "intend to use the Completion API, please select the appropriate API type and deployment name. If you do " - + "intend to use the Chat API, please refer to the guideline at https://aka.ms/pfdoc/chat-prompt or view " - + "the samples in our gallery that contain 'Chat' in the name.") + "The Chat API requires a specific format for prompt definition, and the prompt should include separate " + + "lines as role delimiters: 'assistant:\\n','system:\\n','user:\\n'. Current parsed role 'in the context " + + "of azure ml, what does the ml stand for?' does not meet the requirement. If you intend to use the " + + "Completion API, please select the appropriate API type and deployment name. If you do intend to use the " + + "Chat API, please refer to the guideline at https://aka.ms/pfdoc/chat-prompt or view the samples in our " + + "gallery that contain 'Chat' in the name.") assert exc_info.value.error_codes == "UserError/OpenSourceLLMUserError".split("/") @pytest.mark.skip_if_no_key("gpt2_custom_connection") def test_open_source_llm_llama_endpoint_miss(self, gpt2_custom_connection): - gpt2_custom_connection.configs['endpoint_url'] += 'completely/real/endpoint' - os = OpenSourceLLM(gpt2_custom_connection) + tmp = copy.deepcopy(gpt2_custom_connection) + tmp.configs['endpoint_url'] += 'completely/real/endpoint' + os = OpenSourceLLM(tmp) with pytest.raises(OpenSourceLLMOnlineEndpointError) as exc_info: os.call( self.completion_prompt, @@ -192,30 +224,49 @@ def test_open_source_llm_llama_endpoint_miss(self, gpt2_custom_connection): assert exc_info.value.error_codes == "UserError/OpenSourceLLMOnlineEndpointError".split("/") @pytest.mark.skip_if_no_key("gpt2_custom_connection") - def test_open_source_llm_llama_deployment_miss(self, gpt2_custom_connection): - os = OpenSourceLLM( - gpt2_custom_connection, - deployment_name="completely/real/deployment-007") + def test_open_source_llm_llama_deployment_miss(self, gpt2_provider): with pytest.raises(OpenSourceLLMOnlineEndpointError) as exc_info: - os.call(self.completion_prompt, API.COMPLETION) + gpt2_provider.call(self.completion_prompt, + API.COMPLETION, + deployment_name="completely/real/deployment-007") assert exc_info.value.message == ( "Exception hit calling Oneline Endpoint: " + "HTTPError: HTTP Error 404: Not Found") assert exc_info.value.error_codes == "UserError/OpenSourceLLMOnlineEndpointError".split("/") - @pytest.mark.skip - def test_open_source_llm_endpoint_name(self): - os.environ["AZUREML_ARM_SUBSCRIPTION"] = "" - os.environ["AZUREML_ARM_RESOURCEGROUP"] = "" - os.environ["AZUREML_ARM_WORKSPACE_NAME"] = "" - - os_llm = OpenSourceLLM(endpoint_name="llama-temp-chat") - response = os_llm.call(self.llama_chat_prompt, API.CHAT) - assert len(response) > 25 + @pytest.mark.skip_if_no_key("open_source_llm_ws_service_connection") + def test_open_source_llm_chat_endpoint_name(self, chat_endpoints_provider): + for endpoint_name in chat_endpoints_provider: + os_llm = OpenSourceLLM(endpoint_name=endpoint_name) + response = os_llm.call(self.chat_prompt, API.CHAT) + assert len(response) > 25 + + @pytest.mark.skip_if_no_key("open_source_llm_ws_service_connection") + def test_open_source_llm_chat_endpoint_name_with_deployment(self, chat_endpoints_provider): + for endpoint_name in chat_endpoints_provider: + os_llm = OpenSourceLLM(endpoint_name=endpoint_name) + for deployment_name in chat_endpoints_provider[endpoint_name]: + response = os_llm.call(self.chat_prompt, API.CHAT, deployment_name=deployment_name) + assert len(response) > 25 + + @pytest.mark.skip_if_no_key("open_source_llm_ws_service_connection") + def test_open_source_llm_completion_endpoint_name(self, completion_endpoints_provider): + for endpoint_name in completion_endpoints_provider: + os_llm = OpenSourceLLM(endpoint_name=endpoint_name) + response = os_llm.call(self.completion_prompt, API.COMPLETION) + assert len(response) > 25 + + @pytest.mark.skip_if_no_key("open_source_llm_ws_service_connection") + def test_open_source_llm_completion_endpoint_name_with_deployment(self, completion_endpoints_provider): + for endpoint_name in completion_endpoints_provider: + os_llm = OpenSourceLLM(endpoint_name=endpoint_name) + for deployment_name in completion_endpoints_provider[endpoint_name]: + response = os_llm.call(self.completion_prompt, API.COMPLETION, deployment_name=deployment_name) + assert len(response) > 25 @pytest.mark.skip_if_no_key("llama_chat_custom_connection") def test_open_source_llm_llama_chat(self, llama_chat_provider): - response = llama_chat_provider.call(self.llama_chat_prompt, API.CHAT) + response = llama_chat_provider.call(self.chat_prompt, API.CHAT) assert len(response) > 25 @pytest.mark.skip_if_no_key("llama_chat_custom_connection") diff --git a/src/promptflow/promptflow/__init__.py b/src/promptflow/promptflow/__init__.py index 70bb7487fed..16e2c555fed 100644 --- a/src/promptflow/promptflow/__init__.py +++ b/src/promptflow/promptflow/__init__.py @@ -9,6 +9,8 @@ from promptflow._core.tool import ToolProvider, tool # control plane sdk functions +from promptflow._sdk._load_functions import load_flow + from ._sdk._pf_client import PFClient from ._version import VERSION @@ -19,6 +21,7 @@ __all__ = [ "PFClient", + "load_flow", "log_metric", "ToolProvider", "tool", diff --git a/src/promptflow/promptflow/_cli/_pf/_config.py b/src/promptflow/promptflow/_cli/_pf/_config.py index 9bf46e5ae24..972e5baad71 100644 --- a/src/promptflow/promptflow/_cli/_pf/_config.py +++ b/src/promptflow/promptflow/_cli/_pf/_config.py @@ -19,11 +19,11 @@ def add_config_set(subparsers): """ # noqa: E501 activate_action( name="set", - description="Set promptflow configs for current user.", + description="Set prompt flow configs for current user.", epilog=epilog, add_params=[add_param_set_positional] + logging_params, subparsers=subparsers, - help_message="Set promptflow configs for current user, configs will be stored at ~/.promptflow/pf.yaml.", + help_message="Set prompt flow configs for current user, configs will be stored at ~/.promptflow/pf.yaml.", action_param_name="sub_action", ) @@ -48,7 +48,7 @@ def add_config_show(subparsers): def add_config_parser(subparsers): config_parser = subparsers.add_parser( - "config", description="A CLI tool to set promptflow configs for current user.", help="pf config" + "config", description="A CLI tool to set prompt flow configs for current user.", help="pf config" ) subparsers = config_parser.add_subparsers() add_config_set(subparsers) diff --git a/src/promptflow/promptflow/_core/tools_manager.py b/src/promptflow/promptflow/_core/tools_manager.py index 2aaee089b73..405bd2d1049 100644 --- a/src/promptflow/promptflow/_core/tools_manager.py +++ b/src/promptflow/promptflow/_core/tools_manager.py @@ -10,7 +10,7 @@ import types from functools import partial from pathlib import Path -from typing import Callable, List, Mapping, Optional, Tuple, Union +from typing import Callable, List, Mapping, Optional, Tuple, Union, Dict import pkg_resources import yaml @@ -170,6 +170,50 @@ def gen_tool_by_source(name, source: ToolSource, tool_type: ToolType, working_di ) +def append_workspace_triple_to_func_input_params(func_sig_params, func_input_params_dict, ws_triple_dict): + '''Append workspace triple to func input params. + + :param func_sig_params: function signature parameters, full params. + :param func_input_params_dict: user input param key-values for dynamic list function. + :param ws_triple_dict: workspace triple dict, including subscription_id, resource_group_name, workspace_name. + :return: combined func input params. + ''' + # append workspace triple to func input params if any below condition are met: + # 1. func signature has kwargs param. + # 2. func signature has param named 'subscription_id','resource_group_name','workspace_name'. + has_kwargs_param = any([param.kind == inspect.Parameter.VAR_KEYWORD for _, param in func_sig_params.items()]) + if has_kwargs_param is False: + # keep only params that are in func signature. Or run into error when calling func. + avail_ws_info_dict = {k: v for k, v in ws_triple_dict.items() if k in set(func_sig_params.keys())} + else: + avail_ws_info_dict = ws_triple_dict + + # if ws triple key is in func input params, it means user has provided value for it, + # do not expect implicit override. + combined_func_input_params = dict(avail_ws_info_dict, **func_input_params_dict) + return combined_func_input_params + + +def gen_dynamic_list(func_path: str, func_input_params_dict: Dict, ws_triple_dict: Dict[str, str] = None): + import importlib + import inspect + + # TODO: validate func path. + module_name, func_name = func_path.rsplit('.', 1) + module = importlib.import_module(module_name) + func = getattr(module, func_name) + # get param names from func signature. + func_sig_params = inspect.signature(func).parameters + # TODO: validate if func input params are all in func signature params. + # TODO: add more tests to verify following appending logic. + combined_func_input_params = append_workspace_triple_to_func_input_params( + func_sig_params, func_input_params_dict, ws_triple_dict) + # TODO: error handling of func call. + result = func(**combined_func_input_params) + # TODO: validate response is of required format. Throw correct message if response is empty. + return result + + class BuiltinsManager: def __init__(self) -> None: pass diff --git a/src/promptflow/promptflow/_internal/__init__.py b/src/promptflow/promptflow/_internal/__init__.py index 450a1b05e68..7d4a0b885c3 100644 --- a/src/promptflow/promptflow/_internal/__init__.py +++ b/src/promptflow/promptflow/_internal/__init__.py @@ -40,6 +40,7 @@ register_apis, register_builtins, register_connections, + gen_dynamic_list, ) from promptflow._core.tracer import Tracer from promptflow._sdk._constants import LOCAL_MGMT_DB_PATH @@ -102,3 +103,4 @@ NotFoundException, SqliteClient, ) +from promptflow.storage._run_storage import DefaultRunStorage diff --git a/src/promptflow/promptflow/_sdk/_serving/flow_invoker.py b/src/promptflow/promptflow/_sdk/_serving/flow_invoker.py index e1b8c9b7936..aadc7201c02 100644 --- a/src/promptflow/promptflow/_sdk/_serving/flow_invoker.py +++ b/src/promptflow/promptflow/_sdk/_serving/flow_invoker.py @@ -18,6 +18,7 @@ update_environment_variables_with_connections, ) from promptflow._sdk.entities._connection import _Connection +from promptflow._sdk.entities._flow import Flow from promptflow._sdk.operations._flow_operations import FlowOperations from promptflow._utils.multimedia_utils import convert_multimedia_data_to_base64, persist_multimedia_data from promptflow.executor import FlowExecutor @@ -30,8 +31,8 @@ class FlowInvoker: """ The invoker of a flow. - :param flow: The path of the flow. - :type flow: str + :param flow: The path of the flow, or the flow loaded by load_flow(). + :type flow: [str, ~promptflow._sdk.entities._flow.Flow] :param connection_provider: The connection provider, defaults to None :type connection_provider: [str, Callable], optional :param streaming: The function or bool to determine enable streaming or not, defaults to lambda: False @@ -40,13 +41,12 @@ class FlowInvoker: def __init__( self, - flow: str, + flow: [str, Flow], connection_provider: [str, Callable] = None, streaming: Union[Callable[[], bool], bool] = False, **kwargs, ): - self.flow_dir = flow - self.flow_entity = load_flow(self.flow_dir) + self.flow_entity = flow if isinstance(flow, Flow) else load_flow(source=flow) self.streaming = streaming if isinstance(streaming, Callable) else lambda: streaming # Pass dump_to path to dump flow result for extension. self._dump_to = kwargs.get("dump_to", None) diff --git a/src/promptflow/promptflow/_sdk/_utils.py b/src/promptflow/promptflow/_sdk/_utils.py index 60cd2b98fec..506947a9e4e 100644 --- a/src/promptflow/promptflow/_sdk/_utils.py +++ b/src/promptflow/promptflow/_sdk/_utils.py @@ -32,6 +32,7 @@ import promptflow from promptflow._constants import EXTENSION_UA from promptflow._core.tool_meta_generator import generate_tool_meta_dict_by_file +from promptflow._core.tools_manager import gen_dynamic_list from promptflow._sdk._constants import ( DAG_FILE_NAME, DEFAULT_ENCODING, @@ -624,6 +625,28 @@ def _generate_tool_meta( return res +def _gen_dynamic_list(function_config: Dict) -> List: + """Generate dynamic list for a tool input. + + :param function_config: function config in tool meta. Should contain'func_path' and 'func_kwargs'. + :return: a list of tool input dynamic enums. + """ + func_path = function_config.get("func_path", "") + func_kwargs = function_config.get("func_kwargs", {}) + # May call azure control plane api in the custom function to list Azure resources. + # which may need Azure workspace triple. + # TODO: move this method to a common place. + from promptflow._cli._utils import get_workspace_triad_from_local + + workspace_triad = get_workspace_triad_from_local() + if (workspace_triad.subscription_id and workspace_triad.resource_group_name + and workspace_triad.workspace_name): + return gen_dynamic_list(func_path, func_kwargs, workspace_triad._asdict()) + # if no workspace triple available, just skip. + else: + return gen_dynamic_list(func_path, func_kwargs) + + def _generate_package_tools(keys: Optional[List[str]] = None) -> dict: import imp diff --git a/src/promptflow/promptflow/_sdk/entities/_flow.py b/src/promptflow/promptflow/_sdk/entities/_flow.py index 7b789f2b527..7aa743c9d6c 100644 --- a/src/promptflow/promptflow/_sdk/entities/_flow.py +++ b/src/promptflow/promptflow/_sdk/entities/_flow.py @@ -120,6 +120,7 @@ def __init__( super().__init__(code=code, **kwargs) self._flow_dir, self._dag_file_name = self._get_flow_definition(self.code) + self._executable = None @property def flow_dag_path(self) -> Path: @@ -172,3 +173,20 @@ def _dump_for_validation(self) -> Dict: return yaml.safe_load(self.flow_dag_path.read_text(encoding=DEFAULT_ENCODING)) # endregion + + # region MLFlow model requirements + @property + def inputs(self): + # This is used for build mlflow model signature. + if not self._executable: + self._executable = self._init_executable() + return {k: v.type.value for k, v in self._executable.inputs.items()} + + @property + def outputs(self): + # This is used for build mlflow model signature. + if not self._executable: + self._executable = self._init_executable() + return {k: v.type.value for k, v in self._executable.outputs.items()} + + # endregion diff --git a/src/promptflow/promptflow/_sdk/operations/_test_submitter.py b/src/promptflow/promptflow/_sdk/operations/_test_submitter.py index 1042b8bde34..c9afb89aad1 100644 --- a/src/promptflow/promptflow/_sdk/operations/_test_submitter.py +++ b/src/promptflow/promptflow/_sdk/operations/_test_submitter.py @@ -202,6 +202,7 @@ def node_test( dependency_nodes_outputs=dependency_nodes_outputs, connections=connections, working_dir=self.flow.code, + output_sub_dir=".promptflow/intermediate", ) return result diff --git a/src/promptflow/promptflow/_utils/connection_utils.py b/src/promptflow/promptflow/_utils/connection_utils.py index be16b1d64a3..5ea3cb6ff4d 100644 --- a/src/promptflow/promptflow/_utils/connection_utils.py +++ b/src/promptflow/promptflow/_utils/connection_utils.py @@ -37,6 +37,7 @@ def generate_custom_strong_type_connection_spec(cls, package, package_version): def generate_custom_strong_type_connection_template(cls, connection_spec, package, package_version): connection_template_str = """ + $schema: https://azuremlschemas.azureedge.net/promptflow/latest/CustomStrongTypeConnection.schema.json name: "to_replace_with_connection_name" type: custom custom_type: {{ custom_type }} diff --git a/src/promptflow/promptflow/executor/_errors.py b/src/promptflow/promptflow/executor/_errors.py index e190bce71da..543eb4bf56e 100644 --- a/src/promptflow/promptflow/executor/_errors.py +++ b/src/promptflow/promptflow/executor/_errors.py @@ -193,6 +193,16 @@ def __init__(self, line_number, timeout): ) +class EmptyLLMApiMapping(UserErrorException): + """Exception raised when connection_type_to_api_mapping is empty and llm node provider can't be inferred""" + + def __init__(self): + super().__init__( + message="LLM api mapping is empty, please ensure 'promptflow-tools' package has been installed.", + target=ErrorTarget.EXECUTOR, + ) + + class ResolveToolError(PromptflowException): """Exception raised when tool load failed. diff --git a/src/promptflow/promptflow/executor/_tool_resolver.py b/src/promptflow/promptflow/executor/_tool_resolver.py index 87c70397168..ebf1937f6c3 100644 --- a/src/promptflow/promptflow/executor/_tool_resolver.py +++ b/src/promptflow/promptflow/executor/_tool_resolver.py @@ -12,14 +12,15 @@ from promptflow._core.connection_manager import ConnectionManager from promptflow._core.tools_manager import BuiltinsManager, ToolLoader, connection_type_to_api_mapping -from promptflow._utils.tool_utils import get_inputs_for_prompt_template, get_prompt_param_name_from_func from promptflow._utils.multimedia_utils import create_image, load_multimedia_data_recursively +from promptflow._utils.tool_utils import get_inputs_for_prompt_template, get_prompt_param_name_from_func from promptflow.contracts.flow import InputAssignment, InputValueType, Node, ToolSourceType from promptflow.contracts.tool import ConnectionType, Tool, ToolType, ValueType from promptflow.contracts.types import PromptTemplate from promptflow.exceptions import ErrorTarget, PromptflowException, UserErrorException from promptflow.executor._errors import ( ConnectionNotFound, + EmptyLLMApiMapping, InvalidConnectionType, InvalidCustomLLMTool, InvalidSource, @@ -212,6 +213,8 @@ def _get_node_connection(self, node: Node): def _resolve_llm_node(self, node: Node, convert_input_types=False) -> ResolvedTool: connection = self._get_node_connection(node) if not node.provider: + if not connection_type_to_api_mapping: + raise EmptyLLMApiMapping() # If provider is not specified, try to resolve it from connection type node.provider = connection_type_to_api_mapping.get(type(connection).__name__) tool: Tool = self._tool_loader.load_tool_for_llm_node(node) diff --git a/src/promptflow/promptflow/executor/flow_executor.py b/src/promptflow/promptflow/executor/flow_executor.py index 9f247e19342..3829fd8a2d3 100644 --- a/src/promptflow/promptflow/executor/flow_executor.py +++ b/src/promptflow/promptflow/executor/flow_executor.py @@ -24,7 +24,7 @@ from promptflow._core.tools_manager import ToolsManager from promptflow._utils.context_utils import _change_working_dir from promptflow._utils.logger_utils import logger -from promptflow._utils.multimedia_utils import load_multimedia_data +from promptflow._utils.multimedia_utils import load_multimedia_data, load_multimedia_data_recursively from promptflow._utils.utils import transpose from promptflow.contracts.flow import Flow, FlowInputDefinition, InputAssignment, InputValueType, Node from promptflow.contracts.run_info import FlowRunInfo, Status @@ -48,7 +48,7 @@ from promptflow.executor._tool_resolver import ToolResolver from promptflow.executor.flow_validator import FlowValidator from promptflow.storage import AbstractRunStorage -from promptflow.storage._run_storage import DefaultRunStorage, DummyRunStorage +from promptflow.storage._run_storage import DefaultRunStorage LINE_NUMBER_KEY = "line_number" # Using the same key with portal. LINE_TIMEOUT_SEC = 600 @@ -242,6 +242,7 @@ def load_and_exec_node( flow_file: Path, node_name: str, *, + output_sub_dir: Optional[str] = None, flow_inputs: Optional[Mapping[str, Any]] = None, dependency_nodes_outputs: Optional[Mapping[str, Any]] = None, connections: Optional[dict] = None, @@ -294,8 +295,10 @@ def load_and_exec_node( flow_file=flow_file, ) - flow_inputs = FlowExecutor._apply_default_value_for_input(flow.inputs, flow_inputs) - converted_flow_inputs_for_node = FlowValidator.convert_flow_inputs_for_node(flow, node, flow_inputs) + inputs_with_default_value = FlowExecutor._apply_default_value_for_input(flow.inputs, flow_inputs) + inputs = load_multimedia_data(flow.inputs, inputs_with_default_value) + dependency_nodes_outputs = load_multimedia_data_recursively(dependency_nodes_outputs) + converted_flow_inputs_for_node = FlowValidator.convert_flow_inputs_for_node(flow, node, inputs) package_tool_keys = [node.source.tool] if node.source and node.source.tool else [] tool_resolver = ToolResolver(working_dir, connections, package_tool_keys) resolved_node = tool_resolver.resolve_tool_by_node(node) @@ -320,7 +323,9 @@ def load_and_exec_node( resolved_inputs = {k: v for k, v in resolved_inputs.items() if k not in resolved_node.init_args} # TODO: Simplify the logic here - run_tracker = RunTracker(DummyRunStorage()) + sub_dir = "." if output_sub_dir is None else output_sub_dir + storage = DefaultRunStorage(base_dir=working_dir, sub_dir=Path(sub_dir)) + run_tracker = RunTracker(storage) with run_tracker.node_log_manager: ToolInvoker.activate(DefaultToolInvoker()) diff --git a/src/promptflow/tests/executor/e2etests/test_executor_happypath.py b/src/promptflow/tests/executor/e2etests/test_executor_happypath.py index 2f66097191b..0725fa45568 100644 --- a/src/promptflow/tests/executor/e2etests/test_executor_happypath.py +++ b/src/promptflow/tests/executor/e2etests/test_executor_happypath.py @@ -1,5 +1,7 @@ import uuid +import os from types import GeneratorType +from pathlib import Path import pytest @@ -11,6 +13,7 @@ from promptflow.executor import FlowExecutor from promptflow.executor._errors import ConnectionNotFound, InputTypeError, ResolveToolError from promptflow.executor.flow_executor import BulkResult, LineResult +from promptflow.storage._run_storage import DefaultRunStorage from promptflow.storage import AbstractRunStorage from ..utils import ( @@ -19,6 +22,7 @@ get_flow_expected_status_summary, get_flow_sample_inputs, get_yaml_file, + get_yaml_working_dir ) SAMPLE_FLOW = "web_classification_no_variants" @@ -27,6 +31,11 @@ SAMPLE_FLOW_WITH_LANGCHAIN_TRACES = "flow_with_langchain_traces" +def assert_contains_substrings(s, substrings): + for substring in substrings: + assert substring in s + + class MemoryRunStorage(AbstractRunStorage): def __init__(self): self._node_runs = {} @@ -105,6 +114,7 @@ def test_executor_storage(self, dev_connections): "prompt_tools", "script_with___file__", "connection_as_input", + "sample_flow_with_functions" ], ) def test_executor_exec_bulk(self, flow_folder, dev_connections): @@ -221,6 +231,38 @@ def test_executor_exec_line(self, flow_folder, dev_connections): assert node_run_info.node == node assert isinstance(node_run_info.api_calls, list) # api calls is set + @pytest.mark.parametrize( + "flow_folder", + [ + "python_tool_with_multiple_image_nodes" + ], + ) + def test_executor_exec_line_with_image(self, flow_folder, dev_connections): + self.skip_serp(flow_folder, dev_connections) + working_dir = get_yaml_working_dir(flow_folder) + os.chdir(working_dir) + storage = DefaultRunStorage(base_dir=working_dir, sub_dir=Path("./temp")) + executor = FlowExecutor.create(get_yaml_file(flow_folder), dev_connections, storage=storage) + flow_result = executor.exec_line({}) + assert not executor._run_tracker._flow_runs, "Flow runs in run tracker should be empty." + assert not executor._run_tracker._node_runs, "Node runs in run tracker should be empty." + assert isinstance(flow_result.output, dict) + assert flow_result.run_info.status == Status.Completed + node_count = len(executor._flow.nodes) + assert isinstance(flow_result.run_info.api_calls, list) and len(flow_result.run_info.api_calls) == node_count + substrings = ["data:image/jpg;path", ".jpg"] + for i in range(node_count): + assert_contains_substrings(str(flow_result.run_info.api_calls[i]), substrings) + assert len(flow_result.node_run_infos) == node_count + for node, node_run_info in flow_result.node_run_infos.items(): + assert node_run_info.status == Status.Completed + assert node_run_info.node == node + assert isinstance(node_run_info.api_calls, list) # api calls is set + assert_contains_substrings(str(node_run_info.inputs), substrings) + assert_contains_substrings(str(node_run_info.output), substrings) + assert_contains_substrings(str(node_run_info.result), substrings) + assert_contains_substrings(str(node_run_info.api_calls[0]), substrings) + @pytest.mark.parametrize( "flow_folder, node_name, flow_inputs, dependency_nodes_outputs", [ @@ -252,6 +294,41 @@ def test_executor_exec_node(self, flow_folder, node_name, flow_inputs, dependenc assert run_info.node == node_name assert run_info.system_metrics["duration"] >= 0 + @pytest.mark.parametrize( + "flow_folder, node_name, flow_inputs, dependency_nodes_outputs", + [ + ("python_tool_with_multiple_image_nodes", "python_node_2", {"logo_content": "Microsoft and four squares"}, + {"python_node": {"image": {"data:image/jpg;path": "logo.jpg"}, "image_name": "Microsoft's logo", + "image_list": [{"data:image/jpg;path": "logo.jpg"}]}}), + ("python_tool_with_multiple_image_nodes", "python_node", { + "image": "logo.jpg", "image_name": "Microsoft's logo"}, {},) + ], + ) + def test_executor_exec_node_with_image(self, flow_folder, node_name, flow_inputs, dependency_nodes_outputs, + dev_connections): + self.skip_serp(flow_folder, dev_connections) + yaml_file = get_yaml_file(flow_folder) + working_dir = get_yaml_working_dir(flow_folder) + os.chdir(working_dir) + run_info = FlowExecutor.load_and_exec_node( + yaml_file, + node_name, + flow_inputs=flow_inputs, + dependency_nodes_outputs=dependency_nodes_outputs, + connections=dev_connections, + output_sub_dir=("./temp"), + raise_ex=True, + ) + substrings = ["data:image/jpg;path", "temp", ".jpg"] + assert_contains_substrings(str(run_info.inputs), substrings) + assert_contains_substrings(str(run_info.output), substrings) + assert_contains_substrings(str(run_info.result), substrings) + assert_contains_substrings(str(run_info.api_calls[0]), substrings) + assert run_info.status == Status.Completed + assert isinstance(run_info.api_calls, list) + assert run_info.node == node_name + assert run_info.system_metrics["duration"] >= 0 + def test_executor_node_overrides(self, dev_connections): inputs = self.get_line_inputs() executor = FlowExecutor.create( diff --git a/src/promptflow/tests/executor/unittests/_core/test_tools_manager.py b/src/promptflow/tests/executor/unittests/_core/test_tools_manager.py index 36a5ebe6559..77b784ce0ef 100644 --- a/src/promptflow/tests/executor/unittests/_core/test_tools_manager.py +++ b/src/promptflow/tests/executor/unittests/_core/test_tools_manager.py @@ -167,6 +167,7 @@ def test_collect_package_tools_and_connections(self, install_custom_tool_pkg): } expected_template = { + "$schema": "https://azuremlschemas.azureedge.net/promptflow/latest/CustomStrongTypeConnection.schema.json", "name": "to_replace_with_connection_name", "type": "custom", "custom_type": "MyFirstConnection", @@ -197,5 +198,16 @@ def test_collect_package_tools_and_connections(self, install_custom_tool_pkg): content = templates["my_tool_package.tools.my_tool_with_custom_strong_type_connection.MyCustomConnection"] expected_template_str = textwrap.dedent(expected_template) - - assert content in expected_template_str + assert expected_template_str in content + + # TODO: enable this test after new my_tool_package is released + @pytest.mark.skip("Will enable this test after new my_tool_package is released") + def test_gen_dynamic_list(self): + from promptflow._sdk._utils import _gen_dynamic_list + func_path = "my_tool_package.tools.tool_with_dynamic_list_input.my_list_func" + func_kwargs = {"prefix": "My"} + result = _gen_dynamic_list({ + "func_path": func_path, "func_kwargs": func_kwargs}) + assert len(result) == 10 + + # TODO: add test for gen_dynamic_list with ws_triple. diff --git a/src/promptflow/tests/executor/utils.py b/src/promptflow/tests/executor/utils.py index 210d3830709..afecd725625 100644 --- a/src/promptflow/tests/executor/utils.py +++ b/src/promptflow/tests/executor/utils.py @@ -14,6 +14,11 @@ def get_yaml_file(folder_name, root: str = FLOW_ROOT, file_name: str = "flow.dag return yaml_file +def get_yaml_working_dir(folder_name, root: str = FLOW_ROOT): + flow_folder_path = Path(root) / folder_name + return flow_folder_path + + def get_flow_inputs(folder_name, root: str = FLOW_ROOT): flow_folder_path = Path(root) / folder_name inputs = load_json(flow_folder_path / "inputs.json") diff --git a/src/promptflow/tests/sdk_cli_test/unittests/test_utils.py b/src/promptflow/tests/sdk_cli_test/unittests/test_utils.py index 99bfe26d8ec..c08492940da 100644 --- a/src/promptflow/tests/sdk_cli_test/unittests/test_utils.py +++ b/src/promptflow/tests/sdk_cli_test/unittests/test_utils.py @@ -173,7 +173,9 @@ def test_concurrent_execution_of_refresh_connections_dir(self, concurrent_count) # Create and start threads for _ in range(concurrent_count): - thread = threading.Thread(target=refresh_connections_dir, args={None, None}) + thread = threading.Thread( + target=lambda: refresh_connections_dir(connection_spec_files=[], connection_template_yamls=[]) + ) thread.start() threads.append(thread) diff --git a/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/flow.dag.yaml b/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/flow.dag.yaml index 971906fa51f..806c5a0c39f 100644 --- a/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/flow.dag.yaml +++ b/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/flow.dag.yaml @@ -2,6 +2,12 @@ inputs: image: type: image default: logo.jpg + image_name: + type: string + default: Microsoft's logo + logo_content: + type: string + default: Microsoft and four squares outputs: output: type: image @@ -14,10 +20,12 @@ nodes: path: python_with_image.py inputs: image: ${inputs.image} + image_name: ${inputs.image_name} - name: python_node_2 type: python source: type: code - path: python_with_image.py + path: python_node_2.py inputs: - image: ${python_node.output} + image_dict: ${python_node.output} + logo_content: ${inputs.logo_content} diff --git a/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/python_node_2.py b/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/python_node_2.py new file mode 100644 index 00000000000..68791f1c419 --- /dev/null +++ b/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/python_node_2.py @@ -0,0 +1,9 @@ +from promptflow.contracts.multimedia import Image +from promptflow import tool + + +@tool +def python_with_image(image_dict: dict, logo_content: str) -> Image: + image_dict["image_list2"] = [image_dict["image"], image_dict["image"]] + image_dict["logo_content"] = logo_content + return image_dict diff --git a/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/python_with_image.py b/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/python_with_image.py index 3b0770ebfd6..bd0a3fb7072 100644 --- a/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/python_with_image.py +++ b/src/promptflow/tests/test_configs/flows/python_tool_with_multiple_image_nodes/python_with_image.py @@ -3,5 +3,5 @@ @tool -def python_with_image(image: Image) -> Image: - return image +def python_with_image(image: Image, image_name: str) -> Image: + return {"image": image, "image_name": image_name, "image_list": [image, image]} diff --git a/src/promptflow/tests/test_configs/flows/sample_flow_with_functions/flow.dag.yaml b/src/promptflow/tests/test_configs/flows/sample_flow_with_functions/flow.dag.yaml new file mode 100644 index 00000000000..44691909765 --- /dev/null +++ b/src/promptflow/tests/test_configs/flows/sample_flow_with_functions/flow.dag.yaml @@ -0,0 +1,104 @@ +id: use_functions_with_chat_models +name: Use Functions with Chat Models +inputs: + chat_history: + type: list + default: + - inputs: + question: What is the weather like in Boston? + outputs: + answer: '{"forecast":["sunny","windy"],"location":"Boston","temperature":"72","unit":"fahrenheit"}' + llm_output: + content: null + function_call: + name: get_current_weather + arguments: |- + { + "location": "Boston" + } + role: assistant + is_chat_input: false + question: + type: string + default: How about London next week? + is_chat_input: true +outputs: + answer: + type: string + reference: ${run_function.output} + is_chat_output: true + llm_output: + type: object + reference: ${use_functions_with_chat_models.output} +nodes: +- name: run_function + type: python + source: + type: code + path: run_function.py + inputs: + response_message: ${use_functions_with_chat_models.output} + use_variants: false +- name: use_functions_with_chat_models + type: llm + source: + type: code + path: use_functions_with_chat_models.jinja2 + inputs: + deployment_name: gpt-35-turbo + temperature: 0.7 + top_p: 1 + stop: "" + max_tokens: 256 + presence_penalty: 0 + frequency_penalty: 0 + logit_bias: "" + functions: + - name: get_current_weather + description: Get the current weather in a given location + parameters: + type: object + properties: + location: + type: string + description: The city and state, e.g. San Francisco, CA + unit: + type: string + enum: + - celsius + - fahrenheit + required: + - location + - name: get_n_day_weather_forecast + description: Get an N-day weather forecast + parameters: + type: object + properties: + location: + type: string + description: The city and state, e.g. San Francisco, CA + format: + type: string + enum: + - celsius + - fahrenheit + description: The temperature unit to use. Infer this from the users location. + num_days: + type: integer + description: The number of days to forecast + required: + - location + - format + - num_days + function_call: + name: get_current_weather + chat_history: ${inputs.chat_history} + question: ${inputs.question} + provider: AzureOpenAI + connection: azure_open_ai_connection + api: chat + module: promptflow.tools.aoai + use_variants: false +node_variants: {} +environment: + python_requirements_txt: requirements.txt diff --git a/src/promptflow/tests/test_configs/flows/sample_flow_with_functions/run_function.py b/src/promptflow/tests/test_configs/flows/sample_flow_with_functions/run_function.py new file mode 100644 index 00000000000..d1a97198b28 --- /dev/null +++ b/src/promptflow/tests/test_configs/flows/sample_flow_with_functions/run_function.py @@ -0,0 +1,41 @@ +from promptflow import tool +import json + + +def get_current_weather(location, unit="fahrenheit"): + """Get the current weather in a given location""" + weather_info = { + "location": location, + "temperature": "72", + "unit": unit, + "forecast": ["sunny", "windy"], + } + return weather_info + + +def get_n_day_weather_forecast(location, format, num_days): + """Get next num_days weather in a given location""" + weather_info = { + "location": location, + "temperature": "60", + "format": format, + "forecast": ["rainy"], + "num_days": num_days, + } + return weather_info + + +@tool +def run_function(response_message: dict) -> str: + if "function_call" in response_message: + function_name = response_message["function_call"]["name"] + function_args = json.loads(response_message["function_call"]["arguments"]) + print(function_args) + result = globals()[function_name](**function_args) + else: + print("No function call") + if isinstance(response_message, dict): + result = response_message["content"] + else: + result = response_message + return result diff --git a/src/promptflow/tests/test_configs/flows/sample_flow_with_functions/use_functions_with_chat_models.jinja2 b/src/promptflow/tests/test_configs/flows/sample_flow_with_functions/use_functions_with_chat_models.jinja2 new file mode 100644 index 00000000000..05b4b43a2ac --- /dev/null +++ b/src/promptflow/tests/test_configs/flows/sample_flow_with_functions/use_functions_with_chat_models.jinja2 @@ -0,0 +1,27 @@ +system: +Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous. + +{% for item in chat_history %} +user: +{{item.inputs.question}} + +{% if 'function_call' in item.outputs.llm_output %} +assistant: +Function generation requested, function = {{item.outputs.llm_output.function_call.name}}, args = {{item.outputs.llm_output.function_call.arguments}} + +function: +name: +{{item.outputs.llm_output.function_call.name}} +content: +{{item.outputs.answer}} + +{% else %} +assistant: +{{item.outputs.llm_output}}}} + +{% endif %}} + +{% endfor %} + +user: +{{question}} \ No newline at end of file