From b0cc7ce0b9218aab86dcc6af305fbbbdf5109bee Mon Sep 17 00:00:00 2001 From: zhangxingzhi Date: Sun, 8 Oct 2023 12:06:07 +0800 Subject: [PATCH 1/6] feat: remove load_as_component --- src/promptflow/promptflow/azure/_pf_client.py | 60 +------ .../azure/operations/_artifact_utilities.py | 53 +----- .../azure/operations/_flow_operations.py | 151 +----------------- src/promptflow/setup.py | 2 +- .../e2etests/test_flow_in_azure_ml.py | 91 ++--------- .../flows/saved_component_spec/command.yaml | 49 ------ .../flows/saved_component_spec/parallel.yaml | 52 ++++-- .../parallel_anonymous.yaml | 73 --------- .../flows/web_classification/flow.dag.yaml | 1 + 9 files changed, 61 insertions(+), 471 deletions(-) delete mode 100644 src/promptflow/tests/test_configs/flows/saved_component_spec/command.yaml delete mode 100644 src/promptflow/tests/test_configs/flows/saved_component_spec/parallel_anonymous.yaml diff --git a/src/promptflow/promptflow/azure/_pf_client.py b/src/promptflow/promptflow/azure/_pf_client.py index f82d8a2eac9..b2487033b10 100644 --- a/src/promptflow/promptflow/azure/_pf_client.py +++ b/src/promptflow/promptflow/azure/_pf_client.py @@ -4,7 +4,7 @@ import os from os import PathLike from pathlib import Path -from typing import IO, Any, AnyStr, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union from azure.ai.ml import MLClient from azure.core.credentials import TokenCredential @@ -14,7 +14,6 @@ from promptflow._sdk._errors import RunOperationParameterError from promptflow._sdk._user_agent import USER_AGENT from promptflow._sdk.entities import Run -from promptflow.azure._load_functions import load_flow from promptflow.azure._restclient.service_caller_factory import _FlowServiceCallerFactory from promptflow.azure._utils.gerneral import is_remote_uri from promptflow.azure.operations import RunOperations @@ -300,63 +299,6 @@ def visualize(self, runs: Union[List[str], List[Run]]) -> None: """ self.runs.visualize(runs) - def load_as_component( - self, - source: Union[str, PathLike, IO[AnyStr]], - *, - component_type: str, - columns_mapping: Dict[str, Union[str, float, int, bool]] = None, - variant: str = None, - environment_variables: Dict[str, Any] = None, - is_deterministic: bool = True, - **kwargs, - ) -> "Component": - """ - Load a flow as a component. - - :param source: Source of the flow. Should be a path to a flow dag yaml file or a flow directory. - :type source: Union[str, PathLike, IO[AnyStr]] - :param component_type: Type of the loaded component, support parallel only for now. - :type component_type: str - :param variant: Node variant used for the flow. - :type variant: str - :param environment_variables: Environment variables to set for the flow. - :type environment_variables: dict - :param columns_mapping: Inputs mapping for the flow. - :type columns_mapping: dict - :param is_deterministic: Whether the loaded component is deterministic. - :type is_deterministic: bool - """ - name = kwargs.pop("name", None) - version = kwargs.pop("version", None) - description = kwargs.pop("description", None) - display_name = kwargs.pop("display_name", None) - tags = kwargs.pop("tags", None) - - flow = load_flow( - source=source, - relative_origin=kwargs.pop("relative_origin", None), - **kwargs, - ) - - if component_type != "parallel": - raise NotImplementedError(f"Component type {component_type} is not supported yet.") - - # TODO: confirm if we should keep flow operations - component = self._flows.load_as_component( - flow=flow, - columns_mapping=columns_mapping, - variant=variant, - environment_variables=environment_variables, - name=name, - version=version, - description=description, - is_deterministic=is_deterministic, - display_name=display_name, - tags=tags, - ) - return component - def _add_user_agent(self, kwargs) -> None: user_agent = kwargs.pop("user_agent", None) user_agent = f"{user_agent} {USER_AGENT}" if user_agent else USER_AGENT diff --git a/src/promptflow/promptflow/azure/operations/_artifact_utilities.py b/src/promptflow/promptflow/azure/operations/_artifact_utilities.py index 3edbbf48177..0a1cf2ec51a 100644 --- a/src/promptflow/promptflow/azure/operations/_artifact_utilities.py +++ b/src/promptflow/promptflow/azure/operations/_artifact_utilities.py @@ -9,7 +9,7 @@ import uuid from datetime import datetime, timedelta from pathlib import Path -from typing import Dict, Optional, Tuple, TypeVar, Union +from typing import Dict, Optional, TypeVar, Union from azure.ai.ml._artifacts._blob_storage_helper import BlobStorageClient from azure.ai.ml._artifacts._gen2_storage_helper import Gen2StorageClient @@ -34,7 +34,6 @@ get_artifact_path_from_storage_url, get_storage_client, ) -from azure.ai.ml._utils.utils import is_mlflow_uri, is_url from azure.ai.ml.constants._common import SHORT_URI_FORMAT, STORAGE_ACCOUNT_URLS from azure.ai.ml.entities import Environment from azure.ai.ml.entities._assets._artifacts.artifact import Artifact, ArtifactStorageInfo @@ -357,56 +356,6 @@ def _update_gen2_metadata(name, version, indicator_file, storage_client) -> None T = TypeVar("T", bound=Artifact) -def _check_and_upload_path( - artifact: T, - asset_operations: Union["DataOperations", "ModelOperations", "CodeOperations", "FeatureSetOperations"], - artifact_type: str, - datastore_name: Optional[str] = None, - sas_uri: Optional[str] = None, - show_progress: bool = True, -) -> Tuple[T, str]: - """Checks whether `artifact` is a path or a uri and uploads it to the datastore if necessary. - - param T artifact: artifact to check and upload param - Union["DataOperations", "ModelOperations", "CodeOperations"] - asset_operations: the asset operations to use for uploading - param str datastore_name: the name of the datastore to upload to - param str sas_uri: the sas uri to use for uploading - """ - - datastore_name = artifact.datastore - if ( - hasattr(artifact, "local_path") - and artifact.local_path is not None - or ( - hasattr(artifact, "path") - and artifact.path is not None - and not (is_url(artifact.path) or is_mlflow_uri(artifact.path)) - ) - ): - path = ( - Path(artifact.path) - if hasattr(artifact, "path") and artifact.path is not None - else Path(artifact.local_path) - ) - if not path.is_absolute(): - path = Path(artifact.base_path, path).resolve() - uploaded_artifact = _upload_to_datastore( - asset_operations._operation_scope, - asset_operations._datastore_operation, - path, - datastore_name=datastore_name, - asset_name=artifact.name, - asset_version=str(artifact.version), - asset_hash=artifact._upload_hash if hasattr(artifact, "_upload_hash") else None, - sas_uri=sas_uri, - artifact_type=artifact_type, - show_progress=show_progress, - ignore_file=getattr(artifact, "_ignore_file", None), - ) - return uploaded_artifact - - def _check_and_upload_env_build_context( environment: Environment, operations: "EnvironmentOperations", diff --git a/src/promptflow/promptflow/azure/operations/_flow_operations.py b/src/promptflow/promptflow/azure/operations/_flow_operations.py index cef2ef52a84..debb642fffb 100644 --- a/src/promptflow/promptflow/azure/operations/_flow_operations.py +++ b/src/promptflow/promptflow/azure/operations/_flow_operations.py @@ -5,9 +5,8 @@ import logging import os -import re from pathlib import Path -from typing import Any, Dict +from typing import Dict from azure.ai.ml._artifacts._artifact_utilities import _check_and_upload_path from azure.ai.ml._scope_dependent_operations import ( @@ -16,11 +15,7 @@ OperationScope, _ScopeDependentOperations, ) -from azure.ai.ml._utils._storage_utils import AzureMLDatastorePathUri -from azure.ai.ml._utils.utils import hash_dict -from azure.ai.ml.constants._common import SHORT_URI_FORMAT, AzureMLResourceType -from azure.ai.ml.operations import ComponentOperations -from azure.ai.ml.operations._code_operations import CodeOperations +from azure.ai.ml.constants._common import SHORT_URI_FORMAT from azure.ai.ml.operations._operation_orchestrator import OperationOrchestrator from azure.core.exceptions import HttpResponseError @@ -32,12 +27,8 @@ ) from promptflow._sdk._utils import PromptflowIgnoreFile, generate_flow_tools_json from promptflow._sdk._vendor._asset_utils import traverse_directory -from promptflow.azure._constants._flow import DEFAULT_STORAGE from promptflow.azure._entities._flow import Flow -from promptflow.azure._ml import Component -from promptflow.azure._restclient.flow.models import FlowRunMode, LoadFlowAsComponentRequest from promptflow.azure._restclient.flow_service_caller import FlowServiceCaller -from promptflow.azure._utils import is_arm_id from promptflow.exceptions import SystemErrorException @@ -63,10 +54,6 @@ def __init__( self._service_caller = service_caller self._credential = credential - @property - def _code_operations(self) -> CodeOperations: - return self._all_operations.get_operation(AzureMLResourceType.CODE, lambda x: isinstance(x, CodeOperations)) - def _create_or_update(self, flow, **kwargs): # upload to file share self._resolve_arm_id_or_upload_dependencies(flow) @@ -102,140 +89,6 @@ def _download(self, source, dest): # TODO: support download flow raise NotImplementedError("Not implemented yet") - @classmethod - def _clear_empty_item(cls, obj): - if not isinstance(obj, dict): - return obj - return {k: cls._clear_empty_item(v) for k, v in obj.items() if v is not None} - - @classmethod - def _get_component_hash(cls, rest_object): - """this hash should include all the burn-in information: - - code - - keys of inputs_mapping - - environment_variables, it will be burned into something like component.task.environment_variables? - some other fields will be burned into component but will impact default value of inputs: - - variant - - connections - - values of inputs_mapping - Now we use all of them as hash key. - """ - obj = rest_object.as_dict() - - return hash_dict(cls._clear_empty_item(obj)) - - @classmethod - def _get_name_and_version(cls, *, rest_object, name: str = None, version: str = None): - if name and version: - return name, version - if name or version: - raise ValueError("name and version of the component must be provided together") - # the hash will be impacted by all editable fields, including default value of inputs_mapping - # so components with different default value of columns_mapping can't be reused from each other - return "azureml_anonymous_flow", cls._get_component_hash(rest_object) - - def load_as_component( - self, - flow, - name: str = None, - version: str = None, - display_name: str = None, - description: str = None, - tags: Dict[str, str] = None, - variant: str = None, - columns_mapping: Dict[str, str] = None, - environment_variables: Dict[str, Any] = None, - connections: Dict[str, Dict[str, str]] = None, - is_deterministic: bool = True, - **kwargs, - ) -> Component: - """Load a flow as a component.""" - rest_object = LoadFlowAsComponentRequest( - node_variant=variant, - inputs_mapping=columns_mapping, - environment_variables=environment_variables, - connections=connections, - display_name=display_name, - description=description, - tags=tags, - is_deterministic=is_deterministic, - # hack: MT support this only for now, will remove after MT release new version - run_mode=FlowRunMode.BULK_TEST, - ) - - if is_arm_id(flow): - rest_object.flow_definition_resource_id = flow.id - else: - # upload to file share - self._resolve_arm_id_or_upload_dependencies(flow) - if flow.path.startswith("azureml://"): - # upload via _check_and_upload_path - # submit with params FlowDefinitionDataStoreName and FlowDefinitionBlobPath - path_uri = AzureMLDatastorePathUri(flow.path) - rest_object.flow_definition_data_store_name = path_uri.datastore - rest_object.flow_definition_blob_path = path_uri.path - else: - # upload via CodeOperations.create_or_update - # submit with param FlowDefinitionDataUri - rest_object.flow_definition_data_uri = flow.path - - rest_object.component_name, rest_object.component_version = self._get_name_and_version( - rest_object=rest_object, name=name, version=version - ) - - component_id = self._service_caller.create_component_from_flow( - subscription_id=self._operation_scope.subscription_id, - resource_group_name=self._operation_scope.resource_group_name, - workspace_name=self._operation_scope.workspace_name, - body=rest_object, - ) - name, version = re.match(r".*/components/(.*)/versions/(.*)", component_id).groups() - return self._all_operations.get_operation( - AzureMLResourceType.COMPONENT, - lambda x: isinstance(x, ComponentOperations), - ).get(name, version) - - def _resolve_arm_id_or_upload_dependencies_to_file_share(self, flow: Flow) -> None: - ops = OperationOrchestrator(self._all_operations, self._operation_scope, self._operation_config) - # resolve flow's code - self._try_resolve_code_for_flow_to_file_share(flow=flow, ops=ops) - - @classmethod - def _try_resolve_code_for_flow_to_file_share(cls, flow: Flow, ops: OperationOrchestrator) -> None: - from ._artifact_utilities import _check_and_upload_path - - if flow.path: - if flow.path.startswith("azureml://datastores"): - # remote path - path_uri = AzureMLDatastorePathUri(flow.path) - if path_uri.datastore != DEFAULT_STORAGE: - raise ValueError(f"Only {DEFAULT_STORAGE} is supported as remote storage for now.") - flow.path = path_uri.path - flow._code_uploaded = True - return - else: - raise ValueError("Path is required for flow.") - - with flow._build_code() as code: - if code is None: - return - if flow._code_uploaded: - return - code.datastore = DEFAULT_STORAGE - uploaded_code_asset = _check_and_upload_path( - artifact=code, - asset_operations=ops._code_assets, - artifact_type="Code", - show_progress=False, - ) - if "remote_path" in uploaded_code_asset: - path = uploaded_code_asset["remote_path"] - elif "remote path" in uploaded_code_asset: - path = uploaded_code_asset["remote path"] - flow.code = path - flow.path = (Path(path) / flow.path).as_posix() - flow._code_uploaded = True - def _resolve_arm_id_or_upload_dependencies(self, flow: Flow, ignore_tools_json=False) -> None: ops = OperationOrchestrator(self._all_operations, self._operation_scope, self._operation_config) # resolve flow's code diff --git a/src/promptflow/setup.py b/src/promptflow/setup.py index bfc0e5e45f0..349856e47aa 100644 --- a/src/promptflow/setup.py +++ b/src/promptflow/setup.py @@ -77,7 +77,7 @@ "azure-core>=1.26.4,<2.0.0", "azure-storage-blob>=12.13.0,<13.0.0", "azure-identity>=1.12.0,<2.0.0", - "azure-ai-ml>=1.9.0,<2.0.0", + "azure-ai-ml>=1.11.0,<2.0.0", "pyjwt>=2.4.0,<3.0.0", # requirement of control plane SDK ], "executable": [ diff --git a/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_flow_in_azure_ml.py b/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_flow_in_azure_ml.py index aea913bbefd..bebdaea63bb 100644 --- a/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_flow_in_azure_ml.py +++ b/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_flow_in_azure_ml.py @@ -4,9 +4,8 @@ import pydash import pytest import yaml -from azure.ai.ml import Input, dsl -from azure.ai.ml.constants import AssetTypes -from azure.ai.ml.entities import Component, PipelineJob +from azure.ai.ml import load_component +from azure.ai.ml.entities import Component from promptflow.connections import AzureOpenAIConnection @@ -61,6 +60,7 @@ def update_saved_spec(component: Component, saved_spec_path: str): current_spec_text = saved_spec_path.read_text() if current_spec_text == yaml_text: return + saved_spec_path.parent.mkdir(parents=True, exist_ok=True) saved_spec_path.write_text(yaml_text) @@ -73,42 +73,19 @@ class TestFlowInAzureML: [ pytest.param( { - "component_type": "parallel", - "columns_mapping": { - "groundtruth": "1", - "prediction": "${{batch_run.outputs.category}}", - }, - "environment_variables": { - "verbose": "true", - }, - }, - { - "type": "parallel", - }, - id="parallel_anonymous", - ), - pytest.param( - { - "name": "web_classification_0", + "name": "web_classification_4", "version": "1.0.0", - "component_type": "parallel", "description": "Create flows that use large language models to " "classify URLs into multiple categories.", - "columns_mapping": { - "groundtruth": "1", - "prediction": "${{batch_run.outputs.category}}", - }, "environment_variables": { "verbose": "true", }, }, { - "name": "web_classification_0", + "name": "web_classification_4", "version": "1.0.0", "description": "Create flows that use large language models to " "classify URLs into multiple categories.", - "inputs.groundtruth.default": "1", - "inputs.prediction.default": "${{batch_run.outputs.category}}", "type": "parallel", }, id="parallel", @@ -119,62 +96,26 @@ def test_flow_as_component( self, azure_open_ai_connection: AzureOpenAIConnection, temp_output_dir, - pf, + ml_client, load_params: dict, expected_spec_attrs: dict, request, ) -> None: + # keep the simplest test here, more tests are in azure-ai-ml flows_dir = "./tests/test_configs/flows" - flow_func: Component = pf.load_as_component( - f"{flows_dir}/web_classification", - **load_params, - ) - - update_saved_spec(flow_func, f"./tests/test_configs/flows/saved_component_spec/{request.node.callspec.id}.yaml") - - component_dict = flow_func._to_dict() - slimmed_created_component_attrs = {key: pydash.get(component_dict, key) for key in expected_spec_attrs.keys()} - assert slimmed_created_component_attrs == expected_spec_attrs - - def test_flow_as_component_in_dsl_pipeline( - self, azure_open_ai_connection: AzureOpenAIConnection, temp_output_dir, pf - ) -> None: - - flows_dir = "./tests/test_configs/flows" - - flow_func: Component = pf.load_as_component( - f"{flows_dir}/web_classification", - component_type="parallel", - columns_mapping={ - "groundtruth": "${data.answer}", - "url": "${data.url}", - }, - environment_variables={ - "verbose": "true", - }, + flow_func: Component = load_component( + f"{flows_dir}/web_classification/flow.dag.yaml", params_override=[load_params] ) - @dsl.pipeline - def pipeline_with_flow(input_data): - flow_node = flow_func( - data=input_data, - connections={ - "summarize_text_content": { - "deployment_name": "test_deployment_name", - } - }, - ) - flow_node.logging_level = "DEBUG" - return flow_node.outputs + # TODO: snapshot of flow component changed every time? + created_component = ml_client.components.create_or_update(flow_func, is_anonymous=True) - pipeline: PipelineJob = pipeline_with_flow( - input_data=Input(path=f"{flows_dir}/web_classification_input_dir", type=AssetTypes.URI_FOLDER), + update_saved_spec( + created_component, f"./tests/test_configs/flows/saved_component_spec/{request.node.callspec.id}.yaml" ) - # compute cluster doesn't have access to azurecr for now, so the submitted job will fail in building image stage - pipeline.settings.default_compute = "cpu-cluster" - created_job: PipelineJob = pf.ml_client.jobs.create_or_update(pipeline) - assert created_job.id - assert created_job.jobs["flow_node"].logging_level == "DEBUG" + component_dict = created_component._to_dict() + slimmed_created_component_attrs = {key: pydash.get(component_dict, key) for key in expected_spec_attrs.keys()} + assert slimmed_created_component_attrs == expected_spec_attrs diff --git a/src/promptflow/tests/test_configs/flows/saved_component_spec/command.yaml b/src/promptflow/tests/test_configs/flows/saved_component_spec/command.yaml deleted file mode 100644 index fd9dbf0e52a..00000000000 --- a/src/promptflow/tests/test_configs/flows/saved_component_spec/command.yaml +++ /dev/null @@ -1,49 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: web_classification_updated -version: version_updated -display_name: web_classification_display_name_updated -description: Create flows that use large language models to classify URLs into multiple - categories. -type: command -inputs: - data: - type: uri_folder - variant: - type: string - optional: true - default: variant_1 - groundtruth: - type: string - optional: true - default: '1' - prediction: - type: string - optional: true - default: ${{variant.outputs.category}} - connections.summarize_text_content.connection: - type: string - optional: true - default: azure_open_ai_connection - connections.summarize_text_content.deployment_name: - type: string - optional: true - connections.classify_with_llm.connection: - type: string - optional: true - default: azure_open_ai_connection - connections.classify_with_llm.deployment_name: - type: string - optional: true -outputs: - output: - type: uri_folder -command: pf run_bulk --input ${{inputs.data}} --output ${{outputs.output}} $[[--inputs-mapping - groundtruth=${{inputs.groundtruth}},prediction=${{inputs.prediction}}]] --connections - "$[[summarize_text_content.deployment_name=${{inputs.connections.summarize_text_content.deployment_name}},]]$[[summarize_text_content.connection=${{inputs.connections.summarize_text_content.connection}},]]$[[classify_with_llm.deployment_name=${{inputs.connections.classify_with_llm.deployment_name}},]]$[[classify_with_llm.connection=${{inputs.connections.classify_with_llm.connection}},]]" - $[[--variant ${{inputs.variant}}]] -environment: - name: CliV2AnonymousEnvironment - version: e2ebc71877ca60434ac0cb69936b523f - image: promptflow.azurecr.io/cli_test:latest -code: D:/PycharmProjects/PromptFlow/src/promptflow-sdk/tests/test_configs/flows/web_classification -is_deterministic: true diff --git a/src/promptflow/tests/test_configs/flows/saved_component_spec/parallel.yaml b/src/promptflow/tests/test_configs/flows/saved_component_spec/parallel.yaml index 74a0dff470b..cadaa6ec966 100644 --- a/src/promptflow/tests/test_configs/flows/saved_component_spec/parallel.yaml +++ b/src/promptflow/tests/test_configs/flows/saved_component_spec/parallel.yaml @@ -7,7 +7,7 @@ creation_context: last_modified_by_type: xxx description: Create flows that use large language models to classify URLs into multiple categories. -display_name: web_classification_0 +display_name: web_classification_4 error_threshold: -1 id: azureml:/subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.MachineLearningServices/workspaces/xxx/components/xxx/versions/xxx input_data: ${{inputs.data}} @@ -20,6 +20,18 @@ inputs: default: text-davinci-003 optional: true type: string + connections.classify_with_llm.model: + enum: + - text-davinci-001 + - text-davinci-002 + - text-davinci-003 + - text-curie-001 + - text-babbage-001 + - text-ada-001 + - code-cushman-001 + - code-davinci-002 + optional: true + type: string connections.summarize_text_content.connection: default: azure_open_ai_connection optional: true @@ -28,16 +40,26 @@ inputs: default: text-davinci-003 optional: true type: string + connections.summarize_text_content.model: + enum: + - text-davinci-001 + - text-davinci-002 + - text-davinci-003 + - text-curie-001 + - text-babbage-001 + - text-ada-001 + - code-cushman-001 + - code-davinci-002 + optional: true + type: string data: - description: Flow data input optional: false type: uri_folder - groundtruth: - default: '1' - optional: false - type: string - prediction: - default: ${{batch_run.outputs.category}} + run_outputs: + optional: true + type: uri_folder + url: + default: https://www.microsoft.com/en-us/d/xbox-wireless-controller-stellar-shift-special-edition/94fbjc7h0h6h optional: false type: string is_deterministic: true @@ -45,8 +67,10 @@ logging_level: INFO max_concurrency_per_instance: 1 mini_batch_error_threshold: 0 mini_batch_size: '1' -name: web_classification_0 +name: web_classification_4 outputs: + debug_info: + type: uri_folder flow_outputs: type: uri_folder retry_settings: @@ -54,13 +78,15 @@ retry_settings: timeout: 3600 task: append_row_to: ${{outputs.flow_outputs}} - code: azureml:/subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.MachineLearningServices/workspaces/xxx/codes/xxx/versions/xxx + code: /subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.MachineLearningServices/workspaces/xxx/codes/xxx/versions/xxx entry_script: driver/azureml_user/parallel_run/prompt_flow_entry.py environment: azureml:/subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.MachineLearningServices/workspaces/xxx/environments/xxx/versions/xxx program_arguments: --amlbi_pf_enabled True --amlbi_pf_run_mode component --amlbi_mini_batch_rows - 1 --amlbi_file_format jsonl --amlbi_pf_connections "$[[classify_with_llm.connection=${{inputs.connections.classify_with_llm.connection}},]]$[[classify_with_llm.deployment_name=${{inputs.connections.classify_with_llm.deployment_name}},]]$[[summarize_text_content.connection=${{inputs.connections.summarize_text_content.connection}},]]$[[summarize_text_content.deployment_name=${{inputs.connections.summarize_text_content.deployment_name}},]]" - --amlbi_pf_input_groundtruth ${{inputs.groundtruth}} --amlbi_pf_input_prediction - ${{inputs.prediction}} + 1 --amlbi_file_format jsonl $[[--amlbi_pf_run_outputs ${{inputs.run_outputs}}]] + --amlbi_pf_debug_info ${{outputs.debug_info}} --amlbi_pf_connections "$[[classify_with_llm.connection=${{inputs.connections.classify_with_llm.connection}},]]$[[summarize_text_content.connection=${{inputs.connections.summarize_text_content.connection}},]]" + --amlbi_pf_deployment_names "$[[classify_with_llm.deployment_name=${{inputs.connections.classify_with_llm.deployment_name}},]]$[[summarize_text_content.deployment_name=${{inputs.connections.summarize_text_content.deployment_name}},]]" + --amlbi_pf_model_names "$[[classify_with_llm.model=${{inputs.connections.classify_with_llm.model}},]]$[[summarize_text_content.model=${{inputs.connections.summarize_text_content.model}},]]" + --amlbi_pf_input_url ${{inputs.url}} type: run_function type: parallel version: 1.0.0 diff --git a/src/promptflow/tests/test_configs/flows/saved_component_spec/parallel_anonymous.yaml b/src/promptflow/tests/test_configs/flows/saved_component_spec/parallel_anonymous.yaml deleted file mode 100644 index f47c9c4d941..00000000000 --- a/src/promptflow/tests/test_configs/flows/saved_component_spec/parallel_anonymous.yaml +++ /dev/null @@ -1,73 +0,0 @@ -creation_context: - created_at: xxx - created_by: xxx - created_by_type: xxx - last_modified_at: xxx - last_modified_by: xxx - last_modified_by_type: xxx -display_name: azureml_anonymous_flow -error_threshold: -1 -id: azureml:/subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.MachineLearningServices/workspaces/xxx/components/xxx/versions/xxx -input_data: ${{inputs.data}} -inputs: - connections.classify_with_llm.connection: - default: azure_open_ai_connection - optional: true - type: string - connections.classify_with_llm.deployment_name: - default: text-davinci-003 - optional: true - type: string - connections.summarize_text_content.connection: - default: azure_open_ai_connection - optional: true - type: string - connections.summarize_text_content.deployment_name: - default: text-davinci-003 - optional: true - type: string - data: - optional: false - type: uri_folder - groundtruth: - default: '1' - optional: false - type: string - prediction: - default: ${{batch_run.outputs.category}} - optional: false - type: string - run_outputs: - optional: true - type: uri_folder - url: - default: https://www.microsoft.com/en-us/d/xbox-wireless-controller-stellar-shift-special-edition/94fbjc7h0h6h - optional: false - type: string -is_deterministic: true -logging_level: INFO -max_concurrency_per_instance: 1 -mini_batch_error_threshold: 0 -mini_batch_size: '1' -name: azureml_anonymous_flow -outputs: - debug_info: - type: uri_folder - flow_outputs: - type: uri_folder -retry_settings: - max_retries: 2 - timeout: 3600 -task: - append_row_to: ${{outputs.flow_outputs}} - code: azureml:/subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.MachineLearningServices/workspaces/xxx/codes/xxx/versions/xxx - entry_script: driver/azureml_user/parallel_run/prompt_flow_entry.py - environment: azureml:/subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.MachineLearningServices/workspaces/xxx/environments/xxx/versions/xxx - program_arguments: --amlbi_pf_enabled True --amlbi_pf_run_mode component --amlbi_mini_batch_rows - 1 --amlbi_file_format jsonl $[[--amlbi_pf_run_outputs ${{inputs.run_outputs}}]] - --amlbi_pf_debug_info ${{outputs.debug_info}} --amlbi_pf_connections "$[[classify_with_llm.connection=${{inputs.connections.classify_with_llm.connection}},]]$[[classify_with_llm.deployment_name=${{inputs.connections.classify_with_llm.deployment_name}},]]$[[summarize_text_content.connection=${{inputs.connections.summarize_text_content.connection}},]]$[[summarize_text_content.deployment_name=${{inputs.connections.summarize_text_content.deployment_name}},]]" - --amlbi_pf_input_url ${{inputs.url}} --amlbi_pf_input_groundtruth ${{inputs.groundtruth}} - --amlbi_pf_input_prediction ${{inputs.prediction}} - type: run_function -type: parallel -version: c8160529-52a3-0626-39da-3040f0a43655 diff --git a/src/promptflow/tests/test_configs/flows/web_classification/flow.dag.yaml b/src/promptflow/tests/test_configs/flows/web_classification/flow.dag.yaml index ebcb2836255..e5c71f474be 100644 --- a/src/promptflow/tests/test_configs/flows/web_classification/flow.dag.yaml +++ b/src/promptflow/tests/test_configs/flows/web_classification/flow.dag.yaml @@ -1,3 +1,4 @@ +$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json inputs: url: type: string From b3fe6677a2b985422282ad6f844e6e0bf1a5976c Mon Sep 17 00:00:00 2001 From: zhangxingzhi Date: Sun, 8 Oct 2023 14:31:39 +0800 Subject: [PATCH 2/6] doc: example and doc for flow in pipeline --- docs/cloud/index.md | 1 + docs/how-to-guides/init-and-test-a-flow.md | 4 +- docs/how-to-guides/use-flow-in-pipeline.md | 79 +++++++ .../standard/web_classification/README.md | 128 ++++++++++ .../classify_with_llm.jinja2 | 21 ++ .../web_classification/convert_to_dict.py | 12 + .../standard/web_classification/data.jsonl | 3 + .../fetch_text_content_from_url.py | 30 +++ .../standard/web_classification/flow.dag.yaml | 90 +++++++ .../web_classification/prepare_examples.py | 44 ++++ .../web_classification/requirements.txt | 3 + .../flows/standard/web_classification/run.yml | 4 + .../web_classification/run_evaluation.yml | 7 + .../summarize_text_content.jinja2 | 7 + .../summarize_text_content__variant_1.jinja2 | 7 + .../tutorials/flow-in-pipeline/pipeline.ipynb | 220 ++++++++++++++++++ 16 files changed, 659 insertions(+), 1 deletion(-) create mode 100644 docs/how-to-guides/use-flow-in-pipeline.md create mode 100644 examples/flows/standard/web_classification/README.md create mode 100644 examples/flows/standard/web_classification/classify_with_llm.jinja2 create mode 100644 examples/flows/standard/web_classification/convert_to_dict.py create mode 100644 examples/flows/standard/web_classification/data.jsonl create mode 100644 examples/flows/standard/web_classification/fetch_text_content_from_url.py create mode 100644 examples/flows/standard/web_classification/flow.dag.yaml create mode 100644 examples/flows/standard/web_classification/prepare_examples.py create mode 100644 examples/flows/standard/web_classification/requirements.txt create mode 100644 examples/flows/standard/web_classification/run.yml create mode 100644 examples/flows/standard/web_classification/run_evaluation.yml create mode 100644 examples/flows/standard/web_classification/summarize_text_content.jinja2 create mode 100644 examples/flows/standard/web_classification/summarize_text_content__variant_1.jinja2 create mode 100644 examples/tutorials/flow-in-pipeline/pipeline.ipynb diff --git a/docs/cloud/index.md b/docs/cloud/index.md index f01219b0428..da3340301bd 100644 --- a/docs/cloud/index.md +++ b/docs/cloud/index.md @@ -7,5 +7,6 @@ You can develope your flow locally and seamlessly move the experience to azure c :maxdepth: 1 azureai/quick-start azureai/deploy-to-azure-appservice +azureai/flow-in-pipeline azureai/faq ``` \ No newline at end of file diff --git a/docs/how-to-guides/init-and-test-a-flow.md b/docs/how-to-guides/init-and-test-a-flow.md index 4afaeff38d5..0402529a90e 100644 --- a/docs/how-to-guides/init-and-test-a-flow.md +++ b/docs/how-to-guides/init-and-test-a-flow.md @@ -298,4 +298,6 @@ Break points and debugging functionalities for the Python steps in your flow. Ju ## Next steps -- [Add conditional control to a flow](./add-conditional-control-to-a-flow.md) \ No newline at end of file +- [Add conditional control to a flow](./add-conditional-control-to-a-flow.md) +- [Run and evaluate a flow](./run-and-evaluate-a-flow.md) +- [Use Flow in Pipeline Job](./use-flow-in-pipeline.md) diff --git a/docs/how-to-guides/use-flow-in-pipeline.md b/docs/how-to-guides/use-flow-in-pipeline.md new file mode 100644 index 00000000000..51acda454d8 --- /dev/null +++ b/docs/how-to-guides/use-flow-in-pipeline.md @@ -0,0 +1,79 @@ +# Use Flow in Pipeline Job + +:::{admonition} Experimental feature +This is an experimental feature, and may change at any time. Learn [more](faq.md#stable-vs-experimental). +::: + +After you have developed and tested the flow in [init and test a flow](./init-and-test-a-flow.md), this guide will help you learn how to use a flow as a parallel component in a pipeline job on AzureML, so that you can integrate the created flow with existing pipelines and process a large amount of data. + +:::{admonition} Pre-requirements +- Customer need to install the extension `ml>=2.21.0` to enable this feature in CLI and package `azure-ai-ml>=1.11.0` to enable this feature in SDK; +- Customer need to put `$schema` in the target `flow.dag.yaml` to enable this feature; +- Customer need to generate `flow.tools.json` for the target flow before below usage. Usually the generation can be done by `pf flow validate`. +::: + +For more information about AzureML and component: +- [Install and set up the CLI(v2)](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-configure-cli?view=azureml-api-2&tabs=public) +- [Install and set up the SDK(v2)](https://learn.microsoft.com/en-us/python/api/overview/azure/ai-ml-readme?view=azure-python) +- [What is a pipeline](https://learn.microsoft.com/en-us/azure/machine-learning/concept-ml-pipelines?view=azureml-api-2) +- [What is a component](https://learn.microsoft.com/en-us/azure/machine-learning/concept-component?view=azureml-api-2) + +## Register a flow as a component + +Customer can register a flow as a component with either CLI or SDK. + +::::{tab-set} +:::{tab-item} CLI +:sync: CLI + +```bash +# Validate flow as a component +az ml component validate --file standard/web_classification/flow.dag.yaml + +# Register flow as a component +# Default component name will be the name of flow folder, which is web_classification here; default version will be "1" +az ml component create --file standard/web_classification/flow.dag.yaml + +# Register flow as a component with parameters override +az ml component create --file standard/web_classification/flow.dag.yaml --version 2 --set name=web_classification_updated +``` + +::: + +:::{tab-item} SDK +:sync: SDK + +```python +from azure.ai.ml import MLClient, load_component + +ml_client = MLClient() + +# Validate flow as a component +ml_client.components.validate("standard/web_classification/flow.dag.yaml") + +# Register flow as a component +# Default component name will be the name of flow folder, which is web_classification here; default version will be "1" +flow_component = load_component("standard/web_classification/flow.dag.yaml") +ml_client.components.create_or_update(flow_component) + +# Register flow as a component with parameters override +ml_client.components.create_or_update( + "standard/web_classification/flow.dag.yaml", + version="2", + params_override=[ + {"name": "web_classification_updated"} + ] +) +``` + +::: + +:::: + +After registered a flow as a component, they can be referred in a pipeline job like [regular registered components](https://github.com/Azure/azureml-examples/tree/main/cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components). + +## Directly use a flow in a pipeline job + +Besides explicitly registering a flow as a component, customer can also directly use flow in a pipeline job: +- [CLI sample](https://github.com/Azure/azureml-examples/tree/zhangxingzhi/flow-in-pipeline/cli/jobs/pipelines-with-components/flow_in_pipeline/1a_flow_in_pipeline) +- [SDK sample](https://github.com/Azure/azureml-examples/blob/zhangxingzhi/flow-in-pipeline/sdk/python/jobs/pipelines/1l_flow_in_pipeline/flow_in_pipeline.ipynb) diff --git a/examples/flows/standard/web_classification/README.md b/examples/flows/standard/web_classification/README.md new file mode 100644 index 00000000000..a19fed957c6 --- /dev/null +++ b/examples/flows/standard/web_classification/README.md @@ -0,0 +1,128 @@ +# Web Classification + +This is a flow demonstrating multi-class classification with LLM. Given an url, it will classify the url into one web category with just a few shots, simple summarization and classification prompts. + +## Tools used in this flow +- LLM Tool +- Python Tool + +## What you will learn + +In this flow, you will learn +- how to compose a classification flow with LLM. +- how to feed few shots to LLM classifier. + +## Prerequisites + +Install promptflow sdk and other dependencies: +```bash +pip install -r requirements.txt +``` + +## Getting Started + +### 1. Setup connection + +If you are using Azure Open AI, prepare your resource follow this [instruction](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal) and get your `api_key` if you don't have one. + +```bash +# Override keys with --set to avoid yaml file changes +pf connection create --file ../../../connections/azure_openai.yml --set api_key= api_base= --name open_ai_connection +``` + +If you using OpenAI, sign up account [OpenAI website](https://openai.com/), login and [find personal API key](https://platform.openai.com/account/api-keys). + +```shell +pf connection create --file ../../../connections/openai.yml --set api_key= +``` + +### 2. Configure the flow with your connection +`flow.dag.yaml` is already configured with connection named `open_ai_connection`. + +### 3. Test flow with single line data + +```bash +# test with default input value in flow.dag.yaml +pf flow test --flow . +# test with user specified inputs +pf flow test --flow . --inputs url='https://www.youtube.com/watch?v=kYqRtjDBci8' +``` + +### 4. Run with multi-line data + +```bash +# create run using command line args +pf run create --flow . --data ./data.jsonl --stream + +# (Optional) create a random run name +run_name="web_classification_"$(openssl rand -hex 12) +# create run using yaml file, run_name will be used in following contents, --name is optional +pf run create --file run.yml --stream --name $run_name +``` + +```bash +# list run +pf run list +# show run +pf run show --name $run_name +# show run outputs +pf run show-details --name $run_name +``` + +### 5. Run with classification evaluation flow + +create `evaluation` run: +```bash +# (Optional) save previous run name into variable, and create a new random run name for further use +prev_run_name=$run_name +run_name="classification_accuracy_"$(openssl rand -hex 12) +# create run using command line args +pf run create --flow ../../evaluation/eval-classification-accuracy --data ./data.jsonl --column-mapping groundtruth='${data.answer}' prediction='${run.outputs.category}' --run $prev_run_name --stream +# create run using yaml file, --name is optional +pf run create --file run_evaluation.yml --run $prev_run_name --stream --name $run_name +``` + +```bash +pf run show-details --name $run_name +pf run show-metrics --name $run_name +pf run visualize --name $run_name +``` + + +### 6. Submit run to cloud +```bash +# set default workspace +az account set -s +az configure --defaults group= workspace= + +# create run +pfazure run create --flow . --data ./data.jsonl --stream --runtime demo-mir +# pfazure run create --flow . --data ./data.jsonl --stream # automatic runtime + +# (Optional) create a new random run name for further use +run_name="web_classification_"$(openssl rand -hex 12) + +# create run using yaml file, --name is optional +pfazure run create --file run.yml --runtime demo-mir --name $run_name +# pfazure run create --file run.yml --stream --name $run_name # automatic runtime + + +pfazure run stream --name $run_name +pfazure run show-details --name $run_name +pfazure run show-metrics --name $run_name + + +# (Optional) save previous run name into variable, and create a new random run name for further use +prev_run_name=$run_name +run_name="classification_accuracy_"$(openssl rand -hex 12) + +# create evaluation run, --name is optional +pfazure run create --flow ../../evaluation/eval-classification-accuracy --data ./data.jsonl --column-mapping groundtruth='${data.answer}' prediction='${run.outputs.category}' --run $prev_run_name --runtime demo-mir +pfazure run create --file run_evaluation.yml --run $prev_run_name --stream --name $run_name --runtime demo-mir + +pfazure run stream --name $run_name +pfazure run show --name $run_name +pfazure run show-details --name $run_name +pfazure run show-metrics --name $run_name +pfazure run visualize --name $run_name +``` \ No newline at end of file diff --git a/examples/flows/standard/web_classification/classify_with_llm.jinja2 b/examples/flows/standard/web_classification/classify_with_llm.jinja2 new file mode 100644 index 00000000000..6d7c3da4005 --- /dev/null +++ b/examples/flows/standard/web_classification/classify_with_llm.jinja2 @@ -0,0 +1,21 @@ +system: +Your task is to classify a given url into one of the following categories: +Movie, App, Academic, Channel, Profile, PDF or None based on the text content information. +The classification will be based on the url, the webpage text content summary, or both. + +user: +The selection range of the value of "category" must be within "Movie", "App", "Academic", "Channel", "Profile", "PDF" and "None". +The selection range of the value of "evidence" must be within "Url", "Text content", and "Both". +Here are a few examples: +{% for ex in examples %} +URL: {{ex.url}} +Text content: {{ex.text_content}} +OUTPUT: +{"category": "{{ex.category}}", "evidence": "{{ex.evidence}}"} + +{% endfor %} + +For a given URL and text content, classify the url to complete the category and indicate evidence: +URL: {{url}} +Text content: {{text_content}}. +OUTPUT: \ No newline at end of file diff --git a/examples/flows/standard/web_classification/convert_to_dict.py b/examples/flows/standard/web_classification/convert_to_dict.py new file mode 100644 index 00000000000..8e9490b801a --- /dev/null +++ b/examples/flows/standard/web_classification/convert_to_dict.py @@ -0,0 +1,12 @@ +import json + +from promptflow import tool + + +@tool +def convert_to_dict(input_str: str): + try: + return json.loads(input_str) + except Exception as e: + print("The input is not valid, error: {}".format(e)) + return {"category": "None", "evidence": "None"} diff --git a/examples/flows/standard/web_classification/data.jsonl b/examples/flows/standard/web_classification/data.jsonl new file mode 100644 index 00000000000..248b61c6e95 --- /dev/null +++ b/examples/flows/standard/web_classification/data.jsonl @@ -0,0 +1,3 @@ +{"url": "https://www.youtube.com/watch?v=kYqRtjDBci8", "answer": "Channel", "evidence": "Both"} +{"url": "https://arxiv.org/abs/2307.04767", "answer": "Academic", "evidence": "Both"} +{"url": "https://play.google.com/store/apps/details?id=com.twitter.android", "answer": "App", "evidence": "Both"} diff --git a/examples/flows/standard/web_classification/fetch_text_content_from_url.py b/examples/flows/standard/web_classification/fetch_text_content_from_url.py new file mode 100644 index 00000000000..1ff7f792909 --- /dev/null +++ b/examples/flows/standard/web_classification/fetch_text_content_from_url.py @@ -0,0 +1,30 @@ +import bs4 +import requests + +from promptflow import tool + + +@tool +def fetch_text_content_from_url(url: str): + # Send a request to the URL + try: + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35" + } + response = requests.get(url, headers=headers) + if response.status_code == 200: + # Parse the HTML content using BeautifulSoup + soup = bs4.BeautifulSoup(response.text, "html.parser") + soup.prettify() + return soup.get_text()[:2000] + else: + msg = ( + f"Get url failed with status code {response.status_code}.\nURL: {url}\nResponse: " + f"{response.text[:100]}" + ) + print(msg) + return "No available content" + except Exception as e: + print("Get url failed with error: {}".format(e)) + return "No available content" diff --git a/examples/flows/standard/web_classification/flow.dag.yaml b/examples/flows/standard/web_classification/flow.dag.yaml new file mode 100644 index 00000000000..06f14400ede --- /dev/null +++ b/examples/flows/standard/web_classification/flow.dag.yaml @@ -0,0 +1,90 @@ +$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json +inputs: + url: + type: string + default: https://play.google.com/store/apps/details?id=com.twitter.android +outputs: + category: + type: string + reference: ${convert_to_dict.output.category} + evidence: + type: string + reference: ${convert_to_dict.output.evidence} +nodes: +- name: fetch_text_content_from_url + type: python + source: + type: code + path: fetch_text_content_from_url.py + inputs: + url: ${inputs.url} +- name: summarize_text_content + use_variants: true +- name: prepare_examples + type: python + source: + type: code + path: prepare_examples.py + inputs: {} +- name: classify_with_llm + type: llm + source: + type: code + path: classify_with_llm.jinja2 + inputs: + # This is to easily switch between openai and azure openai. + # deployment_name is required by azure openai, model is required by openai. + deployment_name: gpt-35-turbo + model: gpt-3.5-turbo + max_tokens: 128 + temperature: 0.2 + url: ${inputs.url} + text_content: ${summarize_text_content.output} + examples: ${prepare_examples.output} + connection: open_ai_connection + api: chat +- name: convert_to_dict + type: python + source: + type: code + path: convert_to_dict.py + inputs: + input_str: ${classify_with_llm.output} +node_variants: + summarize_text_content: + variants: + variant_0: + node: + type: llm + source: + type: code + path: summarize_text_content.jinja2 + inputs: + # This is to easily switch between openai and azure openai. + # deployment_name is required by azure openai, model is required by openai. + deployment_name: gpt-35-turbo + model: gpt-3.5-turbo + max_tokens: 128 + temperature: 0.2 + text: ${fetch_text_content_from_url.output} + connection: open_ai_connection + api: chat + variant_1: + node: + type: llm + source: + type: code + path: summarize_text_content__variant_1.jinja2 + inputs: + # This is to easily switch between openai and azure openai. + # deployment_name is required by azure openai, model is required by openai. + deployment_name: gpt-35-turbo + model: gpt-3.5-turbo + max_tokens: 256 + temperature: 0.3 + text: ${fetch_text_content_from_url.output} + connection: open_ai_connection + api: chat + default_variant_id: variant_0 +environment: + python_requirements_txt: requirements.txt diff --git a/examples/flows/standard/web_classification/prepare_examples.py b/examples/flows/standard/web_classification/prepare_examples.py new file mode 100644 index 00000000000..c4ccb76d732 --- /dev/null +++ b/examples/flows/standard/web_classification/prepare_examples.py @@ -0,0 +1,44 @@ +from promptflow import tool + + +@tool +def prepare_examples(): + return [ + { + "url": "https://play.google.com/store/apps/details?id=com.spotify.music", + "text_content": "Spotify is a free music and podcast streaming app with millions of songs, albums, and " + "original podcasts. It also offers audiobooks, so users can enjoy thousands of stories. " + "It has a variety of features such as creating and sharing music playlists, discovering " + "new music, and listening to popular and exclusive podcasts. It also has a Premium " + "subscription option which allows users to download and listen offline, and access " + "ad-free music. It is available on all devices and has a variety of genres and artists " + "to choose from.", + "category": "App", + "evidence": "Both", + }, + { + "url": "https://www.youtube.com/channel/UC_x5XG1OV2P6uZZ5FSM9Ttw", + "text_content": "NFL Sunday Ticket is a service offered by Google LLC that allows users to watch NFL " + "games on YouTube. It is available in 2023 and is subject to the terms and privacy policy " + "of Google LLC. It is also subject to YouTube's terms of use and any applicable laws.", + "category": "Channel", + "evidence": "URL", + }, + { + "url": "https://arxiv.org/abs/2303.04671", + "text_content": "Visual ChatGPT is a system that enables users to interact with ChatGPT by sending and " + "receiving not only languages but also images, providing complex visual questions or " + "visual editing instructions, and providing feedback and asking for corrected results. " + "It incorporates different Visual Foundation Models and is publicly available. Experiments " + "show that Visual ChatGPT opens the door to investigating the visual roles of ChatGPT with " + "the help of Visual Foundation Models.", + "category": "Academic", + "evidence": "Text content", + }, + { + "url": "https://ab.politiaromana.ro/", + "text_content": "There is no content available for this text.", + "category": "None", + "evidence": "None", + }, + ] diff --git a/examples/flows/standard/web_classification/requirements.txt b/examples/flows/standard/web_classification/requirements.txt new file mode 100644 index 00000000000..ccef8cfd3cc --- /dev/null +++ b/examples/flows/standard/web_classification/requirements.txt @@ -0,0 +1,3 @@ +promptflow[azure] +promptflow-tools +bs4 \ No newline at end of file diff --git a/examples/flows/standard/web_classification/run.yml b/examples/flows/standard/web_classification/run.yml new file mode 100644 index 00000000000..9522372f0e0 --- /dev/null +++ b/examples/flows/standard/web_classification/run.yml @@ -0,0 +1,4 @@ +$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Run.schema.json +flow: . +data: data.jsonl +variant: ${summarize_text_content.variant_1} \ No newline at end of file diff --git a/examples/flows/standard/web_classification/run_evaluation.yml b/examples/flows/standard/web_classification/run_evaluation.yml new file mode 100644 index 00000000000..50090e7d2e3 --- /dev/null +++ b/examples/flows/standard/web_classification/run_evaluation.yml @@ -0,0 +1,7 @@ +$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Run.schema.json +flow: ../../evaluation/eval-classification-accuracy +data: data.jsonl +run: web_classification_variant_1_20230724_173442_973403 # replace with your run name +column_mapping: + groundtruth: ${data.answer} + prediction: ${run.outputs.category} \ No newline at end of file diff --git a/examples/flows/standard/web_classification/summarize_text_content.jinja2 b/examples/flows/standard/web_classification/summarize_text_content.jinja2 new file mode 100644 index 00000000000..81078019db8 --- /dev/null +++ b/examples/flows/standard/web_classification/summarize_text_content.jinja2 @@ -0,0 +1,7 @@ +system: +Please summarize the following text in one paragraph. 100 words. +Do not add any information that is not in the text. + +user: +Text: {{text}} +Summary: \ No newline at end of file diff --git a/examples/flows/standard/web_classification/summarize_text_content__variant_1.jinja2 b/examples/flows/standard/web_classification/summarize_text_content__variant_1.jinja2 new file mode 100644 index 00000000000..5fb816079d5 --- /dev/null +++ b/examples/flows/standard/web_classification/summarize_text_content__variant_1.jinja2 @@ -0,0 +1,7 @@ +system: +Please summarize some keywords of this paragraph and have some details of each keywords. +Do not add any information that is not in the text. + +user: +Text: {{text}} +Summary: \ No newline at end of file diff --git a/examples/tutorials/flow-in-pipeline/pipeline.ipynb b/examples/tutorials/flow-in-pipeline/pipeline.ipynb new file mode 100644 index 00000000000..bc7489998e9 --- /dev/null +++ b/examples/tutorials/flow-in-pipeline/pipeline.ipynb @@ -0,0 +1,220 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Use Flow as Component in Pipeline\n", + "\n", + "**Requirements** - In order to benefit from this tutorial, you will need:\n", + "- A basic understanding of Machine Learning\n", + "- An Azure account with an active subscription - [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F)\n", + "- An Azure ML workspace with computer cluster - [Configure workspace](../../configuration.ipynb)\n", + "- A python environment\n", + "- Installed Azure Machine Learning Python SDK v2 - [install instructions](../../../README.md) - check the getting started section\n", + "- **Permission to access connections has been assigned to the used compute cluster**\n", + "\n", + "**Learning Objectives** - By the end of this tutorial, you should be able to:\n", + "- Connect to your AML workspace from the Python SDK\n", + "- Create `Pipeline` load flow as components from YAML\n", + "\n", + "**Motivations** - This notebook explains how to run a pipeline with distributed training component." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Connect to Azure Machine Learning Workspace\n", + "\n", + "The [workspace](https://docs.microsoft.com/en-us/azure/machine-learning/concept-workspace) is the top-level resource for Azure Machine Learning, providing a centralized place to work with all the artifacts you create when you use Azure Machine Learning. In this section we will connect to the workspace in which the job will be run.\n", + "\n", + "## 1.1 Import the required libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import required libraries\n", + "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", + "\n", + "from azure.ai.ml import MLClient, load_component, Input\n", + "from azure.ai.ml.constants import AssetTypes\n", + "from azure.ai.ml.dsl import pipeline\n", + "from azure.ai.ml.entities import ResourceConfiguration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.2 Configure credential\n", + "\n", + "We are using `DefaultAzureCredential` to get access to workspace. \n", + "`DefaultAzureCredential` should be capable of handling most Azure SDK authentication scenarios. \n", + "\n", + "Reference for more available credentials if it does not work for you: [configure credential example](../../configuration.ipynb), [azure-identity reference doc](https://docs.microsoft.com/en-us/python/api/azure-identity/azure.identity?view=azure-python)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " credential = DefaultAzureCredential()\n", + " # Check if given credential can get token successfully.\n", + " credential.get_token(\"https://management.azure.com/.default\")\n", + "except Exception as ex:\n", + " # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work\n", + " credential = InteractiveBrowserCredential()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.3 Get a handle to the workspace\n", + "\n", + "We use config file to connect to a workspace. The Azure ML workspace should be configured with computer cluster. [Check this notebook for configure a workspace](../../configuration.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get a handle to workspace\n", + "ml_client = MLClient.from_config(credential=credential)\n", + "\n", + "# Retrieve an already attached Azure Machine Learning Compute.\n", + "cluster_name = \"gpu-cluster\"\n", + "print(ml_client.compute.get(cluster_name))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Load flow as component\n", + "\n", + "We suppose that there has already been a flow authored with Promptflow SDK/CLI/portal. Then we can load its flow dag yaml as a component like regular component specs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flow_component = load_component(\"./web_classification/flow.dag.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Pipeline job\n", + "## 3.1 Build pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_input = Input(path=\"./data/data.jsonl\", type=AssetTypes.URI_FILE)\n", + "\n", + "@pipeline()\n", + "def pipeline_func_with_flow(data):\n", + " flow_node = flow_component(\n", + " data=data,\n", + " url=\"${data.url}\",\n", + " connections={\n", + " \"summarize_text_content\": {\n", + " \"connection\": \"azure_open_ai_connection\",\n", + " \"deployment_name\": \"text-davinci-003\",\n", + " },\n", + " \"classify_with_llm\": {\n", + " \"connection\": \"azure_open_ai_connection\",\n", + " \"deployment_name\": \"text-davinci-003\",\n", + " },\n", + " },\n", + " )\n", + " flow_node.compute = \"cpu-cluster\"\n", + "\n", + "\n", + "# create pipeline instance\n", + "pipeline_job = pipeline_func_with_flow(data=data_input)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3.2 Submit pipeline job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# submit job to workspace\n", + "pipeline_job = ml_client.jobs.create_or_update(\n", + " pipeline_job, experiment_name=\"pipeline_samples\"\n", + ")\n", + "pipeline_job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Wait until the job completes\n", + "ml_client.jobs.stream(pipeline_job.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Next Steps\n", + "You can see further examples of running a pipeline job [here](../)" + ] + } + ], + "metadata": { + "description": { + "description": "Create pipeline using components to run a distributed job with tensorflow" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c4f95c5ceabec0c1decacb19429d57143392be5d Mon Sep 17 00:00:00 2001 From: zhangxingzhi Date: Wed, 11 Oct 2023 10:43:45 +0800 Subject: [PATCH 3/6] fix: resolve comments --- docs/how-to-guides/init-and-test-a-flow.md | 2 +- docs/how-to-guides/use-flow-in-pipeline.md | 22 ++- .../standard/web_classification/README.md | 128 ------------------ .../classify_with_llm.jinja2 | 21 --- .../web_classification/convert_to_dict.py | 12 -- .../standard/web_classification/data.jsonl | 3 - .../fetch_text_content_from_url.py | 30 ---- .../standard/web_classification/flow.dag.yaml | 90 ------------ .../web_classification/prepare_examples.py | 44 ------ .../web_classification/requirements.txt | 3 - .../flows/standard/web_classification/run.yml | 4 - .../web_classification/run_evaluation.yml | 7 - .../summarize_text_content.jinja2 | 7 - .../summarize_text_content__variant_1.jinja2 | 7 - 14 files changed, 9 insertions(+), 371 deletions(-) delete mode 100644 examples/flows/standard/web_classification/README.md delete mode 100644 examples/flows/standard/web_classification/classify_with_llm.jinja2 delete mode 100644 examples/flows/standard/web_classification/convert_to_dict.py delete mode 100644 examples/flows/standard/web_classification/data.jsonl delete mode 100644 examples/flows/standard/web_classification/fetch_text_content_from_url.py delete mode 100644 examples/flows/standard/web_classification/flow.dag.yaml delete mode 100644 examples/flows/standard/web_classification/prepare_examples.py delete mode 100644 examples/flows/standard/web_classification/requirements.txt delete mode 100644 examples/flows/standard/web_classification/run.yml delete mode 100644 examples/flows/standard/web_classification/run_evaluation.yml delete mode 100644 examples/flows/standard/web_classification/summarize_text_content.jinja2 delete mode 100644 examples/flows/standard/web_classification/summarize_text_content__variant_1.jinja2 diff --git a/docs/how-to-guides/init-and-test-a-flow.md b/docs/how-to-guides/init-and-test-a-flow.md index 0402529a90e..9fab857b0c5 100644 --- a/docs/how-to-guides/init-and-test-a-flow.md +++ b/docs/how-to-guides/init-and-test-a-flow.md @@ -300,4 +300,4 @@ Break points and debugging functionalities for the Python steps in your flow. Ju - [Add conditional control to a flow](./add-conditional-control-to-a-flow.md) - [Run and evaluate a flow](./run-and-evaluate-a-flow.md) -- [Use Flow in Pipeline Job](./use-flow-in-pipeline.md) +- [Use flow in pipeline job](./use-flow-in-pipeline.md) diff --git a/docs/how-to-guides/use-flow-in-pipeline.md b/docs/how-to-guides/use-flow-in-pipeline.md index 51acda454d8..301263ff8f0 100644 --- a/docs/how-to-guides/use-flow-in-pipeline.md +++ b/docs/how-to-guides/use-flow-in-pipeline.md @@ -1,4 +1,4 @@ -# Use Flow in Pipeline Job +# Use flow in pipeline job :::{admonition} Experimental feature This is an experimental feature, and may change at any time. Learn [more](faq.md#stable-vs-experimental). @@ -27,15 +27,12 @@ Customer can register a flow as a component with either CLI or SDK. :sync: CLI ```bash -# Validate flow as a component -az ml component validate --file standard/web_classification/flow.dag.yaml - # Register flow as a component -# Default component name will be the name of flow folder, which is web_classification here; default version will be "1" -az ml component create --file standard/web_classification/flow.dag.yaml +# Default component name will be the name of flow folder, which is not a valid component name, so we override it here; default version will be "1" +az ml component create --file standard/web-classification/flow.dag.yaml --set name=web_classification # Register flow as a component with parameters override -az ml component create --file standard/web_classification/flow.dag.yaml --version 2 --set name=web_classification_updated +az ml component create --file standard/web-classification/flow.dag.yaml --version 2 --set name=web_classification_updated ``` ::: @@ -48,20 +45,17 @@ from azure.ai.ml import MLClient, load_component ml_client = MLClient() -# Validate flow as a component -ml_client.components.validate("standard/web_classification/flow.dag.yaml") - # Register flow as a component -# Default component name will be the name of flow folder, which is web_classification here; default version will be "1" -flow_component = load_component("standard/web_classification/flow.dag.yaml") +# Default component name will be the name of flow folder, which is web-classification here; default version will be "1" +flow_component = load_component("standard/web-classification/flow.dag.yaml") ml_client.components.create_or_update(flow_component) # Register flow as a component with parameters override ml_client.components.create_or_update( - "standard/web_classification/flow.dag.yaml", + "standard/web-classification/flow.dag.yaml", version="2", params_override=[ - {"name": "web_classification_updated"} + {"name": "web-classification_updated"} ] ) ``` diff --git a/examples/flows/standard/web_classification/README.md b/examples/flows/standard/web_classification/README.md deleted file mode 100644 index a19fed957c6..00000000000 --- a/examples/flows/standard/web_classification/README.md +++ /dev/null @@ -1,128 +0,0 @@ -# Web Classification - -This is a flow demonstrating multi-class classification with LLM. Given an url, it will classify the url into one web category with just a few shots, simple summarization and classification prompts. - -## Tools used in this flow -- LLM Tool -- Python Tool - -## What you will learn - -In this flow, you will learn -- how to compose a classification flow with LLM. -- how to feed few shots to LLM classifier. - -## Prerequisites - -Install promptflow sdk and other dependencies: -```bash -pip install -r requirements.txt -``` - -## Getting Started - -### 1. Setup connection - -If you are using Azure Open AI, prepare your resource follow this [instruction](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal) and get your `api_key` if you don't have one. - -```bash -# Override keys with --set to avoid yaml file changes -pf connection create --file ../../../connections/azure_openai.yml --set api_key= api_base= --name open_ai_connection -``` - -If you using OpenAI, sign up account [OpenAI website](https://openai.com/), login and [find personal API key](https://platform.openai.com/account/api-keys). - -```shell -pf connection create --file ../../../connections/openai.yml --set api_key= -``` - -### 2. Configure the flow with your connection -`flow.dag.yaml` is already configured with connection named `open_ai_connection`. - -### 3. Test flow with single line data - -```bash -# test with default input value in flow.dag.yaml -pf flow test --flow . -# test with user specified inputs -pf flow test --flow . --inputs url='https://www.youtube.com/watch?v=kYqRtjDBci8' -``` - -### 4. Run with multi-line data - -```bash -# create run using command line args -pf run create --flow . --data ./data.jsonl --stream - -# (Optional) create a random run name -run_name="web_classification_"$(openssl rand -hex 12) -# create run using yaml file, run_name will be used in following contents, --name is optional -pf run create --file run.yml --stream --name $run_name -``` - -```bash -# list run -pf run list -# show run -pf run show --name $run_name -# show run outputs -pf run show-details --name $run_name -``` - -### 5. Run with classification evaluation flow - -create `evaluation` run: -```bash -# (Optional) save previous run name into variable, and create a new random run name for further use -prev_run_name=$run_name -run_name="classification_accuracy_"$(openssl rand -hex 12) -# create run using command line args -pf run create --flow ../../evaluation/eval-classification-accuracy --data ./data.jsonl --column-mapping groundtruth='${data.answer}' prediction='${run.outputs.category}' --run $prev_run_name --stream -# create run using yaml file, --name is optional -pf run create --file run_evaluation.yml --run $prev_run_name --stream --name $run_name -``` - -```bash -pf run show-details --name $run_name -pf run show-metrics --name $run_name -pf run visualize --name $run_name -``` - - -### 6. Submit run to cloud -```bash -# set default workspace -az account set -s -az configure --defaults group= workspace= - -# create run -pfazure run create --flow . --data ./data.jsonl --stream --runtime demo-mir -# pfazure run create --flow . --data ./data.jsonl --stream # automatic runtime - -# (Optional) create a new random run name for further use -run_name="web_classification_"$(openssl rand -hex 12) - -# create run using yaml file, --name is optional -pfazure run create --file run.yml --runtime demo-mir --name $run_name -# pfazure run create --file run.yml --stream --name $run_name # automatic runtime - - -pfazure run stream --name $run_name -pfazure run show-details --name $run_name -pfazure run show-metrics --name $run_name - - -# (Optional) save previous run name into variable, and create a new random run name for further use -prev_run_name=$run_name -run_name="classification_accuracy_"$(openssl rand -hex 12) - -# create evaluation run, --name is optional -pfazure run create --flow ../../evaluation/eval-classification-accuracy --data ./data.jsonl --column-mapping groundtruth='${data.answer}' prediction='${run.outputs.category}' --run $prev_run_name --runtime demo-mir -pfazure run create --file run_evaluation.yml --run $prev_run_name --stream --name $run_name --runtime demo-mir - -pfazure run stream --name $run_name -pfazure run show --name $run_name -pfazure run show-details --name $run_name -pfazure run show-metrics --name $run_name -pfazure run visualize --name $run_name -``` \ No newline at end of file diff --git a/examples/flows/standard/web_classification/classify_with_llm.jinja2 b/examples/flows/standard/web_classification/classify_with_llm.jinja2 deleted file mode 100644 index 6d7c3da4005..00000000000 --- a/examples/flows/standard/web_classification/classify_with_llm.jinja2 +++ /dev/null @@ -1,21 +0,0 @@ -system: -Your task is to classify a given url into one of the following categories: -Movie, App, Academic, Channel, Profile, PDF or None based on the text content information. -The classification will be based on the url, the webpage text content summary, or both. - -user: -The selection range of the value of "category" must be within "Movie", "App", "Academic", "Channel", "Profile", "PDF" and "None". -The selection range of the value of "evidence" must be within "Url", "Text content", and "Both". -Here are a few examples: -{% for ex in examples %} -URL: {{ex.url}} -Text content: {{ex.text_content}} -OUTPUT: -{"category": "{{ex.category}}", "evidence": "{{ex.evidence}}"} - -{% endfor %} - -For a given URL and text content, classify the url to complete the category and indicate evidence: -URL: {{url}} -Text content: {{text_content}}. -OUTPUT: \ No newline at end of file diff --git a/examples/flows/standard/web_classification/convert_to_dict.py b/examples/flows/standard/web_classification/convert_to_dict.py deleted file mode 100644 index 8e9490b801a..00000000000 --- a/examples/flows/standard/web_classification/convert_to_dict.py +++ /dev/null @@ -1,12 +0,0 @@ -import json - -from promptflow import tool - - -@tool -def convert_to_dict(input_str: str): - try: - return json.loads(input_str) - except Exception as e: - print("The input is not valid, error: {}".format(e)) - return {"category": "None", "evidence": "None"} diff --git a/examples/flows/standard/web_classification/data.jsonl b/examples/flows/standard/web_classification/data.jsonl deleted file mode 100644 index 248b61c6e95..00000000000 --- a/examples/flows/standard/web_classification/data.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -{"url": "https://www.youtube.com/watch?v=kYqRtjDBci8", "answer": "Channel", "evidence": "Both"} -{"url": "https://arxiv.org/abs/2307.04767", "answer": "Academic", "evidence": "Both"} -{"url": "https://play.google.com/store/apps/details?id=com.twitter.android", "answer": "App", "evidence": "Both"} diff --git a/examples/flows/standard/web_classification/fetch_text_content_from_url.py b/examples/flows/standard/web_classification/fetch_text_content_from_url.py deleted file mode 100644 index 1ff7f792909..00000000000 --- a/examples/flows/standard/web_classification/fetch_text_content_from_url.py +++ /dev/null @@ -1,30 +0,0 @@ -import bs4 -import requests - -from promptflow import tool - - -@tool -def fetch_text_content_from_url(url: str): - # Send a request to the URL - try: - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35" - } - response = requests.get(url, headers=headers) - if response.status_code == 200: - # Parse the HTML content using BeautifulSoup - soup = bs4.BeautifulSoup(response.text, "html.parser") - soup.prettify() - return soup.get_text()[:2000] - else: - msg = ( - f"Get url failed with status code {response.status_code}.\nURL: {url}\nResponse: " - f"{response.text[:100]}" - ) - print(msg) - return "No available content" - except Exception as e: - print("Get url failed with error: {}".format(e)) - return "No available content" diff --git a/examples/flows/standard/web_classification/flow.dag.yaml b/examples/flows/standard/web_classification/flow.dag.yaml deleted file mode 100644 index 06f14400ede..00000000000 --- a/examples/flows/standard/web_classification/flow.dag.yaml +++ /dev/null @@ -1,90 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json -inputs: - url: - type: string - default: https://play.google.com/store/apps/details?id=com.twitter.android -outputs: - category: - type: string - reference: ${convert_to_dict.output.category} - evidence: - type: string - reference: ${convert_to_dict.output.evidence} -nodes: -- name: fetch_text_content_from_url - type: python - source: - type: code - path: fetch_text_content_from_url.py - inputs: - url: ${inputs.url} -- name: summarize_text_content - use_variants: true -- name: prepare_examples - type: python - source: - type: code - path: prepare_examples.py - inputs: {} -- name: classify_with_llm - type: llm - source: - type: code - path: classify_with_llm.jinja2 - inputs: - # This is to easily switch between openai and azure openai. - # deployment_name is required by azure openai, model is required by openai. - deployment_name: gpt-35-turbo - model: gpt-3.5-turbo - max_tokens: 128 - temperature: 0.2 - url: ${inputs.url} - text_content: ${summarize_text_content.output} - examples: ${prepare_examples.output} - connection: open_ai_connection - api: chat -- name: convert_to_dict - type: python - source: - type: code - path: convert_to_dict.py - inputs: - input_str: ${classify_with_llm.output} -node_variants: - summarize_text_content: - variants: - variant_0: - node: - type: llm - source: - type: code - path: summarize_text_content.jinja2 - inputs: - # This is to easily switch between openai and azure openai. - # deployment_name is required by azure openai, model is required by openai. - deployment_name: gpt-35-turbo - model: gpt-3.5-turbo - max_tokens: 128 - temperature: 0.2 - text: ${fetch_text_content_from_url.output} - connection: open_ai_connection - api: chat - variant_1: - node: - type: llm - source: - type: code - path: summarize_text_content__variant_1.jinja2 - inputs: - # This is to easily switch between openai and azure openai. - # deployment_name is required by azure openai, model is required by openai. - deployment_name: gpt-35-turbo - model: gpt-3.5-turbo - max_tokens: 256 - temperature: 0.3 - text: ${fetch_text_content_from_url.output} - connection: open_ai_connection - api: chat - default_variant_id: variant_0 -environment: - python_requirements_txt: requirements.txt diff --git a/examples/flows/standard/web_classification/prepare_examples.py b/examples/flows/standard/web_classification/prepare_examples.py deleted file mode 100644 index c4ccb76d732..00000000000 --- a/examples/flows/standard/web_classification/prepare_examples.py +++ /dev/null @@ -1,44 +0,0 @@ -from promptflow import tool - - -@tool -def prepare_examples(): - return [ - { - "url": "https://play.google.com/store/apps/details?id=com.spotify.music", - "text_content": "Spotify is a free music and podcast streaming app with millions of songs, albums, and " - "original podcasts. It also offers audiobooks, so users can enjoy thousands of stories. " - "It has a variety of features such as creating and sharing music playlists, discovering " - "new music, and listening to popular and exclusive podcasts. It also has a Premium " - "subscription option which allows users to download and listen offline, and access " - "ad-free music. It is available on all devices and has a variety of genres and artists " - "to choose from.", - "category": "App", - "evidence": "Both", - }, - { - "url": "https://www.youtube.com/channel/UC_x5XG1OV2P6uZZ5FSM9Ttw", - "text_content": "NFL Sunday Ticket is a service offered by Google LLC that allows users to watch NFL " - "games on YouTube. It is available in 2023 and is subject to the terms and privacy policy " - "of Google LLC. It is also subject to YouTube's terms of use and any applicable laws.", - "category": "Channel", - "evidence": "URL", - }, - { - "url": "https://arxiv.org/abs/2303.04671", - "text_content": "Visual ChatGPT is a system that enables users to interact with ChatGPT by sending and " - "receiving not only languages but also images, providing complex visual questions or " - "visual editing instructions, and providing feedback and asking for corrected results. " - "It incorporates different Visual Foundation Models and is publicly available. Experiments " - "show that Visual ChatGPT opens the door to investigating the visual roles of ChatGPT with " - "the help of Visual Foundation Models.", - "category": "Academic", - "evidence": "Text content", - }, - { - "url": "https://ab.politiaromana.ro/", - "text_content": "There is no content available for this text.", - "category": "None", - "evidence": "None", - }, - ] diff --git a/examples/flows/standard/web_classification/requirements.txt b/examples/flows/standard/web_classification/requirements.txt deleted file mode 100644 index ccef8cfd3cc..00000000000 --- a/examples/flows/standard/web_classification/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -promptflow[azure] -promptflow-tools -bs4 \ No newline at end of file diff --git a/examples/flows/standard/web_classification/run.yml b/examples/flows/standard/web_classification/run.yml deleted file mode 100644 index 9522372f0e0..00000000000 --- a/examples/flows/standard/web_classification/run.yml +++ /dev/null @@ -1,4 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Run.schema.json -flow: . -data: data.jsonl -variant: ${summarize_text_content.variant_1} \ No newline at end of file diff --git a/examples/flows/standard/web_classification/run_evaluation.yml b/examples/flows/standard/web_classification/run_evaluation.yml deleted file mode 100644 index 50090e7d2e3..00000000000 --- a/examples/flows/standard/web_classification/run_evaluation.yml +++ /dev/null @@ -1,7 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Run.schema.json -flow: ../../evaluation/eval-classification-accuracy -data: data.jsonl -run: web_classification_variant_1_20230724_173442_973403 # replace with your run name -column_mapping: - groundtruth: ${data.answer} - prediction: ${run.outputs.category} \ No newline at end of file diff --git a/examples/flows/standard/web_classification/summarize_text_content.jinja2 b/examples/flows/standard/web_classification/summarize_text_content.jinja2 deleted file mode 100644 index 81078019db8..00000000000 --- a/examples/flows/standard/web_classification/summarize_text_content.jinja2 +++ /dev/null @@ -1,7 +0,0 @@ -system: -Please summarize the following text in one paragraph. 100 words. -Do not add any information that is not in the text. - -user: -Text: {{text}} -Summary: \ No newline at end of file diff --git a/examples/flows/standard/web_classification/summarize_text_content__variant_1.jinja2 b/examples/flows/standard/web_classification/summarize_text_content__variant_1.jinja2 deleted file mode 100644 index 5fb816079d5..00000000000 --- a/examples/flows/standard/web_classification/summarize_text_content__variant_1.jinja2 +++ /dev/null @@ -1,7 +0,0 @@ -system: -Please summarize some keywords of this paragraph and have some details of each keywords. -Do not add any information that is not in the text. - -user: -Text: {{text}} -Summary: \ No newline at end of file From 910fa5b74f01bd2b49ec05bdce04e397eb69938d Mon Sep 17 00:00:00 2001 From: Xingzhi Zhang <37076709+elliotzh@users.noreply.github.com> Date: Fri, 20 Oct 2023 11:05:55 +0800 Subject: [PATCH 4/6] doc: pf validate --- docs/how-to-guides/use-flow-in-pipeline.md | 16 +++++++++---- docs/reference/pf-command-reference.md | 27 +++++++++++++++++++++- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/docs/how-to-guides/use-flow-in-pipeline.md b/docs/how-to-guides/use-flow-in-pipeline.md index 301263ff8f0..f1973b09d65 100644 --- a/docs/how-to-guides/use-flow-in-pipeline.md +++ b/docs/how-to-guides/use-flow-in-pipeline.md @@ -9,7 +9,9 @@ After you have developed and tested the flow in [init and test a flow](./init-an :::{admonition} Pre-requirements - Customer need to install the extension `ml>=2.21.0` to enable this feature in CLI and package `azure-ai-ml>=1.11.0` to enable this feature in SDK; - Customer need to put `$schema` in the target `flow.dag.yaml` to enable this feature; -- Customer need to generate `flow.tools.json` for the target flow before below usage. Usually the generation can be done by `pf flow validate`. + - `flow.dag.yaml`: `$schema`: `https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json` + - `run.yaml`: `$schema`: `https://azuremlschemas.azureedge.net/promptflow/latest/Run.schema.json` +- Customer need to generate `flow.tools.json` for the target flow before below usage. The generation can be done by `pf flow validate`. ::: For more information about AzureML and component: @@ -46,8 +48,9 @@ from azure.ai.ml import MLClient, load_component ml_client = MLClient() # Register flow as a component -# Default component name will be the name of flow folder, which is web-classification here; default version will be "1" flow_component = load_component("standard/web-classification/flow.dag.yaml") +# Default component name will be the name of flow folder, which is not a valid component name, so we override it here; default version will be "1" +flow_component.name = "web_classification" ml_client.components.create_or_update(flow_component) # Register flow as a component with parameters override @@ -55,7 +58,7 @@ ml_client.components.create_or_update( "standard/web-classification/flow.dag.yaml", version="2", params_override=[ - {"name": "web-classification_updated"} + {"name": "web_classification_updated"} ] ) ``` @@ -69,5 +72,8 @@ After registered a flow as a component, they can be referred in a pipeline job l ## Directly use a flow in a pipeline job Besides explicitly registering a flow as a component, customer can also directly use flow in a pipeline job: -- [CLI sample](https://github.com/Azure/azureml-examples/tree/zhangxingzhi/flow-in-pipeline/cli/jobs/pipelines-with-components/flow_in_pipeline/1a_flow_in_pipeline) -- [SDK sample](https://github.com/Azure/azureml-examples/blob/zhangxingzhi/flow-in-pipeline/sdk/python/jobs/pipelines/1l_flow_in_pipeline/flow_in_pipeline.ipynb) +- [CLI example](https://github.com/Azure/azureml-examples/tree/main/cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component) +- [SDK example](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1l_flow_in_pipeline) + +All connections and flow inputs will be exposed as input parameters of the component. Default value can be provided in flow/run definition; they can also be set/overwrite on job submission: +- [CLI/SDK example](../../examples/tutorials/flow-in-pipeline/pipeline.ipynb) diff --git a/docs/reference/pf-command-reference.md b/docs/reference/pf-command-reference.md index 779adb18bb2..828a79c2747 100644 --- a/docs/reference/pf-command-reference.md +++ b/docs/reference/pf-command-reference.md @@ -20,8 +20,9 @@ Manage promptflow flow flows. | --- | --- | | [pf flow init](#pf-flow-init) | Initialize a prompt flow directory. | | [pf flow test](#pf-flow-test) | Test the prompt flow or flow node. | +| [pf flow validate](#pf-flow-validate) | Validate a flow and generate `flow.tools.json` for it. | | [pf flow build](#pf-flow-build) | Build a flow for further sharing or deployment. | -| [pf flow serve](#pf-flow-serve) | Serving a flow as an endpoint. | +| [pf flow serve](#pf-flow-serve) | Serve a flow as an endpoint. | ### pf flow init @@ -167,6 +168,30 @@ Start a interactive chat session for chat flow. Displays the output for each step in the chat flow. +### pf flow validate + +Validate the prompt flow and generate a `flow.tools.json` under `.promptflow`. This file is required when using flow as a component in a Azure ML pipeline. + +```bash +pf flow validate --source + [--debug] + [--verbose] +``` + +#### Examples + +Validate the flow. + +```bash +pf flow validate --source +``` + +#### Required Parameter + +`--source` + +The flow source to validate. + ### pf flow build Build a flow for further sharing or deployment. From a38a92b01efe16794f14e82eb124a87d2a042519 Mon Sep 17 00:00:00 2001 From: Xingzhi Zhang <37076709+elliotzh@users.noreply.github.com> Date: Fri, 20 Oct 2023 15:10:47 +0800 Subject: [PATCH 5/6] feat: enable ci for the new example --- .../samples_flowinpipeline_pipeline.yml | 47 ++++++++++++++ .../azureai/use-flow-in-azure-ml-pipeline.md} | 64 +++++++++++++++++-- docs/cloud/index.md | 2 +- docs/how-to-guides/init-and-test-a-flow.md | 3 +- examples/README.md | 2 + examples/configuration.ipynb | 34 ++++++++++ .../tutorials/flow-in-pipeline/pipeline.ipynb | 12 ++-- 7 files changed, 147 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/samples_flowinpipeline_pipeline.yml rename docs/{how-to-guides/use-flow-in-pipeline.md => cloud/azureai/use-flow-in-azure-ml-pipeline.md} (69%) diff --git a/.github/workflows/samples_flowinpipeline_pipeline.yml b/.github/workflows/samples_flowinpipeline_pipeline.yml new file mode 100644 index 00000000000..4699878019b --- /dev/null +++ b/.github/workflows/samples_flowinpipeline_pipeline.yml @@ -0,0 +1,47 @@ +# This code is autogenerated. +# Code is generated by running custom script: python3 readme.py +# Any manual changes to this file may cause incorrect behavior. +# Any manual changes will be overwritten if the code is regenerated. + +name: samples_flowinpipeline_pipeline +on: + schedule: + - cron: "28 19 * * *" # Every day starting at 3:28 BJT + pull_request: + branches: [ main ] + paths: [ examples/**, .github/workflows/samples_flowinpipeline_pipeline.yml ] + workflow_dispatch: + +jobs: + samples_notebook_ci: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + - name: Generate config.json + run: echo '${{ secrets.TEST_WORKSPACE_CONFIG_JSON_CANARY }}' > ${{ github.workspace }}/examples/config.json + - name: Azure Login + uses: azure/login@v1 + with: + creds: ${{ secrets.AZURE_CREDENTIALS }} + - name: Setup Python 3.9 environment + uses: actions/setup-python@v4 + with: + python-version: "3.9" + - name: Prepare requirements + run: | + python -m pip install --upgrade pip + pip install -r ${{ github.workspace }}/examples/requirements.txt + pip install -r ${{ github.workspace }}/examples/dev_requirements.txt + - name: Create Aoai Connection + run: pf connection create -f ${{ github.workspace }}/examples/connections/azure_openai.yml --set api_key="${{ secrets.AOAI_API_KEY_TEST }}" api_base="${{ secrets.AOAI_API_ENDPOINT_TEST }}" + - name: Test Notebook + working-directory: examples/tutorials/flow-in-pipeline + run: | + papermill -k python pipeline.ipynb pipeline.output.ipynb + - name: Upload artifact + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: artifact + path: examples/tutorials/flow-in-pipeline diff --git a/docs/how-to-guides/use-flow-in-pipeline.md b/docs/cloud/azureai/use-flow-in-azure-ml-pipeline.md similarity index 69% rename from docs/how-to-guides/use-flow-in-pipeline.md rename to docs/cloud/azureai/use-flow-in-azure-ml-pipeline.md index f1973b09d65..d4871b7110b 100644 --- a/docs/how-to-guides/use-flow-in-pipeline.md +++ b/docs/cloud/azureai/use-flow-in-azure-ml-pipeline.md @@ -1,10 +1,6 @@ -# Use flow in pipeline job +# Use flow in Azure ML pipeline job -:::{admonition} Experimental feature -This is an experimental feature, and may change at any time. Learn [more](faq.md#stable-vs-experimental). -::: - -After you have developed and tested the flow in [init and test a flow](./init-and-test-a-flow.md), this guide will help you learn how to use a flow as a parallel component in a pipeline job on AzureML, so that you can integrate the created flow with existing pipelines and process a large amount of data. +After you have developed and tested the flow in [init and test a flow](../../how-to-guides/init-and-test-a-flow.md), this guide will help you learn how to use a flow as a parallel component in a pipeline job on AzureML, so that you can integrate the created flow with existing pipelines and process a large amount of data. :::{admonition} Pre-requirements - Customer need to install the extension `ml>=2.21.0` to enable this feature in CLI and package `azure-ai-ml>=1.11.0` to enable this feature in SDK; @@ -76,4 +72,58 @@ Besides explicitly registering a flow as a component, customer can also directly - [SDK example](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1l_flow_in_pipeline) All connections and flow inputs will be exposed as input parameters of the component. Default value can be provided in flow/run definition; they can also be set/overwrite on job submission: -- [CLI/SDK example](../../examples/tutorials/flow-in-pipeline/pipeline.ipynb) + +::::{tab-set} +:::{tab-item} CLI +:sync: CLI + +```yaml +... +jobs: + flow_node: + type: parallel + component: standard/web-classification/flow.dag.yaml + inputs: + data: ${{parent.inputs.web_classification_input}} + url: "${data.url}" + connections.summarize_text_content.connection: azure_open_ai_connection + connections.summarize_text_content.deployment_name: text-davinci-003 +... +``` + +::: + +:::{tab-item} SDK +:sync: SDK + +```python +from azure.ai.ml import dsl + +ml_client = MLClient() + +# Register flow as a component +flow_component = load_component("standard/web-classification/flow.dag.yaml") +data_input = Input(path="standard/web-classification/data.jsonl", type=AssetTypes.URI_FILE) + +@dsl.pipeline +def pipeline_func_with_flow(data): + flow_node = flow_component( + data=data, + url="${data.url}", + connections={ + "summarize_text_content": { + "connection": "azure_open_ai_connection", + "deployment_name": "text-davinci-003", + }, + }, + ) + flow_node.compute = "cpu-cluster" + +pipeline_with_flow = pipeline_func_with_flow(data=data_input) + +ml_client.jobs.create_or_update(pipeline_with_flow) +``` + +::: + +:::: diff --git a/docs/cloud/index.md b/docs/cloud/index.md index da3340301bd..2c8aa2e2584 100644 --- a/docs/cloud/index.md +++ b/docs/cloud/index.md @@ -7,6 +7,6 @@ You can develope your flow locally and seamlessly move the experience to azure c :maxdepth: 1 azureai/quick-start azureai/deploy-to-azure-appservice -azureai/flow-in-pipeline +azureai/use-flow-in-azure-ml-pipeline.md azureai/faq ``` \ No newline at end of file diff --git a/docs/how-to-guides/init-and-test-a-flow.md b/docs/how-to-guides/init-and-test-a-flow.md index 9fab857b0c5..0b9c2f161ff 100644 --- a/docs/how-to-guides/init-and-test-a-flow.md +++ b/docs/how-to-guides/init-and-test-a-flow.md @@ -299,5 +299,4 @@ Break points and debugging functionalities for the Python steps in your flow. Ju ## Next steps - [Add conditional control to a flow](./add-conditional-control-to-a-flow.md) -- [Run and evaluate a flow](./run-and-evaluate-a-flow.md) -- [Use flow in pipeline job](./use-flow-in-pipeline.md) +- [Run and evaluate a flow](./run-and-evaluate-a-flow/index.md) diff --git a/examples/README.md b/examples/README.md index 107c3094725..41617e99a4e 100644 --- a/examples/README.md +++ b/examples/README.md @@ -28,6 +28,7 @@ ------|--------|------------- | [chat-with-pdf](tutorials/e2e-development/chat-with-pdf.md) | [![samples_tutorials_e2e_development_chat_with_pdf](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_e2e_development_chat_with_pdf.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_e2e_development_chat_with_pdf.yml) | Retrieval Augmented Generation (or RAG) has become a prevalent pattern to build intelligent application with Large Language Models (or LLMs) since it can infuse external knowledge into the model, which is not trained with those up-to-date or proprietary information | | [azure-app-service](tutorials/flow-deploy/azure-app-service/README.md) | [![samples_tutorials_flow_deploy_azure_app_service](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_flow_deploy_azure_app_service.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_flow_deploy_azure_app_service.yml) | This example demos how to deploy a flow using Azure App Service | +| [distribute-flow-as-executable-app](tutorials/flow-deploy/distribute-flow-as-executable-app/README.md) | [![samples_tutorials_flow_deploy_distribute_flow_as_executable_app](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_flow_deploy_distribute_flow_as_executable_app.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_flow_deploy_distribute_flow_as_executable_app.yml) | This example demos how to package flow as a executable app | | [docker](tutorials/flow-deploy/docker/README.md) | [![samples_tutorials_flow_deploy_docker](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_flow_deploy_docker.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_flow_deploy_docker.yml) | This example demos how to deploy flow as a docker app | | [kubernetes](tutorials/flow-deploy/kubernetes/README.md) | [![samples_tutorials_flow_deploy_kubernetes](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_flow_deploy_kubernetes.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_tutorials_flow_deploy_kubernetes.yml) | This example demos how to deploy flow as a Kubernetes app | @@ -87,6 +88,7 @@ ------|--------|------------- | [quickstart.ipynb](tutorials/get-started/quickstart.ipynb) | [![samples_getstarted_quickstart](https://github.com/microsoft/promptflow/actions/workflows/samples_getstarted_quickstart.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_getstarted_quickstart.yml) | A quickstart tutorial to run a flow and evaluate it. | | [quickstart-azure.ipynb](tutorials/get-started/quickstart-azure.ipynb) | [![samples_getstarted_quickstartazure](https://github.com/microsoft/promptflow/actions/workflows/samples_getstarted_quickstartazure.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_getstarted_quickstartazure.yml) | A quickstart tutorial to run a flow in Azure AI and evaluate it. | +| [pipeline.ipynb](tutorials/flow-in-pipeline/pipeline.ipynb) | [![samples_flowinpipeline_pipeline](https://github.com/microsoft/promptflow/actions/workflows/samples_flowinpipeline_pipeline.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flowinpipeline_pipeline.yml) | {'description': 'Create pipeline using components to run a distributed job with tensorflow'} | | [cloud-run-management.ipynb](tutorials/run-management/cloud-run-management.ipynb) | [![samples_runmanagement_cloudrunmanagement](https://github.com/microsoft/promptflow/actions/workflows/samples_runmanagement_cloudrunmanagement.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_runmanagement_cloudrunmanagement.yml) | Flow run management in Azure AI | | [connection.ipynb](connections/connection.ipynb) | [![samples_connections_connection](https://github.com/microsoft/promptflow/actions/workflows/samples_connections_connection.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_connections_connection.yml) | Manage various types of connections using sdk | | [chat-with-pdf-azure.ipynb](flows/chat/chat-with-pdf/chat-with-pdf-azure.ipynb) | [![samples_flows_chat_chatwithpdf_chatwithpdfazure](https://github.com/microsoft/promptflow/actions/workflows/samples_flows_chat_chatwithpdf_chatwithpdfazure.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flows_chat_chatwithpdf_chatwithpdfazure.yml) | A tutorial of chat-with-pdf flow that executes in Azure AI | diff --git a/examples/configuration.ipynb b/examples/configuration.ipynb index c993738e4aa..158f571b06d 100644 --- a/examples/configuration.ipynb +++ b/examples/configuration.ipynb @@ -121,6 +121,40 @@ "print(pf)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Retrieve or create an Azure Machine Learning compute target\n", + "\n", + "To create a Azure Machine Learning job, you need a compute cluster as prerequisite. Below code ensures computes named `cpu-cluster` exists in your workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml import MLClient\n", + "from azure.ai.ml.entities import AmlCompute\n", + "\n", + "# MLClient use the same configuration as PFClient\n", + "ml_client = MLClient.from_config(credential=credential)\n", + "\n", + "# specify aml compute name.\n", + "cpu_compute_target = \"cpu-cluster\"\n", + "\n", + "try:\n", + " ml_client.compute.get(cpu_compute_target)\n", + "except Exception:\n", + " print(\"Creating a new cpu compute target...\")\n", + " compute = AmlCompute(\n", + " name=cpu_compute_target, size=\"STANDARD_D2_V2\", min_instances=0, max_instances=4\n", + " )\n", + " ml_client.compute.begin_create_or_update(compute).result()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/examples/tutorials/flow-in-pipeline/pipeline.ipynb b/examples/tutorials/flow-in-pipeline/pipeline.ipynb index bc7489998e9..7d83b593d55 100644 --- a/examples/tutorials/flow-in-pipeline/pipeline.ipynb +++ b/examples/tutorials/flow-in-pipeline/pipeline.ipynb @@ -12,11 +12,10 @@ "- An Azure ML workspace with computer cluster - [Configure workspace](../../configuration.ipynb)\n", "- A python environment\n", "- Installed Azure Machine Learning Python SDK v2 - [install instructions](../../../README.md) - check the getting started section\n", - "- **Permission to access connections has been assigned to the used compute cluster**\n", "\n", "**Learning Objectives** - By the end of this tutorial, you should be able to:\n", "- Connect to your AML workspace from the Python SDK\n", - "- Create `Pipeline` load flow as components from YAML\n", + "- Create `Pipeline` with a component loaded from `flow.dag.yaml`\n", "\n", "**Motivations** - This notebook explains how to run a pipeline with distributed training component." ] @@ -43,8 +42,7 @@ "\n", "from azure.ai.ml import MLClient, load_component, Input\n", "from azure.ai.ml.constants import AssetTypes\n", - "from azure.ai.ml.dsl import pipeline\n", - "from azure.ai.ml.entities import ResourceConfiguration" + "from azure.ai.ml.dsl import pipeline" ] }, { @@ -93,7 +91,7 @@ "ml_client = MLClient.from_config(credential=credential)\n", "\n", "# Retrieve an already attached Azure Machine Learning Compute.\n", - "cluster_name = \"gpu-cluster\"\n", + "cluster_name = \"cpu-cluster\"\n", "print(ml_client.compute.get(cluster_name))" ] }, @@ -112,7 +110,7 @@ "metadata": {}, "outputs": [], "source": [ - "flow_component = load_component(\"./web_classification/flow.dag.yaml\")" + "flow_component = load_component(\"../../flows/standard/web-classification/flow.dag.yaml\")" ] }, { @@ -129,7 +127,7 @@ "metadata": {}, "outputs": [], "source": [ - "data_input = Input(path=\"./data/data.jsonl\", type=AssetTypes.URI_FILE)\n", + "data_input = Input(path=\"../../flows/standard/web-classification/data.jsonl\", type=AssetTypes.URI_FILE)\n", "\n", "@pipeline()\n", "def pipeline_func_with_flow(data):\n", From 78ee54afad3f2d2497725ad996d24829d7fce847 Mon Sep 17 00:00:00 2001 From: Xingzhi Zhang <37076709+elliotzh@users.noreply.github.com> Date: Fri, 20 Oct 2023 17:34:38 +0800 Subject: [PATCH 6/6] fix: use valid deployment name --- docs/how-to-guides/init-and-test-a-flow.md | 3 +-- examples/tutorials/flow-in-pipeline/pipeline.ipynb | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/how-to-guides/init-and-test-a-flow.md b/docs/how-to-guides/init-and-test-a-flow.md index 0b9c2f161ff..4afaeff38d5 100644 --- a/docs/how-to-guides/init-and-test-a-flow.md +++ b/docs/how-to-guides/init-and-test-a-flow.md @@ -298,5 +298,4 @@ Break points and debugging functionalities for the Python steps in your flow. Ju ## Next steps -- [Add conditional control to a flow](./add-conditional-control-to-a-flow.md) -- [Run and evaluate a flow](./run-and-evaluate-a-flow/index.md) +- [Add conditional control to a flow](./add-conditional-control-to-a-flow.md) \ No newline at end of file diff --git a/examples/tutorials/flow-in-pipeline/pipeline.ipynb b/examples/tutorials/flow-in-pipeline/pipeline.ipynb index 7d83b593d55..604bd58cd82 100644 --- a/examples/tutorials/flow-in-pipeline/pipeline.ipynb +++ b/examples/tutorials/flow-in-pipeline/pipeline.ipynb @@ -137,11 +137,11 @@ " connections={\n", " \"summarize_text_content\": {\n", " \"connection\": \"azure_open_ai_connection\",\n", - " \"deployment_name\": \"text-davinci-003\",\n", + " \"deployment_name\": \"gpt-35-turbo\",\n", " },\n", " \"classify_with_llm\": {\n", " \"connection\": \"azure_open_ai_connection\",\n", - " \"deployment_name\": \"text-davinci-003\",\n", + " \"deployment_name\": \"gpt-35-turbo\",\n", " },\n", " },\n", " )\n",