From 664abdb0528cf4753eaa6464fb7e08c509e52613 Mon Sep 17 00:00:00 2001
From: Honglin <0mza987@gmail.com>
Date: Tue, 26 Sep 2023 18:05:24 +0800
Subject: [PATCH] [SDK/CLI] Fix the portal url parse logic for asset id, add
 validation for cloud PFClient parameters (#614)

# Description

- Fix the portal url parse logic for asset id
- Add validation for cloud PFClient parameters

# All Promptflow Contribution checklist:
- [x] **The pull request does not introduce [breaking changes].**
- [x] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [ ] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [x] Title of the pull request is clear and informative.
- [x] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [ ] Pull request includes test coverage for the included changes.
---
 src/promptflow/CHANGELOG.md                   |  2 +
 src/promptflow/promptflow/azure/_pf_client.py | 19 +++++++
 .../azure/operations/_run_operations.py       | 52 +++++++++++++------
 .../e2etests/test_run_operations.py           | 44 +++++++++++++++-
 4 files changed, 99 insertions(+), 18 deletions(-)

diff --git a/src/promptflow/CHANGELOG.md b/src/promptflow/CHANGELOG.md
index e02307c223d..30cede90d26 100644
--- a/src/promptflow/CHANGELOG.md
+++ b/src/promptflow/CHANGELOG.md
@@ -15,12 +15,14 @@
 - Read/write log file with encoding specified.
 - Avoid inconsistent error message when executor exits abnormally.
 - Align inputs & outputs row number in case partial completed run will break `pfazure run show-details`.
+- Fix bug that failed to parse portal url for run data when the form is an asset id.
 
 ### Improvements
 - [Executor][Internal] Improve error message with more details and actionable information.
 - [SDK/CLI] `pf/pfazure run show-details`:
   - Add `--max-results` option to control the number of results to display.
   - Add `--all-results` option to display all results.
+- Add validation for azure `PFClient` constructor in case wrong parameter is passed.
 
 ## 0.1.0b6 (2023.09.15)
 
diff --git a/src/promptflow/promptflow/azure/_pf_client.py b/src/promptflow/promptflow/azure/_pf_client.py
index 70c8ca9c805..f82d8a2eac9 100644
--- a/src/promptflow/promptflow/azure/_pf_client.py
+++ b/src/promptflow/promptflow/azure/_pf_client.py
@@ -11,6 +11,7 @@
 from pandas import DataFrame
 
 from promptflow._sdk._constants import MAX_SHOW_DETAILS_RESULTS
+from promptflow._sdk._errors import RunOperationParameterError
 from promptflow._sdk._user_agent import USER_AGENT
 from promptflow._sdk.entities import Run
 from promptflow.azure._load_functions import load_flow
@@ -48,6 +49,7 @@ def __init__(
         workspace_name: Optional[str] = None,
         **kwargs,
     ):
+        self._validate_config_information(subscription_id, resource_group_name, workspace_name, kwargs)
         self._add_user_agent(kwargs)
         self._ml_client = kwargs.pop("ml_client", None) or MLClient(
             credential=credential,
@@ -94,6 +96,23 @@ def __init__(
             **kwargs,
         )
 
+    @staticmethod
+    def _validate_config_information(subscription_id, resource_group_name, workspace_name, kwargs):
+        """Validate the config information in case wrong parameter name is passed into the constructor."""
+        sub_name, wrong_sub_name = "subscription_id", "subscription"
+        rg_name, wrong_rg_name = "resource_group_name", "resource_group"
+        ws_name, wrong_ws_name = "workspace_name", "workspace"
+
+        error_message = (
+            "You have passed in the wrong parameter name to initialize the PFClient, please use {0!r} instead of {1!r}."
+        )
+        if not subscription_id and kwargs.get(wrong_sub_name, None) is not None:
+            raise RunOperationParameterError(error_message.format(sub_name, wrong_sub_name))
+        if not resource_group_name and kwargs.get(wrong_rg_name, None) is not None:
+            raise RunOperationParameterError(error_message.format(rg_name, wrong_rg_name))
+        if not workspace_name and kwargs.get(wrong_ws_name, None) is not None:
+            raise RunOperationParameterError(error_message.format(ws_name, wrong_ws_name))
+
     @property
     def ml_client(self):
         """Return a client to interact with Azure ML services."""
diff --git a/src/promptflow/promptflow/azure/operations/_run_operations.py b/src/promptflow/promptflow/azure/operations/_run_operations.py
index f16fab3b376..a3f678751b5 100644
--- a/src/promptflow/promptflow/azure/operations/_run_operations.py
+++ b/src/promptflow/promptflow/azure/operations/_run_operations.py
@@ -149,18 +149,17 @@ def _get_input_portal_url_from_input_uri(self, input_uri):
         if not input_uri:
             return None
         if input_uri.startswith("azureml://"):
-            # input uri is a datastore path
-            match = self._DATASTORE_PATH_PATTERN.match(input_uri)
-            if not match or len(match.groups()) != 2:
+            res = self._get_portal_url_from_asset_id(input_uri)
+            if res is None:
+                res = self._get_portal_url_from_datastore_path(input_uri)
+            if res is None:
+                error_msg = (
+                    f"Failed to get portal url: {input_uri!r} is not a valid azureml asset id or datastore path."
+                )
                 logger.warning(error_msg)
-                return None
-            datastore, path = match.groups()
-            return (
-                f"https://ml.azure.com/data/datastore/{datastore}/edit?wsid={self._common_azure_url_pattern}"
-                f"&activeFilePath={path}#browseTab"
-            )
+            return res
         elif input_uri.startswith("azureml:/"):
-            # when input uri is an asset id, leverage the asset id pattern to get the portal url
+            # some asset id could start with "azureml:/"
             return self._get_portal_url_from_asset_id(input_uri)
         elif input_uri.startswith("azureml:"):
             # named asset id
@@ -170,14 +169,33 @@ def _get_input_portal_url_from_input_uri(self, input_uri):
             logger.warning(error_msg)
             return None
 
-    def _get_portal_url_from_asset_id(self, output_uri):
-        """Get the portal url for the data output."""
-        error_msg = f"Failed to get portal url: {output_uri!r} is not a valid azureml asset id."
-        if not output_uri:
+    def _get_portal_url_from_datastore_path(self, datastore_path, log_warning=False):
+        """Get the portal url from the datastore path."""
+        error_msg = (
+            f"Failed to get portal url: Datastore path {datastore_path!r} is not a valid azureml datastore path."
+        )
+        if not datastore_path:
             return None
-        match = self._ASSET_ID_PATTERN.match(output_uri)
+        match = self._DATASTORE_PATH_PATTERN.match(datastore_path)
         if not match or len(match.groups()) != 2:
-            logger.warning(error_msg)
+            if log_warning:
+                logger.warning(error_msg)
+            return None
+        datastore, path = match.groups()
+        return (
+            f"https://ml.azure.com/data/datastore/{datastore}/edit?wsid={self._common_azure_url_pattern}"
+            f"&activeFilePath={path}#browseTab"
+        )
+
+    def _get_portal_url_from_asset_id(self, asset_id, log_warning=False):
+        """Get the portal url from asset id."""
+        error_msg = f"Failed to get portal url: {asset_id!r} is not a valid azureml asset id."
+        if not asset_id:
+            return None
+        match = self._ASSET_ID_PATTERN.match(asset_id)
+        if not match or len(match.groups()) != 2:
+            if log_warning:
+                logger.warning(error_msg)
             return None
         name, version = match.groups()
         return f"https://ml.azure.com/data/{name}/{version}/details?wsid={self._common_azure_url_pattern}"
@@ -498,7 +516,7 @@ def _refine_run_data_from_run_history(self, run_data: dict) -> dict:
         # get portal urls
         run_data[RunDataKeys.DATA_PORTAL_URL] = self._get_input_portal_url_from_input_uri(input_data)
         run_data[RunDataKeys.INPUT_RUN_PORTAL_URL] = self._get_run_portal_url(run_id=input_run_id)
-        run_data[RunDataKeys.OUTPUT_PORTAL_URL] = self._get_portal_url_from_asset_id(output_data)
+        run_data[RunDataKeys.OUTPUT_PORTAL_URL] = self._get_portal_url_from_asset_id(output_data, log_warning=True)
         return run_data
 
     def _get_run_from_index_service(self, flow_run_id, **kwargs):
diff --git a/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_run_operations.py b/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_run_operations.py
index 944336549c4..971da06f499 100644
--- a/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_run_operations.py
+++ b/src/promptflow/tests/sdk_cli_azure_test/e2etests/test_run_operations.py
@@ -11,7 +11,7 @@
 import pytest
 
 from promptflow._sdk._constants import RunStatus
-from promptflow._sdk._errors import InvalidRunError, RunNotFoundError
+from promptflow._sdk._errors import InvalidRunError, RunNotFoundError, RunOperationParameterError
 from promptflow._sdk._load_functions import load_run
 from promptflow._sdk.entities import Run
 from promptflow._utils.flow_utils import get_flow_lineage_id
@@ -609,6 +609,48 @@ def fake_submit(*args, **kwargs):
             # request id should be included in FlowRequestException
             assert f"request id: {remote_client.runs._service_caller._request_id}" in str(e.value)
 
+    def test_input_output_portal_url_parser(self, remote_client):
+        runs_op = remote_client.runs
+
+        # test input with datastore path
+        input_datastore_path = (
+            "azureml://datastores/workspaceblobstore/paths/LocalUpload/312cca2af474e5f895013392b6b38f45/data.jsonl"
+        )
+        expected_input_portal_url = (
+            f"https://ml.azure.com/data/datastore/workspaceblobstore/edit?wsid={runs_op._common_azure_url_pattern}"
+            f"&activeFilePath=LocalUpload/312cca2af474e5f895013392b6b38f45/data.jsonl#browseTab"
+        )
+        assert runs_op._get_input_portal_url_from_input_uri(input_datastore_path) == expected_input_portal_url
+
+        # test input with asset id
+        input_asset_id = (
+            "azureml://locations/eastus/workspaces/f40fcfba-ed15-4c0c-a522-6798d8d89094/data/hod-qa-sample/versions/1"
+        )
+        expected_input_portal_url = (
+            f"https://ml.azure.com/data/hod-qa-sample/1/details?wsid={runs_op._common_azure_url_pattern}"
+        )
+        assert runs_op._get_input_portal_url_from_input_uri(input_asset_id) == expected_input_portal_url
+
+        # test output with asset id
+        output_asset_id = (
+            "azureml://locations/eastus/workspaces/f40fcfba-ed15-4c0c-a522-6798d8d89094/data"
+            "/azureml_d360affb-c01f-460f-beca-db9a8b88b625_output_data_flow_outputs/versions/1"
+        )
+        expected_output_portal_url = (
+            "https://ml.azure.com/data/azureml_d360affb-c01f-460f-beca-db9a8b88b625_output_data_flow_outputs/1/details"
+            f"?wsid={runs_op._common_azure_url_pattern}"
+        )
+        assert runs_op._get_portal_url_from_asset_id(output_asset_id) == expected_output_portal_url
+
+    def test_wrong_client_parameters(self):
+        # test wrong client parameters
+        with pytest.raises(RunOperationParameterError, match="You have passed in the wrong parameter name"):
+            PFClient(
+                subscription_id="fake_subscription_id",
+                resource_group="fake_resource_group",
+                workspace_name="fake_workspace_name",
+            )
+
     def test_get_detail_against_partial_fail_run(self, remote_client, pf, runtime) -> None:
         run = pf.run(
             flow=f"{FLOWS_DIR}/partial_fail",