microsoft · crazygao · Sep 28, 2023 · Oct 11, 2023 · Oct 13, 2023 · Oct 13, 2023
@@ -1,4 +1,4 @@
-name: step_sdk_setup_win
+name: step_sdk_setup
 inputs:
   scriptPath:
     required: false
@@ -30,7 +30,7 @@ runs:
     if: inputs.setupType == 'promptflow_with_extra'
     shell: pwsh
     run: |
-      Set-PSDebug -Trace 2
+      Set-PSDebug -Trace 1
       pip install -r ./dev_requirements.txt
       echo "########### pip list (Before) ###########"
       pip list
@@ -44,7 +44,7 @@ runs:
     if: inputs.setupType == 'promptflow_dev'
     shell: pwsh
     run: |
-      Set-PSDebug -Trace 2
+      Set-PSDebug -Trace 1
       pip install -r ./dev_requirements.txt
       python ./setup.py bdist_wheel
       $package = Get-ChildItem ./dist | ? { $_.Name.Contains('.whl')}

@@ -0,0 +1,105 @@
+name: promptflow-replay-test
+on:
+  pull_request:
+    paths:
+      - src/promptflow/**
+      - scripts/**
+  workflow_dispatch:
+env:
+  testWorkingDirectory: ${{ github.workspace }}/src/promptflow
+  PYTHONPATH: ${{ github.workspace }}/src/promptflow
+  IS_IN_CI_PIPELINE: "true"
+  PF_RECORDING_MODE: "replay"
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+    - name: checkout
+      uses: actions/checkout@v3
+    - name: Display and Set Environment Variables
+      run: |
+        export pyVersion="3.9";
+        env | sort >> $GITHUB_OUTPUT
+      id: display_env
+      shell: bash -el {0}
+    - name: Python Setup - ${{ matrix.os }} - Python Version ${{ steps.display_env.outputs.pyVersion }}
+      uses: "./.github/actions/step_create_python_environment"
+      with:
+        pythonVersion: ${{ steps.display_env.outputs.pyVersion }}
+    - name: Build wheel
+      uses: "./.github/actions/step_sdk_setup"
+      with:
+        setupType: promptflow_with_extra
+        scriptPath: ${{ env.testWorkingDirectory }}
+    - name: Upload Wheel
+      if: always()
+      uses: actions/upload-artifact@v3
+      with:
+        name: wheel
+        path: |
+          ${{ github.workspace }}/src/promptflow/dist/*.whl
+          ${{ github.workspace }}/src/promptflow-tools/dist/*.whl
+  sdk_cli_tests:
+    needs: build
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+    - name: checkout
+      uses: actions/checkout@v3
+    - name: Display and Set Environment Variables
+      run: |
+        export pyVersion="3.9";
+        env | sort >> $GITHUB_OUTPUT
+      id: display_env
+      shell: bash -el {0}
+    - name: Python Setup - ${{ matrix.os }} - Python Version ${{ steps.display_env.outputs.pyVersion }}
+      uses: "./.github/actions/step_create_python_environment"
+      with:
+        pythonVersion: ${{ steps.display_env.outputs.pyVersion }}
+    - name: Download Artifacts
+      uses: actions/download-artifact@v3
+      with:
+        name: wheel
+        path: artifacts
+    - name: Install wheel
+      shell: pwsh
+      working-directory: artifacts
+      run: |
+        Set-PSDebug -Trace 1
+        python -m pip install --upgrade pip
+        python -m pip install --upgrade setuptools
+        python -m pip install --upgrade nox
+        python -m pip install --upgrade mock
+        python -m pip install --upgrade pytest-cov
+        python -m pip install --upgrade coverage==6.0.1
+        python -m pip install --upgrade pytest-forked
+        python -m pip install --upgrade pytest-xdist
+        python -m pip install --upgrade pytest-timeout
+        python -m pip install --upgrade pytest-mock
+        python -m pip install --upgrade pytest-nunit
+        python -m pip install --upgrade pytest
+        python -m pip install --upgrade pydash
+        python -m pip install --upgrade wheel
+        python -m pip install --upgrade keyrings.alt
+        python -m pip install --upgrade beautifulsoup4==4.12.2
+        gci ./promptflow -Recurse | % {if ($_.Name.Contains('.whl')) {python -m pip install "$($_.FullName)[azure]"}}
+        gci ./promptflow-tools -Recurse | % {if ($_.Name.Contains('.whl')) {python -m pip install $_.FullName}}
+        pip freeze
+    - name: Get number of CPU cores
+      uses: SimenB/github-actions-cpu-cores@v1
+      id: cpu-cores
+    - name: Run Test
+      shell: pwsh
+      working-directory: ${{ env.testWorkingDirectory }}
+      run: |
+        gci env:* | sort-object name
+        $env:PYTHONPATH=""
+        echo '{}' > ${{ github.workspace }}/src/promptflow/connections.json
+        pytest ./tests/sdk_cli_test/ -n auto --dist loadfile
@@ -16,7 +16,7 @@
         help="Pytest marker to identify the tests to run",
         default="all",
     )
-    parser.add_argument("-n", help="Pytest number of process to run the tests", default="15")
+    parser.add_argument("-n", help="Pytest number of process to run the tests", default="auto")
     parser.add_argument(
         "--model-name",
         help="The model file name to run the tests",
@@ -71,8 +71,6 @@
     pytest_command += [
         "-n",
         args.n,
-        "--dist",
-        "loadgroup",
         "--log-level=info",
         "--log-format=%(asctime)s %(levelname)s %(message)s",
         "--log-date-format=[%Y-%m-%d %H:%M:%S]",

diff --git a/src/promptflow/promptflow/_core/run_tracker.py b/src/promptflow/promptflow/_core/run_tracker.py
@@ -3,6 +3,7 @@
 # ---------------------------------------------------------
 
 import json
+import os
 from contextvars import ContextVar
 from datetime import datetime
 from types import GeneratorType
@@ -175,6 +176,9 @@ def _update_flow_run_info_with_node_runs(self, run_info):
         child_run_infos = self.collect_child_node_runs(run_id)
         run_info.system_metrics = run_info.system_metrics or {}
         run_info.system_metrics.update(self.collect_metrics(child_run_infos, self.OPENAI_AGGREGATE_METRICS))
+        if os.environ.get("PF_RECORDING_MODE", None) == "replay":
+            # some tests require this metric to be set.
+            run_info.system_metrics["total_tokens"] = 0
 
     def _node_run_postprocess(self, run_info: RunInfo, output, ex: Optional[Exception]):
         run_id = run_info.run_id

diff --git a/src/promptflow/promptflow/_core/tool_record.py b/src/promptflow/promptflow/_core/tool_record.py
@@ -0,0 +1,36 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+import collections
+
+from promptflow._internal import RecordStorage, ToolProvider, tool
+
+
+class ToolRecord(ToolProvider):
+    """
+    ToolRecord Record inputs and outputs of llm tool, in replay mode,
+    this tool will read the cached result from storage_record.json
+    """
+
+    @tool
+    def completion(toolType: str, *args, **kwargs) -> str:
+        # "AzureOpenAI" =  args[0], this is type indicator, there may be more than one indicators
+        prompt_tmpl = args[1]
+        prompt_tpl_inputs = args[2]
+        working_folder = args[3]
+
+        hashDict = {}
+        for keyword in prompt_tpl_inputs:
+            if keyword in kwargs:
+                hashDict[keyword] = kwargs[keyword]
+        hashDict["prompt"] = prompt_tmpl
+        hashDict = collections.OrderedDict(sorted(hashDict.items()))
+
+        real_item = RecordStorage.get_record(working_folder, hashDict)
+        return real_item
+
+
+@tool
+def just_return(toolType: str, *args, **kwargs) -> str:
+    return ToolRecord().completion(toolType, *args, **kwargs)
@@ -85,6 +85,7 @@
 )
 from promptflow._utils.utils import (
     AttrDict,
+    RecordStorage,
     camel_to_snake,
     count_and_log_progress,
     load_json,

@@ -51,6 +51,7 @@
     USE_VARIANTS,
     VARIANTS,
     CommonYamlFields,
+    ConnectionType,
 )
 from promptflow._sdk._errors import (
     ConnectionNotFoundError,
@@ -62,9 +63,30 @@
 from promptflow._sdk._vendor import IgnoreFile, get_ignore_file, get_upload_files_from_folder
 from promptflow._utils.context_utils import _change_working_dir, inject_sys_path
 from promptflow._utils.dataclass_serializer import serialize
+from promptflow._utils.tool_utils import get_inputs_for_prompt_template
+from promptflow._utils.utils import RecordStorage
+from promptflow.contracts.run_info import RunInfo as NodeRunInfo
 from promptflow.contracts.tool import ToolType
 
 
+def record_node_run(run_info: NodeRunInfo, flow_folder: Path) -> None:
+    """Persist node run record to local storage."""
+    if os.environ.get("PF_RECORDING_MODE", None) == "record":
+        for api_call in run_info.api_calls:
+            hashDict = {}
+            if "name" in api_call and api_call["name"].startswith("AzureOpenAI"):
+                prompt_tpl = api_call["inputs"]["prompt"]
+                prompt_tpl_inputs = get_inputs_for_prompt_template(prompt_tpl)
+
+                for keyword in prompt_tpl_inputs:
+                    if keyword in api_call["inputs"]:
+                        hashDict[keyword] = api_call["inputs"][keyword]
+                hashDict["prompt"] = prompt_tpl
+                hashDict = collections.OrderedDict(sorted(hashDict.items()))
+                item = serialize(run_info)
+                RecordStorage.set_record(flow_folder, hashDict, str(item["output"]))
+
+
 def snake_to_camel(name):
     return re.sub(r"(?:^|_)([a-z])", lambda x: x.group(1).upper(), name)
 
@@ -639,8 +661,7 @@ def _gen_dynamic_list(function_config: Dict) -> List:
     from promptflow._cli._utils import get_workspace_triad_from_local
 
     workspace_triad = get_workspace_triad_from_local()
-    if (workspace_triad.subscription_id and workspace_triad.resource_group_name
-            and workspace_triad.workspace_name):
+    if workspace_triad.subscription_id and workspace_triad.resource_group_name and workspace_triad.workspace_name:
         return gen_dynamic_list(func_path, func_kwargs, workspace_triad._asdict())
     # if no workspace triple available, just skip.
     else:
@@ -813,8 +834,13 @@ def get_local_connections_from_executable(executable, client):
     for n in connection_names:
         try:
             conn = client.connections.get(name=n, with_secrets=True)
+            if conn is not None and conn.TYPE == ConnectionType.AZURE_OPEN_AI and conn.api_base == "dummy_base":
+                if os.environ.get("PF_RECORDING_MODE", None) == "replay":
+                    return {}
             result[n] = conn._to_execution_connection_dict()
         except ConnectionNotFoundError:
+            if os.environ.get("PF_RECORDING_MODE", None) == "replay":
+                return result
             # ignore when connection not found since it can be configured with env var.
             raise Exception(f"Connection {n!r} required for flow {executable.name!r} is not found.")
     return result

diff --git a/src/promptflow/promptflow/_sdk/operations/_local_storage_operations.py b/src/promptflow/promptflow/_sdk/operations/_local_storage_operations.py
@@ -26,7 +26,7 @@
     get_run_output_path,
 )
 from promptflow._sdk._errors import BulkRunException
-from promptflow._sdk._utils import generate_flow_tools_json
+from promptflow._sdk._utils import generate_flow_tools_json, record_node_run
 from promptflow._sdk.entities import Run
 from promptflow._sdk.entities._flow import Flow
 from promptflow._utils.dataclass_serializer import serialize
@@ -204,6 +204,8 @@ def __init__(self, run: Run, stream=False, run_mode=RunMode.Test):
         self._meta_path = self.path / LocalStorageFilenames.META
         self._exception_path = self.path / LocalStorageFilenames.EXCEPTION
 
+        self._flow_path: Path = run.flow
+
         self._dump_meta_file()
 
     def _dump_meta_file(self) -> None:
@@ -366,6 +368,7 @@ def persist_node_run(self, run_info: NodeRunInfo) -> None:
         line_number = 0 if node_run_record.line_number is None else node_run_record.line_number
         filename = f"{str(line_number).zfill(self.LINE_NUMBER_WIDTH)}.jsonl"
         node_run_record.dump(node_folder / filename, run_name=self._run.name)
+        record_node_run(run_info, self._flow_path)
 
     def persist_flow_run(self, run_info: FlowRunInfo) -> None:
         """Persist line run record to local storage."""

diff --git a/src/promptflow/promptflow/_sdk/operations/_test_submitter.py b/src/promptflow/promptflow/_sdk/operations/_test_submitter.py
@@ -11,7 +11,7 @@
 from typing import Any, Mapping
 
 from promptflow._sdk._constants import LOGGER_NAME, PROMPT_FLOW_DIR_NAME
-from promptflow._sdk._utils import dump_flow_result, parse_variant
+from promptflow._sdk._utils import dump_flow_result, parse_variant, record_node_run
 from promptflow._sdk.entities._flow import Flow
 from promptflow._sdk.operations._local_storage_operations import LoggerOperations
 from promptflow._sdk.operations._run_submitter import SubmitterHelper, variant_overwrite_context
@@ -177,6 +177,7 @@ def flow_test(
                 generator_outputs = self._get_generator_outputs(line_result.output)
                 if generator_outputs:
                     logger.info(f"Some streaming outputs in the result, {generator_outputs.keys()}")
+            record_node_run(line_result.run_info, self._origin_flow.code)
             return line_result
 
     def node_test(
@@ -204,6 +205,7 @@ def node_test(
                 working_dir=self.flow.code,
                 output_sub_dir=".promptflow/intermediate",
             )
+            record_node_run(result, self._origin_flow.code)
             return result
 
     def _chat_flow(self, inputs, chat_history_name, environment_variables: dict = None, show_step_output=False):