Skip to content

Commit ecf9d07

Browse files
authored
[SDK]persist input from run results to avoid input missing in run results (#855)
# Description Please add an informative description that covers that changes made by the pull request and link all relevant issues. Before: ![image](https://github.com/microsoft/promptflow/assets/7776147/1d9aa40f-3bc9-487c-9249-34558aba39c2) After: ![image](https://github.com/microsoft/promptflow/assets/7776147/4628d473-0912-41dd-a442-09c39742a2b4) # All Promptflow Contribution checklist: - [ ] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [ ] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [ ] Title of the pull request is clear and informative. - [ ] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [ ] Pull request includes test coverage for the included changes.
1 parent b599796 commit ecf9d07

File tree

4 files changed

+42
-3
lines changed

4 files changed

+42
-3
lines changed

src/promptflow/promptflow/_sdk/operations/_local_storage_operations.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from promptflow.contracts.run_info import RunInfo as NodeRunInfo
3737
from promptflow.contracts.run_info import Status
3838
from promptflow.contracts.run_mode import RunMode
39+
from promptflow.executor._result import LineResult
3940
from promptflow.executor.flow_executor import BulkResult
4041
from promptflow.storage import AbstractRunStorage
4142

@@ -239,7 +240,14 @@ def load_io_spec(self) -> Tuple[Dict[str, Dict[str, str]], Dict[str, Dict[str, s
239240
flow_dag = yaml.safe_load(f)
240241
return flow_dag["inputs"], flow_dag["outputs"]
241242

242-
def dump_inputs(self, inputs: RunInputs) -> None:
243+
def dump_inputs(self, line_results: List[LineResult]) -> None:
244+
inputs = []
245+
for line_result in line_results:
246+
try:
247+
inputs.append(line_result.run_info.inputs)
248+
except Exception:
249+
# ignore when single line doesn't have inputs
250+
pass
243251
df = pd.DataFrame(inputs)
244252
with open(self._inputs_path, mode="w", encoding=DEFAULT_ENCODING) as f:
245253
# policy: http://policheck.azurewebsites.net/Pages/TermInfo.aspx?LCID=9&TermID=203588
@@ -389,6 +397,7 @@ def persist_result(self, result: Optional[BulkResult]) -> None:
389397
return
390398
self.dump_outputs(result.outputs)
391399
self.dump_metrics(result.metrics)
400+
self.dump_inputs(result.line_results)
392401

393402
@staticmethod
394403
def _prepare_folder(path: Union[str, Path]) -> Path:

src/promptflow/promptflow/_sdk/operations/_run_submitter.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,7 @@ def _submit_bulk_run(self, flow: Flow, run: Run, local_storage: LocalStorageOper
330330
# persist snapshot and result
331331
# snapshot: flow directory and (mapped) inputs
332332
local_storage.dump_snapshot(flow)
333-
local_storage.dump_inputs(mapped_inputs)
334-
# result: outputs and metrics
333+
# persist inputs, outputs and metrics
335334
local_storage.persist_result(bulk_result)
336335
# exceptions
337336
local_storage.dump_exception(exception=exception, bulk_results=bulk_result)

src/promptflow/tests/sdk_cli_test/e2etests/test_flow_run.py

+30
Original file line numberDiff line numberDiff line change
@@ -745,3 +745,33 @@ def test_system_metrics_in_properties(self, pf) -> None:
745745
assert FlowRunProperties.SYSTEM_METRICS in run.properties
746746
assert isinstance(run.properties[FlowRunProperties.SYSTEM_METRICS], dict)
747747
assert "total_tokens" in run.properties[FlowRunProperties.SYSTEM_METRICS]
748+
749+
def test_run_get_inputs(self, pf):
750+
# inputs should be persisted when defaults are used
751+
run = pf.run(
752+
flow=f"{FLOWS_DIR}/default_input",
753+
data=f"{DATAS_DIR}/webClassification1.jsonl",
754+
)
755+
inputs = pf.runs._get_inputs(run=run)
756+
assert inputs == {"line_number": [0], "question": ["input value from default"]}
757+
758+
# inputs should be persisted when data value are used
759+
run = pf.run(
760+
flow=f"{FLOWS_DIR}/flow_with_dict_input",
761+
data=f"{DATAS_DIR}/dictInput1.jsonl",
762+
)
763+
inputs = pf.runs._get_inputs(run=run)
764+
assert inputs == {"key": [{"key": "value in data"}], "line_number": [0]}
765+
766+
# inputs should be persisted when column-mapping are used
767+
run = pf.run(
768+
flow=f"{FLOWS_DIR}/flow_with_dict_input",
769+
data=f"{DATAS_DIR}/webClassification1.jsonl",
770+
column_mapping={"key": {"value": "value in column-mapping"}, "url": "${data.url}"},
771+
)
772+
inputs = pf.runs._get_inputs(run=run)
773+
assert inputs == {
774+
"key": [{"value": "value in column-mapping"}],
775+
"line_number": [0],
776+
"url": ["https://www.youtube.com/watch?v=o5ZQyXaAv1g"],
777+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"key": {"key": "value in data"}}

0 commit comments

Comments
 (0)