From 7e7d57851240e02ff99b19fe26c9d63ab7d4970e Mon Sep 17 00:00:00 2001 From: Diondra <16376603+diondrapeck@users.noreply.github.com> Date: Wed, 11 Sep 2024 11:07:23 -0400 Subject: [PATCH] Include xpia in handled_metrics for evaluation aggregation (#3727) # Description Please add an informative description that covers that changes made by the pull request and link all relevant issues. # All Promptflow Contribution checklist: - [ ] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [ ] **I have read the [contribution guidelines](https://github.com/microsoft/promptflow/blob/main/CONTRIBUTING.md).** - [ ] **I confirm that all new dependencies are compatible with the MIT license.** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [ ] Title of the pull request is clear and informative. - [ ] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [ ] Pull request includes test coverage for the included changes. --- src/promptflow-evals/promptflow/evals/_constants.py | 1 + src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/promptflow-evals/promptflow/evals/_constants.py b/src/promptflow-evals/promptflow/evals/_constants.py index 0c7bb8ad5e4..38d19028c0f 100644 --- a/src/promptflow-evals/promptflow/evals/_constants.py +++ b/src/promptflow-evals/promptflow/evals/_constants.py @@ -19,6 +19,7 @@ class EvaluationMetrics: SELF_HARM = "self_harm" SEXUAL = "sexual" PROTECTED_MATERIAL = "protected_material" + INDIRECT_ATTACK = "indirect_attack" class _InternalEvaluationMetrics: diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py index 560baa7e9c9..4f015c912b4 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py @@ -92,6 +92,7 @@ def _aggregate_label_defect_metrics(df: pd.DataFrame) -> Tuple[List[str], Dict[s handled_metrics = [ EvaluationMetrics.PROTECTED_MATERIAL, _InternalEvaluationMetrics.ECI, + EvaluationMetrics.INDIRECT_ATTACK, ] label_cols = [] for col in df.columns: