-
Notifications
You must be signed in to change notification settings - Fork 97
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
42 changed files
with
2,198 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
name: Test and Evaulate Prompts with Promptflow | ||
|
||
on: | ||
workflow_dispatch: | ||
push: | ||
branches: [ main ] | ||
|
||
env: | ||
GROUP: ${{secrets.GROUP}} | ||
WORKSPACE: ${{secrets.WORKSPACE}} | ||
SUBSCRIPTION: ${{secrets.SUBSCRIPTION}} | ||
RUN_NAME: web_classification_variant_1_20230816_215600_605116 | ||
EVAL_RUN_NAME: classification_accuracy_eval_default_20230821_111809_077086 | ||
|
||
jobs: | ||
login-and-run-and-evalpf: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: check out repo | ||
uses: actions/checkout@v2 | ||
- name: install az ml extension | ||
run: az extension add -n ml -y | ||
- name: azure login | ||
uses: azure/login@v1 | ||
with: | ||
creds: ${{secrets.AZURE_CREDENTIALS}} | ||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: '3.11.4' | ||
- name: list current directory | ||
run: ls | ||
- name: install promptflow | ||
run: pip install -r promptflow/web-classification/requirements.txt | ||
- name: run promptflow | ||
run: | | ||
pfazure run create -f promptflow/web-classification/run.yml --subscription ${{env.SUBSCRIPTION}} -g ${{env.GROUP}} -w ${{env.WORKSPACE}} --stream > promptflow/llmops-helper/run_info.txt | ||
cat promptflow/llmops-helper/run_info.txt | ||
- name: set run name | ||
run: | | ||
echo "RUN_NAME=$(python promptflow/llmops-helper/parse_run_output.py run_info.txt)" >> "$GITHUB_ENV" | ||
- name: show the current run name | ||
run: echo "Run name is:" ${{env.RUN_NAME}} | ||
- name: show promptflow results | ||
run: pfazure run show-details --name ${{env.RUN_NAME}} --subscription ${{env.SUBSCRIPTION}} -g ${{env.GROUP}} -w ${{env.WORKSPACE}} | ||
- name: run promptflow evaluations | ||
run: pfazure run create -f promptflow/web-classification/run_evaluation.yml --run ${{env.RUN_NAME}} --subscription ${{env.SUBSCRIPTION}} -g ${{env.GROUP}} -w ${{env.WORKSPACE}} --stream > promptflow/llmops-helper/eval_info.txt | ||
- name: get eval run name | ||
run: export EVAL_RUN_NAME=$(python promptflow/llmops-helper/parse_run_output.py eval_info.txt) | ||
- name: show promptflow details | ||
run: pfazure run show-details --name ${{env.EVAL_RUN_NAME}} --subscription ${{env.SUBSCRIPTION}} -g ${{env.GROUP}} -w ${{env.WORKSPACE}} | ||
- name: show promptflow metrics | ||
run: pfazure run show-metrics --name ${{env.EVAL_RUN_NAME}} --subscription ${{env.SUBSCRIPTION}} -g ${{env.GROUP}} -w ${{env.WORKSPACE}} > promptflow/llmops-helper/eval_result.json | ||
|
||
assert-and-register-model: | ||
needs: login-and-run-and-evalpf | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: check out repo | ||
uses: actions/checkout@v2 | ||
- name: install az ml extension | ||
run: az extension add -n ml -y | ||
- name: azure login | ||
uses: azure/login@v1 | ||
with: | ||
creds: ${{secrets.AZURE_CREDENTIALS}} | ||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: '3.11.4' | ||
- name: set default subscription | ||
run: | | ||
az account set -s ${{env.SUBSCRIPTION}} | ||
- name: list current directory | ||
run: ls | ||
- name: install promptflow | ||
run: pip install -r promptflow/web-classification/requirements.txt | ||
- name: get assert eval results | ||
id: jobMetricAssert | ||
run: | | ||
export ASSERT=$(python promptflow/llmops-helper/assert.py result.json 0.6) # NOTE <file>.json is the file name and decimal is the threshold for the assertion | ||
echo "::debug::Assert has returned the following value: $ASSERT" | ||
# assert.py will return True or False, but bash expects lowercase. | ||
if ${ASSERT,,} ; then | ||
echo "::debug::Prompt flow run met the quality bar and can be deployed." | ||
echo "::set-output name=result::true" | ||
else | ||
echo "::warning::Prompt flow run didn't meet quality bar." | ||
echo "::set-output name=result::false" | ||
fi | ||
- name: register promptflow model | ||
if: ${{ steps.jobMetricAssert.outputs.result == 'true' }} | ||
run: az ml model create --file promptflow/deployment/model.yaml -g ${{env.GROUP}} -w ${{env.WORKSPACE}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json | ||
name: blue | ||
endpoint_name: web-classification-7272ff | ||
model: azureml:web-classification-model:1 | ||
# You can also specify model files path inline | ||
# path: examples/flows/chat/basic-chat | ||
environment: | ||
image: mcr.microsoft.com/azureml/promptflow/promptflow-runtime:20230808.v1 | ||
# inference config is used to build a serving container for online deployments | ||
inference_config: | ||
liveness_route: | ||
path: /health | ||
port: 8080 | ||
readiness_route: | ||
path: /health | ||
port: 8080 | ||
scoring_route: | ||
path: /score | ||
port: 8080 | ||
instance_type: Standard_E16s_v3 | ||
instance_count: 1 | ||
environment_variables: | ||
|
||
# "compute" mode is the default mode, if you want to deploy to serving mode, you need to set this env variable to "serving" | ||
PROMPTFLOW_RUN_MODE: serving | ||
|
||
# for pulling connections from workspace | ||
PRT_CONFIG_OVERRIDE: deployment.subscription_id=<sub-id>,deployment.resource_group=o<resource-group>,deployment.workspace_name=<workspace-name>,deployment.endpoint_name=<endpoint-name>,deployment.deployment_name=<deployment-name> | ||
|
||
# (Optional) When there are multiple fields in the response, using this env variable will filter the fields to expose in the response. | ||
# For example, if there are 2 flow outputs: "answer", "context", and I only want to have "answer" in the endpoint response, I can set this env variable to '["answer"]'. | ||
# If you don't set this environment, by default all flow outputs will be included in the endpoint response. | ||
# PROMPTFLOW_RESPONSE_INCLUDED_FIELDS: '["category", "evidence"]' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json | ||
name: web-classification-endpoint | ||
auth_mode: key |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json | ||
name: web-classification-model | ||
path: ../web-classification | ||
stage: Production | ||
description: register web-classification flow folder as a custom model | ||
properties: | ||
# In AuzreML studio UI, endpoint detail UI Test tab needs this property to know it's from prompt flow | ||
azureml.promptflow.source_flow_id: web-classification | ||
|
||
# Following are properties only for classification flow | ||
# endpoint detail UI Test tab needs this property to know it's a classification flow | ||
azureml.promptflow.mode: classification |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"url": "https://www.microsoft.com/en-us/store/collections/xboxseriessconsoles?icid=CNav_Xbox_Series_S" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Basic Eval | ||
This example shows how to create a basic evaluation flow. | ||
|
||
Tools used in this flow: | ||
- `python` tool | ||
|
||
## Prerequisites | ||
|
||
Install promptflow sdk and other dependencies in this folder: | ||
```bash | ||
pip install -r requirements.txt | ||
``` | ||
|
||
## What you will learn | ||
|
||
In this flow, you will learn | ||
- how to compose a point based evaluation flow, where you can calculate point-wise metrics. | ||
- the way to log metrics. use `from promptflow import log_metric` | ||
- see file [aggregate](aggregate.py). TODO. | ||
|
||
### 1. Test flow with single line data | ||
|
||
Testing flow/node: | ||
```bash | ||
# test with default input value in flow.dag.yaml | ||
pf flow test --flow . | ||
|
||
# test with flow inputs | ||
pf flow test --flow . --inputs groundtruth=ABC prediction=ABC | ||
|
||
# test node with inputs | ||
pf flow test --flow . --node line_process --inputs groundtruth=ABC prediction=ABC | ||
``` | ||
|
||
### 2. create flow run with multi line data | ||
There are two ways to evaluate an classification flow. | ||
|
||
```bash | ||
pf run create --flow . --data ./data.jsonl --stream | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from typing import List | ||
|
||
from promptflow import tool | ||
|
||
|
||
@tool | ||
def aggregate(processed_results: List[str]): | ||
""" | ||
This tool aggregates the processed result of all lines to the variant level and log metric for each variant. | ||
:param processed_results: List of the output of line_process node. | ||
""" | ||
|
||
# Add your aggregation logic here | ||
# aggregated_results should be a dictionary with the metric name as the key and the metric value as the value. | ||
results_num = len(processed_results) | ||
print(results_num) | ||
print(processed_results) | ||
|
||
# Log metric for each variant | ||
from promptflow import log_metric | ||
log_metric(key="results_num", value=results_num) | ||
|
||
return results_num |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"groundtruth": "Tomorrow's weather will be sunny.","prediction": "The weather will be sunny tomorrow."} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
inputs: | ||
groundtruth: | ||
type: string | ||
default: groundtruth | ||
prediction: | ||
type: string | ||
default: prediction | ||
outputs: | ||
results: | ||
type: string | ||
reference: ${line_process.output} | ||
nodes: | ||
- name: line_process | ||
type: python | ||
source: | ||
type: code | ||
path: line_process.py | ||
inputs: | ||
groundtruth: ${inputs.groundtruth} | ||
prediction: ${inputs.prediction} | ||
- name: aggregate | ||
type: python | ||
source: | ||
type: code | ||
path: aggregate.py | ||
inputs: | ||
processed_results: ${line_process.output} | ||
aggregation: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from promptflow import tool | ||
|
||
|
||
@tool | ||
def line_process(groundtruth: str, prediction: str): | ||
""" | ||
This tool processes the prediction of a single line and returns the processed result. | ||
:param groundtruth: the groundtruth of a single line. | ||
:param prediction: the prediction of a single line. | ||
""" | ||
|
||
# Add your line processing logic here | ||
return "Correct" if groundtruth.lower() == prediction.lower() else "Incorrect" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--extra-index-url https://azuremlsdktestpypi.azureedge.net/promptflow/ | ||
promptflow | ||
promptflow-tools | ||
langchain | ||
jinja2 |
36 changes: 36 additions & 0 deletions
36
promptflow/evaluation/classification-accuracy-eval/.promptflow/flow.tools.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"package": {}, | ||
"code": { | ||
"grade.py": { | ||
"name": "grade.py", | ||
"type": "python", | ||
"inputs": { | ||
"groundtruth": { | ||
"type": [ | ||
"string" | ||
] | ||
}, | ||
"prediction": { | ||
"type": [ | ||
"string" | ||
] | ||
} | ||
}, | ||
"source": "grade.py", | ||
"function": "grade" | ||
}, | ||
"calculate_accuracy.py": { | ||
"name": "calculate_accuracy.py", | ||
"type": "python", | ||
"inputs": { | ||
"grades": { | ||
"type": [ | ||
"object" | ||
] | ||
} | ||
}, | ||
"source": "calculate_accuracy.py", | ||
"function": "calculate_accuracy" | ||
} | ||
} | ||
} |
38 changes: 38 additions & 0 deletions
38
promptflow/evaluation/classification-accuracy-eval/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Classification Accuracy Evaluation | ||
|
||
This is a flow illustrating how to evaluate the performance of a classification system. It involves comparing each prediction to the groundtruth and assigns a "Correct" or "Incorrect" grade, and aggregating the results to produce metrics such as accuracy, which reflects how good the system is at classifying the data. | ||
|
||
Tools used in this flow: | ||
- `python` tool | ||
|
||
## What you will learn | ||
|
||
In this flow, you will learn | ||
- how to compose a point based evaluation flow, where you can calculate point-wise metrics. | ||
- the way to log metrics. use `from promptflow import log_metric` | ||
- see file [calculate_accuracy.py](calculate_accuracy.py) | ||
|
||
### 1. Test flow/node | ||
|
||
```bash | ||
# test with default input value in flow.dag.yaml | ||
pf flow test --flow . | ||
|
||
# test with flow inputs | ||
pf flow test --flow . --inputs groundtruth=APP prediction=APP | ||
|
||
# test node with inputs | ||
pf flow test --flow . --node grade --inputs groundtruth=groundtruth prediction=prediction | ||
``` | ||
|
||
### 2. create flow run with multi line data | ||
There are two ways to evaluate an classification flow. | ||
|
||
```bash | ||
pf run create --flow . --data ./data.jsonl --stream | ||
``` | ||
|
||
### 3. create run against other flow run | ||
|
||
Learn more in [web-classification](../../standard/web-classification/README.md) | ||
|
Binary file added
BIN
+1.04 KB
...ow/evaluation/classification-accuracy-eval/__pycache__/calculate_accuracy.cpython-311.pyc
Binary file not shown.
Binary file added
BIN
+667 Bytes
promptflow/evaluation/classification-accuracy-eval/__pycache__/grade.cpython-311.pyc
Binary file not shown.
17 changes: 17 additions & 0 deletions
17
promptflow/evaluation/classification-accuracy-eval/calculate_accuracy.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from typing import List | ||
|
||
from promptflow import log_metric, tool | ||
|
||
|
||
@tool | ||
def calculate_accuracy(grades: List[str]): | ||
result = [] | ||
for index in range(len(grades)): | ||
grade = grades[index] | ||
result.append(grade) | ||
|
||
# calculate accuracy for each variant | ||
accuracy = round((result.count("Correct") / len(result)), 2) | ||
log_metric("accuracy", accuracy) | ||
|
||
return result |
3 changes: 3 additions & 0 deletions
3
promptflow/evaluation/classification-accuracy-eval/data.jsonl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{"groundtruth": "App","prediction": "App"} | ||
{"groundtruth": "Channel","prediction": "Channel"} | ||
{"groundtruth": "Academic","prediction": "Academic"} |
Oops, something went wrong.