Skip to content

Commit e3de7b1

Browse files
committed
Merge branch 'main' of https://github.com/microsoft/promptflow into orchestrator
2 parents fe0b720 + cb419ce commit e3de7b1

File tree

97 files changed

+58120
-48880
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+58120
-48880
lines changed

.github/actions/step_sdk_setup/action.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ runs:
2626
pip list
2727
python ./setup.py bdist_wheel
2828
$package = Get-ChildItem ./dist | ? { $_.Name.Contains('.whl')}
29-
pip install $($package.FullName + "[azure,executable,azureml-serving]")
29+
pip install $($package.FullName + "[azure,executable,azureml-serving,executor-service]")
3030
echo "########### pip freeze (After) ###########"
3131
pip freeze
3232
working-directory: ${{ inputs.scriptPath }}

.github/workflows/promptflow-executor-e2e-test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ jobs:
100100
run: |
101101
Set-PSDebug -Trace 1
102102
pip install -r ${{ github.workspace }}/src/promptflow/dev_requirements.txt
103-
gci ./promptflow -Recurse | % {if ($_.Name.Contains('.whl')) {python -m pip install "$($_.FullName)"}}
103+
gci ./promptflow -Recurse | % {if ($_.Name.Contains('.whl')) {python -m pip install "$($_.FullName)[executor-service]"}}
104104
gci ./promptflow-tools -Recurse | % {if ($_.Name.Contains('.whl')) {python -m pip install $_.FullName}}
105105
pip freeze
106106
- name: Azure Login

.github/workflows/promptflow-executor-unit-test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ jobs:
105105
run: |
106106
Set-PSDebug -Trace 1
107107
pip install -r ${{ github.workspace }}/src/promptflow/dev_requirements.txt
108-
gci ./promptflow -Recurse | % {if ($_.Name.Contains('.whl')) {python -m pip install "$($_.FullName)"}}
108+
gci ./promptflow -Recurse | % {if ($_.Name.Contains('.whl')) {python -m pip install "$($_.FullName)[executor-service]"}}
109109
gci ./promptflow-tools -Recurse | % {if ($_.Name.Contains('.whl')) {python -m pip install $_.FullName}}
110110
pip freeze
111111
- name: Azure Login

src/promptflow/CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
### Features Added
77

8+
- [SDK/CLI][azure] Support specify compute instance as session compute in run.yaml
89

910
### Bugs Fixed
1011

src/promptflow/dev_requirements.txt

+3
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,6 @@ httpx
2929
# test dummy flow run in notebook, give a minimal version for vulnerability issue
3030
ipykernel>=6.27.1
3131
papermill>=2.5.0
32+
33+
# test executor server in local
34+
uvicorn>=0.27.0

src/promptflow/promptflow/_cli/_pf/_experiment.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@
1313
)
1414
from promptflow._cli._utils import activate_action, exception_handler
1515
from promptflow._sdk._constants import get_list_view_type
16-
from promptflow._sdk._load_functions import load_common
1716
from promptflow._sdk._pf_client import PFClient
18-
from promptflow._sdk.entities._experiment import Experiment, ExperimentTemplate
17+
from promptflow._sdk.entities._experiment import Experiment
1918
from promptflow._utils.logger_utils import get_cli_sdk_logger
2019

2120
logger = get_cli_sdk_logger()
@@ -176,9 +175,11 @@ def dispatch_experiment_commands(args: argparse.Namespace):
176175

177176
@exception_handler("Create experiment")
178177
def create_experiment(args: argparse.Namespace):
178+
from promptflow._sdk._load_functions import _load_experiment_template
179+
179180
template_path = args.template
180181
logger.debug("Loading experiment template from %s", template_path)
181-
template = load_common(ExperimentTemplate, source=template_path)
182+
template = _load_experiment_template(source=template_path)
182183
logger.debug("Creating experiment from template %s", template.dir_name)
183184
experiment = Experiment.from_template(template, name=args.name)
184185
logger.debug("Creating experiment %s", experiment.name)

src/promptflow/promptflow/_constants.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,8 @@ class AvailableIDE:
5050

5151
_ENV_PF_INSTALLER = "PF_INSTALLER"
5252

53-
# trace related
54-
TRACE_SESSION_ID_OP_CTX_NAME = "pf_trace_session_id"
55-
5653

54+
# trace related
5755
class SpanFieldName:
5856
NAME = "name"
5957
CONTEXT = "context"
@@ -75,7 +73,7 @@ class SpanContextFieldName:
7573

7674

7775
class SpanStatusFieldName:
78-
CODE = "code"
76+
STATUS_CODE = "status_code"
7977

8078

8179
class SpanAttributeFieldName:
@@ -89,6 +87,11 @@ class SpanAttributeFieldName:
8987
FLOW_ID = "flow_id"
9088
RUN = "run"
9189
EXPERIMENT = "experiment"
90+
LINE_RUN_ID = "line_run_id"
91+
REFERENCED_LINE_RUN_ID = "referenced.line_run_id"
92+
COMPLETION_TOKEN_COUNT = "__computed__.cumulative_token_count.completion"
93+
PROMPT_TOKEN_COUNT = "__computed__.cumulative_token_count.prompt"
94+
TOTAL_TOKEN_COUNT = "__computed__.cumulative_token_count.total"
9295

9396

9497
class SpanResourceAttributesFieldName:

src/promptflow/promptflow/_core/operation_context.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
44
import copy
5-
65
from contextvars import ContextVar
76
from typing import Dict, Mapping
87

@@ -30,6 +29,14 @@ def _add_otel_attributes(self, key, value):
3029
attributes[key] = value
3130
self[OperationContext._OTEL_ATTRIBUTES] = attributes
3231

32+
def _remove_otel_attributes(self, keys: list):
33+
if isinstance(keys, str):
34+
keys = [keys]
35+
attributes = self.get(OperationContext._OTEL_ATTRIBUTES, {})
36+
for key in keys:
37+
attributes.pop(key, None)
38+
self[OperationContext._OTEL_ATTRIBUTES] = attributes
39+
3340
def _get_otel_attributes(self):
3441
return self.get(OperationContext._OTEL_ATTRIBUTES, {})
3542

src/promptflow/promptflow/_core/tracer.py

+66-6
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
from collections.abc import Iterator
1111
from contextvars import ContextVar
1212
from datetime import datetime
13+
from threading import Lock
1314
from typing import Callable, Dict, List, Optional
1415

15-
from opentelemetry import trace
16+
import opentelemetry.trace as otel_trace
1617
from opentelemetry.trace.status import StatusCode
1718

1819
from promptflow._core.generator_proxy import GeneratorProxy, generate_from_proxy
@@ -24,7 +25,8 @@
2425

2526
from .thread_local_singleton import ThreadLocalSingleton
2627

27-
open_telemetry_tracer = trace.get_tracer("promptflow")
28+
29+
open_telemetry_tracer = otel_trace.get_tracer("promptflow")
2830

2931

3032
class Tracer(ThreadLocalSingleton):
@@ -153,6 +155,46 @@ def _format_error(error: Exception) -> dict:
153155
}
154156

155157

158+
class TokenCollector():
159+
_lock = Lock()
160+
161+
def __init__(self):
162+
self._span_id_to_tokens = {}
163+
164+
def collect_openai_tokens(self, span, output):
165+
span_id = span.get_span_context().span_id
166+
if not inspect.isgenerator(output) and hasattr(output, "usage") and output.usage is not None:
167+
tokens = {
168+
f"__computed__.cumulative_token_count.{k.split('_')[0]}": v for k, v in output.usage.dict().items()
169+
}
170+
if tokens:
171+
with self._lock:
172+
self._span_id_to_tokens[span_id] = tokens
173+
174+
def collect_openai_tokens_for_parent_span(self, span):
175+
tokens = self.try_get_openai_tokens(span.get_span_context().span_id)
176+
if tokens:
177+
if not hasattr(span, "parent") or span.parent is None:
178+
return
179+
parent_span_id = span.parent.span_id
180+
with self._lock:
181+
if parent_span_id in self._span_id_to_tokens:
182+
merged_tokens = {
183+
key: self._span_id_to_tokens[parent_span_id].get(key, 0) + tokens.get(key, 0)
184+
for key in set(self._span_id_to_tokens[parent_span_id]) | set(tokens)
185+
}
186+
self._span_id_to_tokens[parent_span_id] = merged_tokens
187+
else:
188+
self._span_id_to_tokens[parent_span_id] = tokens
189+
190+
def try_get_openai_tokens(self, span_id):
191+
with self._lock:
192+
return self._span_id_to_tokens.get(span_id, None)
193+
194+
195+
token_collector = TokenCollector()
196+
197+
156198
def _create_trace_from_function_call(
157199
f, *, args=None, kwargs=None, args_to_ignore: Optional[List[str]] = None, trace_type=TraceType.FUNCTION
158200
):
@@ -205,6 +247,14 @@ def get_node_name_from_context():
205247
return None
206248

207249

250+
def enrich_span_with_context(span):
251+
try:
252+
attrs_from_context = OperationContext.get_instance()._get_otel_attributes()
253+
span.set_attributes(attrs_from_context)
254+
except Exception as e:
255+
logging.warning(f"Failed to enrich span with context: {e}")
256+
257+
208258
def enrich_span_with_trace(span, trace):
209259
try:
210260
span.set_attributes(
@@ -215,8 +265,7 @@ def enrich_span_with_trace(span, trace):
215265
"node_name": get_node_name_from_context(),
216266
}
217267
)
218-
attrs_from_context = OperationContext.get_instance()._get_otel_attributes()
219-
span.set_attributes(attrs_from_context)
268+
enrich_span_with_context(span)
220269
except Exception as e:
221270
logging.warning(f"Failed to enrich span with trace: {e}")
222271

@@ -235,6 +284,9 @@ def enrich_span_with_output(span, output):
235284
try:
236285
serialized_output = serialize_attribute(output)
237286
span.set_attribute("output", serialized_output)
287+
tokens = token_collector.try_get_openai_tokens(span.get_span_context().span_id)
288+
if tokens:
289+
span.set_attributes(tokens)
238290
except Exception as e:
239291
logging.warning(f"Failed to enrich span with output: {e}")
240292

@@ -306,12 +358,16 @@ async def wrapped(*args, **kwargs):
306358
Tracer.push(trace)
307359
enrich_span_with_input(span, trace.inputs)
308360
output = await func(*args, **kwargs)
361+
if trace_type == TraceType.LLM:
362+
token_collector.collect_openai_tokens(span, output)
309363
enrich_span_with_output(span, output)
310364
span.set_status(StatusCode.OK)
311-
return Tracer.pop(output)
365+
output = Tracer.pop(output)
312366
except Exception as e:
313367
Tracer.pop(None, e)
314368
raise
369+
token_collector.collect_openai_tokens_for_parent_span(span)
370+
return output
315371

316372
wrapped.__original_function = func
317373

@@ -351,12 +407,16 @@ def wrapped(*args, **kwargs):
351407
Tracer.push(trace)
352408
enrich_span_with_input(span, trace.inputs)
353409
output = func(*args, **kwargs)
410+
if trace_type == TraceType.LLM:
411+
token_collector.collect_openai_tokens(span, output)
354412
enrich_span_with_output(span, output)
355413
span.set_status(StatusCode.OK)
356-
return Tracer.pop(output)
414+
output = Tracer.pop(output)
357415
except Exception as e:
358416
Tracer.pop(None, e)
359417
raise
418+
token_collector.collect_openai_tokens_for_parent_span(span)
419+
return output
360420

361421
wrapped.__original_function = func
362422

src/promptflow/promptflow/_sdk/_constants.py

+9
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def _prepare_home_dir() -> Path:
7070
SERVICE_CONFIG_FILE = "pf.yaml"
7171
PF_SERVICE_PORT_FILE = "pfs.port"
7272
PF_SERVICE_LOG_FILE = "pfs.log"
73+
PF_TRACE_CONTEXT = "PF_TRACE_CONTEXT"
7374

7475
LOCAL_MGMT_DB_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite").resolve()
7576
LOCAL_MGMT_DB_SESSION_ACQUIRE_LOCK_PATH = (HOME_PROMPT_FLOW_DIR / "pf.sqlite.lock").resolve()
@@ -135,6 +136,7 @@ def _prepare_home_dir() -> Path:
135136
TRACE_MGMT_DB_PATH = (HOME_PROMPT_FLOW_DIR / "trace.sqlite").resolve()
136137
TRACE_MGMT_DB_SESSION_ACQUIRE_LOCK_PATH = (HOME_PROMPT_FLOW_DIR / "trace.sqlite.lock").resolve()
137138
SPAN_TABLENAME = "span"
139+
PFS_MODEL_DATETIME_FORMAT = "iso8601"
138140

139141

140142
class CustomStrongTypeConnectionConfigs:
@@ -425,3 +427,10 @@ class ExperimentNodeRunStatus(object):
425427
COMPLETED = "Completed"
426428
FAILED = "Failed"
427429
CANCELED = "Canceled"
430+
431+
432+
class ExperimentContextKey:
433+
EXPERIMENT = "experiment"
434+
# Note: referenced id not used for lineage, only for evaluation
435+
REFERENCED_LINE_RUN_ID = "referenced.line_run_id"
436+
REFERENCED_RUN_ID = "referenced.run_id"

src/promptflow/promptflow/_sdk/_load_functions.py

+31
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .._utils.yaml_utils import load_yaml
1212
from .entities import Run
1313
from .entities._connection import CustomConnection, _Connection
14+
from .entities._experiment import ExperimentTemplate
1415
from .entities._flow import Flow
1516

1617
logger = get_cli_sdk_logger()
@@ -105,6 +106,15 @@ def load_connection(
105106
source: Union[str, PathLike, IO[AnyStr]],
106107
**kwargs,
107108
):
109+
"""Load connection from YAML file or .env file.
110+
111+
:param source: The local yaml source of a connection or .env file. Must be a path to a local file.
112+
If the source is a path, it will be open and read.
113+
An exception is raised if the file does not exist.
114+
:type source: Union[PathLike, str]
115+
:return: A Connection object
116+
:rtype: Connection
117+
"""
108118
if Path(source).name.endswith(".env"):
109119
return _load_env_to_connection(source, **kwargs)
110120
return load_common(_Connection, source, **kwargs)
@@ -132,3 +142,24 @@ def _load_env_to_connection(
132142
return CustomConnection(name=name, secrets=data)
133143
except Exception as e:
134144
raise Exception(f"Load entity error: {e}") from e
145+
146+
147+
def _load_experiment_template(
148+
source: Union[str, PathLike, IO[AnyStr]],
149+
**kwargs,
150+
):
151+
"""Load experiment template from YAML file.
152+
153+
:param source: The local yaml source of an experiment template. Must be a path to a local file.
154+
If the source is a path, it will be open and read.
155+
An exception is raised if the file does not exist.
156+
:type source: Union[PathLike, str]
157+
:return: An ExperimentTemplate object
158+
:rtype: ExperimentTemplate
159+
"""
160+
source_path = Path(source)
161+
if source_path.is_dir():
162+
source = source_path / "flow.exp.yaml"
163+
if not source_path.exists():
164+
raise FileNotFoundError(f"Experiment template file {source.resolve().absolute().as_posix()} not found.")
165+
return load_common(ExperimentTemplate, source=source)

src/promptflow/promptflow/_sdk/_orm/trace.py

+39-1
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
44

5+
import copy
56
import typing
67

7-
from sqlalchemy import TEXT, Column, Index
8+
from sqlalchemy import TEXT, Column, Index, text
89
from sqlalchemy.exc import IntegrityError
910
from sqlalchemy.orm import declarative_base
1011

@@ -65,3 +66,40 @@ def list(
6566
if parent_span_id is not None:
6667
stmt = stmt.filter(Span.parent_span_id == parent_span_id)
6768
return [span for span in stmt.all()]
69+
70+
71+
class LineRun:
72+
"""Line run is an abstraction of spans, which is not persisted in the database."""
73+
74+
@staticmethod
75+
def list(
76+
session_id: typing.Optional[str] = None,
77+
) -> typing.List[typing.List[Span]]:
78+
with trace_mgmt_db_session() as session:
79+
stmt = session.query(Span)
80+
if session_id is not None:
81+
stmt = stmt.filter(Span.session_id == session_id)
82+
# other filters, e.g., experiment, run, path, etc.
83+
stmt = stmt.filter(
84+
text("json_extract(json_extract(span.content, '$.attributes'), '$.framework') = 'promptflow'")
85+
)
86+
else:
87+
# TODO: fully support query
88+
raise NotImplementedError
89+
stmt = stmt.order_by(Span.trace_id)
90+
line_runs = []
91+
current_spans: typing.List[Span] = []
92+
span: Span
93+
for span in stmt.all():
94+
if len(current_spans) == 0:
95+
current_spans.append(span)
96+
continue
97+
current_trace_id = current_spans[0].trace_id
98+
if span.trace_id == current_trace_id:
99+
current_spans.append(span)
100+
continue
101+
line_runs.append(copy.deepcopy(current_spans))
102+
current_spans = [span]
103+
if len(current_spans) > 0:
104+
line_runs.append(copy.deepcopy(current_spans))
105+
return line_runs

0 commit comments

Comments
 (0)