Skip to content

Commit

Permalink
Align Cloud Trace JSON Format with OpenTelemetry Standards (#2946)
Browse files Browse the repository at this point in the history
# Description

This pull request updates the Cloud Trace JSON serialization to align
with the OpenTelemetry (OTel) standards. By modifying the dictionary
structure used in the `ReadableSpan.to_json` method, we ensure
consistency with OTel's implementation, enhancing interoperability and
the developer experience. The changes are in accordance with the format
specified in the OTel documentation.

The appended 'Z' in the timestamp represents the UTC timezone, aligning
with the `ReadableSpan.to_json` implementation that reflects timezone
information in Cloud Trace JSON.

## Sample JSON Change
Remove irrelevant field and align format 
```diff
{
    "name": "python_tool_with_simple_image",
    "context": {
      "trace_id": "d86d8adc5f7a30e87a621e6a806da4cf",
      "span_id": "260f4226117b9403",
      "trace_state": ""
    },
    "kind": 1,
-   "start_time": "2024-04-11T09:22:46.841063",
-   "end_time": "2024-04-11T09:22:46.870394",
+   "start_time": "2024-04-11T09:22:46.841063Z",
+   "end_time": "2024-04-11T09:22:46.870394Z",
    "status": {
      "status_code": "Ok",
      "description": ""
    },
    "attributes": {
      "framework": "promptflow",
      "span_type": "Flow",
      "line_run_id": "dbcca243-9984-4ccd-a008-32d298728e2e",
      "inputs": "{\n  \"image\": \"logo.jpg\"\n}",
      "output": "..."
    },
    "events": [
      {
        "name": "promptflow.function.inputs",
-       "timestamp": "2024-04-11T09:22:46.841063",
+       "timestamp": "2024-04-11T09:22:46.841063Z",
        "attributes": {
          "payload": "{\n  \"image\": \"logo.jpg\"\n}"
        }
      },
      {
        "name": "promptflow.function.output",
-       "timestamp": "2024-04-11T09:22:46.870394",
+       "timestamp": "2024-04-11T09:22:46.870394Z",
        "attributes": {
          "payload": "..."
        }
      }
    ],
    "resource": {
      "attributes": {
        "service.name": "promptflow",
        "subscription.id": [REDACT],
        "resource_group.name": "promptflow",
        "workspace.name": "promptflow-canary-dev",
        "collection": "default"
      },
      "schema_url": ""
    },
-   "partition_key": "default_3a284a00-1f34-4159-889c-c5eabfa2a896",
-   "collection_id": "default_3a284a00-1f34-4159-889c-c5eabfa2a896",
-   "id": "260f4226117b9403",
-   "created_by": {
-     "object_id": [REDACT],
-     "tenant_id": [REDACT],
-     "name": "Cheng Liu"
-   }
  }
```

# All Promptflow Contribution checklist:
- [X] **The pull request does not introduce [breaking changes].**
- [ ] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [X] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [ ] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [X] Title of the pull request is clear and informative.
- [X] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [X] Pull request includes test coverage for the included changes.
  • Loading branch information
liucheng-ms authored Apr 24, 2024
1 parent 3b961d3 commit da2af28
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 7 deletions.
34 changes: 31 additions & 3 deletions src/promptflow-azure/promptflow/azure/_storage/cosmosdb/span.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import json
from copy import deepcopy
from datetime import datetime
from typing import Any, Dict

from azure.cosmos.container import ContainerProxy
Expand All @@ -12,6 +13,8 @@
from promptflow._constants import SpanContextFieldName, SpanEventFieldName, SpanFieldName
from promptflow._sdk.entities._trace import Span as SpanEntity

DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" # timestamp format e.g. 2021-08-25T00:00:00.000000Z


class Span:
name: str = None
Expand All @@ -36,8 +39,9 @@ def __init__(self, span: SpanEntity, collection_id: str, created_by: dict) -> No
self.context = span.context
self.kind = span.kind
self.parent_id = span.parent_id
self.start_time = span.start_time.isoformat()
self.end_time = span.end_time.isoformat()
# span entity start_time and end_time are datetime objects using utc timezone
self.start_time = span.start_time.strftime(DATE_FORMAT)
self.end_time = span.end_time.strftime(DATE_FORMAT)
self.status = span.status
self.attributes = span.attributes
# We will remove attributes from events for cosmosdb 2MB size limit.
Expand All @@ -52,6 +56,12 @@ def __init__(self, span: SpanEntity, collection_id: str, created_by: dict) -> No
self.external_event_data_uris = []
self.span_json_uri = None

# covert event time to OTel format
for event in self.events:
event[SpanEventFieldName.TIMESTAMP] = datetime.fromisoformat(event[SpanEventFieldName.TIMESTAMP]).strftime(
DATE_FORMAT
)

def persist(self, cosmos_client: ContainerProxy, blob_container_client: ContainerClient, blob_base_uri: str):
if self.id is None or self.partition_key is None or self.resource is None:
return
Expand Down Expand Up @@ -93,13 +103,31 @@ def to_cosmosdb_item(self, attr_value_truncation_length: int = 8 * 1024):
def _persist_span_json(self, blob_container_client: ContainerClient, blob_base_uri: str):
"""
Persist the span data as a JSON string in a blob.
Persisted span should confirm the format of ReadableSpan.to_json().
https://opentelemetry-python.readthedocs.io/en/latest/_modules/opentelemetry/sdk/trace.html#ReadableSpan.to_json
"""
# check if span_json_uri is already set
if self.span_json_uri is not None:
return

# persist the span as a json string in a blob
span_data = json.dumps(self.to_dict())
# align with ReadableSpan.to_json() format
f_span = {
"name": self.name,
"context": self.context,
"kind": self.kind,
"parent_id": self.parent_id,
"start_time": self.start_time,
"end_time": self.end_time,
"status": self.status,
"attributes": self.attributes,
"events": self.events,
"links": self.links,
"resource": self.resource,
}

span_data = json.dumps(f_span)
blob_path = self._generate_blob_path(file_name="span.json")
blob_client = blob_container_client.get_blob_client(blob_path)
blob_client.upload_blob(span_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def test_to_dict(self):
"name": "test",
"kind": "test",
"parent_id": "test",
"start_time": "2022-01-01T00:00:00",
"end_time": "2022-01-01T00:01:00",
"start_time": "2022-01-01T00:00:00.000000Z",
"end_time": "2022-01-01T00:01:00.000000Z",
"context": {
"trace_id": self.FAKE_TRACE_ID,
"span_id": self.FAKE_SPAN_ID,
Expand Down Expand Up @@ -79,8 +79,8 @@ def test_to_dict(self):
"name": "test",
"kind": "test",
"parent_id": "test",
"start_time": "2022-01-01T00:00:00",
"end_time": "2022-01-01T00:01:00",
"start_time": "2022-01-01T00:00:00.000000Z",
"end_time": "2022-01-01T00:01:00.000000Z",
"attributes": {"line_run_id": "test_line_run_id"},
"partition_key": "fake_collection_id",
"context": {
Expand Down

0 comments on commit da2af28

Please sign in to comment.