Align Cloud Trace JSON Format with OpenTelemetry Standards (#2946)

# Description This pull request updates the Cloud Trace JSON serialization to align with the OpenTelemetry (OTel) standards. By modifying the dictionary structure used in the `ReadableSpan.to_json` method, we ensure consistency with OTel's implementation, enhancing interoperability and the developer experience. The changes are in accordance with the format specified in the OTel documentation. The appended 'Z' in the timestamp represents the UTC timezone, aligning with the `ReadableSpan.to_json` implementation that reflects timezone information in Cloud Trace JSON. ## Sample JSON Change Remove irrelevant field and align format ```diff { "name": "python_tool_with_simple_image", "context": { "trace_id": "d86d8adc5f7a30e87a621e6a806da4cf", "span_id": "260f4226117b9403", "trace_state": "" }, "kind": 1, - "start_time": "2024-04-11T09:22:46.841063", - "end_time": "2024-04-11T09:22:46.870394", + "start_time": "2024-04-11T09:22:46.841063Z", + "end_time": "2024-04-11T09:22:46.870394Z", "status": { "status_code": "Ok", "description": "" }, "attributes": { "framework": "promptflow", "span_type": "Flow", "line_run_id": "dbcca243-9984-4ccd-a008-32d298728e2e", "inputs": "{\n \"image\": \"logo.jpg\"\n}", "output": "..." }, "events": [ { "name": "promptflow.function.inputs", - "timestamp": "2024-04-11T09:22:46.841063", + "timestamp": "2024-04-11T09:22:46.841063Z", "attributes": { "payload": "{\n \"image\": \"logo.jpg\"\n}" } }, { "name": "promptflow.function.output", - "timestamp": "2024-04-11T09:22:46.870394", + "timestamp": "2024-04-11T09:22:46.870394Z", "attributes": { "payload": "..." } } ], "resource": { "attributes": { "service.name": "promptflow", "subscription.id": [REDACT], "resource_group.name": "promptflow", "workspace.name": "promptflow-canary-dev", "collection": "default" }, "schema_url": "" }, - "partition_key": "default_3a284a00-1f34-4159-889c-c5eabfa2a896", - "collection_id": "default_3a284a00-1f34-4159-889c-c5eabfa2a896", - "id": "260f4226117b9403", - "created_by": { - "object_id": [REDACT], - "tenant_id": [REDACT], - "name": "Cheng Liu" - } } ``` # All Promptflow Contribution checklist: - [X] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [X] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [X] Title of the pull request is clear and informative. - [X] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [X] Pull request includes test coverage for the included changes.
microsoft · Apr 24, 2024 · da2af28 · da2af28
1 parent 3b961d3
commit da2af28
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 7 deletions.
diff --git a/src/promptflow-azure/promptflow/azure/_storage/cosmosdb/span.py b/src/promptflow-azure/promptflow/azure/_storage/cosmosdb/span.py
@@ -4,6 +4,7 @@
 
 import json
 from copy import deepcopy
+from datetime import datetime
 from typing import Any, Dict
 
 from azure.cosmos.container import ContainerProxy
@@ -12,6 +13,8 @@
 from promptflow._constants import SpanContextFieldName, SpanEventFieldName, SpanFieldName
 from promptflow._sdk.entities._trace import Span as SpanEntity
 
+DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"  # timestamp format e.g. 2021-08-25T00:00:00.000000Z
+
 
 class Span:
     name: str = None
@@ -36,8 +39,9 @@ def __init__(self, span: SpanEntity, collection_id: str, created_by: dict) -> No
         self.context = span.context
         self.kind = span.kind
         self.parent_id = span.parent_id
-        self.start_time = span.start_time.isoformat()
-        self.end_time = span.end_time.isoformat()
+        # span entity start_time and end_time are datetime objects using utc timezone
+        self.start_time = span.start_time.strftime(DATE_FORMAT)
+        self.end_time = span.end_time.strftime(DATE_FORMAT)
         self.status = span.status
         self.attributes = span.attributes
         # We will remove attributes from events for cosmosdb 2MB size limit.
@@ -52,6 +56,12 @@ def __init__(self, span: SpanEntity, collection_id: str, created_by: dict) -> No
         self.external_event_data_uris = []
         self.span_json_uri = None
 
+        # covert event time to OTel format
+        for event in self.events:
+            event[SpanEventFieldName.TIMESTAMP] = datetime.fromisoformat(event[SpanEventFieldName.TIMESTAMP]).strftime(
+                DATE_FORMAT
+            )
+
     def persist(self, cosmos_client: ContainerProxy, blob_container_client: ContainerClient, blob_base_uri: str):
         if self.id is None or self.partition_key is None or self.resource is None:
             return
@@ -93,13 +103,31 @@ def to_cosmosdb_item(self, attr_value_truncation_length: int = 8 * 1024):
     def _persist_span_json(self, blob_container_client: ContainerClient, blob_base_uri: str):
         """
         Persist the span data as a JSON string in a blob.
+
+        Persisted span should confirm the format of ReadableSpan.to_json().
+        https://opentelemetry-python.readthedocs.io/en/latest/_modules/opentelemetry/sdk/trace.html#ReadableSpan.to_json
         """
         # check if span_json_uri is already set
         if self.span_json_uri is not None:
             return
 
         # persist the span as a json string in a blob
-        span_data = json.dumps(self.to_dict())
+        # align with ReadableSpan.to_json() format
+        f_span = {
+            "name": self.name,
+            "context": self.context,
+            "kind": self.kind,
+            "parent_id": self.parent_id,
+            "start_time": self.start_time,
+            "end_time": self.end_time,
+            "status": self.status,
+            "attributes": self.attributes,
+            "events": self.events,
+            "links": self.links,
+            "resource": self.resource,
+        }
+
+        span_data = json.dumps(f_span)
         blob_path = self._generate_blob_path(file_name="span.json")
         blob_client = blob_container_client.get_blob_client(blob_path)
         blob_client.upload_blob(span_data)

diff --git a/src/promptflow-azure/tests/sdk_cli_azure_test/unittests/test_span.py b/src/promptflow-azure/tests/sdk_cli_azure_test/unittests/test_span.py
@@ -41,8 +41,8 @@ def test_to_dict(self):
             "name": "test",
             "kind": "test",
             "parent_id": "test",
-            "start_time": "2022-01-01T00:00:00",
-            "end_time": "2022-01-01T00:01:00",
+            "start_time": "2022-01-01T00:00:00.000000Z",
+            "end_time": "2022-01-01T00:01:00.000000Z",
             "context": {
                 "trace_id": self.FAKE_TRACE_ID,
                 "span_id": self.FAKE_SPAN_ID,
@@ -79,8 +79,8 @@ def test_to_dict(self):
             "name": "test",
             "kind": "test",
             "parent_id": "test",
-            "start_time": "2022-01-01T00:00:00",
-            "end_time": "2022-01-01T00:01:00",
+            "start_time": "2022-01-01T00:00:00.000000Z",
+            "end_time": "2022-01-01T00:01:00.000000Z",
             "attributes": {"line_run_id": "test_line_run_id"},
             "partition_key": "fake_collection_id",
             "context": {