diff --git a/rust/otap-dataflow/docs/images/otap-metrics-encoding1-flat.svg b/rust/otap-dataflow/docs/images/otap-metrics-encoding1-flat.svg new file mode 100644 index 0000000000..7b1da25241 --- /dev/null +++ b/rust/otap-dataflow/docs/images/otap-metrics-encoding1-flat.svg @@ -0,0 +1,271 @@ + + + + + + +OTAP Metrics: Scope Attributes, Flat Metrics +Encoding 1 — 1 dimension (outcome), 3 timeseries, M=1 metric +Total rows: K + 6M = 2 + 6 = 8 (K=2 scope attrs, M=1 metric × 3 flat variants) + + +ScopeAttrs +SCOPE_ATTRS + +parent_id + +key + +type + +str + +int + +double + +bool + +bytes + +ser + +uint16 + +string/dict + +uint8 + +string/dict + +int64 + +float64 + +bool + +binary + +binary + +0 + +node_id + +Str + +node-7 + + + + + + + + + + + +0 + +pipeline + +Str + +ingest + + + + + + + + + + + +unused + + + +UnivariateMetrics +UNIVARIATE_METRICS + +id + +resource.id + +scope.id + +scope.name + +metric_type + +name + +unit + +agg_temp + +is_monotonic + +uint16 + +uint16 + +uint16 + +str/dict + +uint8 + +str/dict + +str/dict + +int32 + +bool + +0 + +0 + +0 + +otap + +Sum + +consumed_success + +{item} + +Cum + +True + +1 + +0 + +0 + +otap + +Sum + +consumed_failed + +{item} + +Cum + +True + +2 + +0 + +0 + +otap + +Sum + +consumed_refused + +{item} + +Cum + +True + + + +NumberDataPoint +NUMBER_DATA_POINTS + +id + +parent_id + +start_time + +time + +int_value + +flags + +double_value + +uint32 + +uint16 + +ts_ns + +ts_ns + +int64 + +uint32 + +float64 + +0 + +0 + +10:00:00 + +10:00:10 + +142 + +0 + + + +1 + +1 + +10:00:00 + +10:00:10 + +3 + +0 + + + +2 + +2 + +10:00:00 + +10:00:10 + +0 + +0 + + + +unused + + + + + +parent_id → scope.id + + + + +parent_id → id + \ No newline at end of file diff --git a/rust/otap-dataflow/docs/images/otap-metrics-encoding2-scope-dims.svg b/rust/otap-dataflow/docs/images/otap-metrics-encoding2-scope-dims.svg new file mode 100644 index 0000000000..6f0087fcb5 --- /dev/null +++ b/rust/otap-dataflow/docs/images/otap-metrics-encoding2-scope-dims.svg @@ -0,0 +1,397 @@ + + + + + + +OTAP Metrics: Scope Attributes as Dimensions +Encoding 2 — 1 dimension (outcome), 3 timeseries, M=1 metric +Total rows: 3(K+1) + 6M = 3(2+1) + 6 = 15 (outcome promoted into scope attrs) + + +ScopeAttrs +SCOPE_ATTRS + +parent_id + +key + +type + +str + +int + +double + +bool + +bytes + +ser + +uint16 + +string/dict + +uint8 + +string/dict + +int64 + +float64 + +bool + +binary + +binary + +0 + +node_id + +Str + +node-7 + + + + + + + + + + + +0 + +pipeline + +Str + +ingest + + + + + + + + + + + +0 + +outcome + +Str + +success + + + + + + + + + + + +1 + +node_id + +Str + +node-7 + + + + + + + + + + + +1 + +pipeline + +Str + +ingest + + + + + + + + + + + +1 + +outcome + +Str + +failed + + + + + + + + + + + +2 + +node_id + +Str + +node-7 + + + + + + + + + + + +2 + +pipeline + +Str + +ingest + + + + + + + + + + + +2 + +outcome + +Str + +refused + + + + + + + + + + + +unused + + + +UnivariateMetrics +UNIVARIATE_METRICS + +id + +resource.id + +scope.id + +scope.name + +metric_type + +name + +unit + +agg_temp + +is_monotonic + +uint16 + +uint16 + +uint16 + +str/dict + +uint8 + +str/dict + +str/dict + +int32 + +bool + +0 + +0 + +0 + +otap + +Sum + +consumer.items + +{item} + +Cum + +True + +1 + +0 + +1 + +otap + +Sum + +consumer.items + +{item} + +Cum + +True + +2 + +0 + +2 + +otap + +Sum + +consumer.items + +{item} + +Cum + +True + + + +NumberDataPoint +NUMBER_DATA_POINTS + +id + +parent_id + +start_time + +time + +int_value + +flags + +double_value + +uint32 + +uint16 + +ts_ns + +ts_ns + +int64 + +uint32 + +float64 + +0 + +0 + +10:00:00 + +10:00:10 + +142 + +0 + + + +1 + +1 + +10:00:00 + +10:00:10 + +3 + +0 + + + +2 + +2 + +10:00:00 + +10:00:10 + +0 + +0 + + + +unused + + + + + +parent_id → scope.id + + + + +parent_id → id + \ No newline at end of file diff --git a/rust/otap-dataflow/docs/images/otap-metrics-encoding3-dp-attrs.svg b/rust/otap-dataflow/docs/images/otap-metrics-encoding3-dp-attrs.svg new file mode 100644 index 0000000000..b6f40dab1b --- /dev/null +++ b/rust/otap-dataflow/docs/images/otap-metrics-encoding3-dp-attrs.svg @@ -0,0 +1,338 @@ + + + + + + + +OTAP Metrics: Data Point Attributes +Encoding 3 — 1 dimension (outcome), 3 timeseries, M=1 metric +Total rows: K + 7M = 2 + 7 = 9 (outcome as data-point-level attribute) + + +ScopeAttrs +SCOPE_ATTRS + +parent_id + +key + +type + +str + +int + +double + +bool + +bytes + +ser + +uint16 + +string/dict + +uint8 + +string/dict + +int64 + +float64 + +bool + +binary + +binary + +0 + +node_id + +Str + +node-7 + + + + + + + + + + + +0 + +pipeline + +Str + +ingest + + + + + + + + + + + +unused + + + +UnivariateMetrics +UNIVARIATE_METRICS + +id + +resource.id + +scope.id + +scope.name + +metric_type + +name + +unit + +agg_temp + +is_monotonic + +uint16 + +uint16 + +uint16 + +str/dict + +uint8 + +str/dict + +str/dict + +int32 + +bool + +0 + +0 + +0 + +otap + +Sum + +consumer.items + +{item} + +Cum + +True + + + +NumberDataPoint +NUMBER_DATA_POINTS + +id + +parent_id + +start_time + +time + +int_value + +flags + +double_value + +uint32 + +uint16 + +ts_ns + +ts_ns + +int64 + +uint32 + +float64 + +0 + +0 + +10:00:00 + +10:00:10 + +142 + +0 + + + +1 + +0 + +10:00:00 + +10:00:10 + +3 + +0 + + + +2 + +0 + +10:00:00 + +10:00:10 + +0 + +0 + + + +unused + + + +NumberDPAttrs +NUMBER_DP_ATTRS + +parent_id + +key + +type + +str + +int + +double + +bool + +bytes + +ser + +uint32 + +string/dict + +uint8 + +string/dict + +int64 + +float64 + +bool + +binary + +binary + +0 + +outcome + +Str + +success + + + + + + + + + + + +1 + +outcome + +Str + +failed + + + + + + + + + + + +2 + +outcome + +Str + +refused + + + + + + + + + + + +unused + + + + + +parent_id → scope.id + + + + +parent_id → id + + + + +parent_id → id + \ No newline at end of file diff --git a/rust/otap-dataflow/docs/self_tracing_architecture.md b/rust/otap-dataflow/docs/internal-logs-sdk.md similarity index 100% rename from rust/otap-dataflow/docs/self_tracing_architecture.md rename to rust/otap-dataflow/docs/internal-logs-sdk.md diff --git a/rust/otap-dataflow/docs/internal-metrics-sdk.md b/rust/otap-dataflow/docs/internal-metrics-sdk.md new file mode 100644 index 0000000000..413eb4ad9f --- /dev/null +++ b/rust/otap-dataflow/docs/internal-metrics-sdk.md @@ -0,0 +1,503 @@ +# Internal Metrics SDK + +## Overview + +The OTAP-direct Metrics SDK described here will replace the +OpenTelemetry-Rust Metrics SDK, using pieces of the OTAP dataflow +engine as its own internal telemetry pipeline. This +document explains how the internal metrics SDK will be redesigned +through a four-phase implementation plan. See the corresponding document +describing the [internal logs SDK](./internal-logs-sdk.md), which +followed a similar course; however, note that the logs SDK forms OTLP +bytes payloads directly, whereas the metrics SDK forms OTAP records +directly. + +This is a functionally complete Metrics SDK, tailored for efficiency in +the OTAP dataflow engine environment. It may be considered as a +general purpose OpenTelemetry Metrics SDK for Rust, with the following +design constraints: + +- Multivariate design for future + [OTAP-multivariate](../../../docs/multivariate-design.md) uses +- A thread-per-core architecture with push-oriented collection supports + synchronous and asynchronous instrument patterns +- Instruments are fully "bound"; there are no dynamic attributes +- OTel-Weaver semantic convention registry with code generation tools + supports type-safe interfaces and schema migration +- View configuration defines three levels at compile time: Basic, Normal, + and Detailed, which are selected at runtime; there are no dynamic views +- Schema-driven versioning includes a migration path from initial + `#[metric_set]` to generated code using a schema registry with + runtime-configurable output schemas. + +The Metrics SDK is specialized for producing OTAP batches. It +includes built-in support for console and Prometheus reporting through +its `MetricsTap` component, meaning it works with or without a full +internal metrics pipeline. + +### Project phases + +There are four phases in this design plan: + +1. **Drop-in replacement**. The existing `#[metric_set]` macro and the + reporting paths for Counter, UpDownCounter, Gauge, and Mmsc instruments + are unchanged. The current dispatcher is extended with the option + to select the OpenTelemetry SDK, a no-op SDK, the bare-bones Admin + behavior, or the new ITS mode. As this phase progresses, we will + reach a point where the ITS mode provides a sufficient level of + basic functionality. +2. **Schema-driven code generation**. New code generation tooling will + be developed to support our own migration between metric + schemas. One by one, each `#[metric_set]` struct will be replaced + by generated code. An initial `v0` schema will be written to match + pre-existing instrumentation. In some cases, a new "streamlined" + `v1` schema using metric attributes (as opposed to a flat + namespace) will follow. As this phase progresses, users will be + able to control which metric schemas are compiled and which are + selected at runtime (e.g., old, new, or both). Using Rust `enum` + variants corresponding to each metric level, users can configure + and choose between Basic, Normal, and Detailed metric reporting. +3. **Remove the OpenTelemetry SDK.** There will be at this point two + choices when configuring the Metrics SDK: the built-in basic + behavior and the ITS internal pipeline with its variety of nodes. +4. **Introduce exponential histograms.** In most cases, existing `Mmsc` + instruments are configured at Normal level. At the Detailed metric + level, we introduce a choice of exponential histogram. This data + structure supports reporting both coarse and fine-level detail + about metric distributions in the OTAP dataflow engine. + +### Transition from `#[metric_set]` to schema-driven metrics + +Prior to this design, metric instrumentation was fully expanded into +individual counters. For example, we might see +three counters corresponding to three outcome variants: + +```rust +/// Consumed items by outcome after +#[metric_set(name = "otap.consumer")] +#[derive(Debug, Default, Clone)] +pub struct ConsumerMetrics { + #[metric(unit = "{item}")] + consumed_success: Counter, + + #[metric(unit = "{item}")] + consumed_failed: Counter, + + #[metric(unit = "{item}")] + consumed_refused: Counter, + + // ... and more (e.g., duration) +} +``` + +This is logically a single set of metrics using a flat namespace +instead of metric-level attributes. As we use this example, we will +consider how to add signal information (i.e., counting logs, traces, and +metrics requests separately). If we continued using a flat namespace, +we would have 9 Counters; however, a more idiomatic representation +would use metric attributes. For this example, we are considering a +metric with 1 or 2 dimensions having three variants each. We have +three outcomes and three signals, making 3 or 9 timeseries. The +metric is disabled at Basic level because a zero-dimension counter +(i.e., total items consumed regardless of outcome) is better served +by a different metric, such as a channel receive count. + +After the transition, we will have a schema definition listing the +metric groups, their dimensions, and the default configuration by +metric-level: + +```yaml +groups: + - id: metric.consumer.items + type: metric + metric_name: consumer.items + instrument: counter + unit: "{item}" + brief: "Items consumed by a node." + attributes: + - ref: outcome + requirement_level: recommended + - ref: signal + requirement_level: optional + x-otap-levels: + basic: + disabled: true + normal: + dimensions: [outcome] + detailed: + dimensions: [outcome, signal] + + # ... and more +``` + +In separate files, a registry will include definitions for the +`outcome` and `signal` attributes. In the generated code, we might +see: + +```rust +/// [Generated Code] + +#[derive(Debug, Default, Clone)] +pub struct ConsumerMetrics { + consumed_items: ConsumedItemsByOutcomeAndSignal, + + // ... and more (e.g., duration) +} + +#[derive(Debug, Default, Clone)] +pub enum ConsumedItemsByOutcomeAndSignal { + #[default] + Basic, // disabled + Normal(Box<[Counter; 3]>), // 1 dimension: outcome + Detailed(Box<[Counter; 9]>), // 2 dimensions: outcome x signal +} + +impl ConsumedItemsByOutcomeAndSignal { + /// Add a number of items by outcome and signal. + pub fn add(&mut self, items: u64, outcome: Outcomes, signal: SignalType) { + match self { + Self::Basic => {}, // disabled: no-op + Self::Normal(cnts) => cnts[outcome.ordinal()].increment(items), + Self::Detailed(cnts) => cnts[outcome.ordinal() * 3 + signal.ordinal()].increment(items), + } + } +} +``` + +Note the use of `Box<_>` in the enum variant ensures that metric level +actually controls how much memory is used by instrumentation. + +In another file, we will define the translation from the `v1` to `v0` +schema, which we will eventually deprecate. Separately, we will +define how to generate either the `v1` or the `v0` schema from the +current instrumentation. + +## Phase 1: Drop-in replacement + +In this phase, we will insert alternatives for configuring internal +metrics, similar to how internal logging uses `logs::ProviderMode`, +with values including `None`, `Builtin`, `OpenTelemetry`, and `ITS`. +The OpenTelemetry mode will be supported through Phase 3, at which +point we will have only two real options. + +We will not change the manner of reporting multivariate metric structs +currently in use. We will continue to use the structs currently +annotated with `#[metric_set]`, and will continue to use periodic +reporting on short intervals, through channels, to a location +determined by the engine controller. + +The provider modes listed above are implemented at the collection +point, where a central component (`MetricsDispatcher`) receives a +stream of events and is responsible for dispatching. The `None` +setting means no metrics are ever sent; `Builtin` serves a +Prometheus-compatible endpoint without using a full telemetry +pipeline; `OpenTelemetry` preserves existing functionality; and +`ITS` enables our new standard functionality. + +```mermaid +flowchart TB + subgraph Sources["Metric Sources"] + direction LR + MS1["#[metric_set]"] ~~~ AS1["#[attribute_set]"] + end + + MD["MetricsDispatcher"] + + Sources --> |"MPSC channel of
{EntityKey,Metrics,Timestamp}"| MD + + MD --> |"Builtin"| BUILTIN["Admin"] + MD --> |"OpenTelemetry"| OTEL["OpenTelemetry SDK"] + MD --> |"ITS"| ITS["Internal Telemetry Receiver"] + + subgraph ITS_Detail["ITS"] + ITS --> |"OTAP"| BATCH["Batch Processor"] + ITS --> |"Original Data"| ITSPROM["/metrics"] + BATCH --> |"OTAP"| ENGINEEXPORT["OTLP Exporter"] + BATCH --> |"OTAP"| ENGINECONSOLE["Console"] + end + + subgraph BUILTIN_Detail["Builtin behavior"] + BUILTIN --> PROMEXPORT["/metrics"] + end + + subgraph OTEL_Detail["OpenTelemetry SDK"] + OTEL --> |"Collect()"|OTLP["Periodic Reader"] + OTLP --> |"Export()"|SDKEXPORT["OTLP Exporter"] + end +``` + +### Internal telemetry system + +In the internal telemetry system mode, Metrics SDK events are sent to +a global, NUMA-regional, or CPU-local copy of the dataflow engine +serving internal telemetry. This is the same model of configuration +used in the Logging SDK in its corresponding ITS mode, with pipeline +nodes configured separately in the `observability` settings. + +The internal telemetry system has an in-transit representation using +`EntityKey` to describe the scope, a snapshot of the measurements, +and a timestamp. When the built-in provider mode is configured, the internal +representation is used directly, making a short and relatively +low-risk code path for exporting to the console or Prometheus. The +same Prometheus export path is also supported in the ITS mode. + +Internal events are received representing a partial OTAP payload. For +the current OTAP specification, we may generate any of the following +tables: + +- UnivariateMetrics: the top-level table, listing metric type, name, + temporality, description, monotonicity, resource, and scope +- NumberDataPoint: contains one row per scalar value, timestamps, and + flags +- HistogramDataPoint: contains min/max/sum/count, optional buckets, + timestamps, and flags (used by `Mmsc`) +- ScopeAttrs: the attributes associated with each `EntityKey` +- NumberDPAttrs: the attributes associated with each number data point. + +Note that the timestamp column corresponding to the data point +tables will be identical in all cases, as metric sets are observed as +individual events. In Phase 4, as we introduce the OpenTelemetry +exponential histogram data point, we will begin using one new table. +Exemplars are not supported. + +In this phase, minimal support for OpenTelemetry Metric View +configuration is preserved for use at runtime, limited to scoped +renaming of instruments and descriptions. This is meant to assist with +the initial stages of this transition; the limited Views functionality +will become deprecated during Phase 2, once schema-driven code +generation provides an alternative, and it will be fully eliminated +in Phase 3 when the OpenTelemetry SDK is removed and users are able +to achieve the same result by modifying schema definitions. + +## Phase 2: Schema-driven code generation + +The migration from `#[metric_set]` to generated code will use +OpenTelemetry-Weaver tooling and the semantic conventions registry, +proceeding on a set-by-set basis. For each metric set, we aim for a +transition that does not change user-visible behavior, by producing +the `v0` semantic convention while streamlining the instrumentation. + +In the example above, where `ConsumerMetrics` initially consists of +three Counters (e.g., `consumed_success`), the instrumentation will +transition to a single Counter field. The generated `add` method +for this instrument requires passing the `Outcome` and +`SignalType` types at every call site. + +At the end of this phase, each metric set will have at least a `v0` +schema. For many (not all), we will also define a `v1` schema with +streamlined instrumentation and a runtime choice between versions. +The two schemas may be configured as (using a scalar value for a +single schema selection): + +```yaml +engine: + telemetry: + scopes: + metrics: + metrics.otap.consumer: + schema_url: otap-dataflow/consumer@v1 +``` + +### Code generation + +A `cargo xtask generate-metrics` command will read the set of schema +definitions and the schema registry and generate: + +- Three-level instrument enums +- Metric set structs +- OTAP encoder logic for the associated tables +- Crate-level metric documentation. + +```mermaid +flowchart LR + SCHEMA["Metric Schema Definitions"] --> XTASK["cargo xtask
generate-metrics"] + REGISTRY["Semantic Conventions Registry"] --> XTASK + + XTASK --> ENUMS["Instrument enums"] + XTASK --> STRUCTS["MetricSet structs"] + XTASK --> ENCODER["OTAP encoding impls"] + XTASK --> DOCS["Documentation"] +``` + +### OTAP encoding details + +As designed, each metric set that we generate can produce multiple +output schemas. This is meant to provide a migration path between the +major versions of metric instrumentation. We imagine one or more +compiled-in schema versions per metric set, each with control over +dimensionality and cardinality. + +There are a number of forms an OTAP metrics encoding can take, +depending on the number of dimensions. We have a choice between the +use of scope attributes vs metric attributes, and in a future version +of OTAP we may consider entity definitions in the resource. + +Note that in all cases the ScopeAttrs, UnivariateMetrics, and +NumberDPAttrs tables can be precomputed; only the NumberDataPoint +table in this example is computed on the fly. All columns can be +dictionary encoded. All of the choices here are reasonable and valid +uses of OTAP. + +#### Scope attributes, flat metrics + +In this encoding, a set of metrics corresponds with a scope defined by +its attributes, with repetition of metric names but without metric-level +attributes. This yields the following tables, where the scope has K +attributes defined by an `#[attribute_set]` (a struct that defines scope-level +attributes for a metric source) and there are M metrics in the +`#[metric_set]`, for one and two dimensions: + +| Table | 1 dimension | 2 dimensions | +|-------------------|-------------|--------------| +| ScopeAttrs | K | K | +| UnivariateMetrics | 3M | 9M | +| NumberDataPoint | 3M | 9M | +| Total rows | K+6M | K+18M | + +Note that for 1 dimension there are 3 timeseries defined and for 2 +dimensions there are 9 timeseries defined. + +![Encoding 1: Scope attributes, flat metrics](images/otap-metrics-encoding1-flat.svg) + +#### Scope attributes as dimensional metrics + +In this representation, we duplicate each scope and add the intended +metric-level attributes as scope attributes, adding one row per +dimension and repeating the scope attributes for each combination of +metric attributes. + +| Table | 1 dimension | 2 dimensions | +|-------------------|-------------|--------------| +| ScopeAttrs | 3(K+1) | 9(K+2) | +| UnivariateMetrics | 3M | 9M | +| NumberDataPoint | 3M | 9M | +| Total rows | 3K+6M+3 | 9K+18M+18 | + +![Encoding 2: Scope attributes as dimensional metrics](images/otap-metrics-encoding2-scope-dims.svg) + +#### Data point attributes + +The best representation choice may depend on user preferences, and any +of these choices could perform best depending on the nature of the +data. This is the most direct representation of the OTLP protocol +using the OTAP `NUMBER_DP_ATTRS` table; however, it repeats the +attributes for every data point of every metric, so this works best +when the number of metrics per set is small. + +| Table | 1 dimension (3 timeseries) | 2 dimensions (9 timeseries) | +|-------------------|----------------------------|-----------------------------| +| ScopeAttrs | K | K | +| UnivariateMetrics | M | M | +| NumberDataPoint | 3M | 9M | +| NumberDPAttrs | 3M | 18M | +| Total rows | K+7M | K+28M | + +![Encoding 3: Data point attributes](images/otap-metrics-encoding3-dp-attrs.svg) + +Note, as well, that these figures will be extended with more +possibilities when we introduce OpenTelemetry entities defined at the +resource level, which reduces the impact of K, thus making the +scope-attributes-oriented representation more attractive than the +data-point-attributes approach. + +Moreover, the preferred long-term direction is the introduction of +first-class [multivariate metrics](../../../docs/multivariate-design.md) +in OTAP, which would allow multiple related measurements to share a +single set of attributes and timestamp, significantly improving +compression and row efficiency. This remains future work in the OTAP +specification. + +## Phase 3: Remove OpenTelemetry SDK + +With all `#[metric_set]` definitions replaced by generated code, we +remove the OpenTelemetry SDK from the OTAP Dataflow dependencies. +Users configure either the Builtin Prometheus support or an ITS +internal telemetry pipeline (e.g., batch processor, OTLP exporter). + +Support for OpenTelemetry Metrics Views is eliminated at this point. +For the same functionality, users will either: (1) build the engine +with custom telemetry schema definitions, or (2) use the transform +processor for custom metrics behavior. + +## Phase 4: Introduce exponential histograms + +We will introduce an OpenTelemetry exponential histogram to our +codebase based on +[jmacd/rust-expohisto](https://github.com/jmacd/rust-expohisto), which +pairs a table-lookup implementation of the mapping function (following +the DynaTrace and NewRelic algorithms) with an auto-scaling +ring-buffer of buckets. + +As an alternative to `Mmsc`, we will introduce `HistogramNN` +(non-negative) for non-negative measurements and `HistogramPN` +(positive-and-negative) for arbitrary measurements. The implementation +uses a variable-width representation, with SIMD-within-a-register +("SWAR") techniques for widening and merging buckets in place. There +are seven widths: B1, B2, B4 (bit-packed at 1, 2, and 4 bits per +bucket), and U8, U16, U32, U64 (unsigned integers at the +corresponding byte widths). + +The histogram parameters are: + +- ``: the number of 64-bit words of space available +- `min_width`: the initial width of buckets +- `max_scale`: the initial maximum scale + +The implementation has a compiled-in lookup table of size +`2^table_scale`. The default table scale is 8, configurable through a +cargo feature. + +The histogram is allocation-free and `#[no_std]`. As an example, the +non-negative histogram uses six words of space for the independent +min, max, sum, and count fields, and the bucket scale, width, size, +and offset fields. Therefore `HistogramNN<10>` is 128 bytes and +`HistogramNN<26>` is 256 bytes, costing as much as 16 or 32 ordinary +counters. + +After introducing the new data structure, we will add support in the +code generation tooling to introduce histogram options, usually at +detailed level. + +## Appendix: Migration workflow + +This section describes the migration path for the example +`ConsumerMetrics` `#[metric_set]`, from the perspective of a user with +existing monitoring and dashboards. + +In Phase 2, the `v0` schema will remain the default for six months +after the `v1` schema is introduced; it will remain compiled-in for +an additional six months before removal. + +Within the first six months, users will deploy an update of the +dataflow engine and dual metric reporting (using a list to select +multiple schemas simultaneously), for example: + +```yaml +engine: + telemetry: + scopes: + metrics: + metrics.otap.consumer: + - schema_url: otap-dataflow/consumer@v0 + - schema_url: otap-dataflow/consumer@v1 +``` + +After deploying this new configuration, the user is free to redeploy +dashboards and begin monitoring metrics in the new schema version. They +will update their configuration: + +```yaml +engine: + telemetry: + scopes: + metrics: + metrics.otap.consumer: + - schema_url: otap-dataflow/consumer@v1 +``` + +After twelve months, the OTel-Arrow project will deprecate the `v0` +schema; we expect that users will have migrated to `v1` by then. At that +point, we are free to remove the `v0` schema definition. + +After removing the `v0` schema definition, re-running +`cargo xtask generate-metrics` will produce only `v1` metrics. diff --git a/tools/otap-diagram/generate_otap_svg.py b/tools/otap-diagram/generate_otap_svg.py new file mode 100644 index 0000000000..01dd2da818 --- /dev/null +++ b/tools/otap-diagram/generate_otap_svg.py @@ -0,0 +1,827 @@ +#!/usr/bin/env python3 +"""Generate SVG diagrams showing OTAP metrics tables in columnar format. + +Produces four diagrams: + 0. Overview of all OTAP metrics table schemas (columns + types only) + 1. Scope attributes, flat metrics (worked example) + 2. Scope attributes as dimensional metrics (worked example) + 3. Data point attributes (worked example) + +The worked examples use the "consumer.items" counter metric from the +internal-metrics-sdk design, with 1 dimension (outcome: success / +failed / refused) producing 3 timeseries. + +Usage: + python3 tools/otap-diagram/generate_otap_svg.py + +Output goes to rust/otap-dataflow/docs/images/*.svg +""" + +import os +from dataclasses import dataclass, field + +# ── Styling constants ────────────────────────────────────────────── + +FONT = "Consolas, 'Courier New', monospace" +FONT_SIZE = 12 +HEADER_FONT_SIZE = 11 +TITLE_FONT_SIZE = 16 +SUBTITLE_FONT_SIZE = 13 + +CELL_H = 22 +CELL_PAD = 8 +HEADER_H = 26 +TABLE_GAP = 40 +TABLE_HGAP = 30 +TABLE_TITLE_H = 28 +MARGIN = 30 +ARROW_MARKER_SIZE = 8 + +COL_BG = "#f8f9fa" +COL_TABLE_TITLE = "#2c3e50" +COL_TABLE_TITLE_TEXT = "#ffffff" +COL_HEADER_BG = "#34495e" +COL_HEADER_TEXT = "#ecf0f1" +COL_ROW_EVEN = "#ffffff" +COL_ROW_ODD = "#f0f4f8" +COL_BORDER = "#bdc3c7" +COL_TEXT = "#2c3e50" +COL_TYPE_TEXT = "#7f8c8d" +COL_ID_HIGHLIGHT = "#e8f4fd" +COL_FK_LINE = "#3498db" +COL_UNUSED_HEADER = "#7f8c8d" # muted header for unused columns +COL_UNUSED_DATA = "#ececec" # grey-ish for unused data cells +COL_SEPARATOR = "#95a5a6" # vertical separator between groups + +SEP_WIDTH = 3 # width of the visual separator + + +def _col_is_unused(col) -> bool: + """A column is unused when it has data rows but every value is empty.""" + if not col.values: + return False + return all(str(v).strip() == "" for v in col.values) + + +@dataclass +class Column: + name: str + arrow_type: str + values: list = field(default_factory=list) + is_id: bool = False + is_fk: bool = False + nullable: bool = False + width: int = 0 + + +@dataclass +class Table: + name: str + payload_type: str + columns: list + x: int = 0 + y: int = 0 + width: int = 0 + height: int = 0 + + +def measure_text(text: str) -> int: + return len(str(text)) * 7 + CELL_PAD * 2 + + +def compute_col_widths(table: Table): + for col in table.columns: + header_w = measure_text(col.name) + type_w = measure_text(col.arrow_type) - CELL_PAD + val_w = max((measure_text(str(v)) for v in col.values), default=0) + col.width = max(header_w, type_w, val_w, 60) + + +def compute_table_size(table: Table): + # Sort columns: used first, unused last (stable within each group) + used = [c for c in table.columns if not _col_is_unused(c)] + unused = [c for c in table.columns if _col_is_unused(c)] + table.columns = used + unused + + compute_col_widths(table) + table.width = sum(c.width for c in table.columns) + if _has_separator(table): + table.width += SEP_WIDTH + n_rows = max(len(c.values) for c in table.columns) if table.columns else 0 + table.height = TABLE_TITLE_H + HEADER_H + CELL_H + n_rows * CELL_H + + +def _has_separator(table: Table) -> bool: + """True if the table has both used and unused columns.""" + has_used = any(not _col_is_unused(c) for c in table.columns) + has_unused = any(_col_is_unused(c) for c in table.columns) + return has_used and has_unused + + +def xml_escape(s): + return str(s).replace("&", "&").replace("<", "<").replace(">", ">") + + +# ── SVG rendering ────────────────────────────────────────────────── + +def render_table(table: Table) -> str: + parts = [] + x0, y0 = table.x, table.y + n_rows = max(len(c.values) for c in table.columns) if table.columns else 0 + has_sep = _has_separator(table) + + # Title banner + parts.append( + f'' + ) + parts.append( + f'' + ) + parts.append( + f'' + f'{xml_escape(table.name)}' + ) + parts.append( + f'' + f'{xml_escape(table.payload_type)}' + ) + + cy = y0 + TABLE_TITLE_H + + # ── helper: detect the boundary between used and unused columns ── + def is_sep_boundary(idx): + """True if the separator should appear before column at idx.""" + if not has_sep or idx == 0: + return False + prev_unused = _col_is_unused(table.columns[idx - 1]) + curr_unused = _col_is_unused(table.columns[idx]) + return not prev_unused and curr_unused + + # ── Column headers ── + cx = x0 + for i, col in enumerate(table.columns): + if is_sep_boundary(i): + cx += SEP_WIDTH + unused = _col_is_unused(col) + if col.is_id or col.is_fk: + bg, tc = COL_ID_HIGHLIGHT, COL_TEXT + elif unused: + bg, tc = COL_UNUSED_HEADER, COL_HEADER_TEXT + else: + bg, tc = COL_HEADER_BG, COL_HEADER_TEXT + + parts.append( + f'' + ) + parts.append( + f'' + f'{xml_escape(col.name)}' + ) + cx += col.width + cy += HEADER_H + + # ── Type row ── + cx = x0 + for i, col in enumerate(table.columns): + if is_sep_boundary(i): + cx += SEP_WIDTH + unused = _col_is_unused(col) + bg = COL_UNUSED_DATA if unused else COL_ROW_ODD + parts.append( + f'' + ) + parts.append( + f'' + f'{xml_escape(col.arrow_type)}' + ) + cx += col.width + cy += CELL_H + + # ── Data rows ── + for row_idx in range(n_rows): + cx = x0 + base_bg = COL_ROW_EVEN if row_idx % 2 == 0 else COL_ROW_ODD + for i, col in enumerate(table.columns): + if is_sep_boundary(i): + cx += SEP_WIDTH + unused = _col_is_unused(col) + if col.is_id or col.is_fk: + cell_bg = COL_ID_HIGHLIGHT + elif unused: + cell_bg = COL_UNUSED_DATA + else: + cell_bg = base_bg + val = col.values[row_idx] if row_idx < len(col.values) else "" + parts.append( + f'' + ) + parts.append( + f'' + f'{xml_escape(val)}' + ) + cx += col.width + cy += CELL_H + + # ── Vertical separator between used and unused columns ── + if has_sep: + sx = x0 + for i, col in enumerate(table.columns): + if is_sep_boundary(i): + break + sx += col.width + sep_top = y0 + TABLE_TITLE_H + sep_bot = y0 + table.height + parts.append( + f'' + ) + parts.append( + f'' + f'unused' + ) + + # Outline + parts.append( + f'' + ) + return "\n".join(parts) + + +def col_center_x(table, col_name): + """X position for arrow anchors on a named column — offset to the + right of center so arrows don't overlap the centered text.""" + cx = table.x + has_sep = _has_separator(table) + for i, c in enumerate(table.columns): + if has_sep and i > 0: + prev_unused = _col_is_unused(table.columns[i - 1]) + curr_unused = _col_is_unused(c) + if not prev_unused and curr_unused: + cx += SEP_WIDTH + if c.name == col_name: + # Right-of-center: 70% across the column width + return cx + int(c.width * 0.7) + cx += c.width + return table.x + table.width // 2 + + +def col_header_bottom(table): + """Y of the bottom edge of the column header row.""" + return table.y + TABLE_TITLE_H + HEADER_H + + +DOT_R = 5 # radius of the connection dot +STUB_LEN = 20 # vertical stub outside the table before the curve + + +def render_fk_arrow(src_table, src_col_name, dst_table, dst_col_name, + label="", color=COL_FK_LINE, offset=0, **kw) -> str: + """Draw an FK arrow originating from a colored dot on the source + column header, routing out through the nearest table edge, curving + to the destination table, and terminating with an arrowhead on a + colored dot on the destination column header. + + The arrow always exits the source table downward (from the bottom + edge) and enters the destination table from the top edge, which + matches the parent→child reading direction of the tables. + """ + sx = col_center_x(src_table, src_col_name) + dx = col_center_x(dst_table, dst_col_name) + + # Dot positions: center of column header cell + src_dot_y = table_col_header_center_y(src_table) + dst_dot_y = table_col_header_center_y(dst_table) + + # The visible path exits the source table at its bottom edge and + # enters the destination table at its top edge. + src_exit_y = src_table.y + src_table.height + dst_enter_y = dst_table.y + + # Determine whether source is above or below destination and + # adjust so arrows always route through open space. + if src_exit_y < dst_enter_y: + # Normal case: source above destination + stub_s = src_exit_y + STUB_LEN + stub_d = dst_enter_y - STUB_LEN + else: + # Source is below destination: exit from source top instead + src_exit_y = src_table.y + dst_enter_y = dst_table.y + dst_table.height + stub_s = src_exit_y - STUB_LEN + stub_d = dst_enter_y + STUB_LEN + + mid_y = (stub_s + stub_d) / 2 + offset + + cid = color.replace("#", "") + + # Path: vertical line inside table (dot → edge), stub, curve, + # stub, vertical line inside table (edge → dot) + path = ( + f'M {sx},{src_dot_y} ' + f'L {sx},{src_exit_y} ' + f'L {sx},{stub_s} ' + f'C {sx},{mid_y} {dx},{mid_y} {dx},{stub_d} ' + f'L {dx},{dst_enter_y} ' + f'L {dx},{dst_dot_y} ' + ) + + parts = [ + # Colored dot on source column header (open circle = origin) + f'', + # Colored dot on destination column header (filled = target) + f'', + # The arrow path + f'', + ] + if label: + lx = (sx + dx) / 2 + ly = mid_y - 8 + parts.append( + f'' + ) + parts.append( + f'' + f'{xml_escape(label)}' + ) + return "\n".join(parts) + + +def table_col_header_center_y(table): + """Y center of the column header row.""" + return table.y + TABLE_TITLE_H + HEADER_H // 2 + + +def arrow_marker(color): + cid = color.replace("#", "") + s = ARROW_MARKER_SIZE + return ( + f'' + f'' + ) + + +def make_svg(tables, fk_arrows, title, subtitle="", note=""): + for t in tables: + compute_table_size(t) + max_x = max(t.x + t.width for t in tables) + max_y = max(t.y + t.height for t in tables) + svg_w = max_x + MARGIN * 2 + svg_h = max_y + MARGIN * 2 + 60 + + colors = {fk.get("color", COL_FK_LINE) for fk in fk_arrows} + parts = [ + f'', + '', + ] + for c in colors: + parts.append(arrow_marker(c)) + parts.append('') + parts.append(f'') + parts.append( + f'' + f'{xml_escape(title)}' + ) + if subtitle: + parts.append( + f'{xml_escape(subtitle)}' + ) + if note: + parts.append( + f'{xml_escape(note)}' + ) + for t in tables: + parts.append(render_table(t)) + for fk in fk_arrows: + parts.append(render_fk_arrow(**fk)) + parts.append('') + return "\n".join(parts) + + +# ── Diagram builders ────────────────────────────────────────────── + +def make_otlp_tree(): + """Render the same consumer.items example as a nested OTLP protobuf + tree, showing the row-major structure for contrast with OTAP columns.""" + + # ── Nested-box tree renderer (independent of the table renderer) ── + + INDENT = 20 # pixels per nesting level + LINE_H = 20 # height per text line + BOX_PAD_X = 12 # horizontal padding inside a box + BOX_PAD_Y = 6 # vertical padding top/bottom inside a box + BOX_GAP = 6 # vertical gap between sibling boxes + # Two alternating greys: lighter for even depth, slightly darker for odd + GREY_BG = ("#f4f4f4", "#e8e8e8") + GREY_BORDER = ("#999999", "#777777") + GREY_TITLE = ("#555555", "#444444") + + class Node: + """A protobuf message or repeated element in the tree.""" + def __init__(self, msg_type, fields=None, children=None, repeat_label=None): + self.msg_type = msg_type # e.g. "NumberDataPoint" + self.fields = fields or [] # list of (name, value) pairs + self.children = children or [] # list of Node + self.repeat_label = repeat_label # e.g. "[0]" + # Computed during layout + self.x = 0 + self.y = 0 + self.w = 0 + self.h = 0 + + def layout(node, x, y, avail_w, depth=0): + """Recursively compute positions and sizes, returns total height.""" + node.x = x + node.y = y + node.w = avail_w + node.depth = depth + + # Header line + field lines + content_h = BOX_PAD_Y + LINE_H # header + content_h += len(node.fields) * LINE_H + content_h += BOX_PAD_Y # bottom padding before children + + child_y = y + content_h + child_w = avail_w - INDENT * 2 + + for child in node.children: + ch = layout(child, x + INDENT, child_y, child_w, depth + 1) + child_y += ch + BOX_GAP + content_h += ch + BOX_GAP + + if node.children: + content_h += BOX_PAD_Y # extra bottom pad after children + + node.h = content_h + return content_h + + def render_node(node): + """Render a single node as nested SVG rectangles.""" + parts = [] + idx = node.depth % 2 + bg = GREY_BG[idx] + border = GREY_BORDER[idx] + title_color = GREY_TITLE[idx] + + # Box + parts.append( + f'' + ) + + ty = node.y + BOX_PAD_Y + 13 + + # Header: message type (and optional repeat label) + label = node.msg_type + if node.repeat_label: + label = f'{node.repeat_label} {node.msg_type}' + parts.append( + f'' + f'{xml_escape(label)}' + ) + ty += LINE_H + + # Fields + for fname, fval in node.fields: + parts.append( + f'' + f'{xml_escape(fname)}: ' + f'{xml_escape(fval)}' + ) + ty += LINE_H + + # Children + for child in node.children: + parts.append(render_node(child)) + + return "\n".join(parts) + + # ── Build the tree for the consumer.items example ── + + def make_ndp(idx, outcome, value): + return Node("NumberDataPoint", [ + ("start_time_unix_nano", "10:00:00"), + ("time_unix_nano", "10:00:10"), + ("as_int", str(value)), + ("flags", "0"), + ("attributes", f'[{{outcome: "{outcome}"}}]'), + ], repeat_label=f'[{idx}]') + + tree = Node("ExportMetricsServiceRequest", children=[ + Node("ResourceMetrics", repeat_label="[0]", children=[ + Node("Resource", [ + ("dropped_attributes_count", "0"), + ]), + Node("ScopeMetrics", repeat_label="[0]", children=[ + Node("InstrumentationScope", [ + ("name", '"otap"'), + ("version", '"0.1"'), + ("attributes", '[{node_id: "node-7"}, {pipeline: "ingest"}]'), + ]), + Node("Metric", repeat_label="[0]", fields=[ + ("name", '"consumer.items"'), + ("unit", '"{item}"'), + ("description", '""'), + ], children=[ + Node("Sum", [ + ("aggregation_temporality", "CUMULATIVE"), + ("is_monotonic", "true"), + ], children=[ + make_ndp(0, "success", 142), + make_ndp(1, "failed", 3), + make_ndp(2, "refused", 0), + ]), + ]), + ]), + ]), + ]) + + # ── Layout and render ── + + tree_w = 720 + layout(tree, MARGIN, MARGIN + 40, tree_w) + + svg_w = tree_w + MARGIN * 2 + svg_h = tree.h + MARGIN * 2 + 60 + + parts = [ + f'', + f'', + f'' + f'OTLP Protobuf: Row-Major Nested Encoding', + f'' + f'Same consumer.items data \u2014 1 dimension (outcome), 3 data points', + render_node(tree), + f'' + f'Each data point is a self-contained message carrying all context; ' + f'attributes and metadata are repeated per point', + '', + ] + return "\n".join(parts) + + +def _scope_attrs_cols(values_per_row): + """Build the standard 9-column attr schema with given row data.""" + pids, keys, types, strs = [], [], [], [] + ints, doubles, bools, bytess, sers = [], [], [], [], [] + for pid, key, typ, strv in values_per_row: + pids.append(pid) + keys.append(key) + types.append(typ) + strs.append(strv) + ints.append("") + doubles.append("") + bools.append("") + bytess.append("") + sers.append("") + return [ + Column("parent_id", "uint16", pids, is_fk=True), + Column("key", "string/dict", keys), + Column("type", "uint8", types), + Column("str", "string/dict", strs, nullable=True), + Column("int", "int64", ints, nullable=True), + Column("double", "float64", doubles, nullable=True), + Column("bool", "bool", bools, nullable=True), + Column("bytes", "binary", bytess, nullable=True), + Column("ser", "binary", sers, nullable=True), + ] + + +def make_encoding1(): + """Encoding 1: Scope attributes, flat metrics.""" + scope_attrs = Table("ScopeAttrs", "SCOPE_ATTRS", _scope_attrs_cols([ + (0, "node_id", "Str", "node-7"), + (0, "pipeline", "Str", "ingest"), + ])) + metrics = Table("UnivariateMetrics", "UNIVARIATE_METRICS", [ + Column("id", "uint16", [0, 1, 2], is_id=True), + Column("resource.id", "uint16", [0, 0, 0], nullable=True), + Column("scope.id", "uint16", [0, 0, 0], nullable=True), + Column("scope.name", "str/dict", ["otap", "otap", "otap"], nullable=True), + Column("metric_type", "uint8", ["Sum", "Sum", "Sum"]), + Column("name", "str/dict", [ + "consumed_success", "consumed_failed", "consumed_refused"]), + Column("unit", "str/dict", ["{item}", "{item}", "{item}"], nullable=True), + Column("agg_temp", "int32", ["Cum", "Cum", "Cum"], nullable=True), + Column("is_monotonic", "bool", [True, True, True], nullable=True), + ]) + ndp = Table("NumberDataPoint", "NUMBER_DATA_POINTS", [ + Column("id", "uint32", [0, 1, 2], is_id=True, nullable=True), + Column("parent_id", "uint16", [0, 1, 2], is_fk=True), + Column("start_time", "ts_ns", ["10:00:00", "10:00:00", "10:00:00"], nullable=True), + Column("time", "ts_ns", ["10:00:10", "10:00:10", "10:00:10"]), + Column("int_value", "int64", [142, 3, 0], nullable=True), + Column("double_value", "float64", ["", "", ""], nullable=True), + Column("flags", "uint32", [0, 0, 0], nullable=True), + ]) + + scope_attrs.x = MARGIN; scope_attrs.y = MARGIN + 40 + compute_table_size(scope_attrs) + metrics.x = MARGIN; metrics.y = scope_attrs.y + scope_attrs.height + TABLE_GAP + 10 + compute_table_size(metrics) + ndp.x = MARGIN; ndp.y = metrics.y + metrics.height + TABLE_GAP + 10 + compute_table_size(ndp) + + return make_svg( + [scope_attrs, metrics, ndp], + [ + dict(src_table=scope_attrs, src_col_name="parent_id", + dst_table=metrics, dst_col_name="scope.id", + label="parent_id \u2192 scope.id", color=COL_FK_LINE), + dict(src_table=ndp, src_col_name="parent_id", + dst_table=metrics, dst_col_name="id", + label="parent_id \u2192 id", color="#e74c3c"), + ], + "OTAP Metrics: Scope Attributes, Flat Metrics", + "Encoding 1 \u2014 1 dimension (outcome), 3 timeseries, M=1 metric", + "Total rows: K + 6M = 2 + 6 = 8 (K=2 scope attrs, M=1 metric \u00d7 3 flat variants)", + ) + + +def make_encoding2(): + """Encoding 2: Scope attributes as dimensional metrics.""" + scope_attrs = Table("ScopeAttrs", "SCOPE_ATTRS", _scope_attrs_cols([ + (0, "node_id", "Str", "node-7"), + (0, "pipeline", "Str", "ingest"), + (0, "outcome", "Str", "success"), + (1, "node_id", "Str", "node-7"), + (1, "pipeline", "Str", "ingest"), + (1, "outcome", "Str", "failed"), + (2, "node_id", "Str", "node-7"), + (2, "pipeline", "Str", "ingest"), + (2, "outcome", "Str", "refused"), + ])) + metrics = Table("UnivariateMetrics", "UNIVARIATE_METRICS", [ + Column("id", "uint16", [0, 1, 2], is_id=True), + Column("resource.id", "uint16", [0, 0, 0], nullable=True), + Column("scope.id", "uint16", [0, 1, 2], nullable=True), + Column("scope.name", "str/dict", ["otap", "otap", "otap"], nullable=True), + Column("metric_type", "uint8", ["Sum", "Sum", "Sum"]), + Column("name", "str/dict", [ + "consumer.items", "consumer.items", "consumer.items"]), + Column("unit", "str/dict", ["{item}", "{item}", "{item}"], nullable=True), + Column("agg_temp", "int32", ["Cum", "Cum", "Cum"], nullable=True), + Column("is_monotonic", "bool", [True, True, True], nullable=True), + ]) + ndp = Table("NumberDataPoint", "NUMBER_DATA_POINTS", [ + Column("id", "uint32", [0, 1, 2], is_id=True, nullable=True), + Column("parent_id", "uint16", [0, 1, 2], is_fk=True), + Column("start_time", "ts_ns", ["10:00:00", "10:00:00", "10:00:00"], nullable=True), + Column("time", "ts_ns", ["10:00:10", "10:00:10", "10:00:10"]), + Column("int_value", "int64", [142, 3, 0], nullable=True), + Column("double_value", "float64", ["", "", ""], nullable=True), + Column("flags", "uint32", [0, 0, 0], nullable=True), + ]) + + scope_attrs.x = MARGIN; scope_attrs.y = MARGIN + 40 + compute_table_size(scope_attrs) + metrics.x = MARGIN; metrics.y = scope_attrs.y + scope_attrs.height + TABLE_GAP + 10 + compute_table_size(metrics) + ndp.x = MARGIN; ndp.y = metrics.y + metrics.height + TABLE_GAP + 10 + compute_table_size(ndp) + + return make_svg( + [scope_attrs, metrics, ndp], + [ + dict(src_table=scope_attrs, src_col_name="parent_id", + dst_table=metrics, dst_col_name="scope.id", + label="parent_id \u2192 scope.id", color=COL_FK_LINE), + dict(src_table=ndp, src_col_name="parent_id", + dst_table=metrics, dst_col_name="id", + label="parent_id \u2192 id", color="#e74c3c"), + ], + "OTAP Metrics: Scope Attributes as Dimensions", + "Encoding 2 \u2014 1 dimension (outcome), 3 timeseries, M=1 metric", + "Total rows: 3(K+1) + 6M = 3(2+1) + 6 = 15 (outcome promoted into scope attrs)", + ) + + +def make_encoding3(): + """Encoding 3: Data point attributes.""" + scope_attrs = Table("ScopeAttrs", "SCOPE_ATTRS", _scope_attrs_cols([ + (0, "node_id", "Str", "node-7"), + (0, "pipeline", "Str", "ingest"), + ])) + metrics = Table("UnivariateMetrics", "UNIVARIATE_METRICS", [ + Column("id", "uint16", [0], is_id=True), + Column("resource.id", "uint16", [0], nullable=True), + Column("scope.id", "uint16", [0], nullable=True), + Column("scope.name", "str/dict", ["otap"], nullable=True), + Column("metric_type", "uint8", ["Sum"]), + Column("name", "str/dict", ["consumer.items"]), + Column("unit", "str/dict", ["{item}"], nullable=True), + Column("agg_temp", "int32", ["Cum"], nullable=True), + Column("is_monotonic", "bool", [True], nullable=True), + ]) + ndp = Table("NumberDataPoint", "NUMBER_DATA_POINTS", [ + Column("id", "uint32", [0, 1, 2], is_id=True, nullable=True), + Column("parent_id", "uint16", [0, 0, 0], is_fk=True), + Column("start_time", "ts_ns", ["10:00:00", "10:00:00", "10:00:00"], nullable=True), + Column("time", "ts_ns", ["10:00:10", "10:00:10", "10:00:10"]), + Column("int_value", "int64", [142, 3, 0], nullable=True), + Column("double_value", "float64", ["", "", ""], nullable=True), + Column("flags", "uint32", [0, 0, 0], nullable=True), + ]) + dp_attrs = Table("NumberDPAttrs", "NUMBER_DP_ATTRS", _scope_attrs_cols([ + (0, "outcome", "Str", "success"), + (1, "outcome", "Str", "failed"), + (2, "outcome", "Str", "refused"), + ])) + # Override parent_id type to uint32 for dp attrs + dp_attrs.columns[0] = Column("parent_id", "uint32", + [0, 1, 2], is_fk=True) + + scope_attrs.x = MARGIN; scope_attrs.y = MARGIN + 40 + compute_table_size(scope_attrs) + metrics.x = MARGIN; metrics.y = scope_attrs.y + scope_attrs.height + TABLE_GAP + 10 + compute_table_size(metrics) + ndp.x = MARGIN; ndp.y = metrics.y + metrics.height + TABLE_GAP + 10 + compute_table_size(ndp) + dp_attrs.x = MARGIN; dp_attrs.y = ndp.y + ndp.height + TABLE_GAP + 10 + compute_table_size(dp_attrs) + + return make_svg( + [scope_attrs, metrics, ndp, dp_attrs], + [ + dict(src_table=scope_attrs, src_col_name="parent_id", + dst_table=metrics, dst_col_name="scope.id", + label="parent_id \u2192 scope.id", color=COL_FK_LINE), + dict(src_table=ndp, src_col_name="parent_id", + dst_table=metrics, dst_col_name="id", + label="parent_id \u2192 id", color="#e74c3c"), + dict(src_table=dp_attrs, src_col_name="parent_id", + dst_table=ndp, dst_col_name="id", + label="parent_id \u2192 id", color="#27ae60"), + ], + "OTAP Metrics: Data Point Attributes", + "Encoding 3 \u2014 1 dimension (outcome), 3 timeseries, M=1 metric", + "Total rows: K + 7M = 2 + 7 = 9 (outcome as data-point-level attribute)", + ) + + +def main(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + repo_root = os.path.dirname(os.path.dirname(script_dir)) + out_dir = os.path.join(repo_root, "rust", "otap-dataflow", "docs", "images") + os.makedirs(out_dir, exist_ok=True) + + diagrams = [ + ("otap-metrics-encoding1-flat.svg", make_encoding1), + ("otap-metrics-encoding2-scope-dims.svg", make_encoding2), + ("otap-metrics-encoding3-dp-attrs.svg", make_encoding3), + ] + for name, fn in diagrams: + path = os.path.join(out_dir, name) + with open(path, "w") as f: + f.write(fn()) + print(f" \u2713 {path}") + print(f"\nGenerated {len(diagrams)} SVG diagrams in {out_dir}/") + + +if __name__ == "__main__": + main() diff --git a/tools/otap-diagram/output/otap-metrics-encoding1-flat.svg b/tools/otap-diagram/output/otap-metrics-encoding1-flat.svg new file mode 100644 index 0000000000..7b1da25241 --- /dev/null +++ b/tools/otap-diagram/output/otap-metrics-encoding1-flat.svg @@ -0,0 +1,271 @@ + + + + + + +OTAP Metrics: Scope Attributes, Flat Metrics +Encoding 1 — 1 dimension (outcome), 3 timeseries, M=1 metric +Total rows: K + 6M = 2 + 6 = 8 (K=2 scope attrs, M=1 metric × 3 flat variants) + + +ScopeAttrs +SCOPE_ATTRS + +parent_id + +key + +type + +str + +int + +double + +bool + +bytes + +ser + +uint16 + +string/dict + +uint8 + +string/dict + +int64 + +float64 + +bool + +binary + +binary + +0 + +node_id + +Str + +node-7 + + + + + + + + + + + +0 + +pipeline + +Str + +ingest + + + + + + + + + + + +unused + + + +UnivariateMetrics +UNIVARIATE_METRICS + +id + +resource.id + +scope.id + +scope.name + +metric_type + +name + +unit + +agg_temp + +is_monotonic + +uint16 + +uint16 + +uint16 + +str/dict + +uint8 + +str/dict + +str/dict + +int32 + +bool + +0 + +0 + +0 + +otap + +Sum + +consumed_success + +{item} + +Cum + +True + +1 + +0 + +0 + +otap + +Sum + +consumed_failed + +{item} + +Cum + +True + +2 + +0 + +0 + +otap + +Sum + +consumed_refused + +{item} + +Cum + +True + + + +NumberDataPoint +NUMBER_DATA_POINTS + +id + +parent_id + +start_time + +time + +int_value + +flags + +double_value + +uint32 + +uint16 + +ts_ns + +ts_ns + +int64 + +uint32 + +float64 + +0 + +0 + +10:00:00 + +10:00:10 + +142 + +0 + + + +1 + +1 + +10:00:00 + +10:00:10 + +3 + +0 + + + +2 + +2 + +10:00:00 + +10:00:10 + +0 + +0 + + + +unused + + + + + +parent_id → scope.id + + + + +parent_id → id + \ No newline at end of file diff --git a/tools/otap-diagram/output/otap-metrics-encoding2-scope-dims.svg b/tools/otap-diagram/output/otap-metrics-encoding2-scope-dims.svg new file mode 100644 index 0000000000..6f0087fcb5 --- /dev/null +++ b/tools/otap-diagram/output/otap-metrics-encoding2-scope-dims.svg @@ -0,0 +1,397 @@ + + + + + + +OTAP Metrics: Scope Attributes as Dimensions +Encoding 2 — 1 dimension (outcome), 3 timeseries, M=1 metric +Total rows: 3(K+1) + 6M = 3(2+1) + 6 = 15 (outcome promoted into scope attrs) + + +ScopeAttrs +SCOPE_ATTRS + +parent_id + +key + +type + +str + +int + +double + +bool + +bytes + +ser + +uint16 + +string/dict + +uint8 + +string/dict + +int64 + +float64 + +bool + +binary + +binary + +0 + +node_id + +Str + +node-7 + + + + + + + + + + + +0 + +pipeline + +Str + +ingest + + + + + + + + + + + +0 + +outcome + +Str + +success + + + + + + + + + + + +1 + +node_id + +Str + +node-7 + + + + + + + + + + + +1 + +pipeline + +Str + +ingest + + + + + + + + + + + +1 + +outcome + +Str + +failed + + + + + + + + + + + +2 + +node_id + +Str + +node-7 + + + + + + + + + + + +2 + +pipeline + +Str + +ingest + + + + + + + + + + + +2 + +outcome + +Str + +refused + + + + + + + + + + + +unused + + + +UnivariateMetrics +UNIVARIATE_METRICS + +id + +resource.id + +scope.id + +scope.name + +metric_type + +name + +unit + +agg_temp + +is_monotonic + +uint16 + +uint16 + +uint16 + +str/dict + +uint8 + +str/dict + +str/dict + +int32 + +bool + +0 + +0 + +0 + +otap + +Sum + +consumer.items + +{item} + +Cum + +True + +1 + +0 + +1 + +otap + +Sum + +consumer.items + +{item} + +Cum + +True + +2 + +0 + +2 + +otap + +Sum + +consumer.items + +{item} + +Cum + +True + + + +NumberDataPoint +NUMBER_DATA_POINTS + +id + +parent_id + +start_time + +time + +int_value + +flags + +double_value + +uint32 + +uint16 + +ts_ns + +ts_ns + +int64 + +uint32 + +float64 + +0 + +0 + +10:00:00 + +10:00:10 + +142 + +0 + + + +1 + +1 + +10:00:00 + +10:00:10 + +3 + +0 + + + +2 + +2 + +10:00:00 + +10:00:10 + +0 + +0 + + + +unused + + + + + +parent_id → scope.id + + + + +parent_id → id + \ No newline at end of file diff --git a/tools/otap-diagram/output/otap-metrics-encoding3-dp-attrs.svg b/tools/otap-diagram/output/otap-metrics-encoding3-dp-attrs.svg new file mode 100644 index 0000000000..b5e924af66 --- /dev/null +++ b/tools/otap-diagram/output/otap-metrics-encoding3-dp-attrs.svg @@ -0,0 +1,338 @@ + + + + + + + +OTAP Metrics: Data Point Attributes +Encoding 3 — 1 dimension (outcome), 3 timeseries, M=1 metric +Total rows: K + 7M = 2 + 7 = 9 (outcome as data-point-level attribute) + + +ScopeAttrs +SCOPE_ATTRS + +parent_id + +key + +type + +str + +int + +double + +bool + +bytes + +ser + +uint16 + +string/dict + +uint8 + +string/dict + +int64 + +float64 + +bool + +binary + +binary + +0 + +node_id + +Str + +node-7 + + + + + + + + + + + +0 + +pipeline + +Str + +ingest + + + + + + + + + + + +unused + + + +UnivariateMetrics +UNIVARIATE_METRICS + +id + +resource.id + +scope.id + +scope.name + +metric_type + +name + +unit + +agg_temp + +is_monotonic + +uint16 + +uint16 + +uint16 + +str/dict + +uint8 + +str/dict + +str/dict + +int32 + +bool + +0 + +0 + +0 + +otap + +Sum + +consumer.items + +{item} + +Cum + +True + + + +NumberDataPoint +NUMBER_DATA_POINTS + +id + +parent_id + +start_time + +time + +int_value + +flags + +double_value + +uint32 + +uint16 + +ts_ns + +ts_ns + +int64 + +uint32 + +float64 + +0 + +0 + +10:00:00 + +10:00:10 + +142 + +0 + + + +1 + +0 + +10:00:00 + +10:00:10 + +3 + +0 + + + +2 + +0 + +10:00:00 + +10:00:10 + +0 + +0 + + + +unused + + + +NumberDPAttrs +NUMBER_DP_ATTRS + +parent_id + +key + +type + +str + +int + +double + +bool + +bytes + +ser + +uint32 + +string/dict + +uint8 + +string/dict + +int64 + +float64 + +bool + +binary + +binary + +0 + +outcome + +Str + +success + + + + + + + + + + + +1 + +outcome + +Str + +failed + + + + + + + + + + + +2 + +outcome + +Str + +refused + + + + + + + + + + + +unused + + + + + +parent_id → scope.id + + + + +parent_id → id + + + + +parent_id → id + \ No newline at end of file