From 30beeb10d3c1e13847858f7bd3e2b9570bc12cfd Mon Sep 17 00:00:00 2001 From: bouillipx Date: Wed, 8 Apr 2026 13:14:21 +0800 Subject: [PATCH 01/14] fix(security): redact CLI args from Windows debug log to prevent prompt leakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Windows shim debug log at cli-spawn.ts:470 was printing the full `shimSpawn.args` array, which includes the user prompt passed via `['--', effectivePrompt]` from CodexAgentService. In debug mode this would write prompt content to log files in plaintext. Replace `args: shimSpawn.args` with `argCount: shimSpawn.args.length` to preserve diagnostic value (how many args were resolved) without leaking prompt content. Part of the D1 Telemetry Redaction initiative (observability feature). [宪宪/Opus-46🐾] Co-Authored-By: Claude Opus 4.6 --- packages/api/src/utils/cli-spawn.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/api/src/utils/cli-spawn.ts b/packages/api/src/utils/cli-spawn.ts index 148bda5d5..700f4df64 100644 --- a/packages/api/src/utils/cli-spawn.ts +++ b/packages/api/src/utils/cli-spawn.ts @@ -467,7 +467,10 @@ function defaultSpawn( if (IS_WINDOWS) { const shimSpawn = resolveWindowsShimSpawn(command, args); if (shimSpawn) { - log.debug({ original: command, resolved: shimSpawn.command, args: shimSpawn.args }, 'Windows shim resolved'); + log.debug( + { original: command, resolved: shimSpawn.command, argCount: shimSpawn.args.length }, + 'Windows shim resolved', + ); return nodeSpawn(shimSpawn.command, shimSpawn.args, { cwd: options.cwd, env: options.env, From dc622b50f00b714c1c4568137892704e8791c00c Mon Sep 17 00:00:00 2001 From: bouillipx Date: Thu, 9 Apr 2026 12:12:26 +0800 Subject: [PATCH 02/14] =?UTF-8?q?feat(F152):=20Observability=20Phase=201?= =?UTF-8?q?=20=E2=80=94=20OTel=20SDK,=20telemetry=20redaction,=20/ready=20?= =?UTF-8?q?endpoint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the complete F152 observability foundation: - D1 TelemetryRedactor: 4-class field classification (Class A credentials → [REDACTED], Class B business content → hash+length, Class C system IDs → HMAC-SHA256 pseudonymization, Class D safe values → passthrough) - RedactingSpanProcessor and RedactingLogProcessor wrapping OTel export pipeline - D2 MetricAttributeAllowlist: ViewOptions with createAllowListAttributesProcessor enforcing bounded cardinality on all cat_cafe.* metric instruments - GenAI Semantic Conventions isolation layer (genai-semconv.ts) - Model name normalization/bucketing to control metric cardinality - HMAC-SHA256 pseudonymization with fail-fast salt injection for non-dev envs - Unified NodeSDK initialization (traces/metrics/logs) with Prometheus + OTLP - 5 OTel instruments: invocation.duration, llm.call.duration, agent.liveness, invocation.active, token.usage - /ready endpoint (Redis ping probe, returns ready/degraded) - OTel graceful shutdown in server close handler - Regression test: cli-spawn Windows shim debug log argCount verification - Unit tests: redactor classification, model normalizer, metric allowlist Closes #388 Co-Authored-By: Claude Opus 4.6 --- docs/features/index.json | 2 +- packages/api/package.json | 11 + packages/api/src/index.ts | 31 + .../infrastructure/telemetry/genai-semconv.ts | 20 + .../api/src/infrastructure/telemetry/hmac.ts | 47 ++ .../api/src/infrastructure/telemetry/init.ts | 116 ++++ .../infrastructure/telemetry/instruments.ts | 55 ++ .../telemetry/metric-allowlist.ts | 48 ++ .../telemetry/model-normalizer.ts | 33 ++ .../src/infrastructure/telemetry/redactor.ts | 133 +++++ .../telemetry/cli-spawn-redaction.test.js | 108 ++++ pnpm-lock.yaml | 543 +++++++++++++++++- 12 files changed, 1131 insertions(+), 16 deletions(-) create mode 100644 packages/api/src/infrastructure/telemetry/genai-semconv.ts create mode 100644 packages/api/src/infrastructure/telemetry/hmac.ts create mode 100644 packages/api/src/infrastructure/telemetry/init.ts create mode 100644 packages/api/src/infrastructure/telemetry/instruments.ts create mode 100644 packages/api/src/infrastructure/telemetry/metric-allowlist.ts create mode 100644 packages/api/src/infrastructure/telemetry/model-normalizer.ts create mode 100644 packages/api/src/infrastructure/telemetry/redactor.ts create mode 100644 packages/api/test/telemetry/cli-spawn-redaction.test.js diff --git a/docs/features/index.json b/docs/features/index.json index edcef5569..40959bc7e 100644 --- a/docs/features/index.json +++ b/docs/features/index.json @@ -933,7 +933,7 @@ { "id": "F153", "name": "Observability Infrastructure — 运行时可观测基础设施", - "status": "spec | **Owner**: Community (PR author) + Ragdoll | **Priority**: P2", + "status": "in-progress | **Owner**: Community (PR author) + Ragdoll | **Priority**: P2", "file": "F153-observability-infra.md" }, { diff --git a/packages/api/package.json b/packages/api/package.json index 0d46f7a78..755f5fea3 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -30,6 +30,17 @@ "@huggingface/transformers": "^3", "@larksuiteoapi/node-sdk": "^1.59.0", "@modelcontextprotocol/sdk": "^1.0.0", + "@opentelemetry/api": "^1.9.1", + "@opentelemetry/exporter-logs-otlp-http": "^0.214.0", + "@opentelemetry/exporter-metrics-otlp-http": "^0.214.0", + "@opentelemetry/exporter-prometheus": "^0.214.0", + "@opentelemetry/exporter-trace-otlp-http": "^0.214.0", + "@opentelemetry/resources": "^2.6.1", + "@opentelemetry/sdk-logs": "^0.214.0", + "@opentelemetry/sdk-metrics": "^2.6.1", + "@opentelemetry/sdk-node": "^0.214.0", + "@opentelemetry/sdk-trace-node": "^2.6.1", + "@opentelemetry/semantic-conventions": "^1.40.0", "@wecom/aibot-node-sdk": "1.0.4", "better-sqlite3": "^12.6.2", "cheerio": "^1.1.2", diff --git a/packages/api/src/index.ts b/packages/api/src/index.ts index a357e1467..1b1687d03 100644 --- a/packages/api/src/index.ts +++ b/packages/api/src/index.ts @@ -204,6 +204,11 @@ const PROCESS_START_AT = Date.now(); async function main(): Promise { const { logger: customLogger, isDebugMode, LOG_DIR_PATH } = await import('./infrastructure/logger.js'); + + // F152: Initialize OpenTelemetry SDK (must be early, before routes) + const { initTelemetry } = await import('./infrastructure/telemetry/init.js'); + const shutdownTelemetry = initTelemetry(); + const app = Fastify({ logger: customLogger as unknown as import('fastify').FastifyBaseLogger }); if (isDebugMode) { @@ -233,6 +238,25 @@ async function main(): Promise { // Health check app.get('/health', async () => ({ status: 'ok', timestamp: Date.now() })); + // F152: Readiness check — verifies dependencies are reachable + app.get('/ready', async () => { + const checks: Record = {}; + // Redis probe + if (redisClient) { + const t0 = Date.now(); + try { + await redisClient.ping(); + checks.redis = { ok: true, ms: Date.now() - t0 }; + } catch (err) { + checks.redis = { ok: false, ms: Date.now() - t0, error: String(err) }; + } + } else { + checks.redis = { ok: true, ms: 0 }; // memory mode, always ready + } + const allOk = Object.values(checks).every((c) => c.ok); + return { status: allOk ? 'ready' : 'degraded', timestamp: Date.now(), checks }; + }); + // Create invocation tracker for cancellation support const invocationTracker = new InvocationTracker(); @@ -2101,6 +2125,13 @@ async function main(): Promise { app.log.error(`[api] SocketManager close failed: ${String(err)}`); } + // F152: Flush and shutdown OTel SDK before closing server + try { + await shutdownTelemetry(); + } catch (err) { + app.log.error(`[api] OTel shutdown failed: ${String(err)}`); + } + // Close Fastify server await app.close(); diff --git a/packages/api/src/infrastructure/telemetry/genai-semconv.ts b/packages/api/src/infrastructure/telemetry/genai-semconv.ts new file mode 100644 index 000000000..7a303b5cc --- /dev/null +++ b/packages/api/src/infrastructure/telemetry/genai-semconv.ts @@ -0,0 +1,20 @@ +/** + * F152: GenAI Semantic Convention isolation layer. + * + * OTel GenAI Semantic Conventions are still Development-stage. + * All internal code references these constants; upstream renames + * only affect this file. + */ + +// --- Stable attributes --- +export const GENAI_SYSTEM = 'gen_ai.system'; +export const GENAI_MODEL = 'gen_ai.request.model'; + +// --- Development-stage attributes (may rename) --- +export const GENAI_TOKENS_INPUT = 'gen_ai.usage.input_tokens'; +export const GENAI_TOKENS_OUTPUT = 'gen_ai.usage.output_tokens'; + +// --- Custom Cat Cafe attributes --- +export const AGENT_ID = 'agent.id'; +export const OPERATION_NAME = 'operation.name'; +export const STATUS = 'status'; diff --git a/packages/api/src/infrastructure/telemetry/hmac.ts b/packages/api/src/infrastructure/telemetry/hmac.ts new file mode 100644 index 000000000..4bf31905c --- /dev/null +++ b/packages/api/src/infrastructure/telemetry/hmac.ts @@ -0,0 +1,47 @@ +/** + * F152: HMAC-based ID pseudonymization for external telemetry. + * + * System identifiers (threadId, invocationId, etc.) are HMAC'd + * before leaving the machine. Same input → same hash within an + * instance, enabling cross-signal correlation in external tools + * (e.g. Sentry) without exposing raw IDs. + * + * Salt MUST be injected via TELEMETRY_HMAC_SALT env var. + * Non-dev environments fail fast if missing. + */ + +import { createHmac } from 'node:crypto'; + +const TENANT_SALT = process.env.TELEMETRY_HMAC_SALT; + +function getSalt(): string { + if (TENANT_SALT) return TENANT_SALT; + const env = process.env.NODE_ENV; + if (env === 'development' || env === 'test') { + return 'dev-only-insecure-salt'; + } + throw new Error( + 'TELEMETRY_HMAC_SALT is required in non-dev environments. ' + 'Set it in .env or your secret manager.', + ); +} + +/** + * HMAC-SHA256 pseudonymize an identifier. + * Returns first 32 hex chars (128-bit, collision-safe for correlation). + */ +export function hmacId(id: string): string { + return createHmac('sha256', getSalt()).update(id).digest('hex').slice(0, 32); +} + +/** Env-gated escape hatch: export raw IDs (for self-hosted controlled envs). */ +export function shouldExportRawIds(): boolean { + return process.env.TELEMETRY_EXPORT_RAW_SYSTEM_IDS === '1'; +} + +/** + * Pseudonymize a system identifier for external telemetry. + * Returns raw ID if escape hatch is enabled, HMAC otherwise. + */ +export function pseudonymizeId(id: string): string { + return shouldExportRawIds() ? id : hmacId(id); +} diff --git a/packages/api/src/infrastructure/telemetry/init.ts b/packages/api/src/infrastructure/telemetry/init.ts new file mode 100644 index 000000000..6d61bd957 --- /dev/null +++ b/packages/api/src/infrastructure/telemetry/init.ts @@ -0,0 +1,116 @@ +/** + * F152: OpenTelemetry SDK initialization — unified entry point. + * + * Three signals (traces, metrics, logs) share one NodeSDK instance. + * Disabled via OTEL_SDK_DISABLED=true for zero overhead. + * + * Usage: import { initTelemetry } from './infrastructure/telemetry/init.js'; + * const shutdown = initTelemetry(); // call at startup + * // on graceful shutdown: await shutdown(); + */ + +import { OTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-http'; +import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http'; +import { PrometheusExporter } from '@opentelemetry/exporter-prometheus'; +import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http'; +import { resourceFromAttributes } from '@opentelemetry/resources'; +import { BatchLogRecordProcessor } from '@opentelemetry/sdk-logs'; +import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics'; +import { NodeSDK } from '@opentelemetry/sdk-node'; +import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-node'; +import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from '@opentelemetry/semantic-conventions'; +import { createModuleLogger } from '../logger.js'; +import { createMetricAllowlistViews } from './metric-allowlist.js'; +import { RedactingLogProcessor, RedactingSpanProcessor } from './redactor.js'; + +const log = createModuleLogger('telemetry'); + +export interface TelemetryConfig { + serviceName?: string; + serviceVersion?: string; + /** Port for Prometheus /metrics scrape endpoint. Default: 9464 */ + prometheusPort?: number; + /** Set true to also export via OTLP (requires OTEL_EXPORTER_OTLP_ENDPOINT). */ + otlpEnabled?: boolean; +} + +const DEFAULT_CONFIG: Required = { + serviceName: 'cat-cafe-api', + serviceVersion: '0.1.0', + prometheusPort: 9464, + otlpEnabled: !!process.env.OTEL_EXPORTER_OTLP_ENDPOINT, +}; + +let sdk: NodeSDK | null = null; + +/** + * Initialize OTel SDK. Returns an async shutdown function. + * No-op if OTEL_SDK_DISABLED=true. + */ +export function initTelemetry(config?: TelemetryConfig): () => Promise { + if (process.env.OTEL_SDK_DISABLED === 'true') { + log.info('OTel SDK disabled (OTEL_SDK_DISABLED=true)'); + return async () => {}; + } + + const cfg = { ...DEFAULT_CONFIG, ...config }; + + const resource = resourceFromAttributes({ + [ATTR_SERVICE_NAME]: cfg.serviceName, + [ATTR_SERVICE_VERSION]: cfg.serviceVersion, + }); + + // --- Traces: Redacting processor wraps OTLP exporter --- + const spanProcessor = cfg.otlpEnabled + ? new RedactingSpanProcessor(new BatchSpanProcessor(new OTLPTraceExporter())) + : undefined; + + // --- Metrics: Prometheus scrape + optional OTLP push --- + const prometheusExporter = new PrometheusExporter({ + port: cfg.prometheusPort, + preventServerStart: false, + }); + + const metricReaders: import('@opentelemetry/sdk-metrics').IMetricReader[] = [prometheusExporter]; + if (cfg.otlpEnabled) { + metricReaders.push( + new PeriodicExportingMetricReader({ + exporter: new OTLPMetricExporter(), + exportIntervalMillis: 60_000, + }), + ); + } + + // --- Logs: Redacting processor wraps OTLP exporter --- + const logProcessor = cfg.otlpEnabled + ? new RedactingLogProcessor(new BatchLogRecordProcessor(new OTLPLogExporter())) + : undefined; + + // --- Views: enforce metric attribute allowlist --- + const views = createMetricAllowlistViews(); + + sdk = new NodeSDK({ + resource, + spanProcessors: spanProcessor ? [spanProcessor] : [], + metricReaders, + logRecordProcessors: logProcessor ? [logProcessor] : [], + views, + }); + + sdk.start(); + log.info( + { + prometheus: cfg.prometheusPort, + otlp: cfg.otlpEnabled, + }, + 'OTel SDK initialized', + ); + + return async () => { + if (sdk) { + await sdk.shutdown(); + sdk = null; + log.info('OTel SDK shut down'); + } + }; +} diff --git a/packages/api/src/infrastructure/telemetry/instruments.ts b/packages/api/src/infrastructure/telemetry/instruments.ts new file mode 100644 index 000000000..79763332f --- /dev/null +++ b/packages/api/src/infrastructure/telemetry/instruments.ts @@ -0,0 +1,55 @@ +/** + * F152: First batch of OTel instruments for Cat Cafe observability. + * + * All instruments use the `cat_cafe.` prefix and are bound by the + * MetricAttributeAllowlist Views (D2 enforcement). + */ + +import { metrics } from '@opentelemetry/api'; + +const meter = metrics.getMeter('cat-cafe-api', '0.1.0'); + +/** Histogram: invocation duration (seconds). */ +export const invocationDuration = meter.createHistogram('cat_cafe.invocation.duration', { + description: 'Duration of a single cat invocation', + unit: 's', +}); + +/** Histogram: individual LLM API call duration (seconds). */ +export const llmCallDuration = meter.createHistogram('cat_cafe.llm.call.duration', { + description: 'Duration of a single LLM API call', + unit: 's', +}); + +/** + * Gauge: agent liveness state. + * 0=dead, 1=idle-silent, 2=busy-silent, 3=active. + */ +export const agentLiveness = meter.createObservableGauge('cat_cafe.agent.liveness', { + description: 'Agent process liveness state (0=dead, 1=idle-silent, 2=busy-silent, 3=active)', +}); + +/** UpDownCounter: currently active invocations. */ +export const activeInvocations = meter.createUpDownCounter('cat_cafe.invocation.active', { + description: 'Number of currently active invocations', +}); + +/** Counter: token usage (split by input/output via attributes). */ +export const tokenUsage = meter.createCounter('cat_cafe.token.usage', { + description: 'Cumulative token consumption', + unit: 'tokens', +}); + +/** Map liveness state string to numeric gauge value. */ +export function livenessStateToNumber(state: 'dead' | 'idle-silent' | 'busy-silent' | 'active'): number { + switch (state) { + case 'dead': + return 0; + case 'idle-silent': + return 1; + case 'busy-silent': + return 2; + case 'active': + return 3; + } +} diff --git a/packages/api/src/infrastructure/telemetry/metric-allowlist.ts b/packages/api/src/infrastructure/telemetry/metric-allowlist.ts new file mode 100644 index 000000000..864d7413c --- /dev/null +++ b/packages/api/src/infrastructure/telemetry/metric-allowlist.ts @@ -0,0 +1,48 @@ +/** + * F152: Metric Attribute Allowlist — D2 code-level enforcement. + * + * Every OTel instrument is registered with a View that restricts + * its attributes to the allowlist. Non-allowed attributes are + * silently dropped by the SDK (not aggregated, not exported). + * + * This prevents anyone from accidentally adding high-cardinality + * attributes (threadId, invocationId, etc.) to metrics. + */ + +import { createAllowListAttributesProcessor, type ViewOptions } from '@opentelemetry/sdk-metrics'; +import { AGENT_ID, GENAI_MODEL, GENAI_SYSTEM, OPERATION_NAME, STATUS } from './genai-semconv.js'; + +/** The ONLY attributes allowed on metric instruments. */ +export const ALLOWED_METRIC_ATTRIBUTES: ReadonlySet = new Set([ + AGENT_ID, + GENAI_SYSTEM, + GENAI_MODEL, + OPERATION_NAME, + STATUS, +]); + +const allowedKeys = [...ALLOWED_METRIC_ATTRIBUTES]; + +/** + * Create OTel Views that enforce the attribute allowlist for our instruments. + * Pass these to the MeterProvider configuration. + */ +export function createMetricAllowlistViews(): ViewOptions[] { + return [ + { + instrumentName: 'cat_cafe.*', + attributesProcessors: [createAllowListAttributesProcessor(allowedKeys)], + }, + ]; +} + +/** + * Create a ViewOptions for a specific instrument name. + * Use this when you need fine-grained per-instrument control. + */ +export function createInstrumentView(instrumentName: string): ViewOptions { + return { + instrumentName, + attributesProcessors: [createAllowListAttributesProcessor(allowedKeys)], + }; +} diff --git a/packages/api/src/infrastructure/telemetry/model-normalizer.ts b/packages/api/src/infrastructure/telemetry/model-normalizer.ts new file mode 100644 index 000000000..f122a926f --- /dev/null +++ b/packages/api/src/infrastructure/telemetry/model-normalizer.ts @@ -0,0 +1,33 @@ +/** + * F152: Model name normalization for bounded metric cardinality. + * + * `defaultModel` in runtime-cat-catalog is a free string — reporting + * raw values as metric attributes would cause cardinality explosion. + * This module buckets model names into provider+family groups. + */ + +const MODEL_BUCKETS: ReadonlyArray = [ + ['claude-opus', 'claude-opus'], + ['claude-sonnet', 'claude-sonnet'], + ['claude-haiku', 'claude-haiku'], + ['gpt-4o', 'gpt-4o'], + ['gpt-4', 'gpt-4'], + ['gpt-5', 'gpt-5'], + ['o3', 'o3'], + ['o4', 'o4'], + ['gemini-2.5', 'gemini-2.5'], + ['gemini-2.0', 'gemini-2.0'], + ['qwen', 'qwen'], +]; + +/** + * Normalize a raw model string into a bounded bucket. + * Unknown models map to `'other'`. + */ +export function normalizeModel(raw: string): string { + const lowered = raw.toLowerCase(); + for (const [prefix, bucket] of MODEL_BUCKETS) { + if (lowered.includes(prefix)) return bucket; + } + return 'other'; +} diff --git a/packages/api/src/infrastructure/telemetry/redactor.ts b/packages/api/src/infrastructure/telemetry/redactor.ts new file mode 100644 index 000000000..61788c897 --- /dev/null +++ b/packages/api/src/infrastructure/telemetry/redactor.ts @@ -0,0 +1,133 @@ +/** + * F152: TelemetryRedactor — OTel SpanProcessor & LogRecordProcessor + * that enforces D1 field classification (Class A/B/C/D) on external telemetry. + * + * Internal (local logs/archive) remains untouched. + * External (OTel exporters) gets filtered through this module. + */ + +import { createHash } from 'node:crypto'; +import type { Context } from '@opentelemetry/api'; +import type { LogRecordProcessor, SdkLogRecord } from '@opentelemetry/sdk-logs'; +import type { ReadableSpan, SpanProcessor } from '@opentelemetry/sdk-trace-node'; +import { pseudonymizeId } from './hmac.js'; + +// --- Class A: credentials — always redacted --- +const CLASS_A_KEYS = new Set([ + 'authorization', + 'cookie', + 'set-cookie', + 'x-api-key', + 'token', + 'apikey', + 'api_key', + 'secret', + 'password', + 'credential', + 'credentials', + 'callbacktoken', +]); + +function isClassA(key: string): boolean { + const lower = key.toLowerCase(); + return CLASS_A_KEYS.has(lower) || lower.endsWith('_token') || lower.endsWith('_api_key'); +} + +// --- Class B: business content — hash+length only --- +const CLASS_B_KEYS = new Set([ + 'prompt', + 'message.content', + 'thinking', + 'toolinput', + 'tool_result', + 'command', + 'aggregated_output', + 'mcp.arguments', + 'rich_block.image', +]); + +function isClassB(key: string): boolean { + return CLASS_B_KEYS.has(key.toLowerCase()); +} + +// --- Class C: system identifiers — HMAC pseudonymized --- +const CLASS_C_KEYS = new Set(['userid', 'threadid', 'invocationid', 'sessionid', 'messageid', 'rawarchivepath']); + +function isClassC(key: string): boolean { + return CLASS_C_KEYS.has(key.toLowerCase()); +} + +function redactValue(key: string, value: unknown): unknown { + if (isClassA(key)) return '[REDACTED]'; + if (isClassB(key) && typeof value === 'string') { + const hash = createHash('sha256').update(value).digest('hex').slice(0, 16); + return `[hash:${hash} len:${value.length}]`; + } + if (isClassC(key) && typeof value === 'string') { + return pseudonymizeId(value); + } + return value; // Class D: pass through +} + +function redactAttributes(attrs: Record): Record { + const result: Record = {}; + for (const [key, value] of Object.entries(attrs)) { + result[key] = redactValue(key, value); + } + return result; +} + +/** + * OTel SpanProcessor that redacts span attributes before export. + * Wraps an inner processor (typically a BatchSpanProcessor). + */ +export class RedactingSpanProcessor implements SpanProcessor { + constructor(private readonly inner: SpanProcessor) {} + + onStart(span: import('@opentelemetry/sdk-trace-node').Span, ctx: Context): void { + this.inner.onStart(span, ctx); + } + + onEnd(span: ReadableSpan): void { + const redacted = redactAttributes(span.attributes as Record); + // ReadableSpan.attributes is readonly; we mutate before export via + // a proxy object that the inner processor serializes. + Object.assign((span as unknown as Record).attributes ?? {}, redacted); + this.inner.onEnd(span); + } + + async shutdown(): Promise { + return this.inner.shutdown(); + } + + async forceFlush(): Promise { + return this.inner.forceFlush(); + } +} + +/** + * OTel LogRecordProcessor that redacts log record attributes before export. + */ +export class RedactingLogProcessor implements LogRecordProcessor { + constructor(private readonly inner: LogRecordProcessor) {} + + onEmit(record: SdkLogRecord, ctx?: Context): void { + const attrs = (record as unknown as Record).attributes; + if (attrs && typeof attrs === 'object') { + const redacted = redactAttributes(attrs as Record); + Object.assign(attrs, redacted); + } + this.inner.onEmit(record, ctx); + } + + async shutdown(): Promise { + return this.inner.shutdown(); + } + + async forceFlush(): Promise { + return this.inner.forceFlush(); + } +} + +// Export classification helpers for testing +export { isClassA, isClassB, isClassC, redactValue, redactAttributes }; diff --git a/packages/api/test/telemetry/cli-spawn-redaction.test.js b/packages/api/test/telemetry/cli-spawn-redaction.test.js new file mode 100644 index 000000000..ad60ab838 --- /dev/null +++ b/packages/api/test/telemetry/cli-spawn-redaction.test.js @@ -0,0 +1,108 @@ +/** + * F152: cli-spawn debug log redaction regression test. + * + * Ensures that Windows shim debug logging never leaks CLI args + * (which may contain user prompts). Prevents future regression + * of the fix applied in commit 4c8f7873. + */ + +import assert from 'node:assert/strict'; +import { readFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; +import { test } from 'node:test'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const CLI_SPAWN_SRC = resolve(__dirname, '../../src/utils/cli-spawn.ts'); + +test('F152: cli-spawn Windows shim debug log must not contain args field', async (t) => { + // Read the source file and find the Windows shim debug log line + const source = readFileSync(CLI_SPAWN_SRC, 'utf8'); + + await t.test('Windows shim resolved log uses argCount, not args', () => { + // Find the log.debug call block that contains 'Windows shim resolved'. + // The Pino call spans multiple lines, so we match the full block. + const shimBlockRe = /log\.debug\(\s*\{[^}]*\}\s*,\s*'Windows shim resolved'/gs; + const blocks = source.match(shimBlockRe); + + assert.ok(blocks && blocks.length > 0, 'Should find the Windows shim resolved log.debug block'); + + for (const block of blocks) { + assert.ok( + !block.includes('args: shimSpawn.args') && !block.includes('args: args'), + `Debug log must not contain raw args field. Found: ${block.trim()}`, + ); + assert.ok(block.includes('argCount'), `Debug log should use argCount instead of args. Found: ${block.trim()}`); + } + }); + + await t.test('No log.debug call in cli-spawn prints raw args array', () => { + // Check that no debug log in the Windows spawn path prints full args + const debugLogLines = source.split('\n').filter((line) => line.includes('log.debug(') && line.includes('args')); + + for (const line of debugLogLines) { + // argCount is fine; args: .args is not + const hasRawArgs = /args:\s*(?:shimSpawn\.args|args\b)/.test(line) && !line.includes('argCount'); + assert.ok(!hasRawArgs, `Found debug log that may leak raw args: ${line.trim()}`); + } + }); +}); + +test('F152: TelemetryRedactor classification', async () => { + const { isClassA, isClassB, isClassC, redactValue } = await import('../../dist/infrastructure/telemetry/redactor.js'); + + // Class A: credentials + assert.ok(isClassA('authorization')); + assert.ok(isClassA('callbackToken')); + assert.ok(isClassA('CAT_CAFE_CALLBACK_TOKEN')); + assert.equal(redactValue('authorization', 'Bearer xxx'), '[REDACTED]'); + + // Class B: business content + assert.ok(isClassB('prompt')); + assert.ok(isClassB('message.content')); + assert.ok(isClassB('toolInput')); + const redacted = redactValue('prompt', 'Hello world'); + assert.ok(typeof redacted === 'string'); + assert.ok(redacted.startsWith('[hash:')); + assert.ok(redacted.includes('len:11')); + assert.ok(!redacted.includes('Hello world')); + + // Class C: system identifiers + assert.ok(isClassC('threadId')); + assert.ok(isClassC('invocationId')); + assert.ok(isClassC('userId')); + const hmaced = redactValue('threadId', 'thread_abc123'); + assert.ok(typeof hmaced === 'string'); + assert.ok(!String(hmaced).includes('thread_abc123')); + + // Class D: safe values — pass through + assert.equal(redactValue('durationMs', 1234), 1234); + assert.equal(redactValue('status', 'success'), 'success'); +}); + +test('F152: model normalizer', async () => { + const { normalizeModel } = await import('../../dist/infrastructure/telemetry/model-normalizer.js'); + + assert.equal(normalizeModel('claude-opus-4-6'), 'claude-opus'); + assert.equal(normalizeModel('claude-sonnet-4-6'), 'claude-sonnet'); + assert.equal(normalizeModel('gpt-4o-2025-01-01'), 'gpt-4o'); + assert.equal(normalizeModel('gemini-2.5-pro'), 'gemini-2.5'); + assert.equal(normalizeModel('some-unknown-model'), 'other'); +}); + +test('F152: metric attribute allowlist', async () => { + const { ALLOWED_METRIC_ATTRIBUTES } = await import('../../dist/infrastructure/telemetry/metric-allowlist.js'); + + // Allowed attributes + assert.ok(ALLOWED_METRIC_ATTRIBUTES.has('agent.id')); + assert.ok(ALLOWED_METRIC_ATTRIBUTES.has('gen_ai.system')); + assert.ok(ALLOWED_METRIC_ATTRIBUTES.has('status')); + + // Forbidden attributes must NOT be in the allowlist + assert.ok(!ALLOWED_METRIC_ATTRIBUTES.has('threadId')); + assert.ok(!ALLOWED_METRIC_ATTRIBUTES.has('invocationId')); + assert.ok(!ALLOWED_METRIC_ATTRIBUTES.has('sessionId')); + assert.ok(!ALLOWED_METRIC_ATTRIBUTES.has('userId')); + assert.ok(!ALLOWED_METRIC_ATTRIBUTES.has('path')); + assert.ok(!ALLOWED_METRIC_ATTRIBUTES.has('command')); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d5daebfc6..92d562c12 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -50,6 +50,39 @@ importers: '@modelcontextprotocol/sdk': specifier: ^1.0.0 version: 1.26.0(zod@3.25.76) + '@opentelemetry/api': + specifier: ^1.9.1 + version: 1.9.1 + '@opentelemetry/exporter-logs-otlp-http': + specifier: ^0.214.0 + version: 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-metrics-otlp-http': + specifier: ^0.214.0 + version: 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-prometheus': + specifier: ^0.214.0 + version: 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-trace-otlp-http': + specifier: ^0.214.0 + version: 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': + specifier: ^2.6.1 + version: 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-logs': + specifier: ^0.214.0 + version: 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-metrics': + specifier: ^2.6.1 + version: 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-node': + specifier: ^0.214.0 + version: 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-node': + specifier: ^2.6.1 + version: 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/semantic-conventions': + specifier: ^1.40.0 + version: 1.40.0 '@wecom/aibot-node-sdk': specifier: 1.0.4 version: 1.0.4 @@ -275,7 +308,7 @@ importers: version: 0.27.3 next: specifier: ^14.1.0 - version: 14.2.35(@babel/core@7.29.0)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + version: 14.2.35(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) next-themes: specifier: ^0.4.6 version: 0.4.6(react-dom@18.3.1(react@18.3.1))(react@18.3.1) @@ -306,7 +339,7 @@ importers: devDependencies: '@ducanh2912/next-pwa': specifier: ^10.2.9 - version: 10.2.9(next@14.2.35(@babel/core@7.29.0)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(webpack@5.105.2) + version: 10.2.9(next@14.2.35(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(webpack@5.105.2) '@types/dagre': specifier: ^0.7.54 version: 0.7.54 @@ -345,7 +378,7 @@ importers: version: 5.9.3 vitest: specifier: ^4.0.18 - version: 4.0.18(@types/node@20.19.31)(jiti@1.21.7)(jsdom@28.0.0)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.2) + version: 4.0.18(@opentelemetry/api@1.9.1)(@types/node@20.19.31)(jiti@1.21.7)(jsdom@28.0.0)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.2) packages: @@ -1236,6 +1269,15 @@ packages: '@grammyjs/types@3.25.0': resolution: {integrity: sha512-iN9i5p+8ZOu9OMxWNcguojQfz4K/PDyMPOnL7PPCON+SoA/F8OKMH3uR7CVUkYfdNe0GCz8QOzAWrnqusQYFOg==} + '@grpc/grpc-js@1.14.3': + resolution: {integrity: sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==} + engines: {node: '>=12.10.0'} + + '@grpc/proto-loader@0.8.0': + resolution: {integrity: sha512-rc1hOQtjIWGxcxpb9aHAfLpIctjEnsDehj0DAiVfBlmT84uvR0uUtN2hEi/ecvWVjXUGf5qPF4qEgiLOx1YIMQ==} + engines: {node: '>=6'} + hasBin: true + '@hono/node-server@1.19.9': resolution: {integrity: sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==} engines: {node: '>=18.14.1'} @@ -1429,6 +1471,9 @@ packages: '@jridgewell/trace-mapping@0.3.31': resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} + '@js-sdsl/ordered-map@4.4.2': + resolution: {integrity: sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==} + '@larksuiteoapi/node-sdk@1.59.0': resolution: {integrity: sha512-sBpkruTvZDOxnVtoTbepWKRX0j1Y1ZElQYu0x7+v088sI9pcpbVp6ZzCGn62dhrKPatzNyCJyzYCPXPYQWccrA==} @@ -1552,6 +1597,174 @@ packages: resolution: {integrity: sha512-nn5ozdjYQpUCZlWGuxcJY/KpxkWQs4DcbMCmKojjyrYDEAGy4Ce19NN4v5MduafTwJlbKc99UA8YhSVqq9yPZA==} engines: {node: '>=12.4.0'} + '@opentelemetry/api-logs@0.214.0': + resolution: {integrity: sha512-40lSJeqYO8Uz2Yj7u94/SJWE/wONa7rmMKjI1ZcIjgf3MHNHv1OZUCrCETGuaRF62d5pQD1wKIW+L4lmSMTzZA==} + engines: {node: '>=8.0.0'} + + '@opentelemetry/api@1.9.1': + resolution: {integrity: sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q==} + engines: {node: '>=8.0.0'} + + '@opentelemetry/configuration@0.214.0': + resolution: {integrity: sha512-Q+awuEwxhETwIAXuxHvIY5ZMEP0ZqvxLTi9kclrkyVJppEUXYL3Bhiw3jYrxdHYMh0Y0tVInQH9FEZ1aMinvLA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.9.0 + + '@opentelemetry/context-async-hooks@2.6.1': + resolution: {integrity: sha512-XHzhwRNkBpeP8Fs/qjGrAf9r9PRv67wkJQ/7ZPaBQQ68DYlTBBx5MF9LvPx7mhuXcDessKK2b+DcxqwpgkcivQ==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/core@2.6.1': + resolution: {integrity: sha512-8xHSGWpJP9wBxgBpnqGL0R3PbdWQndL1Qp50qrg71+B28zK5OQmUgcDKLJgzyAAV38t4tOyLMGDD60LneR5W8g==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/exporter-logs-otlp-grpc@0.214.0': + resolution: {integrity: sha512-SwmFRwO8mi6nndzbsjPgSFg7qy1WeNHRFD+s6uCsdiUDUt3+yzI2qiHE3/ub2f37+/CbeGcG+Ugc8Gwr6nu2Aw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-logs-otlp-http@0.214.0': + resolution: {integrity: sha512-9qv2Tl/Hq6qc5pJCbzFJnzA0uvlb9DgM70yGJPYf3bA5LlLkRCpcn81i4JbcIH4grlQIWY6A+W7YG0LLvS1BAw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-logs-otlp-proto@0.214.0': + resolution: {integrity: sha512-IWAVvCO1TlpotRjFmhQFz9RSfQy5BsLtDRBtptSrXZRwfyRPpuql/RMe5zdmu0Gxl3ERDFwOzOqkf3bwy7Jzcw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-metrics-otlp-grpc@0.214.0': + resolution: {integrity: sha512-0NGxWHVYHgbp51SEzmsP+Hdups81eRs229STcSWHo3WO0aqY6RpJ9csxfyEtFgaNrBDv6UfOh0je4ss/ROS6XA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-metrics-otlp-http@0.214.0': + resolution: {integrity: sha512-Tx/59RmjBgkXJ3qnsD04rpDrVWL53LU/czpgLJh+Ab98nAroe91I7vZ3uGN9mxwPS0jsZEnmqmHygVwB2vRMlA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-metrics-otlp-proto@0.214.0': + resolution: {integrity: sha512-pJIcghFGhx3VSCgP5U+yZx+OMNj0t+ttnhC8IjL5Wza7vWIczctF6t3AGcVQffi2dEqX+ZHANoBwoPR8y6RMKA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-prometheus@0.214.0': + resolution: {integrity: sha512-4TGYoZKebUWVuYkV6r5wS2dUF4zH7EbWFw/Uqz1ZM1tGHQeFT9wzHGXq3iSIXMUrwu5jRdxjfMaXrYejPu2kpQ==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-trace-otlp-grpc@0.214.0': + resolution: {integrity: sha512-FWRZ7AWoTryYhthralHkfXUuyO3l7cRsnr49WcDio1orl2a7KxT8aDZdwQtV1adzoUvZ9Gfo+IstElghCS4zfw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-trace-otlp-http@0.214.0': + resolution: {integrity: sha512-kIN8nTBMgV2hXzV/a20BCFilPZdAIMYYJGSgfMMRm/Xa+07y5hRDS2Vm12A/z8Cdu3Sq++ZvJfElokX2rkgGgw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-trace-otlp-proto@0.214.0': + resolution: {integrity: sha512-ON0spYWb2yAdQ9b+ItNyK0c6qdtcs+0eVR4YFJkhJL7agfT8sHFg0e5EesauSRiTHPZHiDobI92k77q0lwAmqg==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/exporter-zipkin@2.6.1': + resolution: {integrity: sha512-km2/hD3inLTqtLnUAHDGz7ZP/VOyZNslrC/iN66x4jkmpckwlONW54LRPNI6fm09/musDtZga9EWsxgwnjGUlw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.0.0 + + '@opentelemetry/instrumentation@0.214.0': + resolution: {integrity: sha512-MHqEX5Dk59cqVah5LiARMACku7jXSVk9iVDWOea4x3cr7VfdByeDCURK6o1lntT1JS/Tsovw01UJrBhN3/uC5w==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/otlp-exporter-base@0.214.0': + resolution: {integrity: sha512-u1Gdv0/E9wP+apqWf7Wv2npXmgJtxsW2XL0TEv9FZloTZRuMBKmu8cYVXwS4Hm3q/f/3FuCnPTgiwYvIqRSpRg==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/otlp-grpc-exporter-base@0.214.0': + resolution: {integrity: sha512-IDP6zcyA24RhNZ289MP6eToIZcinlmirHjX8v3zKCQ2ZhPpt5cGwkN91tCth337lqHIgWcTy90uKRiX/SzALDw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/otlp-transformer@0.214.0': + resolution: {integrity: sha512-DSaYcuBRh6uozfsWN3R8HsN0yDhCuWP7tOFdkUOVaWD1KVJg8m4qiLUsg/tNhTLS9HUYUcwNpwL2eroLtsZZ/w==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': ^1.3.0 + + '@opentelemetry/propagator-b3@2.6.1': + resolution: {integrity: sha512-Dvz9TA6cPqIbxolSzQ5x9br6iQlqdGhVYrm+oYc7pfJ7LaVXz8F0XIqhWbnKB5YvfZ6SUmabBUUxnvHs/9uhxA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/propagator-jaeger@2.6.1': + resolution: {integrity: sha512-kKFMxBcjBZAC1vBch5mtZ/dJQvcAEKWga+c+q5iGgRLPIE6Mc649zEwMaCIQCzalziMJQiyUadFYMHmELB7AFw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/resources@2.6.1': + resolution: {integrity: sha512-lID/vxSuKWXM55XhAKNoYXu9Cutoq5hFdkbTdI/zDKQktXzcWBVhNsOkiZFTMU9UtEWuGRNe0HUgmsFldIdxVA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.3.0 <1.10.0' + + '@opentelemetry/sdk-logs@0.214.0': + resolution: {integrity: sha512-zf6acnScjhsaBUU22zXZ/sLWim1dfhUAbGXdMmHmNG3LfBnQ3DKsOCITb2IZwoUsNNMTogqFKBnlIPPftUgGwA==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.4.0 <1.10.0' + + '@opentelemetry/sdk-metrics@2.6.1': + resolution: {integrity: sha512-9t9hJHX15meBy2NmTJxL+NJfXmnausR2xUDvE19XQce0Qi/GBtDGamU8nS1RMbdgDmhgpm3VaOu2+fiS/SfTpQ==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.9.0 <1.10.0' + + '@opentelemetry/sdk-node@0.214.0': + resolution: {integrity: sha512-gl2XvQBJuPjhGcw9SsnQO5qxChAPMuGRPFaD8lqtF+Cey91NgGUQ0sio2vlDFOSm3JOLzc44vL+OAfx1dXuZjg==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.3.0 <1.10.0' + + '@opentelemetry/sdk-trace-base@2.6.1': + resolution: {integrity: sha512-r86ut4T1e8vNwB35CqCcKd45yzqH6/6Wzvpk2/cZB8PsPLlZFTvrh8yfOS3CYZYcUmAx4hHTZJ8AO8Dj8nrdhw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.3.0 <1.10.0' + + '@opentelemetry/sdk-trace-node@2.6.1': + resolution: {integrity: sha512-Hh2i4FwHWRFhnO2Q/p6svMxy8MPsNCG0uuzUY3glqm0rwM0nQvbTO1dXSp9OqQoTKXcQzaz9q1f65fsurmOhNw==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + + '@opentelemetry/semantic-conventions@1.40.0': + resolution: {integrity: sha512-cifvXDhcqMwwTlTK04GBNeIe7yyo28Mfby85QXFe1Yk8nmi36Ab/5UQwptOx84SsoGNRg+EVSjwzfSZMy6pmlw==} + engines: {node: '>=14'} + '@pinojs/redact@0.4.0': resolution: {integrity: sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==} @@ -2192,6 +2405,11 @@ packages: resolution: {integrity: sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==} engines: {node: '>= 0.6'} + acorn-import-attributes@1.9.5: + resolution: {integrity: sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ==} + peerDependencies: + acorn: ^8 + acorn-import-phases@1.0.4: resolution: {integrity: sha512-wKmbr/DDiIXzEOiWrTTUcDm24kQ2vGfZQvM2fwg2vXqR5uW6aapr7ObPtj1th32b9u90/Pf4AItvdTh42fBmVQ==} engines: {node: '>=10.13.0'} @@ -2597,6 +2815,9 @@ packages: peerDependencies: devtools-protocol: '*' + cjs-module-lexer@2.2.0: + resolution: {integrity: sha512-4bHTS2YuzUvtoLjdy+98ykbNB5jS0+07EvFNXerqZQJ89F7DI6ET7OQo/HJuW6K0aVsKA9hj9/RVb2kQVOrPDQ==} + classcat@5.0.5: resolution: {integrity: sha512-JhZUT7JFcQy/EzW605k/ktHtncoo9vnyW/2GspNYwFlN1C/WmjuV/xtS04e9SOkL2sTdw0VAZ2UGCcQ9lR6p6w==} @@ -3639,6 +3860,10 @@ packages: resolution: {integrity: sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==} engines: {node: '>=6'} + import-in-the-middle@3.0.1: + resolution: {integrity: sha512-pYkiyXVL2Mf3pozdlDGV6NAObxQx13Ae8knZk1UJRJ6uRW/ZRmTGHlQYtrsSl7ubuE5F8CD1z+s1n4RHNuTtuA==} + engines: {node: '>=18'} + imurmurhash@0.1.4: resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==} engines: {node: '>=0.8.19'} @@ -4012,6 +4237,9 @@ packages: resolution: {integrity: sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==} engines: {node: '>=10'} + lodash.camelcase@4.3.0: + resolution: {integrity: sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==} + lodash.debounce@4.0.8: resolution: {integrity: sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==} @@ -4294,6 +4522,9 @@ packages: mnemonist@0.39.6: resolution: {integrity: sha512-A/0v5Z59y63US00cRSLiloEIw3t5G+MiKz4BhX21FI+YBJXBOGW0ohFxTxO08dsOYlzxo87T7vGfZKYp2bcAWA==} + module-details-from-path@1.0.4: + resolution: {integrity: sha512-EGWKgxALGMgzvxYF1UyGTy0HXX/2vHLkw6+NvDKW2jypWbHpjQuj4UMcqQWXHERJhVGKikolT06G3bcKe4fi7w==} + ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} @@ -4892,6 +5123,10 @@ packages: resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} engines: {node: '>=0.10.0'} + require-in-the-middle@8.0.1: + resolution: {integrity: sha512-QT7FVMXfWOYFbeRBF6nu+I6tr2Tf3u0q8RIEjNob/heKY/nh7drD/k7eeMFmSQgnTtCzLDcCu/XEnpW2wk4xCQ==} + engines: {node: '>=9.3.0 || >=8.10.0 <9.0.0'} + require-main-filename@2.0.0: resolution: {integrity: sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==} @@ -6867,10 +7102,10 @@ snapshots: '@dagrejs/graphlib@3.0.4': {} - '@ducanh2912/next-pwa@10.2.9(next@14.2.35(@babel/core@7.29.0)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(webpack@5.105.2)': + '@ducanh2912/next-pwa@10.2.9(next@14.2.35(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(webpack@5.105.2)': dependencies: fast-glob: 3.3.2 - next: 14.2.35(@babel/core@7.29.0)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + next: 14.2.35(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) semver: 7.6.3 webpack: 5.105.2 workbox-build: 7.1.1 @@ -7066,6 +7301,18 @@ snapshots: '@grammyjs/types@3.25.0': {} + '@grpc/grpc-js@1.14.3': + dependencies: + '@grpc/proto-loader': 0.8.0 + '@js-sdsl/ordered-map': 4.4.2 + + '@grpc/proto-loader@0.8.0': + dependencies: + lodash.camelcase: 4.3.0 + long: 5.3.2 + protobufjs: 7.5.4 + yargs: 17.7.2 + '@hono/node-server@1.19.9(hono@4.11.7)': dependencies: hono: 4.11.7 @@ -7226,6 +7473,8 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 + '@js-sdsl/ordered-map@4.4.2': {} + '@larksuiteoapi/node-sdk@1.59.0': dependencies: axios: 1.13.6(debug@4.4.3) @@ -7359,6 +7608,244 @@ snapshots: '@nolyfill/is-core-module@1.0.39': {} + '@opentelemetry/api-logs@0.214.0': + dependencies: + '@opentelemetry/api': 1.9.1 + + '@opentelemetry/api@1.9.1': {} + + '@opentelemetry/configuration@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + yaml: 2.8.2 + + '@opentelemetry/context-async-hooks@2.6.1(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + + '@opentelemetry/core@2.6.1(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/semantic-conventions': 1.40.0 + + '@opentelemetry/exporter-logs-otlp-grpc@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@grpc/grpc-js': 1.14.3 + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-grpc-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-logs': 0.214.0(@opentelemetry/api@1.9.1) + + '@opentelemetry/exporter-logs-otlp-http@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/api-logs': 0.214.0 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-logs': 0.214.0(@opentelemetry/api@1.9.1) + + '@opentelemetry/exporter-logs-otlp-proto@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/api-logs': 0.214.0 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-logs': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-base': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/exporter-metrics-otlp-grpc@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@grpc/grpc-js': 1.14.3 + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-metrics-otlp-http': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-grpc-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-metrics': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/exporter-metrics-otlp-http@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-metrics': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/exporter-metrics-otlp-proto@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-metrics-otlp-http': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-metrics': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/exporter-prometheus@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-metrics': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/semantic-conventions': 1.40.0 + + '@opentelemetry/exporter-trace-otlp-grpc@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@grpc/grpc-js': 1.14.3 + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-grpc-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-base': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/exporter-trace-otlp-http@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-base': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/exporter-trace-otlp-proto@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-base': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/exporter-zipkin@2.6.1(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-base': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/semantic-conventions': 1.40.0 + + '@opentelemetry/instrumentation@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/api-logs': 0.214.0 + import-in-the-middle: 3.0.1 + require-in-the-middle: 8.0.1 + transitivePeerDependencies: + - supports-color + + '@opentelemetry/otlp-exporter-base@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + + '@opentelemetry/otlp-grpc-exporter-base@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@grpc/grpc-js': 1.14.3 + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-transformer': 0.214.0(@opentelemetry/api@1.9.1) + + '@opentelemetry/otlp-transformer@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/api-logs': 0.214.0 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-logs': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-metrics': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-base': 2.6.1(@opentelemetry/api@1.9.1) + protobufjs: 7.5.4 + + '@opentelemetry/propagator-b3@2.6.1(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/propagator-jaeger@2.6.1(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/resources@2.6.1(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/semantic-conventions': 1.40.0 + + '@opentelemetry/sdk-logs@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/api-logs': 0.214.0 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/semantic-conventions': 1.40.0 + + '@opentelemetry/sdk-metrics@2.6.1(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/sdk-node@0.214.0(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/api-logs': 0.214.0 + '@opentelemetry/configuration': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/context-async-hooks': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-logs-otlp-grpc': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-logs-otlp-http': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-logs-otlp-proto': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-metrics-otlp-grpc': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-metrics-otlp-http': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-metrics-otlp-proto': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-prometheus': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-trace-otlp-grpc': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-trace-otlp-http': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-trace-otlp-proto': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/exporter-zipkin': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/instrumentation': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/otlp-exporter-base': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/propagator-b3': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/propagator-jaeger': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-logs': 0.214.0(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-metrics': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-base': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-node': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/semantic-conventions': 1.40.0 + transitivePeerDependencies: + - supports-color + + '@opentelemetry/sdk-trace-base@2.6.1(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/semantic-conventions': 1.40.0 + + '@opentelemetry/sdk-trace-node@2.6.1(@opentelemetry/api@1.9.1)': + dependencies: + '@opentelemetry/api': 1.9.1 + '@opentelemetry/context-async-hooks': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/core': 2.6.1(@opentelemetry/api@1.9.1) + '@opentelemetry/sdk-trace-base': 2.6.1(@opentelemetry/api@1.9.1) + + '@opentelemetry/semantic-conventions@1.40.0': {} + '@pinojs/redact@0.4.0': {} '@pkgjs/parseargs@0.11.0': @@ -8028,6 +8515,10 @@ snapshots: mime-types: 3.0.2 negotiator: 1.0.0 + acorn-import-attributes@1.9.5(acorn@8.15.0): + dependencies: + acorn: 8.15.0 + acorn-import-phases@1.0.4(acorn@8.15.0): dependencies: acorn: 8.15.0 @@ -8461,6 +8952,8 @@ snapshots: mitt: 3.0.1 zod: 3.25.76 + cjs-module-lexer@2.2.0: {} + classcat@5.0.5: {} client-only@0.0.1: {} @@ -9032,8 +9525,8 @@ snapshots: '@typescript-eslint/parser': 8.54.0(eslint@8.57.1)(typescript@5.9.3) eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 - eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1) - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) + eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1) + eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) eslint-plugin-jsx-a11y: 6.10.2(eslint@8.57.1) eslint-plugin-react: 7.37.5(eslint@8.57.1) eslint-plugin-react-hooks: 5.0.0-canary-7118f5dd7-20230705(eslint@8.57.1) @@ -9052,7 +9545,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1): + eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1): dependencies: '@nolyfill/is-core-module': 1.0.39 debug: 4.4.3 @@ -9063,24 +9556,24 @@ snapshots: tinyglobby: 0.2.15 unrs-resolver: 1.11.1 optionalDependencies: - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) + eslint-plugin-import: 2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) transitivePeerDependencies: - supports-color - eslint-module-utils@2.12.1(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1): + eslint-module-utils@2.12.1(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1): dependencies: debug: 3.2.7 optionalDependencies: '@typescript-eslint/parser': 8.54.0(eslint@8.57.1)(typescript@5.9.3) eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 - eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1) + eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0)(eslint@8.57.1) transitivePeerDependencies: - supports-color eslint-plugin-cafe@file:packages/web/eslint-plugins: {} - eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1): + eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1): dependencies: '@rtsao/scc': 1.1.0 array-includes: 3.1.9 @@ -9091,7 +9584,7 @@ snapshots: doctrine: 2.1.0 eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 - eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) + eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.54.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) hasown: 2.0.2 is-core-module: 2.16.1 is-glob: 4.0.3 @@ -9781,6 +10274,13 @@ snapshots: parent-module: 1.0.1 resolve-from: 4.0.0 + import-in-the-middle@3.0.1: + dependencies: + acorn: 8.15.0 + acorn-import-attributes: 1.9.5(acorn@8.15.0) + cjs-module-lexer: 2.2.0 + module-details-from-path: 1.0.4 + imurmurhash@0.1.4: {} inflight@1.0.6: @@ -10164,6 +10664,8 @@ snapshots: dependencies: p-locate: 5.0.0 + lodash.camelcase@4.3.0: {} + lodash.debounce@4.0.8: {} lodash.defaults@4.2.0: {} @@ -10626,6 +11128,8 @@ snapshots: dependencies: obliterator: 2.0.5 + module-details-from-path@1.0.4: {} + ms@2.1.3: {} mz@2.7.0: @@ -10655,7 +11159,7 @@ snapshots: react: 18.3.1 react-dom: 18.3.1(react@18.3.1) - next@14.2.35(@babel/core@7.29.0)(react-dom@18.3.1(react@18.3.1))(react@18.3.1): + next@14.2.35(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@18.3.1(react@18.3.1))(react@18.3.1): dependencies: '@next/env': 14.2.35 '@swc/helpers': 0.5.5 @@ -10676,6 +11180,7 @@ snapshots: '@next/swc-win32-arm64-msvc': 14.2.33 '@next/swc-win32-ia32-msvc': 14.2.33 '@next/swc-win32-x64-msvc': 14.2.33 + '@opentelemetry/api': 1.9.1 transitivePeerDependencies: - '@babel/core' - babel-plugin-macros @@ -11346,6 +11851,13 @@ snapshots: require-from-string@2.0.2: {} + require-in-the-middle@8.0.1: + dependencies: + debug: 4.4.3 + module-details-from-path: 1.0.4 + transitivePeerDependencies: + - supports-color + require-main-filename@2.0.0: {} requires-port@1.0.0: {} @@ -12323,7 +12835,7 @@ snapshots: tsx: 4.21.0 yaml: 2.8.2 - vitest@4.0.18(@types/node@20.19.31)(jiti@1.21.7)(jsdom@28.0.0)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.2): + vitest@4.0.18(@opentelemetry/api@1.9.1)(@types/node@20.19.31)(jiti@1.21.7)(jsdom@28.0.0)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.2): dependencies: '@vitest/expect': 4.0.18 '@vitest/mocker': 4.0.18(vite@7.3.1(@types/node@20.19.31)(jiti@1.21.7)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.2)) @@ -12346,6 +12858,7 @@ snapshots: vite: 7.3.1(@types/node@20.19.31)(jiti@1.21.7)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.2) why-is-node-running: 2.3.0 optionalDependencies: + '@opentelemetry/api': 1.9.1 '@types/node': 20.19.31 jsdom: 28.0.0 transitivePeerDependencies: From c8cd22a29d80b8aac0011300676ca501cb3be54d Mon Sep 17 00:00:00 2001 From: bouillipx Date: Thu, 9 Apr 2026 17:01:29 +0800 Subject: [PATCH 03/14] feat(F152): wire OTel instruments into business code Connect all 5 OTel instruments to their data sources: - invocationDuration: recorded in invoke-single-cat finally block (seconds) - activeInvocations: incremented on create, decremented in finally - tokenUsage: recorded from provider metadata.usage (input/output split) - llmCallDuration: recorded from metadata.usage.durationApiMs - agentLiveness: ObservableGauge polls registered ProcessLivenessProbes via probe registry (register in cli-spawn on probe.start, unregister in finally on probe.stop) All attributes use D2 allowlist-safe keys (agent.id, gen_ai.system, gen_ai.request.model, operation.name, status). Co-Authored-By: Claude Opus 4.6 --- .../agents/invocation/invoke-single-cat.ts | 43 +++++++++++++++++++ .../infrastructure/telemetry/instruments.ts | 33 +++++++++++++- packages/api/src/utils/cli-spawn.ts | 8 ++++ 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts index 68efd6d20..f9ec1809c 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts @@ -24,6 +24,20 @@ import { isSessionChainEnabled } from '../../../../../config/cat-config-loader.j import { getContextWindowFallback } from '../../../../../config/context-window-sizes.js'; import { getSessionStrategy, shouldTakeAction } from '../../../../../config/session-strategy.js'; import { createModuleLogger } from '../../../../../infrastructure/logger.js'; +import { + AGENT_ID, + GENAI_MODEL, + GENAI_SYSTEM, + OPERATION_NAME, + STATUS, +} from '../../../../../infrastructure/telemetry/genai-semconv.js'; +import { + activeInvocations, + invocationDuration, + llmCallDuration, + tokenUsage, +} from '../../../../../infrastructure/telemetry/instruments.js'; +import { normalizeModel } from '../../../../../infrastructure/telemetry/model-normalizer.js'; import { resolveActiveProjectRoot } from '../../../../../utils/active-project-root.js'; import { resolveCliCommand } from '../../../../../utils/cli-resolve.js'; import { DEFAULT_CLI_TIMEOUT_MS, resolveCliTimeoutMs } from '../../../../../utils/cli-timeout.js'; @@ -281,6 +295,9 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP log.info({ invocationId, catId, threadId, userId }, 'Created invocation'); + // F152: Track active invocations + activeInvocations.add(1, { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }); + // F22 R2 P1-1: Expose invocationId to caller (route-serial/parallel) so they can // use it for RichBlockBuffer.consume() instead of getLatestId() which is wrong // under preemption — old invocation A would steal new invocation B's blocks. @@ -1203,6 +1220,25 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP // F8: Push token usage for frontend cost/token display if (msg.metadata?.usage) { + // F152: Record OTel token usage + LLM call duration + const modelBucket = normalizeModel(msg.metadata.model ?? ''); + const providerSystem = provider ?? 'unknown'; + const tokenAttrs = { + [AGENT_ID]: catId, + [GENAI_SYSTEM]: providerSystem, + [GENAI_MODEL]: modelBucket, + [OPERATION_NAME]: 'invoke', + }; + if (msg.metadata.usage.inputTokens) { + tokenUsage.add(msg.metadata.usage.inputTokens, { ...tokenAttrs, [STATUS]: 'input' }); + } + if (msg.metadata.usage.outputTokens) { + tokenUsage.add(msg.metadata.usage.outputTokens, { ...tokenAttrs, [STATUS]: 'output' }); + } + if (msg.metadata.usage.durationApiMs) { + llmCallDuration.record(msg.metadata.usage.durationApiMs / 1000, tokenAttrs); + } + outputs.push({ type: 'system_info' as const, catId, @@ -1762,6 +1798,13 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP await finalizeTaskProgress(); + // F152: Record invocation duration and decrement active count + const finalDurationMs = Date.now() - startTime; + const otelStatus = hadError ? 'error' : 'ok'; + const otelAttrs = { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke', [STATUS]: otelStatus }; + invocationDuration.record(finalDurationMs / 1000, otelAttrs); + activeInvocations.add(-1, { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }); + // F089: Mark agent pane status when invocation completes if (deps.agentPaneRegistry?.getByInvocation(invocationId)) { if (hadError) { diff --git a/packages/api/src/infrastructure/telemetry/instruments.ts b/packages/api/src/infrastructure/telemetry/instruments.ts index 79763332f..531474372 100644 --- a/packages/api/src/infrastructure/telemetry/instruments.ts +++ b/packages/api/src/infrastructure/telemetry/instruments.ts @@ -40,8 +40,11 @@ export const tokenUsage = meter.createCounter('cat_cafe.token.usage', { unit: 'tokens', }); +/** Liveness state type. */ +export type LivenessState = 'dead' | 'idle-silent' | 'busy-silent' | 'active'; + /** Map liveness state string to numeric gauge value. */ -export function livenessStateToNumber(state: 'dead' | 'idle-silent' | 'busy-silent' | 'active'): number { +export function livenessStateToNumber(state: LivenessState): number { switch (state) { case 'dead': return 0; @@ -53,3 +56,31 @@ export function livenessStateToNumber(state: 'dead' | 'idle-silent' | 'busy-sile return 3; } } + +// --- Liveness probe registry for ObservableGauge --- + +interface LivenessProbeRef { + catId: string; + getState: () => LivenessState; +} + +const activeProbes = new Map(); + +/** Register a liveness probe for ObservableGauge polling. */ +export function registerLivenessProbe(invocationId: string, catId: string, getState: () => LivenessState): void { + activeProbes.set(invocationId, { catId, getState }); +} + +/** Unregister a liveness probe when invocation ends. */ +export function unregisterLivenessProbe(invocationId: string): void { + activeProbes.delete(invocationId); +} + +// Register the ObservableGauge callback — polls all active probes +agentLiveness.addCallback((result) => { + for (const [, probe] of activeProbes) { + result.observe(livenessStateToNumber(probe.getState()), { + 'agent.id': probe.catId, + }); + } +}); diff --git a/packages/api/src/utils/cli-spawn.ts b/packages/api/src/utils/cli-spawn.ts index 700f4df64..386edabed 100644 --- a/packages/api/src/utils/cli-spawn.ts +++ b/packages/api/src/utils/cli-spawn.ts @@ -5,6 +5,7 @@ import { spawn as nodeSpawn } from 'node:child_process'; import { createModuleLogger } from '../infrastructure/logger.js'; +import { registerLivenessProbe, unregisterLivenessProbe } from '../infrastructure/telemetry/instruments.js'; import { escapeBashArg, escapeCmdArg, findGitBashPath, resolveWindowsShimSpawn } from './cli-spawn-win.js'; import { resolveCliTimeoutMs } from './cli-timeout.js'; import type { ChildProcessLike, CliSpawnOptions, SpawnFn } from './cli-types.js'; @@ -199,6 +200,11 @@ export async function* spawnCli( if (options.livenessProbe && child.pid !== undefined) { probe = new ProcessLivenessProbe(child.pid, options.livenessProbe); probe.start(); + // F152: Register probe for OTel agentLiveness gauge + if (options.invocationId) { + const catId = options.env?.CAT_CAFE_CAT_ID ?? 'unknown'; + registerLivenessProbe(options.invocationId, catId, () => probe!.getState()); + } } try { @@ -385,6 +391,8 @@ export async function* spawnCli( } process.off('exit', exitHandler); probe?.stop(); + // F152: Unregister probe from OTel gauge + if (options.invocationId) unregisterLivenessProbe(options.invocationId); killChild(); } } From 9222b1d1b622e8e56f3c2cf4efe5c7ae3964a4a7 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Thu, 9 Apr 2026 20:12:59 +0800 Subject: [PATCH 04/14] =?UTF-8?q?fix(F152):=20address=20review=20findings?= =?UTF-8?q?=20=E2=80=94=202=20P1=20+=201=20P2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1-1: Move activeInvocations.add(1) inside try block so add/sub symmetry is guaranteed by the finally block, even on generator early abort (.return() or reference drop). P1-2: Read Prometheus scrape port from PROMETHEUS_PORT env var, fall back to 9464. Prevents EADDRINUSE when multiple API instances run on the same machine (alpha/runtime). P2: Add validateSalt() called at initTelemetry() startup — throws immediately if TELEMETRY_HMAC_SALT is missing in non-dev envs, rather than deferring to the first pseudonymizeId() call. Co-Authored-By: Claude Opus 4.6 --- .../cats/services/agents/invocation/invoke-single-cat.ts | 7 ++++--- packages/api/src/infrastructure/telemetry/hmac.ts | 8 ++++++++ packages/api/src/infrastructure/telemetry/init.ts | 7 ++++++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts index f9ec1809c..635796c42 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts @@ -295,9 +295,6 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP log.info({ invocationId, catId, threadId, userId }, 'Created invocation'); - // F152: Track active invocations - activeInvocations.add(1, { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }); - // F22 R2 P1-1: Expose invocationId to caller (route-serial/parallel) so they can // use it for RichBlockBuffer.consume() instead of getLatestId() which is wrong // under preemption — old invocation A would steal new invocation B's blocks. @@ -436,6 +433,10 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP let sessionMutexRelease: (() => void) | undefined; try { + // F152: Track active invocations — must be inside try so add/sub symmetry + // is guaranteed by the finally block, even on generator early abort. + activeInvocations.add(1, { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }); + let sessionId: string | undefined; try { sessionId = await preflightRace(sessionManager.get(userId, catId, threadId), 'sessionManager.get', signal); diff --git a/packages/api/src/infrastructure/telemetry/hmac.ts b/packages/api/src/infrastructure/telemetry/hmac.ts index 4bf31905c..879d7e570 100644 --- a/packages/api/src/infrastructure/telemetry/hmac.ts +++ b/packages/api/src/infrastructure/telemetry/hmac.ts @@ -25,6 +25,14 @@ function getSalt(): string { ); } +/** + * Validate salt is available. Call at startup (in initTelemetry) to fail fast + * instead of deferring to the first pseudonymizeId() call. + */ +export function validateSalt(): void { + getSalt(); +} + /** * HMAC-SHA256 pseudonymize an identifier. * Returns first 32 hex chars (128-bit, collision-safe for correlation). diff --git a/packages/api/src/infrastructure/telemetry/init.ts b/packages/api/src/infrastructure/telemetry/init.ts index 6d61bd957..50345f1e6 100644 --- a/packages/api/src/infrastructure/telemetry/init.ts +++ b/packages/api/src/infrastructure/telemetry/init.ts @@ -20,6 +20,7 @@ import { NodeSDK } from '@opentelemetry/sdk-node'; import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-node'; import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from '@opentelemetry/semantic-conventions'; import { createModuleLogger } from '../logger.js'; +import { validateSalt } from './hmac.js'; import { createMetricAllowlistViews } from './metric-allowlist.js'; import { RedactingLogProcessor, RedactingSpanProcessor } from './redactor.js'; @@ -37,7 +38,7 @@ export interface TelemetryConfig { const DEFAULT_CONFIG: Required = { serviceName: 'cat-cafe-api', serviceVersion: '0.1.0', - prometheusPort: 9464, + prometheusPort: process.env.PROMETHEUS_PORT ? Number(process.env.PROMETHEUS_PORT) : 9464, otlpEnabled: !!process.env.OTEL_EXPORTER_OTLP_ENDPOINT, }; @@ -55,6 +56,10 @@ export function initTelemetry(config?: TelemetryConfig): () => Promise { const cfg = { ...DEFAULT_CONFIG, ...config }; + // P2 fix: validate HMAC salt at startup, not on first redaction call. + // Throws immediately if salt is missing in non-dev environments. + validateSalt(); + const resource = resourceFromAttributes({ [ATTR_SERVICE_NAME]: cfg.serviceName, [ATTR_SERVICE_VERSION]: cfg.serviceVersion, From d62635dbccc998723954c35e4ccea0b033645901 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Thu, 9 Apr 2026 22:50:10 +0800 Subject: [PATCH 05/14] fix(F152): wire trace spans + OTel log bridge + /ready SQLite probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses 砚砚 R2 review findings (2 P1 + 1 P2): P1 Trace signal: Create invocation span via @opentelemetry/api tracer in invoke-single-cat — span covers full lifecycle (try/catch/finally), records SpanStatusCode.ERROR on failure, SpanStatusCode.OK on success. RedactingSpanProcessor processes these before export. P1 Log signal: Add otel-logger.ts bridge that emits structured log records through the OTel log pipeline (RedactingLogProcessor → exporter). Emits invocation_started, invocation_completed, invocation_error events with trace-log correlation (active span context captured automatically). Does NOT replace Pino for local logs — parallel emission path. P2 /ready endpoint: Add SQLite health probe (evidenceStore.health() → SELECT 1), return 503 status code when any dependency check fails instead of 200 with degraded status. Co-Authored-By: Claude Opus 4.6 --- .../agents/invocation/invoke-single-cat.ts | 22 ++++++++++ packages/api/src/index.ts | 19 +++++++- .../infrastructure/telemetry/otel-logger.ts | 43 +++++++++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 packages/api/src/infrastructure/telemetry/otel-logger.ts diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts index 635796c42..bea90809e 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts @@ -14,6 +14,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import { rm } from 'node:fs/promises'; import { dirname, resolve } from 'node:path'; import { type CatId, type ContextHealth, catRegistry, type MessageContent } from '@cat-cafe/shared'; +import { SpanStatusCode, trace } from '@opentelemetry/api'; import { resolveBuiltinClientForProvider, resolveForClient, @@ -38,6 +39,7 @@ import { tokenUsage, } from '../../../../../infrastructure/telemetry/instruments.js'; import { normalizeModel } from '../../../../../infrastructure/telemetry/model-normalizer.js'; +import { emitOtelLog } from '../../../../../infrastructure/telemetry/otel-logger.js'; import { resolveActiveProjectRoot } from '../../../../../utils/active-project-root.js'; import { resolveCliCommand } from '../../../../../utils/cli-resolve.js'; import { DEFAULT_CLI_TIMEOUT_MS, resolveCliTimeoutMs } from '../../../../../utils/cli-timeout.js'; @@ -59,6 +61,7 @@ import { } from '../providers/opencode-config-template.js'; const log = createModuleLogger('invoke'); +const tracer = trace.getTracer('cat-cafe-api', '0.1.0'); let _openCodeKnownModels: Set | null = null; export function getOpenCodeKnownModels(): Set { @@ -432,11 +435,19 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP // F118: Declared before try so it's accessible in finally let sessionMutexRelease: (() => void) | undefined; + // F152: Create invocation span for distributed tracing + const invocationSpan = tracer.startSpan('cat_cafe.invocation', { + attributes: { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }, + }); + try { // F152: Track active invocations — must be inside try so add/sub symmetry // is guaranteed by the finally block, even on generator early abort. activeInvocations.add(1, { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }); + // F152: Emit invocation start through OTel log pipeline + emitOtelLog('INFO', 'invocation_started', { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }); + let sessionId: string | undefined; try { sessionId = await preflightRace(sessionManager.get(userId, catId, threadId), 'sessionManager.get', signal); @@ -1733,6 +1744,10 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP } didComplete = true; // F118 AC-C5: Normal completion reached } catch (err) { + // F152: Record error on invocation span + OTel log + invocationSpan.setStatus({ code: SpanStatusCode.ERROR, message: err instanceof Error ? err.message : String(err) }); + emitOtelLog('ERROR', 'invocation_error', { [AGENT_ID]: catId, [STATUS]: 'error' }); + // === CAT_ERROR 审计 (fire-and-forget, 缅因猫 review P2-3) === const durationMs = Date.now() - startTime; auditLog @@ -1814,5 +1829,12 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP deps.agentPaneRegistry.markDone(invocationId, 0); } } + + // F152: End invocation span + emit completion log through OTel + if (didComplete && !hadError) { + invocationSpan.setStatus({ code: SpanStatusCode.OK }); + emitOtelLog('INFO', 'invocation_completed', { [AGENT_ID]: catId, [STATUS]: 'ok' }); + } + invocationSpan.end(); } } diff --git a/packages/api/src/index.ts b/packages/api/src/index.ts index 1b1687d03..11a7eec55 100644 --- a/packages/api/src/index.ts +++ b/packages/api/src/index.ts @@ -238,8 +238,10 @@ async function main(): Promise { // Health check app.get('/health', async () => ({ status: 'ok', timestamp: Date.now() })); - // F152: Readiness check — verifies dependencies are reachable - app.get('/ready', async () => { + // F152: Readiness check — verifies dependencies are reachable. + // evidenceStoreRef is set after memoryServices init; handler runs at request time. + let evidenceStoreRef: { health(): Promise } | null = null; + app.get('/ready', async (_request, reply) => { const checks: Record = {}; // Redis probe if (redisClient) { @@ -253,7 +255,18 @@ async function main(): Promise { } else { checks.redis = { ok: true, ms: 0 }; // memory mode, always ready } + // SQLite probe + if (evidenceStoreRef) { + const t0 = Date.now(); + try { + const ok = await evidenceStoreRef.health(); + checks.sqlite = { ok, ms: Date.now() - t0, ...(ok ? {} : { error: 'SELECT 1 failed' }) }; + } catch (err) { + checks.sqlite = { ok: false, ms: Date.now() - t0, error: String(err) }; + } + } const allOk = Object.values(checks).every((c) => c.ok); + if (!allOk) reply.code(503); return { status: allOk ? 'ready' : 'degraded', timestamp: Date.now(), checks }; }); @@ -465,6 +478,8 @@ async function main(): Promise { return excluded; }, }); + // F152: Wire evidence store into /ready probe + evidenceStoreRef = memoryServices.evidenceStore; app.log.info('[api] F102: SQLite memory services initialized'); // F102 D-2: Auto-rebuild evidence index on startup (AC-D4) diff --git a/packages/api/src/infrastructure/telemetry/otel-logger.ts b/packages/api/src/infrastructure/telemetry/otel-logger.ts new file mode 100644 index 000000000..6d47b20fe --- /dev/null +++ b/packages/api/src/infrastructure/telemetry/otel-logger.ts @@ -0,0 +1,43 @@ +/** + * F152: OTel Logger bridge — emits structured log records through the + * OTel log pipeline (RedactingLogProcessor → exporter). + * + * This does NOT replace Pino for local logs. It provides a parallel + * emission path so that key events flow through OTel's log signal, + * enabling correlation with traces and metrics in external backends. + */ + +import { trace } from '@opentelemetry/api'; +import { logs, SeverityNumber } from '@opentelemetry/api-logs'; + +const logger = logs.getLogger('cat-cafe-api', '0.1.0'); + +/** + * Emit a structured log record through the OTel log pipeline. + * Automatically captures active span context for trace-log correlation. + */ +export function emitOtelLog( + severity: 'INFO' | 'WARN' | 'ERROR', + body: string, + attributes?: Record, +): void { + const severityMap: Record = { + INFO: SeverityNumber.INFO, + WARN: SeverityNumber.WARN, + ERROR: SeverityNumber.ERROR, + }; + + // Capture active span context for trace-log correlation + const activeSpan = trace.getActiveSpan(); + const spanContext = activeSpan?.spanContext(); + + logger.emit({ + severityNumber: severityMap[severity], + severityText: severity, + body, + attributes: { + ...attributes, + ...(spanContext ? { traceId: spanContext.traceId, spanId: spanContext.spanId } : {}), + }, + }); +} From 3dff4b34de94d84626827255444fddc70ecea2c8 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Fri, 10 Apr 2026 01:32:24 +0800 Subject: [PATCH 06/14] fix(F152): proper trace-log correlation + api-logs dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses 砚砚 R3 review findings (1 P1 + 1 P2 + 1 P3): P1: Fix trace-log correlation — emitOtelLog() now accepts an explicit Span parameter. Derives Context via trace.setSpan(context.active(), span) and passes it as LogRecord.context, which is the OTel-standard way to link log records to spans. Removed manual traceId/spanId from attributes. All 3 call sites in invoke-single-cat pass invocationSpan. P2: Add @opentelemetry/api-logs as direct dependency in package.json. Previously relied on transitive hoist from sdk-logs. P3: Add regression test verifying otel-logger uses trace.setSpan() + LogRecord.context for correlation, and does NOT use manual traceId/spanId attributes. Co-Authored-By: Claude Opus 4.6 --- packages/api/package.json | 1 + .../agents/invocation/invoke-single-cat.ts | 6 ++--- .../infrastructure/telemetry/otel-logger.ts | 25 +++++++++++-------- .../telemetry/cli-spawn-redaction.test.js | 23 +++++++++++++++++ pnpm-lock.yaml | 3 +++ 5 files changed, 44 insertions(+), 14 deletions(-) diff --git a/packages/api/package.json b/packages/api/package.json index 755f5fea3..da993ba8d 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -31,6 +31,7 @@ "@larksuiteoapi/node-sdk": "^1.59.0", "@modelcontextprotocol/sdk": "^1.0.0", "@opentelemetry/api": "^1.9.1", + "@opentelemetry/api-logs": "^0.214.0", "@opentelemetry/exporter-logs-otlp-http": "^0.214.0", "@opentelemetry/exporter-metrics-otlp-http": "^0.214.0", "@opentelemetry/exporter-prometheus": "^0.214.0", diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts index bea90809e..332821e9f 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts @@ -446,7 +446,7 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP activeInvocations.add(1, { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }); // F152: Emit invocation start through OTel log pipeline - emitOtelLog('INFO', 'invocation_started', { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }); + emitOtelLog('INFO', 'invocation_started', { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }, invocationSpan); let sessionId: string | undefined; try { @@ -1746,7 +1746,7 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP } catch (err) { // F152: Record error on invocation span + OTel log invocationSpan.setStatus({ code: SpanStatusCode.ERROR, message: err instanceof Error ? err.message : String(err) }); - emitOtelLog('ERROR', 'invocation_error', { [AGENT_ID]: catId, [STATUS]: 'error' }); + emitOtelLog('ERROR', 'invocation_error', { [AGENT_ID]: catId, [STATUS]: 'error' }, invocationSpan); // === CAT_ERROR 审计 (fire-and-forget, 缅因猫 review P2-3) === const durationMs = Date.now() - startTime; @@ -1833,7 +1833,7 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP // F152: End invocation span + emit completion log through OTel if (didComplete && !hadError) { invocationSpan.setStatus({ code: SpanStatusCode.OK }); - emitOtelLog('INFO', 'invocation_completed', { [AGENT_ID]: catId, [STATUS]: 'ok' }); + emitOtelLog('INFO', 'invocation_completed', { [AGENT_ID]: catId, [STATUS]: 'ok' }, invocationSpan); } invocationSpan.end(); } diff --git a/packages/api/src/infrastructure/telemetry/otel-logger.ts b/packages/api/src/infrastructure/telemetry/otel-logger.ts index 6d47b20fe..1ef4a1ac5 100644 --- a/packages/api/src/infrastructure/telemetry/otel-logger.ts +++ b/packages/api/src/infrastructure/telemetry/otel-logger.ts @@ -5,21 +5,27 @@ * This does NOT replace Pino for local logs. It provides a parallel * emission path so that key events flow through OTel's log signal, * enabling correlation with traces and metrics in external backends. + * + * Trace-log correlation: caller passes the active Span; we derive a + * Context via trace.setSpan() and pass it as LogRecord.context, which + * is the OTel-standard way to link log records to spans. */ -import { trace } from '@opentelemetry/api'; -import { logs, SeverityNumber } from '@opentelemetry/api-logs'; +import { context, type Span, trace } from '@opentelemetry/api'; +import { type LogAttributes, logs, SeverityNumber } from '@opentelemetry/api-logs'; const logger = logs.getLogger('cat-cafe-api', '0.1.0'); /** * Emit a structured log record through the OTel log pipeline. - * Automatically captures active span context for trace-log correlation. + * Pass the active span to get proper trace-log correlation via + * LogRecord.context (not manual traceId/spanId attributes). */ export function emitOtelLog( severity: 'INFO' | 'WARN' | 'ERROR', body: string, - attributes?: Record, + attributes?: LogAttributes, + span?: Span, ): void { const severityMap: Record = { INFO: SeverityNumber.INFO, @@ -27,17 +33,14 @@ export function emitOtelLog( ERROR: SeverityNumber.ERROR, }; - // Capture active span context for trace-log correlation - const activeSpan = trace.getActiveSpan(); - const spanContext = activeSpan?.spanContext(); + // Build context from span for OTel trace-log correlation + const logContext = span ? trace.setSpan(context.active(), span) : undefined; logger.emit({ severityNumber: severityMap[severity], severityText: severity, body, - attributes: { - ...attributes, - ...(spanContext ? { traceId: spanContext.traceId, spanId: spanContext.spanId } : {}), - }, + attributes, + context: logContext, }); } diff --git a/packages/api/test/telemetry/cli-spawn-redaction.test.js b/packages/api/test/telemetry/cli-spawn-redaction.test.js index ad60ab838..1a01a223d 100644 --- a/packages/api/test/telemetry/cli-spawn-redaction.test.js +++ b/packages/api/test/telemetry/cli-spawn-redaction.test.js @@ -90,6 +90,29 @@ test('F152: model normalizer', async () => { assert.equal(normalizeModel('some-unknown-model'), 'other'); }); +test('F152: emitOtelLog accepts span for trace-log correlation', async () => { + // Verify that emitOtelLog signature accepts a Span parameter + // and that LogRecord.context is used (not manual traceId/spanId attributes). + const { emitOtelLog } = await import('../../dist/infrastructure/telemetry/otel-logger.js'); + + // emitOtelLog must accept 4 params: severity, body, attributes, span + assert.ok(emitOtelLog.length >= 2, 'emitOtelLog should accept at least severity + body params'); + + // Source code check: ensure LogRecord uses context field, not manual traceId + const { readFileSync } = await import('node:fs'); + const { resolve, dirname } = await import('node:path'); + const { fileURLToPath } = await import('node:url'); + const __dir = dirname(fileURLToPath(import.meta.url)); + const src = readFileSync(resolve(__dir, '../../src/infrastructure/telemetry/otel-logger.ts'), 'utf8'); + + // Must use trace.setSpan + context field on LogRecord + assert.ok(src.includes('trace.setSpan('), 'Should derive context from span via trace.setSpan()'); + assert.ok(src.includes('context: logContext'), 'Should pass context to logger.emit() via LogRecord.context'); + // Must NOT have manual traceId/spanId in attributes + assert.ok(!src.includes('traceId: spanContext'), 'Should not manually inject traceId into attributes'); + assert.ok(!src.includes('spanId: spanContext'), 'Should not manually inject spanId into attributes'); +}); + test('F152: metric attribute allowlist', async () => { const { ALLOWED_METRIC_ATTRIBUTES } = await import('../../dist/infrastructure/telemetry/metric-allowlist.js'); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 92d562c12..8a5bf1e60 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -53,6 +53,9 @@ importers: '@opentelemetry/api': specifier: ^1.9.1 version: 1.9.1 + '@opentelemetry/api-logs': + specifier: ^0.214.0 + version: 0.214.0 '@opentelemetry/exporter-logs-otlp-http': specifier: ^0.214.0 version: 0.214.0(@opentelemetry/api@1.9.1) From c57de3c3eb80170d7625795ba6712b14b158aba9 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Fri, 10 Apr 2026 14:54:34 +0800 Subject: [PATCH 07/14] =?UTF-8?q?fix(F153):=20CI=20failures=20=E2=80=94=20?= =?UTF-8?q?ROADMAP=20entry,=20HMAC=20salt=20resilience,=20test=20env?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add F153 to docs/ROADMAP.md (lint check-feature-truth gate) - Make initTelemetry() gracefully degrade when HMAC salt is missing instead of crashing the server (telemetry should not be a crash source) - Set NODE_ENV=test fallback in test file for CI environments [宪宪/Opus-46🐾] Co-Authored-By: Claude Opus 4.6 --- docs/ROADMAP.md | 3 +-- packages/api/src/infrastructure/telemetry/init.ts | 10 ++++++++-- .../api/test/telemetry/cli-spawn-redaction.test.js | 3 +++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index d82874a1d..730ff90d2 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -56,9 +56,8 @@ created: 2026-02-26 | F147 | i18n — Hub 界面中英文切换 | idea | 待定 | internal | — | | F149 | ACP Runtime Operations — 项目级进程池 + Session Lease | spec | Maine Coon | internal | [F149](features/F149-acp-runtime-operations.md) | | F152 | Expedition Memory — 外部项目记忆冷启动 + 经验回流 | spec | Ragdoll | internal | [F152](features/F152-expedition-memory.md) | -| F153 | Observability Infrastructure — 运行时可观测基础设施 | spec | Community + Ragdoll | community [#388](https://github.com/zts212653/clowder-ai/issues/388) | [F153](features/F153-observability-infra.md) | +| F153 | Observability Infrastructure — 运行时可观测基础设施 | in-progress | Community + Ragdoll | community [#388](https://github.com/zts212653/clowder-ai/issues/388) | [F153](features/F153-observability-infra.md) | | F154 | Cat Routing Personalization — 全局默认猫 + 首选猫入口 + 单次定向 | spec | Ragdoll | community [#385](https://github.com/zts212653/clowder-ai/issues/385) [#391](https://github.com/zts212653/clowder-ai/pull/391) | [F154](features/F154-cat-routing-personalization.md) | | F155 | Scene-Based Guidance Engine — 场景式交互引导 | needs-discussion | 待定 | internal | [F155](features/F155-scene-guidance-engine.md) | | F156 | WebSocket Security Hardening — 实时通道安全加固 | in-progress | Ragdoll | internal | [F156](features/F156-websocket-security-hardening.md) | - diff --git a/packages/api/src/infrastructure/telemetry/init.ts b/packages/api/src/infrastructure/telemetry/init.ts index 50345f1e6..f71e15278 100644 --- a/packages/api/src/infrastructure/telemetry/init.ts +++ b/packages/api/src/infrastructure/telemetry/init.ts @@ -57,8 +57,14 @@ export function initTelemetry(config?: TelemetryConfig): () => Promise { const cfg = { ...DEFAULT_CONFIG, ...config }; // P2 fix: validate HMAC salt at startup, not on first redaction call. - // Throws immediately if salt is missing in non-dev environments. - validateSalt(); + // If salt is missing in non-dev environments, disable OTel gracefully + // rather than crashing the server — telemetry should never be a crash source. + try { + validateSalt(); + } catch (err) { + log.error({ err }, 'OTel SDK disabled: HMAC salt validation failed'); + return async () => {}; + } const resource = resourceFromAttributes({ [ATTR_SERVICE_NAME]: cfg.serviceName, diff --git a/packages/api/test/telemetry/cli-spawn-redaction.test.js b/packages/api/test/telemetry/cli-spawn-redaction.test.js index 1a01a223d..1059d49ba 100644 --- a/packages/api/test/telemetry/cli-spawn-redaction.test.js +++ b/packages/api/test/telemetry/cli-spawn-redaction.test.js @@ -6,6 +6,9 @@ * of the fix applied in commit 4c8f7873. */ +// Ensure NODE_ENV=test so HMAC salt fallback works in CI +process.env.NODE_ENV = process.env.NODE_ENV || 'test'; + import assert from 'node:assert/strict'; import { readFileSync } from 'node:fs'; import { dirname, resolve } from 'node:path'; From 2cdc47ed4c63d97403eb3771addaa0a21ee74af3 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Fri, 10 Apr 2026 20:25:16 +0800 Subject: [PATCH 08/14] fix(F153): yielded-error span status + salt semantics comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P2-1: finally block now sets span status ERROR + emits OTel error log when hadError is true (yielded-error path). Previously only the catch path marked spans as ERROR, leaving yielded errors as UNSET. P2-2: Updated hmac.ts comments to match actual behavior — missing salt disables OTel gracefully instead of crashing the server. [宪宪/Opus-46🐾] Co-Authored-By: Claude Opus 4.6 --- .../cats/services/agents/invocation/invoke-single-cat.ts | 7 +++++-- packages/api/src/infrastructure/telemetry/hmac.ts | 8 +++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts index 332821e9f..74aeb5f6f 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts @@ -1830,8 +1830,11 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP } } - // F152: End invocation span + emit completion log through OTel - if (didComplete && !hadError) { + // F152: End invocation span + emit completion/error log through OTel + if (hadError) { + invocationSpan.setStatus({ code: SpanStatusCode.ERROR, message: 'invocation completed with error' }); + emitOtelLog('ERROR', 'invocation_error', { [AGENT_ID]: catId, [STATUS]: 'error' }, invocationSpan); + } else if (didComplete) { invocationSpan.setStatus({ code: SpanStatusCode.OK }); emitOtelLog('INFO', 'invocation_completed', { [AGENT_ID]: catId, [STATUS]: 'ok' }, invocationSpan); } diff --git a/packages/api/src/infrastructure/telemetry/hmac.ts b/packages/api/src/infrastructure/telemetry/hmac.ts index 879d7e570..8903a345d 100644 --- a/packages/api/src/infrastructure/telemetry/hmac.ts +++ b/packages/api/src/infrastructure/telemetry/hmac.ts @@ -7,7 +7,8 @@ * (e.g. Sentry) without exposing raw IDs. * * Salt MUST be injected via TELEMETRY_HMAC_SALT env var. - * Non-dev environments fail fast if missing. + * Missing salt in non-dev environments disables OTel (server continues + * without telemetry). Dev/test environments use a fallback salt. */ import { createHmac } from 'node:crypto'; @@ -26,8 +27,9 @@ function getSalt(): string { } /** - * Validate salt is available. Call at startup (in initTelemetry) to fail fast - * instead of deferring to the first pseudonymizeId() call. + * Validate salt is available. Called at startup by initTelemetry(). + * Throws if salt is missing in non-dev environments — caller catches + * and disables OTel gracefully (server continues without telemetry). */ export function validateSalt(): void { getSalt(); From d7be6ffe6c3bec309f0d4e988d8337b338b14d09 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Fri, 10 Apr 2026 20:49:10 +0800 Subject: [PATCH 09/14] =?UTF-8?q?fix(F153):=20deduplicate=20error=20emit?= =?UTF-8?q?=20=E2=80=94=20finally=20only=20covers=20yielded-error=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The catch path (L1731-1732) already emits OTel error log + sets span status ERROR. The finally block's hadError guard was firing on both catch and yielded-error paths, causing duplicate error logs in OTel backends. Now guarded with `hadError && !didWriteAudit` so only the yielded-error path (where catch didn't run) emits here. [宪宪/Opus-46🐾] Co-Authored-By: Claude Opus 4.6 --- .../cats/services/agents/invocation/invoke-single-cat.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts index 74aeb5f6f..45602d137 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts @@ -1831,7 +1831,9 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP } // F152: End invocation span + emit completion/error log through OTel - if (hadError) { + // Only emit error here for yielded-error path (hadError && !didWriteAudit). + // The catch path already emits error log + sets span status at L1731-1732. + if (hadError && !didWriteAudit) { invocationSpan.setStatus({ code: SpanStatusCode.ERROR, message: 'invocation completed with error' }); emitOtelLog('ERROR', 'invocation_error', { [AGENT_ID]: catId, [STATUS]: 'error' }, invocationSpan); } else if (didComplete) { From 4904914a325189ed602759b62263a469b92f6772 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Fri, 10 Apr 2026 22:37:29 +0800 Subject: [PATCH 10/14] fix(F153): align abort-path OTel signal with audit log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When generator is .return()'d (client disconnect / abort), audit log writes CAT_ERROR but OTel recorded status as 'ok' with span UNSET. Now the abort path (!didWriteAudit && !hadError && !didComplete) sets span ERROR + emits invocation_aborted log, consistent with audit. Also rebuts R3 P1 (liveness gauge dead) — registerLivenessProbe() is already called at cli-spawn.ts:206, unregister at :395, both in PR diff. [宪宪/Opus-46🐾] Co-Authored-By: Claude Opus 4.6 --- .../services/agents/invocation/invoke-single-cat.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts index 45602d137..297b5e541 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts @@ -1816,7 +1816,8 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP // F152: Record invocation duration and decrement active count const finalDurationMs = Date.now() - startTime; - const otelStatus = hadError ? 'error' : 'ok'; + const wasAbortedWithoutError = !didWriteAudit && !hadError && !didComplete; + const otelStatus = hadError || wasAbortedWithoutError ? 'error' : 'ok'; const otelAttrs = { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke', [STATUS]: otelStatus }; invocationDuration.record(finalDurationMs / 1000, otelAttrs); activeInvocations.add(-1, { [AGENT_ID]: catId, [OPERATION_NAME]: 'invoke' }); @@ -1831,11 +1832,15 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP } // F152: End invocation span + emit completion/error log through OTel - // Only emit error here for yielded-error path (hadError && !didWriteAudit). - // The catch path already emits error log + sets span status at L1731-1732. + // Three paths: (1) catch already handled, (2) yielded-error, (3) abort, (4) ok if (hadError && !didWriteAudit) { + // Yielded-error path — catch didn't fire, so emit error here invocationSpan.setStatus({ code: SpanStatusCode.ERROR, message: 'invocation completed with error' }); emitOtelLog('ERROR', 'invocation_error', { [AGENT_ID]: catId, [STATUS]: 'error' }, invocationSpan); + } else if (wasAbortedWithoutError) { + // Abort path — generator .return()'d without completion, consistent with audit CAT_ERROR + invocationSpan.setStatus({ code: SpanStatusCode.ERROR, message: 'generator_returned_without_completion' }); + emitOtelLog('ERROR', 'invocation_aborted', { [AGENT_ID]: catId, [STATUS]: 'error' }, invocationSpan); } else if (didComplete) { invocationSpan.setStatus({ code: SpanStatusCode.OK }); emitOtelLog('INFO', 'invocation_completed', { [AGENT_ID]: catId, [STATUS]: 'ok' }, invocationSpan); From 1d035bb34bd5a1a5ed41612231d2e9f1d9e50138 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Sat, 11 Apr 2026 12:44:58 +0800 Subject: [PATCH 11/14] fix(F153): regenerate feature index to fix CI lint gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [宪宪/Opus-46🐾] Co-Authored-By: Claude Opus 4.6 --- docs/features/index.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/features/index.json b/docs/features/index.json index 40959bc7e..057011d07 100644 --- a/docs/features/index.json +++ b/docs/features/index.json @@ -933,7 +933,7 @@ { "id": "F153", "name": "Observability Infrastructure — 运行时可观测基础设施", - "status": "in-progress | **Owner**: Community (PR author) + Ragdoll | **Priority**: P2", + "status": "spec | **Owner**: Community (PR author) + Ragdoll | **Priority**: P2", "file": "F153-observability-infra.md" }, { @@ -961,5 +961,5 @@ "file": "F157-feishu-receipt-ack.md" } ], - "generated_at": "2026-04-10T11:52:59.639Z" + "generated_at": "2026-04-11T04:39:18.175Z" } From 8e7242df03cf663201f55a0b94291eeae5a656f7 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Sat, 11 Apr 2026 14:15:34 +0800 Subject: [PATCH 12/14] fix(F153): register telemetry env vars in env-registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TELEMETRY_HMAC_SALT, TELEMETRY_EXPORT_RAW_SYSTEM_IDS, PROMETHEUS_PORT, OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_SDK_DISABLED to env-registry.ts with new 'telemetry' category. Fixes CI check:env-registry lint gate. [宪宪/Opus-46🐾] Co-Authored-By: Claude Opus 4.6 --- packages/api/src/config/env-registry.ts | 35 ++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/packages/api/src/config/env-registry.ts b/packages/api/src/config/env-registry.ts index eb6891bbf..af0fff0ef 100644 --- a/packages/api/src/config/env-registry.ts +++ b/packages/api/src/config/env-registry.ts @@ -32,7 +32,8 @@ export type EnvCategory = | 'signal' | 'github_review' | 'evidence' - | 'quota'; + | 'quota' + | 'telemetry'; export interface EnvDefinition { /** The env var name, e.g. 'REDIS_URL' */ @@ -74,6 +75,7 @@ export const ENV_CATEGORIES: Record = { github_review: 'GitHub Review 监控', evidence: 'F102 记忆系统', quota: '额度监控', + telemetry: '可观测性 (OTel)', }; export const ENV_VARS: EnvDefinition[] = [ @@ -1240,6 +1242,37 @@ export const ENV_VARS: EnvDefinition[] = [ sensitive: false, hubVisible: false, }, + + // --- telemetry (F153) --- + { + name: 'TELEMETRY_HMAC_SALT', + defaultValue: '(dev/test 自动 fallback)', + description: 'HMAC salt — 遥测系统 ID 伪名化用。生产环境必设,缺失则禁用 OTel', + category: 'telemetry', + sensitive: true, + }, + { + name: 'TELEMETRY_EXPORT_RAW_SYSTEM_IDS', + defaultValue: '(未设置 → HMAC 伪名化)', + description: '设为 1 跳过 HMAC,导出原始系统 ID(仅限自托管受控环境)', + category: 'telemetry', + sensitive: false, + }, + { + name: 'PROMETHEUS_PORT', + defaultValue: '9464', + description: 'Prometheus /metrics 抓取端口', + category: 'telemetry', + sensitive: false, + }, + { + name: 'OTEL_EXPORTER_OTLP_ENDPOINT', + defaultValue: '(未设置 → 仅 Prometheus)', + description: 'OTLP 导出端点(设置后同时推送 traces/metrics/logs 到该端点)', + category: 'telemetry', + sensitive: false, + }, + { name: 'OTEL_SDK_DISABLED', defaultValue: '(未设置 → 启用)', description: '设为 true 完全禁用 OTel SDK', category: 'telemetry', sensitive: false }, ]; /** Mask credentials in a URL while preserving host/port/db for debugging. */ From 53b1227c5ecf318f2ed290c28358d448bec26ca5 Mon Sep 17 00:00:00 2001 From: bouillipx Date: Sat, 11 Apr 2026 14:27:15 +0800 Subject: [PATCH 13/14] style(F153): expand OTEL_SDK_DISABLED entry for biome formatter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [宪宪/Opus-46🐾] Co-Authored-By: Claude Opus 4.6 --- packages/api/src/config/env-registry.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/api/src/config/env-registry.ts b/packages/api/src/config/env-registry.ts index af0fff0ef..c72be67a6 100644 --- a/packages/api/src/config/env-registry.ts +++ b/packages/api/src/config/env-registry.ts @@ -1272,7 +1272,13 @@ export const ENV_VARS: EnvDefinition[] = [ category: 'telemetry', sensitive: false, }, - { name: 'OTEL_SDK_DISABLED', defaultValue: '(未设置 → 启用)', description: '设为 true 完全禁用 OTel SDK', category: 'telemetry', sensitive: false }, + { + name: 'OTEL_SDK_DISABLED', + defaultValue: '(未设置 → 启用)', + description: '设为 true 完全禁用 OTel SDK', + category: 'telemetry', + sensitive: false, + }, ]; /** Mask credentials in a URL while preserving host/port/db for debugging. */ From a0f8e7aeda6372f4ee35a605af6188d3e7988362 Mon Sep 17 00:00:00 2001 From: Lysander Su <773678591@qq.com> Date: Sat, 11 Apr 2026 05:07:09 -0700 Subject: [PATCH 14/14] fix(F153): abort path marks pane crashed + observability coverage tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P2 fix: wasAbortedWithoutError now triggers markCrashed() instead of markDone() on the agent pane registry, aligning all three observation systems (audit log, OTel trace, pane status) on abort events. Tests added (11 new): - Liveness probe register/unregister lifecycle + state mapping - cli-spawn liveness wiring verification (source check) - AgentPaneRegistry unit tests (register→running, markCrashed, markDone) - Abort path signal consistency (source checks for all 3 systems) [宪宪/Opus-46🐾] Co-Authored-By: Claude Opus 4.6 --- .../agents/invocation/invoke-single-cat.ts | 2 +- .../telemetry/observability-coverage.test.js | 150 ++++++++++++++++++ 2 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 packages/api/test/telemetry/observability-coverage.test.js diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts index 297b5e541..1fb85e571 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts @@ -1824,7 +1824,7 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP // F089: Mark agent pane status when invocation completes if (deps.agentPaneRegistry?.getByInvocation(invocationId)) { - if (hadError) { + if (hadError || wasAbortedWithoutError) { deps.agentPaneRegistry.markCrashed(invocationId, null); } else { deps.agentPaneRegistry.markDone(invocationId, 0); diff --git a/packages/api/test/telemetry/observability-coverage.test.js b/packages/api/test/telemetry/observability-coverage.test.js new file mode 100644 index 000000000..fdcf9dc2e --- /dev/null +++ b/packages/api/test/telemetry/observability-coverage.test.js @@ -0,0 +1,150 @@ +/** + * F153: Observability coverage tests — liveness probe wiring, + * abort-path pane registry behavior, and signal consistency. + */ + +process.env.NODE_ENV = process.env.NODE_ENV || 'test'; + +import assert from 'node:assert/strict'; +import { readFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; +import { test } from 'node:test'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +// --- Liveness probe registration tests --- + +test('F153: liveness probe register/unregister lifecycle', async (t) => { + const { + registerLivenessProbe, + unregisterLivenessProbe, + livenessStateToNumber, + } = await import('../../dist/infrastructure/telemetry/instruments.js'); + + await t.test('livenessStateToNumber maps correctly', () => { + assert.equal(livenessStateToNumber('dead'), 0); + assert.equal(livenessStateToNumber('idle-silent'), 1); + assert.equal(livenessStateToNumber('busy-silent'), 2); + assert.equal(livenessStateToNumber('active'), 3); + }); + + await t.test('register and unregister do not throw', () => { + const testId = `test-inv-${Date.now()}`; + assert.doesNotThrow(() => registerLivenessProbe(testId, 'opus', () => 'active')); + assert.doesNotThrow(() => unregisterLivenessProbe(testId)); + }); + + await t.test('unregister unknown id is a no-op', () => { + assert.doesNotThrow(() => unregisterLivenessProbe('nonexistent-id')); + }); +}); + +test('F153: cli-spawn wires liveness probes', () => { + const source = readFileSync( + resolve(__dirname, '../../src/utils/cli-spawn.ts'), + 'utf8', + ); + + // Must import both register and unregister + assert.ok( + source.includes('registerLivenessProbe'), + 'cli-spawn must import registerLivenessProbe', + ); + assert.ok( + source.includes('unregisterLivenessProbe'), + 'cli-spawn must import unregisterLivenessProbe', + ); + + // registerLivenessProbe must be called with invocationId + assert.ok( + source.includes('registerLivenessProbe(options.invocationId'), + 'cli-spawn must call registerLivenessProbe with invocationId', + ); + + // unregisterLivenessProbe must be called in cleanup + assert.ok( + source.includes('unregisterLivenessProbe(options.invocationId)'), + 'cli-spawn must call unregisterLivenessProbe in cleanup', + ); +}); + +// --- AgentPaneRegistry unit tests --- + +test('F153: AgentPaneRegistry marks aborted invocations as crashed', async (t) => { + const { AgentPaneRegistry } = await import( + '../../dist/domains/terminal/agent-pane-registry.js' + ); + + const registry = new AgentPaneRegistry(); + const invId = 'inv-abort-test'; + + registry.register(invId, 'wt-1', 'pane-1', 'user-1'); + + await t.test('newly registered pane is running', () => { + const pane = registry.getByInvocation(invId); + assert.ok(pane); + assert.equal(pane.status, 'running'); + }); + + await t.test('markCrashed sets status to crashed', () => { + registry.markCrashed(invId, null); + const pane = registry.getByInvocation(invId); + assert.ok(pane); + assert.equal(pane.status, 'crashed'); + assert.ok(pane.finishedAt, 'finishedAt should be set'); + }); + + await t.test('markDone sets status to done', () => { + const invId2 = 'inv-done-test'; + registry.register(invId2, 'wt-1', 'pane-2', 'user-1'); + registry.markDone(invId2, 0); + const pane = registry.getByInvocation(invId2); + assert.ok(pane); + assert.equal(pane.status, 'done'); + assert.equal(pane.exitCode, 0); + }); +}); + +// --- Source-level signal consistency verification --- + +test('F153: abort path marks pane as crashed (not done)', () => { + const source = readFileSync( + resolve( + __dirname, + '../../src/domains/cats/services/agents/invocation/invoke-single-cat.ts', + ), + 'utf8', + ); + + // The pane registry block must check wasAbortedWithoutError + // to ensure abort path doesn't fall through to markDone + assert.ok( + source.includes('hadError || wasAbortedWithoutError'), + 'Pane registry condition must include wasAbortedWithoutError to prevent abort→done inconsistency', + ); +}); + +test('F153: all three observation systems align on abort', () => { + const source = readFileSync( + resolve( + __dirname, + '../../src/domains/cats/services/agents/invocation/invoke-single-cat.ts', + ), + 'utf8', + ); + + // Audit log: must emit CAT_ERROR for abort + assert.ok( + source.includes('generator_returned_without_completion'), + 'Audit must log generator_returned_without_completion for abort path', + ); + + // OTel: must set span ERROR for abort + assert.ok( + source.includes("'invocation_aborted'"), + 'OTel must emit invocation_aborted log for abort path', + ); + + // Pane: abort must not silently markDone (checked by previous test) +});