From 2732a05261bb975e264fcb417032680dbcabefe3 Mon Sep 17 00:00:00 2001
From: SurbhiAgarwal1 <agarwalsurbhi1807@gmail.com>
Date: Thu, 7 May 2026 08:02:51 +0530
Subject: [PATCH 1/2] feat(javaagent): add library README support to
 explorer-db-builder (#242)

- Extend InventoryManager to discover and load library READMEs from registry
- Augment instrumentation metadata with markdown_hash and enable backfilling
- Implement markdown publishing to public data directory in DatabaseWriter
- Add frontend types and API support for README lazy loading
---
 .../src/collector_watcher/__init__.py         |  5 +-
 .../src/explorer_db_builder/__init__.py       |  5 +-
 .../explorer_db_builder/database_writer.py    | 27 ++++++++
 .../src/explorer_db_builder/main.py           | 26 +++++++-
 .../metadata_backfiller.py                    |  2 +-
 .../java_instrumentation_watcher/__init__.py  |  5 +-
 .../inventory_manager.py                      | 61 +++++++++++++++++++
 .../src/lib/api/javaagent-data.ts             |  8 +++
 ecosystem-explorer/src/types/javaagent.ts     |  2 +
 9 files changed, 136 insertions(+), 5 deletions(-)

diff --git a/ecosystem-automation/collector-watcher/src/collector_watcher/__init__.py b/ecosystem-automation/collector-watcher/src/collector_watcher/__init__.py
index d248de14..023cc46b 100644
--- a/ecosystem-automation/collector-watcher/src/collector_watcher/__init__.py
+++ b/ecosystem-automation/collector-watcher/src/collector_watcher/__init__.py
@@ -16,4 +16,7 @@
 
 import importlib.metadata
 
-__version__ = importlib.metadata.version("collector-watcher")
+try:
+    __version__ = importlib.metadata.version("collector-watcher")
+except importlib.metadata.PackageNotFoundError:
+    __version__ = "0.0.0-dev"
diff --git a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/__init__.py b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/__init__.py
index ecc22fab..052d3b02 100644
--- a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/__init__.py
+++ b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/__init__.py
@@ -16,4 +16,7 @@
 
 import importlib.metadata
 
-__version__ = importlib.metadata.version("explorer-db-builder")
+try:
+    __version__ = importlib.metadata.version("explorer-db-builder")
+except importlib.metadata.PackageNotFoundError:
+    __version__ = "0.0.0-dev"
diff --git a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/database_writer.py b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/database_writer.py
index ed58cfe6..5a8f825b 100644
--- a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/database_writer.py
+++ b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/database_writer.py
@@ -203,6 +203,33 @@ def write_version_list(self, versions: list[Version]) -> None:
             logger.error(f"Failed to write version list: {e}")
             raise
 
+    def write_markdown(self, library_name: str, markdown_hash: str, content: str) -> None:
+        """Write markdown file to the database.
+
+        Args:
+            library_name: Name of the library
+            markdown_hash: Hash of the markdown content
+            content: Markdown content string
+        """
+        markdown_dir = self.database_dir / "markdown"
+        markdown_dir.mkdir(parents=True, exist_ok=True)
+        file_path = markdown_dir / f"{library_name}-{markdown_hash}.md"
+
+        if file_path.exists():
+            logger.debug(f"Markdown for '{library_name}' with hash {markdown_hash} already exists, skipping write")
+            return
+
+        try:
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(content)
+            file_size = len(content.encode("utf-8"))
+            self.files_written += 1
+            self.total_bytes += file_size
+            logger.debug(f"Wrote markdown for '{library_name}' with hash {markdown_hash}")
+        except OSError as e:
+            logger.error(f"Failed to write markdown for '{library_name}': {e}")
+            # README publishing failures must never fail DB generation as per requirements
+
     def get_stats(self) -> dict[str, Any]:
         """Get statistics about files written during this session.
 
diff --git a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/main.py b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/main.py
index c092b314..bb88b696 100644
--- a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/main.py
+++ b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/main.py
@@ -139,9 +139,33 @@ def run_javaagent_builder(
         versions = get_release_versions(inventory_manager)
         logger.info(f"Processing {len(versions)} release versions")
 
+        # Pre-load README maps for all versions to enable augmentation and backfilling
+        readme_maps = {v: inventory_manager.load_library_readme_map(v) for v in versions}
+
+        # Publish all READMEs to the database
+        for version, readme_map in readme_maps.items():
+            for library_name, markdown_hash in readme_map.items():
+                content = inventory_manager.load_library_readme_content(version, library_name, markdown_hash)
+                if content:
+                    db_writer.write_markdown(library_name, markdown_hash, content)
+
+        def load_and_augment_inventory(version: Version) -> dict:
+            inventory = inventory_manager.load_versioned_inventory(version)
+            readme_map = readme_maps.get(version, {})
+
+            # Augment libraries and custom instrumentations with markdown_hash
+            for key in ["libraries", "custom"]:
+                if key in inventory:
+                    for item in inventory[key]:
+                        name = item.get("name")
+                        if name and name in readme_map:
+                            item["markdown_hash"] = readme_map[name]
+
+            return inventory
+
         backfilled_libraries = backfill_metadata(
             versions,
-            inventory_manager.load_versioned_inventory,
+            load_and_augment_inventory,
             item_key="libraries",
         )
         backfilled_inventories = backfill_metadata(
diff --git a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/metadata_backfiller.py b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/metadata_backfiller.py
index 5dba637f..51027b1f 100644
--- a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/metadata_backfiller.py
+++ b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/metadata_backfiller.py
@@ -22,7 +22,7 @@
 
 logger = logging.getLogger(__name__)
 
-BACKFILLABLE_FIELDS = ["display_name", "description", "library_link", "has_javaagent"]
+BACKFILLABLE_FIELDS = ["display_name", "description", "library_link", "has_javaagent", "markdown_hash"]
 NESTED_BACKFILLABLE_FIELDS: dict[str, list[str]] = {
     "configurations": ["declarative_name", "examples"],
 }
diff --git a/ecosystem-automation/java-instrumentation-watcher/src/java_instrumentation_watcher/__init__.py b/ecosystem-automation/java-instrumentation-watcher/src/java_instrumentation_watcher/__init__.py
index 07daaff6..381a903a 100644
--- a/ecosystem-automation/java-instrumentation-watcher/src/java_instrumentation_watcher/__init__.py
+++ b/ecosystem-automation/java-instrumentation-watcher/src/java_instrumentation_watcher/__init__.py
@@ -16,4 +16,7 @@
 
 import importlib.metadata
 
-__version__ = importlib.metadata.version("java-instrumentation-watcher")
+try:
+    __version__ = importlib.metadata.version("java-instrumentation-watcher")
+except importlib.metadata.PackageNotFoundError:
+    __version__ = "0.0.0-dev"
diff --git a/ecosystem-automation/java-instrumentation-watcher/src/java_instrumentation_watcher/inventory_manager.py b/ecosystem-automation/java-instrumentation-watcher/src/java_instrumentation_watcher/inventory_manager.py
index 06d110f0..27993d5e 100644
--- a/ecosystem-automation/java-instrumentation-watcher/src/java_instrumentation_watcher/inventory_manager.py
+++ b/ecosystem-automation/java-instrumentation-watcher/src/java_instrumentation_watcher/inventory_manager.py
@@ -17,6 +17,9 @@
 from collections.abc import Iterable
 from typing import Any
 
+import logging
+import re
+
 import yaml
 from semantic_version import Version
 from watcher_common.content_hashing import compute_content_hash
@@ -113,3 +116,61 @@ def save_library_readmes(
             file_path.write_text(content, encoding="utf-8")
             written += 1
         return written
+
+    def load_library_readme_map(self, version: Version) -> dict[str, str]:
+        """
+        Scan library_readmes/ and build a map of library_name -> markdown_hash.
+
+        Args:
+            version: Version to scan
+
+        Returns:
+            Dictionary mapping library names to their markdown content hashes
+        """
+        readme_dir = self.get_version_dir(version) / self.README_DIR
+        if not readme_dir.exists():
+            return {}
+
+        readme_map = {}
+        for item in readme_dir.iterdir():
+            if item.is_file() and item.suffix == ".md":
+                parsed = self._parse_readme_filename(item.name)
+                if parsed:
+                    library_name, markdown_hash = parsed
+                    readme_map[library_name] = markdown_hash
+                else:
+                    logging.getLogger(__name__).warning(f"Malformed README filename in {version}: {item.name}")
+
+        return readme_map
+
+    def load_library_readme_content(self, version: Version, library_name: str, markdown_hash: str) -> str | None:
+        """
+        Load the content of a specific library README.
+
+        Args:
+            version: Version to load from
+            library_name: Name of the library
+            markdown_hash: Content hash of the markdown
+
+        Returns:
+            The markdown content, or None if it doesn't exist or cannot be read
+        """
+        file_path = self.get_version_dir(version) / self.README_DIR / f"{library_name}-{markdown_hash}.md"
+        if not file_path.exists():
+            return None
+
+        try:
+            return file_path.read_text(encoding="utf-8")
+        except OSError:
+            logging.getLogger(__name__).error(f"Failed to read README file: {file_path}")
+            return None
+
+    def _parse_readme_filename(self, filename: str) -> tuple[str, str] | None:
+        """
+        Parse a README filename into (library_name, markdown_hash).
+        Format: {library-name}-{hash}.md
+        """
+        match = re.match(r"^(.*)-([a-f0-9]+)\.md$", filename)
+        if match:
+            return match.group(1), match.group(2)
+        return None
diff --git a/ecosystem-explorer/src/lib/api/javaagent-data.ts b/ecosystem-explorer/src/lib/api/javaagent-data.ts
index 290db22d..98960588 100644
--- a/ecosystem-explorer/src/lib/api/javaagent-data.ts
+++ b/ecosystem-explorer/src/lib/api/javaagent-data.ts
@@ -79,3 +79,11 @@ export async function loadAllInstrumentations(version: string): Promise<Instrume
     })
   );
 }
+
+export async function loadLibraryReadme(libraryName: string, markdownHash: string): Promise<string> {
+  const response = await fetch(`${BASE_PATH}/markdown/${libraryName}-${markdownHash}.md`);
+  if (!response.ok) {
+    throw new Error(`Failed to load README for ${libraryName}`);
+  }
+  return response.text();
+}
diff --git a/ecosystem-explorer/src/types/javaagent.ts b/ecosystem-explorer/src/types/javaagent.ts
index b115b5d2..6f6a92f8 100644
--- a/ecosystem-explorer/src/types/javaagent.ts
+++ b/ecosystem-explorer/src/types/javaagent.ts
@@ -64,6 +64,8 @@ export interface InstrumentationData {
   configurations?: Configuration[];
   /** Telemetry emitted by this instrumentation under specific conditions. */
   telemetry?: Telemetry[];
+  /** Content hash of the library README markdown file. */
+  markdown_hash?: string;
   /** Whether this is a custom (non-upstream) instrumentation. */
   _is_custom?: boolean;
 }

From 36e72b09b2c4fff4dee2626be3c5b69d99c0f931 Mon Sep 17 00:00:00 2001
From: SurbhiAgarwal1 <agarwalsurbhi1807@gmail.com>
Date: Thu, 7 May 2026 08:59:22 +0530
Subject: [PATCH 2/2] feat(db-builder): integrate Weaver for semconv compliance
 checking (#97)

- Implement SemconvEnricher to validate telemetry via OTel Weaver
- Insert enrichment stage into the javaagent builder pipeline
- Add semconv_compliance field to Metric and Span models
- Support dynamic versioning based on instrumentation schema_url
---
 SEMCONV_INTEGRATION_DETAIL.md                 |  76 +++++++
 .../src/explorer_db_builder/main.py           |   8 +
 .../explorer_db_builder/semconv_enricher.py   | 206 ++++++++++++++++++
 .../tests/test_semconv_enricher.py            | 132 +++++++++++
 ecosystem-explorer/src/types/javaagent.ts     |   4 +
 5 files changed, 426 insertions(+)
 create mode 100644 SEMCONV_INTEGRATION_DETAIL.md
 create mode 100644 ecosystem-automation/explorer-db-builder/src/explorer_db_builder/semconv_enricher.py
 create mode 100644 ecosystem-automation/explorer-db-builder/tests/test_semconv_enricher.py

diff --git a/SEMCONV_INTEGRATION_DETAIL.md b/SEMCONV_INTEGRATION_DETAIL.md
new file mode 100644
index 00000000..e542be4e
--- /dev/null
+++ b/SEMCONV_INTEGRATION_DETAIL.md
@@ -0,0 +1,76 @@
+# Technical Detail: Semantic Convention Integration (Issue #97)
+
+This document provides a technical deep-dive into the implementation of the Semantic Convention compliance pipeline in the `explorer-db-builder`.
+
+## 1. Architectural Overview
+The integration follows a "sidecar" enrichment pattern. Instead of modifying the core data structures, we introduce a `SemconvEnricher` that evaluates telemetry metadata against standard OTel registries using the **OpenTelemetry Weaver** engine.
+
+### Data Flow
+1. **Extraction**: Retrieve metrics and spans from the normalized `InstrumentationData`.
+2. **Translation**: Map OTel signals to a Weaver-compatible "Application Registry".
+3. **Evaluation**: Execute `weaver registry check` against a specific semconv version.
+4. **Annotation**: Persist compliance status back to the telemetry metadata.
+
+## 2. Component: `SemconvEnricher`
+**Location**: `explorer_db_builder/semconv_enricher.py`
+
+This is the primary orchestrator for compliance checking.
+
+### Transformation Logic
+The enricher generates a temporary directory containing:
+- **`manifest.yaml`**: Defines the instrumentation name and the dependency on the official OTel semantic convention registry (e.g., `github.com/open-telemetry/semantic-conventions@v1.37.0`).
+- **`telemetry.yaml`**: Translates internal metadata into Weaver's definition format.
+  - **Metrics**: Defined with `type: metric` and attributes using the `ref` keyword to ensure Weaver validates them against the registry's definitions.
+  - **Spans**: Defined with `type: span`, using synthetic IDs based on the instrumentation name and span kind (e.g., `activej-http.SERVER`).
+
+### Weaver Invocation
+The enricher calls the `weaver` CLI via a subprocess. 
+- **Success Condition**: If `weaver registry check` exits with code 0, all signals defined in the registry are considered compliant.
+- **Error Handling**: If errors are reported (return code 1), the enricher parses the `stderr` output to identify specific signals that failed validation and marks them accordingly.
+
+## 3. Pipeline Integration
+**Location**: `explorer_db_builder/main.py`
+
+The enrichment stage is integrated into `process_version` immediately after the `transform_instrumentation_format` call.
+
+```python
+transformed_inventory = transform_instrumentation_format(inventory)
+
+# Enrich with semantic convention compliance
+try:
+    enricher = SemconvEnricher()
+    enricher.enrich_inventory(transformed_inventory)
+except Exception as e:
+    logger.warning(f"Semantic convention enrichment failed: {e}")
+```
+
+This placement ensures that:
+- Enrichment works on normalized, clean data.
+- The pipeline remains resilient (a Weaver failure does not crash the build).
+
+## 4. Frontend & Metadata Schema
+**Location**: `ecosystem-explorer/src/types/javaagent.ts`
+
+The compliance status is persisted as a `semconv_compliance` array on individual telemetry signals:
+
+```json
+{
+  "name": "http.server.request.duration",
+  "unit": "s",
+  "semconv_compliance": ["1.37.0"]
+}
+```
+
+This structure is extensible, allowing an instrumentation to be marked as compliant with multiple semantic convention versions over time.
+
+## 5. Verification & Testing
+**Location**: `tests/test_semconv_enricher.py`
+
+A dedicated test suite validates the following:
+- **YAML Generation**: Ensures the generated `manifest.yaml` and `telemetry.yaml` are valid and follow Weaver's specification.
+- **Version Extraction**: Tests the regex-based extraction of versions from OTel schema URLs.
+- **Mocked CLI Interactions**: Simulates various Weaver output scenarios (total success, partial failure, and system errors) to verify that the metadata is updated correctly.
+
+---
+**Branch**: `feat/97-semconv-integration`  
+**PR Title**: `feat(db-builder): integrate Weaver for semconv compliance checking (#97)`
diff --git a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/main.py b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/main.py
index bb88b696..c542103c 100644
--- a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/main.py
+++ b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/main.py
@@ -27,6 +27,7 @@
 from explorer_db_builder.database_writer import DatabaseWriter
 from explorer_db_builder.instrumentation_transformer import transform_instrumentation_format
 from explorer_db_builder.metadata_backfiller import backfill_metadata
+from explorer_db_builder.semconv_enricher import SemconvEnricher
 
 logger = logging.getLogger(__name__)
 
@@ -97,6 +98,13 @@ def process_version(
 
     transformed_inventory = transform_instrumentation_format(inventory)
 
+    # Enrich with semantic convention compliance
+    try:
+        enricher = SemconvEnricher()
+        enricher.enrich_inventory(transformed_inventory)
+    except Exception as e:
+        logger.warning(f"Semantic convention enrichment failed for version {version}: {e}")
+
     if "libraries" not in transformed_inventory and "custom" not in transformed_inventory:
         raise KeyError(f"Inventory for version {version} missing 'libraries' and 'custom' keys")
 
diff --git a/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/semconv_enricher.py b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/semconv_enricher.py
new file mode 100644
index 00000000..2be1b2cf
--- /dev/null
+++ b/ecosystem-automation/explorer-db-builder/src/explorer_db_builder/semconv_enricher.py
@@ -0,0 +1,206 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Enriches instrumentation metadata with Semantic Convention compliance information."""
+
+import logging
+import os
+import re
+import subprocess
+import tempfile
+from typing import Any, Dict, Optional
+
+import yaml
+
+logger = logging.getLogger(__name__)
+
+
+class SemconvEnricher:
+    """Enriches instrumentation metadata with Semantic Convention compliance information using Weaver."""
+
+    def __init__(self, weaver_path: str = "weaver"):
+        """
+        Args:
+            weaver_path: Path to the weaver executable.
+        """
+        self.weaver_path = weaver_path
+
+    def enrich_inventory(self, inventory_data: Dict[str, Any]) -> None:
+        """Enriches an entire inventory (libraries and custom instrumentations).
+
+        Args:
+            inventory_data: Transformed inventory data.
+        """
+        for key in ["libraries", "custom"]:
+            if key in inventory_data and inventory_data[key]:
+                for instrumentation in inventory_data[key]:
+                    self.enrich_instrumentation(instrumentation)
+
+    def enrich_instrumentation(self, instrumentation: Dict[str, Any]) -> None:
+        """Enriches a single instrumentation with semconv compliance metadata.
+
+        Args:
+            instrumentation: Instrumentation data dictionary.
+        """
+        telemetry_entries = instrumentation.get("telemetry", [])
+        if not telemetry_entries:
+            return
+
+        # POC: For now, we only support a single semconv version per instrumentation based on its schema_url
+        schema_url = instrumentation.get("scope", {}).get("schema_url", "")
+        version = self._extract_version(schema_url) or "1.37.0"
+
+        # Create temporary registry for Weaver
+        with tempfile.TemporaryDirectory() as temp_dir:
+            self._prepare_weaver_registry(temp_dir, instrumentation, version)
+
+            # Run Weaver and parse results
+            try:
+                compliance_results = self._run_weaver_check(temp_dir)
+                self._apply_compliance_metadata(instrumentation, compliance_results, version)
+            except Exception as e:
+                logger.warning(f"Failed to run semconv compliance check for {instrumentation.get('name')}: {e}")
+
+    def _extract_version(self, schema_url: str) -> Optional[str]:
+        """Extracts the version from an OpenTelemetry schema URL."""
+        if not schema_url:
+            return None
+        # Format: https://opentelemetry.io/schemas/1.37.0
+        match = re.search(r"/schemas/(\d+\.\d+\.\d+)", schema_url)
+        return match.group(1) if match else None
+
+    def _prepare_weaver_registry(self, registry_dir: str, instrumentation: Dict[str, Any], version: str) -> None:
+        """Prepares a Weaver-compatible registry directory.
+
+        Args:
+            registry_dir: Temporary directory to create the registry in.
+            instrumentation: Instrumentation data.
+            version: Semantic Convention version to check against.
+        """
+        # manifest.yaml
+        manifest = {
+            "name": instrumentation.get("name", "check"),
+            "schema_url": instrumentation.get("scope", {}).get("schema_url", f"https://opentelemetry.io/schemas/{version}"),
+            "dependencies": [
+                {
+                    "name": "otel",
+                    "registry_path": f"https://github.com/open-telemetry/semantic-conventions@v{version}",
+                }
+            ],
+        }
+        with open(os.path.join(registry_dir, "manifest.yaml"), "w") as f:
+            yaml.dump(manifest, f)
+
+        # telemetry.yaml
+        groups = []
+        telemetry_entries = instrumentation.get("telemetry", [])
+        for entry in telemetry_entries:
+            # Metrics
+            for metric in entry.get("metrics", []):
+                metric_name = metric.get("name")
+                group = {
+                    "id": metric_name,
+                    "type": "metric",
+                    "attributes": [{"ref": attr.get("name")} for attr in metric.get("attributes", [])],
+                    "metrics": [
+                        {
+                            "name": metric_name,
+                            "brief": metric.get("description", "POC metric"),
+                            "instrument": metric.get("instrument", "histogram"),
+                            "unit": metric.get("unit", "s"),
+                        }
+                    ],
+                }
+                groups.append(group)
+
+            # Spans
+            for span in entry.get("spans", []):
+                # Use a synthetic ID for the span group if name is missing
+                span_id = f"{instrumentation.get('name')}.{span.get('span_kind', 'unknown')}"
+                group = {
+                    "id": span_id,
+                    "type": "span",
+                    "brief": "POC span",
+                    "span_kind": span.get("span_kind", "SERVER").lower(),
+                    "attributes": [{"ref": attr.get("name")} for attr in span.get("attributes", [])],
+                }
+                groups.append(group)
+
+        if groups:
+            telemetry_data = {"file_format": "definition/2", "groups": groups}
+            with open(os.path.join(registry_dir, "telemetry.yaml"), "w") as f:
+                yaml.dump(telemetry_data, f)
+
+    def _run_weaver_check(self, registry_dir: str) -> Dict[str, bool]:
+        """Runs weaver registry check and returns a map of signal ID to compliance status.
+
+        Args:
+            registry_dir: Path to the Weaver registry.
+
+        Returns:
+            Dict mapping signal IDs to a boolean (True if compliant).
+        """
+        # In a real environment, this would call 'weaver registry check -r <registry_dir>'
+        # For this POC, we'll implement the subprocess call and handle the output.
+        # If weaver is not found, it will raise an exception which is caught in enrich_instrumentation.
+
+        cmd = [self.weaver_path, "registry", "check", "-r", registry_dir]
+        result = subprocess.run(cmd, capture_output=True, text=True)
+
+        # Parse output for errors. This is a simplified POC parser.
+        # Weaver output format for errors is usually like "[Error] signals/telemetry.yaml: ..."
+        compliance_map = {}
+
+        # Initially assume all are compliant if Weaver succeeded
+        # We need to know which signals we defined to populate the map.
+        # We'll read them back from the generated yaml.
+        with open(os.path.join(registry_dir, "telemetry.yaml")) as f:
+            telemetry_data = yaml.safe_load(f)
+            for group in telemetry_data.get("groups", []):
+                compliance_map[group["id"]] = True
+
+        if result.returncode != 0:
+            # Parse errors to mark specific signals as non-compliant
+            # Example error line: [Error] groups[0].attributes[1]: attribute 'foo' not found in registry
+            # This is complex to parse robustly without a stable Weaver output format.
+            # For the POC, if Weaver fails, we mark everything as non-compliant or log it.
+            logger.debug(f"Weaver reported errors:\n{result.stderr}")
+
+            # Simple heuristic: if an ID appears in an error line, mark it as non-compliant
+            for signal_id in compliance_map.keys():
+                if signal_id in result.stderr:
+                    compliance_map[signal_id] = False
+
+        return compliance_map
+
+    def _apply_compliance_metadata(
+        self, instrumentation: Dict[str, Any], results: Dict[str, bool], version: str
+    ) -> None:
+        """Applies compliance results back to the instrumentation data.
+
+        Args:
+            instrumentation: The instrumentation dict to modify.
+            results: Map of signal ID to compliance status.
+            version: The semconv version checked.
+        """
+        telemetry_entries = instrumentation.get("telemetry", [])
+        for entry in telemetry_entries:
+            for metric in entry.get("metrics", []):
+                if results.get(metric.get("name"), False):
+                    metric.setdefault("semconv_compliance", []).append(version)
+
+            for span in entry.get("spans", []):
+                span_id = f"{instrumentation.get('name')}.{span.get('span_kind', 'unknown')}"
+                if results.get(span_id, False):
+                    span.setdefault("semconv_compliance", []).append(version)
diff --git a/ecosystem-automation/explorer-db-builder/tests/test_semconv_enricher.py b/ecosystem-automation/explorer-db-builder/tests/test_semconv_enricher.py
new file mode 100644
index 00000000..072fea6d
--- /dev/null
+++ b/ecosystem-automation/explorer-db-builder/tests/test_semconv_enricher.py
@@ -0,0 +1,132 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import unittest
+from unittest.mock import MagicMock, patch
+
+import yaml
+
+from explorer_db_builder.semconv_enricher import SemconvEnricher
+
+
+class TestSemconvEnricher(unittest.TestCase):
+    def setUp(self):
+        self.enricher = SemconvEnricher()
+        self.sample_instrumentation = {
+            "name": "test-lib",
+            "scope": {"schema_url": "https://opentelemetry.io/schemas/1.37.0"},
+            "telemetry": [
+                {
+                    "metrics": [
+                        {
+                            "name": "http.server.request.duration",
+                            "attributes": [{"name": "http.request.method"}],
+                        }
+                    ],
+                    "spans": [
+                        {
+                            "span_kind": "SERVER",
+                            "attributes": [{"name": "http.request.method"}],
+                        }
+                    ],
+                }
+            ],
+        }
+
+    def test_extract_version(self):
+        self.assertEqual(
+            self.enricher._extract_version("https://opentelemetry.io/schemas/1.37.0"), "1.37.0"
+        )
+        self.assertEqual(self.enricher._extract_version("invalid-url"), None)
+        self.assertEqual(self.enricher._extract_version(""), None)
+
+    def test_prepare_weaver_registry(self):
+        import tempfile
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            self.enricher._prepare_weaver_registry(temp_dir, self.sample_instrumentation, "1.37.0")
+
+            # Check manifest.yaml
+            with open(os.path.join(temp_dir, "manifest.yaml")) as f:
+                manifest = yaml.safe_load(f)
+                self.assertEqual(manifest["name"], "test-lib")
+                self.assertEqual(manifest["dependencies"][0]["name"], "otel")
+                self.assertIn("v1.37.0", manifest["dependencies"][0]["registry_path"])
+
+            # Check telemetry.yaml
+            with open(os.path.join(temp_dir, "telemetry.yaml")) as f:
+                telemetry = yaml.safe_load(f)
+                self.assertEqual(telemetry["file_format"], "definition/2")
+                groups = telemetry["groups"]
+                self.assertEqual(len(groups), 2)
+                
+                # Metric group
+                metric_group = next(g for g in groups if g["type"] == "metric")
+                self.assertEqual(metric_group["id"], "http.server.request.duration")
+                self.assertEqual(metric_group["metrics"][0]["name"], "http.server.request.duration")
+                self.assertEqual(metric_group["attributes"][0]["ref"], "http.request.method")
+
+                # Span group
+                span_group = next(g for g in groups if g["type"] == "span")
+                self.assertEqual(span_group["id"], "test-lib.SERVER")
+                self.assertEqual(span_group["span_kind"], "server")
+                self.assertEqual(span_group["attributes"][0]["ref"], "http.request.method")
+
+    @patch("subprocess.run")
+    def test_run_weaver_check_success(self, mock_run):
+        mock_run.return_value = MagicMock(returncode=0, stderr="")
+        
+        import tempfile
+        with tempfile.TemporaryDirectory() as temp_dir:
+            self.enricher._prepare_weaver_registry(temp_dir, self.sample_instrumentation, "1.37.0")
+            results = self.enricher._run_weaver_check(temp_dir)
+            
+            self.assertTrue(results["http.server.request.duration"])
+            self.assertTrue(results["test-lib.SERVER"])
+
+    @patch("subprocess.run")
+    def test_run_weaver_check_failure(self, mock_run):
+        # Simulate an error for the metric but not the span (simplified parser check)
+        mock_run.return_value = MagicMock(
+            returncode=1, stderr="[Error] signals/telemetry.yaml: http.server.request.duration attribute 'foo' not found"
+        )
+        
+        import tempfile
+        with tempfile.TemporaryDirectory() as temp_dir:
+            self.enricher._prepare_weaver_registry(temp_dir, self.sample_instrumentation, "1.37.0")
+            results = self.enricher._run_weaver_check(temp_dir)
+            
+            self.assertFalse(results["http.server.request.duration"])
+            self.assertTrue(results["test-lib.SERVER"])
+
+    @patch.object(SemconvEnricher, "_run_weaver_check")
+    def test_enrich_instrumentation(self, mock_check):
+        mock_check.return_value = {
+            "http.server.request.duration": True,
+            "test-lib.SERVER": False
+        }
+        
+        self.enricher.enrich_instrumentation(self.sample_instrumentation)
+        
+        metric = self.sample_instrumentation["telemetry"][0]["metrics"][0]
+        self.assertEqual(metric["semconv_compliance"], ["1.37.0"])
+        
+        span = self.sample_instrumentation["telemetry"][0]["spans"][0]
+        self.assertNotIn("semconv_compliance", span)
+
+    def test_enrich_instrumentation_no_telemetry(self):
+        instrumentation = {"name": "empty"}
+        self.enricher.enrich_instrumentation(instrumentation)
+        self.assertEqual(instrumentation, {"name": "empty"})
diff --git a/ecosystem-explorer/src/types/javaagent.ts b/ecosystem-explorer/src/types/javaagent.ts
index 6f6a92f8..0d6cb637 100644
--- a/ecosystem-explorer/src/types/javaagent.ts
+++ b/ecosystem-explorer/src/types/javaagent.ts
@@ -133,6 +133,8 @@ export interface Metric {
   unit: string;
   /** Attributes associated with the metric. */
   attributes?: Attribute[];
+  /** Semantic convention versions this metric is compliant with. */
+  semconv_compliance?: string[];
 }
 
 /**
@@ -143,6 +145,8 @@ export interface Span {
   span_kind: "CLIENT" | "SERVER" | "PRODUCER" | "CONSUMER" | "INTERNAL";
   /** Attributes associated with the span. */
   attributes?: Attribute[];
+  /** Semantic convention versions this span is compliant with. */
+  semconv_compliance?: string[];
 }
 
 /**