Skip to content
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
2732a05
feat(javaagent): add library README support to explorer-db-builder (#…
SurbhiAgarwal1 May 7, 2026
cd43b9f
Merge branch 'main' into feat/242-library-readme-support
SurbhiAgarwal1 May 8, 2026
6689ea7
Merge branch 'main' into feat/242-library-readme-support
SurbhiAgarwal1 May 9, 2026
0e712c9
feat: address Copilot feedback for library README support
SurbhiAgarwal1 May 9, 2026
762d41f
Merge branch 'main' into feat/242-library-readme-support
SurbhiAgarwal1 May 11, 2026
4863395
Merge branch 'main' into feat/242-library-readme-support and fix fail…
SurbhiAgarwal1 May 11, 2026
c9bb873
fix: resolve merge conflict in fetch-with-cache.ts
SurbhiAgarwal1 May 11, 2026
186b011
fix: resolve merge conflict and apply formatting
SurbhiAgarwal1 May 11, 2026
6afc5cb
chore: remove untracked generated and temp files from commit
SurbhiAgarwal1 May 11, 2026
5b1a322
fix: restore public/data from main (accidentally removed)
SurbhiAgarwal1 May 11, 2026
556b3b0
fix: restore public/data from main and add prettierignore for data files
SurbhiAgarwal1 May 11, 2026
74e232a
fix: increase App test timeout for lazy-loaded Suspense and exclude p…
SurbhiAgarwal1 May 11, 2026
752e936
feat: implement library readme support and list pages redesign (#242)
SurbhiAgarwal1 May 12, 2026
f7b8f83
style: fix formatting and import sorting for CI
SurbhiAgarwal1 May 12, 2026
5e2d7bf
remove generated files
jaydeluca May 12, 2026
309a487
Merge branch 'main' into feat/242-library-readme-support
jaydeluca May 12, 2026
9ae5cba
chore: remove unrelated generated files and project artifacts
SurbhiAgarwal1 May 12, 2026
1fde25b
chore: ensure all unrelated files match main branch
SurbhiAgarwal1 May 12, 2026
da12718
fix: merge upstream/main and resolve bun.lock conflict
SurbhiAgarwal1 May 13, 2026
feb8cfb
fix: remove temp files and fix formatting after upstream merge
SurbhiAgarwal1 May 13, 2026
0f03ea4
fix: apply prettier formatting to root-level files
SurbhiAgarwal1 May 13, 2026
9f1c1ad
fix: remove local description files from repo
SurbhiAgarwal1 May 13, 2026
e404018
ci: trigger re-run after format fix
SurbhiAgarwal1 May 13, 2026
213518d
Merge upstream/main and resolve conflicts
SurbhiAgarwal1 May 14, 2026
856e29b
chore: clean up unrelated changes and generated files to resolve PR c…
SurbhiAgarwal1 May 14, 2026
72d8fdf
chore: revert unintended registry changes
SurbhiAgarwal1 May 14, 2026
9aad922
chore: remove unrelated files and SNAPSHOT registry data
SurbhiAgarwal1 May 14, 2026
4108acc
chore: final cleanup of unrelated files
SurbhiAgarwal1 May 14, 2026
e1f654c
chore: properly align unrelated files with upstream/main
SurbhiAgarwal1 May 14, 2026
110ecb4
chore: revert __init__.py versioning fixes to keep PR focused
SurbhiAgarwal1 May 14, 2026
fa24eaf
chore: restore README files and registry hashes
SurbhiAgarwal1 May 14, 2026
6ae092c
Merge upstream/main and resolve conflicts
SurbhiAgarwal1 May 15, 2026
224ae67
Add test for loadGlobalConfigurations
SurbhiAgarwal1 May 15, 2026
1b5a75a
chore: include remaining local changes after merge
SurbhiAgarwal1 May 15, 2026
4505b8e
test: update error message expectations to match new format
SurbhiAgarwal1 May 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .markdownlint-cli2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ ignores:
- "**/AGENTS.md"
- "ecosystem-explorer/DESIGN.md"
- "ecosystem-registry/**"
- "ecosystem-explorer/public/data/**"
- "**/tmp_repos/**"
- "**/node_modules/**"
- ".claude/**"
Expand Down
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ coverage

# Data (avoid formatting large generated files)
ecosystem-explorer/public/data
public/data

# Registry (written by automation watchers - do not reformat)
ecosystem-registry
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@

import importlib.metadata

__version__ = importlib.metadata.version("collector-watcher")
try:
__version__ = importlib.metadata.version("collector-watcher")
except importlib.metadata.PackageNotFoundError:
__version__ = "0.0.0-dev"
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@

import importlib.metadata

__version__ = importlib.metadata.version("configuration-watcher")
try:
__version__ = importlib.metadata.version("configuration-watcher")
except importlib.metadata.PackageNotFoundError:
__version__ = "0.0.0-dev"
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@

import importlib.metadata

__version__ = importlib.metadata.version("explorer-db-builder")
try:
__version__ = importlib.metadata.version("explorer-db-builder")
except importlib.metadata.PackageNotFoundError:
__version__ = "0.0.0-dev"
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import json
import logging
import re
import shutil
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -46,6 +47,10 @@ def __init__(self, database_dir: str = "ecosystem-explorer/public/data/javaagent
self.files_written = 0
self.total_bytes = 0

def _sanitize_name(self, name: str) -> str:
"""Sanitizes a name for use as a filename to prevent path traversal."""
return re.sub(r"[^a-zA-Z0-9._\-]", "_", name)

def _get_file_path(self, library_name: str, library_hash: str) -> Path:
"""Get the file path for a library with the given name and hash.

Expand All @@ -58,9 +63,10 @@ def _get_file_path(self, library_name: str, library_hash: str) -> Path:
Returns:
Path to the library JSON file
"""
instrumentations_dir = self.database_dir / "instrumentations" / library_name
safe_name = self._sanitize_name(library_name)
instrumentations_dir = self.database_dir / "instrumentations" / safe_name
instrumentations_dir.mkdir(parents=True, exist_ok=True)
return instrumentations_dir / f"{library_name}-{library_hash}.json"
return instrumentations_dir / f"{safe_name}-{library_hash}.json"

def write_libraries(self, libraries: list[dict[str, Any]]) -> dict[str, str]:
"""Write library data to content-addressed files.
Expand Down Expand Up @@ -203,6 +209,35 @@ def write_version_list(self, versions: list[Version]) -> None:
logger.error(f"Failed to write version list: {e}")
raise

def write_markdown(self, library_name: str, markdown_hash: str, content: str) -> None:
"""Write markdown file to the database.

Args:
library_name: Name of the library
markdown_hash: Hash of the markdown content
content: Markdown content string
"""
markdown_dir = self.database_dir / "markdown"
markdown_dir.mkdir(parents=True, exist_ok=True)

safe_name = self._sanitize_name(library_name)
file_path = markdown_dir / f"{safe_name}-{markdown_hash}.md"

if file_path.exists():
logger.debug(f"Markdown for '{safe_name}' with hash {markdown_hash} already exists, skipping write")
return

try:
with open(file_path, "w", encoding="utf-8") as f:
f.write(content)
file_size = len(content.encode("utf-8"))
self.files_written += 1
self.total_bytes += file_size
logger.debug(f"Wrote markdown for '{safe_name}' with hash {markdown_hash}")
except OSError as e:
logger.error(f"Failed to write markdown for '{safe_name}': {e}")
# README publishing failures must never fail DB generation as per requirements

Comment thread
SurbhiAgarwal1 marked this conversation as resolved.
Comment thread
jaydeluca marked this conversation as resolved.
def get_stats(self) -> dict[str, Any]:
"""Get statistics about files written during this session.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,33 @@ def run_javaagent_builder(
versions = get_release_versions(inventory_manager)
logger.info(f"Processing {len(versions)} release versions")

# Pre-load README maps for all versions to enable augmentation and backfilling
readme_maps = {v: inventory_manager.load_library_readme_map(v) for v in versions}

# Publish all READMEs to the database
for version, readme_map in readme_maps.items():
for library_name, markdown_hash in readme_map.items():
content = inventory_manager.load_library_readme_content(version, library_name, markdown_hash)
if content is not None:
db_writer.write_markdown(library_name, markdown_hash, content)

def load_and_augment_inventory(version: Version) -> dict:
inventory = inventory_manager.load_versioned_inventory(version)
readme_map = readme_maps.get(version, {})

# Augment libraries and custom instrumentations with markdown_hash
for key in ["libraries", "custom"]:
if key in inventory:
for item in inventory[key]:
name = item.get("name")
if name and name in readme_map:
item["markdown_hash"] = readme_map[name]

return inventory

Comment thread
SurbhiAgarwal1 marked this conversation as resolved.
Comment thread
jaydeluca marked this conversation as resolved.
backfilled_libraries = backfill_metadata(
versions,
inventory_manager.load_versioned_inventory,
load_and_augment_inventory,
item_key="libraries",
)
backfilled_inventories = backfill_metadata(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

logger = logging.getLogger(__name__)

BACKFILLABLE_FIELDS = ["display_name", "description", "library_link", "has_javaagent"]
BACKFILLABLE_FIELDS = ["display_name", "description", "library_link", "has_javaagent", "markdown_hash"]
NESTED_BACKFILLABLE_FIELDS: dict[str, list[str]] = {
"configurations": ["declarative_name", "examples"],
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,3 +440,55 @@ def test_multiple_versions_workflow(self, db_writer, temp_db_dir):
# Verify structure
assert (temp_db_dir / "versions" / "1.0.0-index.json").exists()
assert (temp_db_dir / "versions" / "2.0.0-index.json").exists()


class TestWriteMarkdown:
"""Tests for markdown file writing."""

def test_write_markdown_success(self, db_writer, temp_db_dir):
library_name = "test-lib"
markdown_hash = "abc123def456"
content = "# Test README"

db_writer.write_markdown(library_name, markdown_hash, content)

# Verify file creation
markdown_file = temp_db_dir / "markdown" / f"{library_name}-{markdown_hash}.md"
assert markdown_file.exists()
assert markdown_file.read_text(encoding="utf-8") == content

# Verify stats
assert db_writer.files_written == 1
assert db_writer.total_bytes == len(content.encode("utf-8"))

def test_write_markdown_deduplication(self, db_writer, temp_db_dir, caplog):
import logging

caplog.set_level(logging.DEBUG)

library_name = "test-lib"
markdown_hash = "abc123def456"
content = "# Test README"

# Write first time
db_writer.write_markdown(library_name, markdown_hash, content)
assert db_writer.files_written == 1

# Write second time (same content)
db_writer.write_markdown(library_name, markdown_hash, content)

# Stats should not increase
assert db_writer.files_written == 1
assert "already exists, skipping write" in caplog.text

def test_write_markdown_error_handling(self, db_writer):
from unittest.mock import patch

with patch("builtins.open", side_effect=OSError("Disk full")):
with patch("explorer_db_builder.database_writer.logger") as mock_logger:
db_writer.write_markdown("error-lib", "hash", "content")

# Verify error was logged
mock_logger.error.assert_called()
args, _ = mock_logger.error.call_args
assert "Failed to write markdown" in args[0]
43 changes: 39 additions & 4 deletions ecosystem-automation/explorer-db-builder/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,41 @@ def test_run_builder_processes_all_versions(self, mock_inventory_manager, mock_d
assert mock_db_writer.write_libraries.call_count == 3
assert mock_db_writer.write_version_index.call_count == 3

def test_run_builder_processes_readmes(self, mock_inventory_manager, mock_db_writer):
"""Verifies READMEs are discovered, published, and hashes injected."""
versions = [Version("1.0.0")]
inventory_data = {"file_format": 0.2, "libraries": [{"name": "lib1"}], "custom": [{"name": "custom1"}]}
readme_map = {"lib1": "abc123def456", "custom1": "fed4321cba98"}
readme_content = "# README content"

mock_inventory_manager.list_versions.return_value = versions
mock_inventory_manager.load_versioned_inventory.return_value = inventory_data
mock_inventory_manager.load_library_readme_map.return_value = readme_map
mock_inventory_manager.load_library_readme_content.return_value = readme_content
mock_db_writer.write_libraries.return_value = {"lib1": "hash1"}

exit_code = run_javaagent_builder(mock_inventory_manager, mock_db_writer)

assert exit_code == 0

# Verify READMEs were loaded and written
assert mock_inventory_manager.load_library_readme_map.call_count == 1
assert mock_inventory_manager.load_library_readme_content.call_count == 2
assert mock_db_writer.write_markdown.call_count == 2
mock_db_writer.write_markdown.assert_any_call("lib1", "abc123def456", readme_content)
mock_db_writer.write_markdown.assert_any_call("custom1", "fed4321cba98", readme_content)

# Verify hashes were injected before writing libraries
write_calls = mock_db_writer.write_libraries.call_args_list
# libraries call
libs = write_calls[0][0][0]
assert libs[0]["name"] == "lib1"
assert libs[0]["markdown_hash"] == "abc123def456"
# custom call
custom = write_calls[1][0][0]
assert custom[0]["name"] == "custom1"
assert custom[0]["markdown_hash"] == "fed4321cba98"

def test_run_builder_uses_backfilled_inventories(self, mock_inventory_manager, mock_db_writer):
versions = [Version("1.0.0"), Version("2.0.0")]
inventory_1_0 = {
Expand All @@ -235,17 +270,17 @@ def test_run_builder_uses_backfilled_inventories(self, mock_inventory_manager, m

# Verify backfilled data is written: version 1.0.0 should have display_name backfilled
write_calls = mock_db_writer.write_libraries.call_args_list
# We expect 2 calls: one for version 1.0.0 libraries, one for version 2.0.0 libraries
# (Custom instrumentations are empty, so they aren't called)
assert len(write_calls) == 2

# First call is for version 1.0.0 - should have backfilled display_name
# First call is for version 1.0.0 libraries - should have backfilled display_name
libraries_v1 = write_calls[0][0][0]
assert len(libraries_v1) == 1
assert libraries_v1[0]["name"] == "lib1"
assert libraries_v1[0]["display_name"] == "Library 1"

# Second call is for version 2.0.0 - should have original display_name
# Second call is for version 2.0.0 libraries - should have original display_name
libraries_v2 = write_calls[1][0][0]
assert len(libraries_v2) == 1
assert libraries_v2[0]["name"] == "lib1"
assert libraries_v2[0]["display_name"] == "Library 1"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@

import importlib.metadata

__version__ = importlib.metadata.version("java-instrumentation-watcher")
try:
__version__ = importlib.metadata.version("java-instrumentation-watcher")
except importlib.metadata.PackageNotFoundError:
__version__ = "0.0.0-dev"
Loading