Skip to content
2 changes: 1 addition & 1 deletion .github/workflows/ci-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
run: hatch fmt --check

- name: Run unit tests with coverage
run: hatch run cov
run: hatch run cov -- -m "not manual"

- name: Install oteltest
run: pip install oteltest
Expand Down
171 changes: 171 additions & 0 deletions metadata/aggregate_yamls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import ast
import glob
import os
import sys
from pathlib import Path

import yaml

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))
from splunk_otel.env import DEFAULTS


def find_all_yaml_files(yamls_dir):
yamls_dir = Path(yamls_dir)
return sorted([f for f in yamls_dir.iterdir() if f.is_file() and f.suffix == ".yaml"], key=lambda p: p.name)


def load_yaml_file(path):
try:
with open(path) as f:
return yaml.safe_load(f)
except (OSError, yaml.YAMLError):
return None


def extract_instrumentation_fields(data):
instr = {}
if "keys" in data:
instr["keys"] = data["keys"]
elif "instrumentation_name" in data:
instr["keys"] = [data["instrumentation_name"]]
elif "name" in data:
instr["keys"] = [data["name"].lower()]
else:
instr["keys"] = []

if "instrumented_components" in data:
instr["instrumented_components"] = data["instrumented_components"]
elif "name" in data:
instr["instrumented_components"] = [
{"name": data["name"], "supported_versions": data.get("supported_versions", "varies")}
]
else:
instr["instrumented_components"] = []

instr["stability"] = data.get("stability", "stable")
instr["support"] = data.get("support", "official")

def norm(attrs):
return [a["name"] if isinstance(a, dict) and set(a) == {"name"} else a for a in (attrs or [])]

signals = []
for key in ("spans",):
if key in data:
spans = [
dict(span, attributes=norm(span.get("attributes"))) if "attributes" in span else dict(span)
for span in data[key]
]
signals.append({"spans": spans})
if not signals and "signals" in data:
for s in data["signals"]:
if "spans" in s:
spans = [
dict(span, attributes=norm(span.get("attributes"))) if "attributes" in span else dict(span)
for span in s["spans"]
]
signals.append({"spans": spans})
instr["signals"] = signals

return instr


def main():
yamls_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "generator", "yamls")
output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "splunk-otel-python-metadata.yaml")

component = "Splunk Distribution of OpenTelemetry Python"
version = "1.0.0"
dependencies = []
instrumentations = []

for yaml_file in find_all_yaml_files(yamls_dir):
data = load_yaml_file(yaml_file)
if data:
instr = extract_instrumentation_fields(data)
instrumentations.append(instr)

# Extract all OTEL_ and SPLUNK_ env var constants from src/splunk_otel
settings = []
env_vars = set()
for pyfile in glob.glob(os.path.join(os.path.dirname(__file__), "../src/splunk_otel/**/*.py"), recursive=True):
with open(pyfile) as f:
tree = ast.parse(f.read(), filename=pyfile)
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
for target in node.targets:
if (
isinstance(target, ast.Name)
and isinstance(node.value, ast.Constant)
and isinstance(node.value.value, str)
and (node.value.value.startswith("OTEL_") or node.value.value.startswith("SPLUNK_"))
):
env_vars.add(node.value.value)
# Add also those from DEFAULTS (for default values)
env_vars.update(DEFAULTS.keys())
seen = set()
for env in sorted(env_vars):
if env in seen:
continue
seen.add(env)
# Auto property: convert env to lower, replace _ with . and remove leading otel_/splunk_
prop = env.lower()
if prop.startswith("otel_"):
prop = prop.replace("otel_", "otel.", 1)
elif prop.startswith("splunk_"):
prop = prop.replace("splunk_", "splunk.", 1)
prop = prop.replace("_", ".")
# Auto type
if "enabled" in env.lower():
typ = "boolean"
elif any(x in env.lower() for x in ["interval", "limit", "count"]):
typ = "int"
else:
typ = "string"
# Auto category
if "exporter" in env.lower():
cat = "exporter"
elif "instrumentation" in env.lower():
cat = "instrumentation"
elif "splunk" in env.lower():
cat = "splunk"
else:
cat = "general"
# Auto description
if typ == "boolean":
desc = f"Enable or disable {prop.replace('.', ' ')}."
elif typ == "int":
desc = f"Integer value for {prop.replace('.', ' ')}."
else:
desc = f"Value for {prop.replace('.', ' ')}."
settings.append(
{
"property": prop,
"env": env,
"description": desc,
"default": DEFAULTS.get(env, ""),
"type": typ,
"category": cat,
}
)
# Sort settings by property for more readable YAML
settings = sorted(settings, key=lambda s: s["property"])

final_metadata = {
"component": component,
"version": version,
"dependencies": dependencies,
"settings": settings,
"instrumentations": instrumentations,
}

class IndentDumper(yaml.SafeDumper):
def increase_indent(self, *, flow=False, indentless=False):
return super().increase_indent(flow=flow, indentless=False)

with open(output_path, "w") as f:
yaml.dump(final_metadata, f, default_flow_style=False, sort_keys=False, Dumper=IndentDumper, indent=2)


if __name__ == "__main__":
main()
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ dependencies = [
"pytest",
"ruff",
"oteltest",
"pyyaml",
"langchain",
"langchain_community",
"langchain_openai",
"langchain_core",
]

[tool.hatch.envs.default.scripts]
Expand Down
74 changes: 74 additions & 0 deletions tests/test_aggregate_yamls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import yaml

from metadata import aggregate_yamls


def test_extract_instrumentation_fields_minimal():
data = {"name": "TestInstr"}
instr = aggregate_yamls.extract_instrumentation_fields(data)
assert instr["keys"] == ["testinstr"]
assert instr["instrumented_components"][0]["name"] == "TestInstr"
assert instr["stability"] == "stable"
assert instr["support"] == "official"
assert instr["signals"] == []


def test_extract_instrumentation_fields_full():
data = {
"keys": ["foo"],
"instrumented_components": [{"name": "Bar", "supported_versions": "2.0"}],
"stability": "beta",
"support": "community",
"spans": [
{"name": "span1", "attributes": [{"name": "attr1"}, {"name": "attr2"}]},
{"name": "span2"},
],
}
instr = aggregate_yamls.extract_instrumentation_fields(data)
assert instr["keys"] == ["foo"]
assert instr["instrumented_components"][0]["name"] == "Bar"
assert instr["stability"] == "beta"
assert instr["support"] == "community"
assert instr["signals"][0]["spans"][0]["attributes"] == ["attr1", "attr2"]


def test_main_creates_yaml(tmp_path, monkeypatch):
yamls_dir = tmp_path / "yamls"
yamls_dir.mkdir()
# Create a sample yaml file
sample_yaml = yamls_dir / "test.yaml"
sample_yaml.write_text("""
name: testinstr
instrumented_components:
- name: testcomp
supported_versions: 1.0
stability: beta
support: community
spans:
- name: span1
attributes:
- name: attr1
- name: attr2
- name: span2
""")
# Patch yamls_dir and output_path in main
out_yaml = tmp_path / "splunk-otel-python-metadata.yaml"
monkeypatch.setattr(aggregate_yamls, "find_all_yaml_files", lambda _: [sample_yaml])

def load_yaml_file_with_ctx(path):
with open(path) as f:
return yaml.safe_load(f)

monkeypatch.setattr(aggregate_yamls, "load_yaml_file", load_yaml_file_with_ctx)
monkeypatch.setattr(aggregate_yamls.os.path, "dirname", lambda _: str(tmp_path))
monkeypatch.setattr(aggregate_yamls.os.path, "abspath", lambda x: str(x))
# Run main
aggregate_yamls.main()
# Check output file
assert out_yaml.exists()
with open(out_yaml) as f:
meta = yaml.safe_load(f)
assert meta["component"] == "Splunk Distribution of OpenTelemetry Python"
assert meta["instrumentations"][0]["keys"] == ["testinstr"]
assert "settings" in meta
assert isinstance(meta["settings"], list)
6 changes: 3 additions & 3 deletions tests/test_ai_metadata_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def test_repeatability_flask():
yaml1 = ai_metadata_generator.generate_instrumentation_metadata(instr_dir)
yaml2 = ai_metadata_generator.generate_instrumentation_metadata(instr_dir)

assert isinstance(yaml1, str)
assert isinstance(yaml2, str)
assert isinstance(yaml1, str)
assert isinstance(yaml2, str)

if yaml1 != yaml2:
# Log all lines that differ
Expand All @@ -52,7 +52,7 @@ def test_repeatability_flask():
logging.info("Lines only in second YAML:")
for line in only_in_2:
logging.info(line)
assert overlap > MIN_OVERLAP
assert overlap > MIN_OVERLAP
else:
logging.info("No differences detected.")

Expand Down
18 changes: 9 additions & 9 deletions tests/test_ai_metadata_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@ def test_env_vars_found():
# OTEL_BAZ in a comment
"""
vars_found = ai_metadata_generator.extract_env_vars_from_code(code)
assert set(vars_found) == {"OTEL_FOO", "SPLUNK_BAR", "OTEL_BAZ"}
assert set(vars_found) == {"OTEL_FOO", "SPLUNK_BAR", "OTEL_BAZ"}


def test_env_vars_empty():
code = "print('no envs here')"
vars_found = ai_metadata_generator.extract_env_vars_from_code(code)
assert vars_found == []
assert vars_found == []


def test_tokens():
assert ai_metadata_generator.estimate_tokens("abcd" * EXPECTED_TOKEN_10) == EXPECTED_TOKEN_10
assert ai_metadata_generator.estimate_tokens("a" * 100) == EXPECTED_TOKEN_25
assert ai_metadata_generator.estimate_tokens("") == 0
assert ai_metadata_generator.estimate_tokens("abcd" * EXPECTED_TOKEN_10) == EXPECTED_TOKEN_10
assert ai_metadata_generator.estimate_tokens("a" * 100) == EXPECTED_TOKEN_25
assert ai_metadata_generator.estimate_tokens("") == 0


def test_code_prioritizes_init(tmp_path):
Expand All @@ -39,9 +39,9 @@ def test_code_prioritizes_init(tmp_path):
(instr_dir / "foo.py").write_text("print('foo')\n")

code = ai_metadata_generator.get_instrumentation_code(str(instr_dir))
assert "# Source: __init__.py" in code
assert "hello" in code
assert "foo" in code
assert "# Source: __init__.py" in code
assert "hello" in code
assert "foo" in code


def test_code_empty_dir(tmp_path):
Expand Down Expand Up @@ -73,4 +73,4 @@ def test_yaml_write(tmp_path):

out_file = yamls_dir / "instr.yaml"
content = out_file.read_text()
assert "OTEL_TEST_ENABLED" in content
assert "OTEL_TEST_ENABLED" in content
Loading