diff --git a/README.md b/README.md index 78eb6b0..d5a2090 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,42 @@ for key, value, obj in yml.walk_keys(): print(yml.to_yaml()) ``` +### Working with Comments + +Yamlium provides structured access to YAML comments via the `comments` attribute: + +```python +from yamlium import parse + +yaml_str = """ +app: + # Database settings + db_host: localhost # primary host + # TODO: increase timeout + + # Cache config + cache_ttl: 3600 +""" +yml = parse(yaml_str) + +# Access comments on a key +db_key = list(yml["app"].keys())[0] +print(db_key.comments.head) # ['# Database settings'] + +# Access inline and foot comments on a value +db_value = yml["app"]["db_host"] +print(db_value.comments.line) # '# primary host' +print(db_value.comments.foot) # ['# TODO: increase timeout'] + +# Modify comments +db_value.comments.line = "# updated comment" +``` + +Comment types: +- `comments.head` - comments directly above a node +- `comments.line` - inline comment on the same line +- `comments.foot` - comments below a node (before a blank line) + ### JSON Conversion ```python diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 5246d1b..d5874a1 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -68,9 +68,9 @@ def test_node_repr(): # Test with comments scalar1 = Scalar("value1") - scalar1.inline_comments = "# inline comment" + scalar1.comments.line = "# inline comment" scalar2 = Scalar("value2") - scalar2.stand_alone_comments = ["# standalone comment"] + scalar2.comments.head = ["# standalone comment"] mapping_with_comments = Mapping({Key("key1"): scalar1, Key("key2"): scalar2}) expected_with_comments = ( diff --git a/tests/test_parser.py b/tests/test_parser.py index 29ee14f..7860cb5 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -728,3 +728,234 @@ def test_chomping_multiline_content(): result = parse(yaml) assert result["key"]._value == "line1\nline2\nline3" assert not result["key"]._value.endswith("\n") + + +# ============================================================================= +# Head/Line/Foot comment semantics tests +# ============================================================================= + + +def test_comments_head_line_foot(): + """Test the plan's example YAML with head/line/foot semantics.""" + comp(""" +app: + name: 'Demo' + + # head + version: '1.0.24' # line + # foot + + # env docs + env: local +""") + + +def test_comment_ownership_semantics(): + """Test that comments are correctly classified as head/line/foot.""" + yaml_str = """ +app: + # head for version + version: '1.0.24' + # foot for version + + # head for env + env: local +""" + doc = parse(yaml_str) + app = doc["app"] + + # Get the keys to access comments + keys = list(app.keys()) + version_key = keys[0] + env_key = keys[1] + + # Version key should have head comment + assert version_key.comments.head == ["# head for version"] + + # Version value should have foot comment (comment followed by blank line) + version_value = app["version"] + assert version_value.comments.foot == ["# foot for version"] + + # Env key should have head comment (comment after blank line) + assert env_key.comments.head == ["# head for env"] + + +def test_inline_comment_on_value(): + """Test that inline comments are attached to the correct node.""" + yaml_str = """key: value # inline comment +""" + doc = parse(yaml_str) + assert doc["key"].comments.line == "# inline comment" + assert doc["key"].comments.head == [] + assert doc["key"].comments.foot == [] + + +def test_multiple_head_comments(): + """Test multiple consecutive head comments.""" + yaml_str = """ +# comment 1 +# comment 2 +# comment 3 +key: value +""" + doc = parse(yaml_str) + key = list(doc.keys())[0] + assert key.comments.head == ["# comment 1", "# comment 2", "# comment 3"] + + +def test_multiple_foot_comments(): + """Test multiple consecutive foot comments followed by blank line.""" + yaml_str = """ +key1: value1 +# foot 1 +# foot 2 + +key2: value2 +""" + doc = parse(yaml_str) + assert doc["key1"].comments.foot == ["# foot 1", "# foot 2"] + + +def test_comments_without_blank_line_become_head(): + """Test that comments without a following blank line become head of next node.""" + yaml_str = """ +key1: value1 +# this becomes head of key2 +key2: value2 +""" + doc = parse(yaml_str) + key2 = list(doc.keys())[1] + assert key2.comments.head == ["# this becomes head of key2"] + assert doc["key1"].comments.foot == [] + + +def test_head_and_foot_comments_together(): + """Test a node with both head and foot comments.""" + yaml_str = """ +# head comment +key: value +# foot comment + +next: value2 +""" + doc = parse(yaml_str) + key = list(doc.keys())[0] + assert key.comments.head == ["# head comment"] + assert doc["key"].comments.foot == ["# foot comment"] + + +def test_comments_dataclass_is_empty(): + """Test the is_empty method on Comments dataclass.""" + from yamlium.nodes import Comments + + empty = Comments() + assert empty.is_empty() is True + + with_head = Comments(head=["# comment"]) + assert with_head.is_empty() is False + + with_line = Comments(line="# inline") + assert with_line.is_empty() is False + + with_foot = Comments(foot=["# foot"]) + assert with_foot.is_empty() is False + + +def test_backward_compat_stand_alone_comments(): + """Test backward compatibility via stand_alone_comments property (deprecated).""" + import warnings + + yaml_str = """ +# head comment +key: value +""" + doc = parse(yaml_str) + key = list(doc.keys())[0] + # Old API should still work but emit deprecation warning + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert key.stand_alone_comments == ["# head comment"] + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "comments.head" in str(w[0].message) + # Should be same as new API (no warning) + assert key.comments.head == ["# head comment"] + + +def test_backward_compat_inline_comments(): + """Test backward compatibility via inline_comments property (deprecated).""" + import warnings + + yaml_str = """key: value # inline +""" + doc = parse(yaml_str) + # Old API should still work but emit deprecation warning + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert doc["key"].inline_comments == "# inline" + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "comments.line" in str(w[0].message) + # Should be same as new API (no warning) + assert doc["key"].comments.line == "# inline" + + +def test_foot_comments_roundtrip(): + """Test that foot comments are preserved in round-trip.""" + yaml_str = """key1: value1 +# foot comment + +key2: value2 +""" + doc = parse(yaml_str) + assert doc["key1"].comments.foot == ["# foot comment"] + # Round-trip should preserve foot comments + result = doc.to_yaml() + assert "# foot comment" in result + + +def test_nested_structure_comments(): + """Test comments in nested structures.""" + yaml_str = """ +outer: + # inner head + inner: value # inner line + # inner foot + + # next head + next: value2 +""" + doc = parse(yaml_str) + outer = doc["outer"] + inner_keys = list(outer.keys()) + + # Inner key has head comment + assert inner_keys[0].comments.head == ["# inner head"] + # Inner value has line comment + assert outer["inner"].comments.line == "# inner line" + # Inner value has foot comment + assert outer["inner"].comments.foot == ["# inner foot"] + # Next key has head comment + assert inner_keys[1].comments.head == ["# next head"] + + +def test_sequence_item_comments(): + """Test comments on sequence items.""" + yaml_str = """ +items: + # head for item1 + - item1 + # foot for item1 + + # head for item2 + - item2 +""" + doc = parse(yaml_str) + seq = doc["items"] + + # First item has head comment + assert seq[0].comments.head == ["# head for item1"] + # First item has foot comment + assert seq[0].comments.foot == ["# foot for item1"] + # Second item has head comment + assert seq[1].comments.head == ["# head for item2"] diff --git a/yamlium/nodes.py b/yamlium/nodes.py index dd50247..70a1149 100644 --- a/yamlium/nodes.py +++ b/yamlium/nodes.py @@ -1,14 +1,34 @@ from __future__ import annotations import pprint +import warnings from abc import abstractmethod from copy import copy +from dataclasses import dataclass, field from pathlib import Path from typing import TYPE_CHECKING, Any, Generator, Iterator, Literal, cast from .lexer import T +@dataclass +class Comments: + """YAML comment storage with head/line/foot semantics. + + Blank lines act as ownership separators: + - head: Comments directly above (no blank line between comment and node) + - line: Inline comment on same line as node + - foot: Comments below node, before next blank line + """ + + head: list[str] = field(default_factory=list) + line: str | None = None + foot: list[str] = field(default_factory=list) + + def is_empty(self) -> bool: + return not self.head and not self.line and not self.foot + + def _indent(i: int, /) -> str: return " " * i @@ -29,8 +49,11 @@ def _preserve_metadata(old_value: Node | None, new_value: Node) -> Node: """Copy metadata (newlines, comments) from old value to new value.""" if old_value is not None and isinstance(old_value, Node): new_value.newlines = old_value.newlines - new_value.inline_comments = old_value.inline_comments - new_value.stand_alone_comments = old_value.stand_alone_comments + new_value.comments = Comments( + head=old_value.comments.head.copy(), + line=old_value.comments.line, + foot=old_value.comments.foot.copy(), + ) return new_value @@ -63,8 +86,46 @@ def __init__( self._indent = _indent self._column: int = -99 self.newlines: int = 0 - self.stand_alone_comments: list[str] = [] - self.inline_comments: str | None = None + self.comments: Comments = Comments() + + # Backward compatibility properties (deprecated) + @property + def stand_alone_comments(self) -> list[str]: + """Deprecated: Use comments.head instead.""" + warnings.warn( + "stand_alone_comments is deprecated, use comments.head instead", + DeprecationWarning, + stacklevel=2, + ) + return self.comments.head + + @stand_alone_comments.setter + def stand_alone_comments(self, value: list[str]) -> None: + warnings.warn( + "stand_alone_comments is deprecated, use comments.head instead", + DeprecationWarning, + stacklevel=2, + ) + self.comments.head = value + + @property + def inline_comments(self) -> str | None: + """Deprecated: Use comments.line instead.""" + warnings.warn( + "inline_comments is deprecated, use comments.line instead", + DeprecationWarning, + stacklevel=2, + ) + return self.comments.line + + @inline_comments.setter + def inline_comments(self, value: str | None) -> None: + warnings.warn( + "inline_comments is deprecated, use comments.line instead", + DeprecationWarning, + stacklevel=2, + ) + self.comments.line = value if TYPE_CHECKING: @@ -118,8 +179,10 @@ def _ast_info(self) -> str: f"{k}={v}" for k, v in { "newlines": self.newlines, - "inline_comment": self.inline_comments, - "stand_alone_comments": self.stand_alone_comments, + # Use backward-compat names for repr to avoid breaking tests + "inline_comment": self.comments.line, + "stand_alone_comments": self.comments.head, + "foot_comments": self.comments.foot, }.items() if v ] @@ -163,13 +226,21 @@ def _ast_repr( return f"Alias('{self._value}')" raise ValueError(f"{type(self)} not supported.") - def _get_sa_comments(self, i: int) -> list[str]: - return [_indent(i) + c for c in self.stand_alone_comments] + def _get_head_comments(self, i: int) -> list[str]: + return [_indent(i) + c for c in self.comments.head] + + def _get_foot_comments(self, i: int) -> list[str]: + return [_indent(i) + c for c in self.comments.foot] - def _enrich_yaml(self, s: str, /) -> str: + def _enrich_yaml(self, s: str, /, i: int = 0, foot_indent: int | None = None) -> str: output = s - if self.inline_comments: - output += f" {self.inline_comments}" + if self.comments.line: + output += f" {self.comments.line}" + if self.comments.foot: + # foot_indent allows caller to specify different indent for foot comments + # (e.g., in mappings, foot comments should be at key indent, not value indent) + fi = foot_indent if foot_indent is not None else i + output += "\n" + "\n".join(self._get_foot_comments(fi)) return output + "\n" * self.newlines @abstractmethod @@ -403,7 +474,7 @@ def _to_yaml(self, i: int = 0) -> str: items = [] for x in self: # Check if we should print standalone comments - items.extend(x._get_sa_comments(i=i)) + items.extend(x._get_head_comments(i=i)) # If child is a mapping if isinstance(x, Mapping): @@ -411,14 +482,16 @@ def _to_yaml(self, i: int = 0) -> str: for k, v in x.items(): if is_first_item: prefix = _indent(i) + "- " - items.extend(k._get_sa_comments(i=i)) + items.extend(k._get_head_comments(i=i)) else: prefix = _indent(i) + " " - items.extend(k._get_sa_comments(i=i + 1)) + items.extend(k._get_head_comments(i=i + 1)) if isinstance(v, (Mapping, Sequence)): # If it is another block, add newline items.append(f"{prefix}{k._to_yaml()}\n{v._to_yaml(i + 2)}") + elif isinstance(v, Scalar): + items.append(f"{prefix}{k._to_yaml()} {v._to_yaml(i + 2, foot_indent=i + 1)}") else: items.append(f"{prefix}{k._to_yaml()} {v._to_yaml(i + 2)}") is_first_item = False @@ -429,7 +502,10 @@ def _to_yaml(self, i: int = 0) -> str: else: prefix = _indent(i) + "- " - items.append(f"{prefix}{x._to_yaml(i + 1)}") + if isinstance(x, Scalar): + items.append(f"{prefix}{x._to_yaml(i + 1, foot_indent=i)}") + else: + items.append(f"{prefix}{x._to_yaml(i + 1)}") return "\n".join(items) def append(self, item: Any) -> None: @@ -486,7 +562,7 @@ def _to_yaml(self, i: int = 0) -> str: items = [] _i = _indent(i) for k, v in self.items(): - items.extend(k._get_sa_comments(i=i)) + items.extend(k._get_head_comments(i=i)) if isinstance(v, (Mapping, Sequence)): if v._is_inline: items.append(f"{_i}{k._to_yaml()} {v._to_yaml()}") @@ -498,7 +574,7 @@ def _to_yaml(self, i: int = 0) -> str: else: val = ( f"\n{_indent(i + 1)}" if v._is_indented else " " - ) + v._to_yaml(i + 1) + ) + v._to_yaml(i + 1, foot_indent=i) items.append(f"{_i}{k._to_yaml()}{val}") else: items.append(f"{_i}{k._to_yaml()} {v._to_yaml()}") @@ -589,7 +665,7 @@ def to_dict(self) -> str | int | float | bool | None: val += "\n" return val - def _to_yaml(self, i: int = 0) -> str: + def _to_yaml(self, i: int = 0, foot_indent: int | None = None) -> str: if self._type == T.SCALAR: if isinstance(self._value, bool): val = "true" if self._value else "false" @@ -614,7 +690,7 @@ def _to_yaml(self, i: int = 0) -> str: + "\n" + "\n".join([(i_ + r) if r else "" for r in content.split("\n")]) ) - return self._enrich_yaml(val) + return self._enrich_yaml(val, i, foot_indent) if TYPE_CHECKING: @@ -632,15 +708,15 @@ def __init__( def __str__(self) -> str: return str(self.child.to_dict()) - def _to_yaml(self, i: int = 0, x: bool = False) -> str: - return self._enrich_yaml(f"*{self._value}") + def _to_yaml(self, i: int = 0, foot_indent: int | None = None) -> str: + return self._enrich_yaml(f"*{self._value}", i, foot_indent) class Document(Sequence): def _to_yaml(self) -> str: items = [] for x in self: - items.extend(x.stand_alone_comments) + items.extend(x.comments.head) items.append(x._to_yaml()) result = "\n\n---\n".join(items) if not result: diff --git a/yamlium/parser.py b/yamlium/parser.py index e16a6ad..a0d3190 100644 --- a/yamlium/parser.py +++ b/yamlium/parser.py @@ -117,16 +117,25 @@ def _process_node(self, n: NodeType) -> NodeType: if not isinstance(n, Alias): self.decode_count += 1 - # Check if this node should be the value of an anchor - # if self.anchor_cache: - # self.anchors[self.anchor_cache] = n - # self.anchor_cache = None - - # If no comment cache, return. - if not self.comment_cache: - return n - n.stand_alone_comments = self.comment_cache - self.comment_cache = [] + # Comments that were pending (no blank line after previous value) become + # HEAD comments of this new node, not FOOT of the previous. + # Only comments followed by a blank line become FOOT comments. + if self.pending_foot_comments: + n.comments.head = self.pending_foot_comments + n.comments.head + self.pending_foot_comments = [] + + # Attach head comments from cache (comments after a blank line) + if self.comment_cache: + n.comments.head = n.comments.head + self.comment_cache + self.comment_cache = [] + + # Reset blank line state when processing a new node + self.saw_blank_line = False + + # Track last value node (keys don't receive foot comments) + if not isinstance(n, Key): + self.last_value_node = n + return n # ------------------------------------------------------------------ @@ -196,11 +205,22 @@ def _build_alias(self) -> Alias: def _handle_comment(self) -> None: token = self._take_token - # If we're on the same line still, add as inline comment. + # Inline: same line as current node if self._current_line == token.line: - self._last_node.inline_comments = token.value - else: + self._last_node.comments.line = token.value + return + + # If no value node exists yet, comments become head of next node + if self.last_value_node is None: self.comment_cache.append(token.value) + return + + # After blank line: head of next node + if self.saw_blank_line: + self.comment_cache.append(token.value) + else: + # No blank line yet: potential foot of previous node + self.pending_foot_comments.append(token.value) def _handle_anchor(self) -> Mapping | Scalar | Sequence | Alias: n = self._last_node @@ -239,6 +259,11 @@ def _check_special_types(self, t: T | None) -> bool: self._handle_comment() elif t == T.EMPTY_LINE: self._take_token # Consume token + # Flush pending foot comments to last value node + if self.pending_foot_comments and self.last_value_node: + self.last_value_node.comments.foot = self.pending_foot_comments + self.pending_foot_comments = [] + self.saw_blank_line = True self._last_node.newlines += 1 else: return False @@ -397,6 +422,11 @@ def parse(self) -> Document: self.anchor_cache: str | None = None self.comment_cache: list[str] = [] + # Comment classification state (head/line/foot semantics) + self.pending_foot_comments: list[str] = [] # May become foot of previous node + self.saw_blank_line: bool = False # Tracks blank line occurrence + self.last_value_node: Node | None = None # Node eligible for foot comments + # Security: depth limiting self.current_depth = 0