Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 140 additions & 8 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,16 +499,35 @@ def test_multiline_with_multiple_indentation_levels():
def test_multiline_literal_with_empty_lines_and_indentation():
"""Test literal scalar with empty lines and indentation."""
comp("""
key: |
key: >
line 1

indented line

back to base
""")
# Input has blank lines without indentation
comp(
"key: |\n"
" line 1\n"
"\n" # blank line without indentation
" indented line\n"
"\n" # blank line without indentation
" back to base\n"
)


def test_multiline_folded_code_block():
def test_multiline_literal_preserves_indented_blank_lines():
"""Test that blank lines in multiline literals preserve their indentation."""
# Use explicit string to ensure the blank line has indentation (4 spaces)
yaml_input = (
"config:\n"
" summary: |\n"
" First paragraph here.\n"
"\n"
" Second paragraph here.\n"
)
comp(yaml_input)
"""Test folded scalar with code-like indented content."""
comp("""
description: >
Expand Down Expand Up @@ -558,23 +577,26 @@ def test_sequence_with_empty_line_and_comment():
Note: Comments at column 0 are attached to the next sequence item and
output at the sequence indentation level. This is the expected behavior.
"""
comp("""
comp(
"""
test:
- first_item
- second_item

- third_item
# a comment
- fourth_item
""", expected_result="""
""",
expected_result="""
test:
- first_item
- second_item

- third_item
# a comment
- fourth_item
""")
""",
)


def test_comment_at_column_zero_in_sequence():
Expand All @@ -583,16 +605,126 @@ def test_comment_at_column_zero_in_sequence():
Note: Comments at column 0 are attached to the next sequence item and
output at the sequence indentation level. This is the expected behavior.
"""
comp("""
comp(
"""
items:
- item1
# comment at column 0
- item2
- item3
""", expected_result="""
""",
expected_result="""
items:
- item1
# comment at column 0
- item2
- item3
""")
""",
)


# =============================================================================
# Chomping indicator tests
# =============================================================================


def test_literal_strip():
"""Test |- strips all trailing newlines."""
yaml = "key: |-\n value\n"
result = parse(yaml)
# Strip chomping removes all trailing newlines
assert result["key"]._value == "value"
assert not result["key"]._value.endswith("\n")


def test_literal_keep():
"""Test |+ keeps all trailing newlines."""
yaml = "key: |+\n value\n\n\n"
result = parse(yaml)
# Keep chomping preserves all trailing newlines
assert result["key"]._value == "value\n\n\n"


def test_literal_clip():
"""Test | (clip) adds single trailing newline."""
yaml = "key: |\n value\n\n\n"
result = parse(yaml)
# Clip chomping (default) adds exactly one trailing newline
assert result["key"]._value == "value\n"


def test_folded_strip():
"""Test >- strips all trailing newlines."""
yaml = "key: >-\n value\n"
result = parse(yaml)
# Strip chomping removes all trailing newlines
assert result["key"]._value == "value"
assert not result["key"]._value.endswith("\n")


def test_folded_keep():
"""Test >+ keeps all trailing newlines."""
yaml = "key: >+\n value\n\n\n"
result = parse(yaml)
# Keep chomping preserves all trailing newlines
assert result["key"]._value == "value\n\n\n"


def test_folded_clip():
"""Test > (clip) adds single trailing newline."""
yaml = "key: >\n value\n\n\n"
result = parse(yaml)
# Clip chomping (default) adds exactly one trailing newline
assert result["key"]._value == "value\n"


def test_chomping_roundtrip_strip():
"""Test |- is preserved in round-trip."""
yaml = """key: |-
no trailing newline
"""
result = parse(yaml)
output = result.to_yaml()
assert "|-" in output


def test_chomping_roundtrip_keep():
"""Test |+ is preserved in round-trip."""
yaml = """key: |+
keep trailing newlines
"""
result = parse(yaml)
output = result.to_yaml()
assert "|+" in output


def test_chomping_roundtrip_folded_strip():
"""Test >- is preserved in round-trip."""
yaml = """key: >-
no trailing newline
"""
result = parse(yaml)
output = result.to_yaml()
assert ">-" in output


def test_chomping_roundtrip_folded_keep():
"""Test >+ is preserved in round-trip."""
yaml = """key: >+
keep trailing newlines
"""
result = parse(yaml)
output = result.to_yaml()
assert ">+" in output


def test_chomping_multiline_content():
"""Test chomping with multiple lines of content."""
yaml = """key: |-
line1
line2
line3
"""
result = parse(yaml)
assert result["key"]._value == "line1\nline2\nline3"
assert not result["key"]._value.endswith("\n")
5 changes: 3 additions & 2 deletions tests/test_parser_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,9 @@ def test_multiline_strings():
be joined with spaces
""",
{
"multiline": "This is a\nmultiline string\nwith multiple lines",
"folded": "This is a folded string that will be joined with spaces",
# Clip mode (default) adds a single trailing newline per YAML spec
"multiline": "This is a\nmultiline string\nwith multiple lines\n",
"folded": "This is a folded string that will be joined with spaces\n",
},
)

Expand Down
139 changes: 139 additions & 0 deletions tests/test_security.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Security tests for yamlium.

These tests verify that yamlium protects against common YAML security vulnerabilities:
- Alias bomb (billion laughs) attacks
- Excessive nesting depth
- Circular references
"""

import pytest

from yamlium import ParsingError, parse


def test_alias_bomb_protection():
"""Alias bomb (billion laughs) attack should be detected and raise error.

This test creates many alias references relative to a small number of actual nodes,
which triggers the alias ratio protection (MAX_ALIAS_RATIO = 10).
"""
# Create a YAML that has a high alias-to-decode ratio
# Structure: 1 anchor, 1 key, then many aliases in a flow sequence
# decode_count ~ 3 (key 'a', scalar '1', key 'b'), alias_count = 50
# ratio = 50/3 ~ 16.7 > 10
yaml = "a: &a 1\n"
yaml += "b: [" + ", ".join(["*a"] * 50) + "]\n"
with pytest.raises(ParsingError, match="Excessive aliasing"):
parse(yaml)


def test_alias_bomb_moderate_usage_allowed():
"""Normal alias usage should not trigger protection."""
yaml = """
base: &base
name: default
value: 42

derived1: *base
derived2: *base
derived3:
<<: *base
extra: value
"""
# This should parse without error
result = parse(yaml)
assert result["base"]["name"] == "default"


def test_depth_limit_exceeded():
"""Deeply nested YAML should raise error when exceeding limit."""
# Generate valid deeply nested YAML (more than MAX_DEPTH=200 levels)
# Each level must have proper content
depth = 202
lines = []
for i in range(depth):
indent = " " * i
lines.append(f"{indent}level{i}:")
# Add a final value
lines.append(f"{' ' * depth}value: end")
yaml = "\n".join(lines)
with pytest.raises(ParsingError, match="Maximum nesting depth"):
parse(yaml)


def test_depth_limit_normal_nesting_allowed():
"""Normal nesting depth should be allowed."""
yaml = """
level1:
level2:
level3:
level4:
level5:
value: deep
"""
result = parse(yaml)
assert result["level1"]["level2"]["level3"]["level4"]["level5"]["value"] == "deep"


def test_depth_limit_sequence_nesting():
"""Sequence nesting should also be depth limited."""
# Generate deeply nested sequences with mappings
# Each iteration creates 2 depth levels (sequence + mapping), so 102 iterations = 204 levels
depth = 102
lines = ["root:"]
for i in range(depth):
indent = " " * (i * 2 + 1)
lines.append(f"{indent}- level{i}:")
# Add a final value
lines.append(f"{' ' * (depth * 2 + 1)}value: end")
yaml = "\n".join(lines)
with pytest.raises(ParsingError, match="Maximum nesting depth"):
parse(yaml)


def test_circular_reference_detection():
"""Self-referential anchor should raise error."""
yaml = """
a: &a
b: *a
"""
with pytest.raises(ParsingError, match="Circular reference"):
parse(yaml)


def test_circular_reference_simple():
"""Simple direct circular reference."""
yaml = """
root: &root
child: *root
"""
with pytest.raises(ParsingError, match="Circular reference"):
parse(yaml)


def test_non_circular_forward_reference_allowed():
"""Forward references that are not circular should be allowed."""
yaml = """
first: &first
value: 1

second: &second
ref: *first
value: 2

third:
ref1: *first
ref2: *second
"""
result = parse(yaml)
assert result["first"]["value"] == 1
assert result["third"]["ref1"].child["value"] == 1


def test_alias_without_anchor():
"""Alias without a defined anchor should raise error."""
yaml = """
key: *undefined_anchor
"""
with pytest.raises(ParsingError, match="No anchor found"):
parse(yaml)
Loading