diff --git a/pkgs/html/CHANGELOG.md b/pkgs/html/CHANGELOG.md index 17cbb8466..c4c336a33 100644 --- a/pkgs/html/CHANGELOG.md +++ b/pkgs/html/CHANGELOG.md @@ -1,6 +1,7 @@ ## 0.15.7-wip - Require Dart `3.6` +- Fix a bug in DOM parsing where `
` tags does not create a new line when html is converted to text. ## 0.15.6 diff --git a/pkgs/html/lib/dom.dart b/pkgs/html/lib/dom.dart index 0c6b38e58..c3212db8b 100644 --- a/pkgs/html/lib/dom.dart +++ b/pkgs/html/lib/dom.dart @@ -214,6 +214,9 @@ abstract class Node { } // Implemented per: http://dom.spec.whatwg.org/#dom-node-textcontent + String? textContent({bool convertBRsToNewlines = false}) => + _getTextContent(this, convertBRsToNewlines: convertBRsToNewlines); + String? get text => null; set text(String? value) {} @@ -1099,8 +1102,36 @@ class FilteredElementList extends IterableBase } // http://dom.spec.whatwg.org/#dom-node-textcontent -// For Element and DocumentFragment -String _getText(Node node) => (_ConcatTextVisitor()..visit(node)).toString(); +String? _getTextContent(Node node, {bool convertBRsToNewlines = false}) { + // DocumentFragment or Element: return descendant text content + if (node is DocumentFragment || node is Element) { + return _getText(node, convertBRsToNewlines: convertBRsToNewlines); + } + // CharacterData (Text, Comment): return data + if (node is Text) { + return node.data; + } + if (node is Comment) { + return node.data; + } + // Otherwise: return null + return null; +} + +/// Returns true if the element is an HTML
element. +/// Checks both the local name and namespace to ensure it's a proper HTML br element. +/// Note: null namespace is treated as HTML namespace for elements created by the HTML parser. +bool isElementBr(Element element) { + if (element.localName != 'br') return false; + final ns = element.namespaceUri; + return ns == null || ns == Namespaces.html; +} + +// For Element and DocumentFragment (legacy helper) +String _getText(Node node, {bool convertBRsToNewlines = false}) => + (_ConcatTextVisitor(convertBRsToNewlines: convertBRsToNewlines) + ..visit(node)) + .toString(); void _setText(Node node, String? value) { node.nodes.clear(); @@ -1109,6 +1140,9 @@ void _setText(Node node, String? value) { class _ConcatTextVisitor extends TreeVisitor { final _str = StringBuffer(); + final bool convertBRsToNewlines; + + _ConcatTextVisitor({this.convertBRsToNewlines = false}); @override String toString() => _str.toString(); @@ -1117,4 +1151,13 @@ class _ConcatTextVisitor extends TreeVisitor { void visitText(Text node) { _str.write(node.data); } + + @override + void visitElement(Element node) { + if (convertBRsToNewlines && isElementBr(node)) { + _str.write('\n'); + return; + } + super.visitElement(node); + } } diff --git a/pkgs/html/test/parser_feature_test.dart b/pkgs/html/test/parser_feature_test.dart index 1faaeea98..7886e8bd4 100644 --- a/pkgs/html/test/parser_feature_test.dart +++ b/pkgs/html/test/parser_feature_test.dart @@ -268,11 +268,13 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. }); test('Element.text', () { - final doc = parseFragment('
foo
bar
baz
'); + final doc = parseFragment('
foo

bar
baz
'); final e = doc.firstChild!; final text = e.firstChild!; expect((text as Text).data, 'foo'); expect(e.text, 'foobarbaz'); + expect(e.textContent(convertBRsToNewlines: true), 'foo\nbarbaz\n'); + expect(e.textContent(convertBRsToNewlines: false), 'foobarbaz'); e.text = 'FOO'; expect(e.nodes.length, 1); @@ -282,7 +284,7 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. }); test('Text.text', () { - final doc = parseFragment('
foo
bar
baz
'); + final doc = parseFragment('
foo
bar

baz
'); final e = doc.firstChild!; final text = e.firstChild as Text; expect(text.data, 'foo'); @@ -291,6 +293,8 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. text.text = 'FOO'; expect(text.data, 'FOO'); expect(e.text, 'FOObarbaz'); + expect(e.textContent(convertBRsToNewlines: true), 'FOObar\nbaz'); + expect(e.textContent(convertBRsToNewlines: false), 'FOObarbaz'); expect(text.text, 'FOO'); });