Skip to content

Commit 4f1b01b

Browse files
authored
Add :skip_whitespace_nodes to to_tree/2 (#10)
1 parent a0f63dc commit 4f1b01b

File tree

4 files changed

+43
-15
lines changed

4 files changed

+43
-15
lines changed

c_src/lazy_html.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ ERL_NIF_TERM attributes_to_term(ErlNifEnv *env, lxb_dom_element_t *element,
350350

351351
void node_to_tree(ErlNifEnv *env, fine::ResourcePtr<LazyHTML> &resource,
352352
lxb_dom_node_t *node, std::vector<ERL_NIF_TERM> &tree,
353-
bool sort_attributes) {
353+
bool sort_attributes, bool skip_whitespace_nodes) {
354354
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
355355
auto element = lxb_dom_interface_element(node);
356356

@@ -366,7 +366,8 @@ void node_to_tree(ErlNifEnv *env, fine::ResourcePtr<LazyHTML> &resource,
366366
auto children = std::vector<ERL_NIF_TERM>();
367367
for (auto child = template_aware_first_child(node); child != NULL;
368368
child = lxb_dom_node_next(child)) {
369-
node_to_tree(env, resource, child, children, sort_attributes);
369+
node_to_tree(env, resource, child, children, sort_attributes,
370+
skip_whitespace_nodes);
370371
}
371372

372373
auto children_term = enif_make_list_from_array(
@@ -375,10 +376,19 @@ void node_to_tree(ErlNifEnv *env, fine::ResourcePtr<LazyHTML> &resource,
375376
tree.push_back(enif_make_tuple3(env, name_term, attrs_term, children_term));
376377
} else if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
377378
auto character_data = lxb_dom_interface_character_data(node);
378-
auto term = fine::make_resource_binary(
379-
env, resource, reinterpret_cast<char *>(character_data->data.data),
380-
character_data->data.length);
381-
tree.push_back(term);
379+
380+
auto whitespace_size = leading_whitespace_size(character_data->data.data,
381+
character_data->data.length);
382+
383+
if (whitespace_size == character_data->data.length &&
384+
skip_whitespace_nodes) {
385+
// Append nothing
386+
} else {
387+
auto term = fine::make_resource_binary(
388+
env, resource, reinterpret_cast<char *>(character_data->data.data),
389+
character_data->data.length);
390+
tree.push_back(term);
391+
}
382392
} else if (node->type == LXB_DOM_NODE_TYPE_COMMENT) {
383393
auto character_data = lxb_dom_interface_character_data(node);
384394
auto term = fine::make_resource_binary(
@@ -390,11 +400,12 @@ void node_to_tree(ErlNifEnv *env, fine::ResourcePtr<LazyHTML> &resource,
390400
}
391401

392402
fine::Term to_tree(ErlNifEnv *env, ExLazyHTML ex_lazy_html,
393-
bool sort_attributes) {
403+
bool sort_attributes, bool skip_whitespace_nodes) {
394404
auto tree = std::vector<ERL_NIF_TERM>();
395405

396406
for (auto node : ex_lazy_html.resource->nodes) {
397-
node_to_tree(env, ex_lazy_html.resource, node, tree, sort_attributes);
407+
node_to_tree(env, ex_lazy_html.resource, node, tree, sort_attributes,
408+
skip_whitespace_nodes);
398409
}
399410

400411
return enif_make_list_from_array(env, tree.data(),

lib/lazy_html.ex

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ defmodule LazyHTML do
122122
* `:sort_attributes` - when `true`, attributes lists are sorted
123123
alphabetically by name. Defaults to `false`.
124124
125+
* `:skip_whitespace_nodes` - when `true`, ignores text nodes that
126+
consist entirely of whitespace, usually whitespace between tags.
127+
Defaults to `false`.
128+
125129
## Examples
126130
127131
iex> lazy_html = LazyHTML.from_document(~S|<html><head><title>Page</title></head><body>Hello world</body></html>|)
@@ -143,9 +147,9 @@ defmodule LazyHTML do
143147
"""
144148
@spec to_tree(t(), keyword()) :: LazyHTML.Tree.t()
145149
def to_tree(%LazyHTML{} = lazy_html, opts \\ []) when is_list(opts) do
146-
opts = Keyword.validate!(opts, sort_attributes: false)
150+
opts = Keyword.validate!(opts, sort_attributes: false, skip_whitespace_nodes: false)
147151

148-
LazyHTML.NIF.to_tree(lazy_html, opts[:sort_attributes])
152+
LazyHTML.NIF.to_tree(lazy_html, opts[:sort_attributes], opts[:skip_whitespace_nodes])
149153
end
150154

151155
@doc """

lib/lazy_html/nif.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ defmodule LazyHTML.NIF do
1515
def from_document(_html), do: err!()
1616
def from_fragment(_html), do: err!()
1717
def to_html(_lazy_html, _skip_whitespace_nodes), do: err!()
18-
def to_tree(_lazy_html, _sort_attributes), do: err!()
18+
def to_tree(_lazy_html, _sort_attributes, _skip_whitespace_nodes), do: err!()
1919
def from_tree(_tree), do: err!()
2020
def query(_lazy_html, _css_selector), do: err!()
2121
def filter(_lazy_html, _css_selector), do: err!()

test/lazy_html_test.exs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,14 +145,14 @@ defmodule LazyHTMLTest do
145145
test "with :skip_whitespace_nodes" do
146146
lazy_html =
147147
LazyHTML.from_fragment("""
148-
<p>
149-
<span> Hello </span>
150-
<span> world </span>
148+
<p>
149+
<span> Hello </span>
150+
<span> world </span>
151151
</p>
152152
""")
153153

154154
assert LazyHTML.to_html(lazy_html, skip_whitespace_nodes: true) ==
155-
"<p><span> Hello </span><span> world </span></p>"
155+
"<p><span> Hello </span><span> world </span></p>"
156156
end
157157

158158
test "includes template children" do
@@ -191,6 +191,19 @@ defmodule LazyHTMLTest do
191191
{"template", [], [{"div", [], ["First"]}, {"div", [], ["Second"]}]}
192192
]
193193
end
194+
195+
test "skip_whitespace_nodes: true" do
196+
lazy_html =
197+
LazyHTML.from_fragment("""
198+
<p>
199+
<span> Hello </span>
200+
<span> world </span>
201+
</p>
202+
""")
203+
204+
assert LazyHTML.to_tree(lazy_html, skip_whitespace_nodes: true) ==
205+
[{"p", [], [{"span", [], [" Hello "]}, {"span", [], [" world "]}]}]
206+
end
194207
end
195208

196209
describe "from_tree/2" do

0 commit comments

Comments
 (0)