Skip to content

Commit

Permalink
Push unclosed opening tags correctly (#7)
Browse files Browse the repository at this point in the history
* Push unclosed opening tags correctly

* Skip the `debug_trace` feateure on tests
  • Loading branch information
aumetra authored Nov 19, 2023
1 parent 1f5da20 commit f744fd0
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ jobs:
toolchain: "stable"
- uses: taiki-e/install-action@cargo-hack
- uses: Swatinem/rust-cache@v2
- run: cargo hack test --feature-powerset
- run: cargo hack test --feature-powerset --skip debug_trace
6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
name = "bubble-bath"
version = "0.1.0"
edition = "2021"
description = "Speedy HTML sanitizer"
description = "Small and quick HTML sanitizer"
license = "MIT OR Apache-2.0"
repository = "https://github.com/aumetra/bubble-bath"
publish = false

[[bench]]
Expand All @@ -29,6 +30,9 @@ thiserror = "1.0.50"

[features]
default = ["simd"]
# Enables the `lol_html` `debug_trace` feature. Do not use in production!
debug_trace = ["lol_html/debug_trace"]
# Enables SIMD acceleration for some operations we have to perform
simd = [
"bytecount/runtime-dispatch-simd",
"dep:simdutf8",
Expand Down
15 changes: 15 additions & 0 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ members = ["."]
debug = 1

[[bin]]
name = "fuzz_target_1"
path = "fuzz_targets/fuzz_target_1.rs"
name = "basic"
path = "fuzz_targets/basic.rs"
test = false
doc = false
44 changes: 34 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,39 @@ impl BubbleBath<'_> {
}

#[inline]
fn comment_handler(comment: &mut Comment<'_>) {
fn count_unclosed_opening_tags<B>(counter: &mut usize, input: B)
where
B: AsRef<[u8]>,
{
let bytes = input.as_ref();

let opening_tags = bytecount::count(bytes, b'<');
let closing_tags = bytecount::count(bytes, b'>');

*counter = counter.saturating_add(opening_tags);
*counter = counter.saturating_sub(closing_tags);
}

#[inline]
fn subtract_opening_tags<B>(counter: &mut usize, input: B)
where
B: AsRef<[u8]>,
{
let mut tmp_counter = 0;
Self::count_unclosed_opening_tags(&mut tmp_counter, input);

*counter = counter.saturating_sub(tmp_counter);
}

#[inline]
fn comment_handler(comment: &mut Comment<'_>, opening_tags: &RefCell<usize>) {
Self::subtract_opening_tags(&mut opening_tags.borrow_mut(), comment.text());
comment.remove();
}

#[inline]
fn text_handler(chunk: &mut TextChunk<'_>) {
fn text_handler(chunk: &mut TextChunk<'_>, opening_tags: &RefCell<usize>) {
Self::subtract_opening_tags(&mut opening_tags.borrow_mut(), chunk.as_str());
*chunk.as_mut_str() = clean_text(chunk.as_str());
}

Expand All @@ -289,9 +316,10 @@ impl BubbleBath<'_> {
S: FnMut(&[u8]),
{
let unclosed_tags = Rc::new(RefCell::new(Slab::new()));
let opening_tags = RefCell::new(0);

let comment_handler = |comment: &mut Comment<'_>| {
Self::comment_handler(comment);
Self::comment_handler(comment, &opening_tags);
Ok(())
};
let document_end_handler = |document_end: &mut DocumentEnd<'_>| {
Expand All @@ -304,7 +332,7 @@ impl BubbleBath<'_> {
Ok(())
};
let text_handler = |chunk: &mut TextChunk<'_>| {
Self::text_handler(chunk);
Self::text_handler(chunk, &opening_tags);
Ok(())
};

Expand All @@ -331,19 +359,15 @@ impl BubbleBath<'_> {
..Settings::default()
};

let mut opening_tags: usize = 0;
let mut rewriter = HtmlRewriter::new(settings, sink);

for chunk in input {
let tmp_opening_tags = bytecount::count(chunk, b'<');
let tmp_closing_tags = bytecount::count(chunk, b'>');

opening_tags = opening_tags.saturating_add(tmp_opening_tags);
opening_tags = opening_tags.saturating_sub(tmp_closing_tags);
Self::count_unclosed_opening_tags(&mut opening_tags.borrow_mut(), chunk);

rewriter.write(chunk)?;
}

let opening_tags = *opening_tags.borrow();
for _ in 0..opening_tags {
rewriter.write(&[b'>'])?;
}
Expand Down
7 changes: 7 additions & 0 deletions tests/ammonia_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ fn deeply_nested_alternating() {
clean("<b-b>".repeat(35_000)).unwrap();
}

#[test]
fn included_angles() {
let fragment = "1 < 2";
let result = clean(fragment).unwrap();
assert_eq!(result, "1 &lt; 2");
}

#[test]
fn remove_script() {
let fragment = "an <script>evil()</script> example";
Expand Down
4 changes: 2 additions & 2 deletions tests/snapshots/torture__torture@extra_open_bracket.snap
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: tests/torture.rs
expression: "bubble_bath::clean(&input).unwrap()"
expression: "bubble_bath::clean(input).unwrap()"
input_file: tests/inputs/extra_open_bracket
---
&lt;&gt;&gt;
&lt;
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: tests/torture.rs
expression: bubble_bath.clean(&input).unwrap()
expression: bubble_bath.clean(input).unwrap()
input_file: tests/inputs/extra_open_bracket
---
&lt;&gt;&gt;
&lt;

0 comments on commit f744fd0

Please sign in to comment.