Skip to content

Commit 6da76f4

Browse files
author
H.L.J.Laloge
committed
fix(html): add acceptance for whitespace at beginning
1 parent 852a3ba commit 6da76f4

File tree

3 files changed

+20
-10
lines changed

3 files changed

+20
-10
lines changed

polydet/plugins/html.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import mmap
2+
import io
23
import yara
34

45
from polydet.polyglot_level import PolyglotLevel
@@ -8,8 +9,8 @@
89
RULES = """
910
rule IsHTML {
1011
strings:
11-
$doctype = /<!DOCTYPE html>/
12-
$opening_tag = /<(html|body|script)/
12+
$doctype = /<!DOCTYPE html/ nocase
13+
$opening_tag = /<(html|body|script)/ nocase
1314
1415
condition:
1516
$doctype or $opening_tag
@@ -25,9 +26,7 @@ def check(filename):
2526
return check_with_matches(filename, {m.rule: m for m in matches})
2627

2728

28-
# TODO Add acceptance for whitespace at beginning
29-
# TODO Check lowercase doctypes
30-
# TODO Check unclosed tags as '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' or '<html onload="">'.
29+
# TODO Support uppercase and random case doctype and tags
3130
# Can be done with ungreedy matching in yara
3231
# TODO Improve to use results of yara matching
3332
def check_with_matches(filename: str, matches):
@@ -56,8 +55,12 @@ def check_with_matches(filename: str, matches):
5655
break
5756
if doc_start != -1:
5857
level = PolyglotLevel()
59-
if doc_start != 0:
58+
59+
buf.seek(0, io.SEEK_SET)
60+
begin_content = buf.read(doc_start) # Read until doc start
61+
if not __is_whitespace(begin_content):
6062
level.add_chunk(0, doc_start)
63+
6164
buf.seek(doc_end)
6265
contents = buf.read()
6366
if not __is_whitespace(contents):
@@ -68,7 +71,5 @@ def check_with_matches(filename: str, matches):
6871

6972

7073
def __is_whitespace(contents: bytes):
71-
for elem in contents:
72-
if elem != ord(' ') and elem != ord('\t') and elem != ord('\n'):
73-
return False
74-
return True
74+
whitespaces = b' \t\r\n'
75+
return all(b in whitespaces for b in contents)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
3+
<html>
4+
</html>
5+

tests/test_html.py

+4
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,7 @@ def test_garbage_end_4(self):
6161
result = html.check('tests/samples/html/garbage_end_4.html')
6262
self.assertEqual(PolyglotLevel(suspicious_chunks=[(0x6, 0x12)]),
6363
result)
64+
65+
def test_whitespace_beginning(self):
66+
result = html.check('tests/samples/html/whitespace_beginning.html')
67+
self.assertEqual(PolyglotLevel(), result)

0 commit comments

Comments
 (0)