Skip to content

Commit

Permalink
rules on whitespace chars when parsing xhtml/xml are stricter than html
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinhendricks committed Dec 1, 2023
1 parent 8be6790 commit feecb06
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 10 deletions.
10 changes: 6 additions & 4 deletions src/Resource_Files/plugin_launchers/python/opf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab

# Copyright (c) 2014-2020 Kevin B. Hendricks and Doug Massay
# Copyright (c) 2014-2023 Kevin B. Hendricks and Doug Massay
# Copyright (c) 2014 John Schember
# All rights reserved.
#
Expand Down Expand Up @@ -33,6 +33,8 @@
from hrefutils import mime_group_map
from collections import OrderedDict

WHITESPACE_CHARS = (' ', '\n', '\r', '\t')

SPECIAL_HANDLING_TAGS = OrderedDict([
('?xml', ('xmlheader', -1)),
('!--', ('comment', -3)),
Expand Down Expand Up @@ -352,13 +354,13 @@ def _parsetag(self, s):
if ttype is None:
# parse any attributes of begin or single tags
while s.find('=', p) != -1 :
while p < n and s[p:p + 1] == ' ' : p += 1
while p < n and s[p:p + 1] in WHITESPACE_CHARS : p += 1
b = p
while p < n and s[p:p + 1] != '=' : p += 1
aname = s[b:p].lower()
aname = aname.rstrip(' ')
aname = aname.rstrip(' \n\r\t')
p += 1
while p < n and s[p:p + 1] == ' ' : p += 1
while p < n and s[p:p + 1] in WHITESPACE_CHARS: p += 1
if s[p:p + 1] in ('"', "'") :
qt = s[p:p + 1]
p = p + 1
Expand Down
4 changes: 2 additions & 2 deletions src/Resource_Files/plugin_launchers/python/quickparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

SPECIAL_HANDLING_TYPES = ['xmlheader', 'doctype', 'comment', 'cdata', 'pi']

WHITESPACE_CHARS = (' ', '\n', '\r', '\f', '\t', '\v')
WHITESPACE_CHARS = (' ', '\n', '\r', '\t')

class QuickXHTMLParser(object):

Expand Down Expand Up @@ -116,7 +116,7 @@ def parsetag(self, s):
while p < n and s[p:p + 1] != '=' : p += 1
# attribute names can be mixed case and are in SVG
aname = s[b:p]
aname = aname.rstrip(' ')
aname = aname.rstrip(' \n\r\t')
p += 1
while p < n and s[p:p + 1] in WHITESPACE_CHARS : p += 1
if s[p:p + 1] in ('"', "'") :
Expand Down
10 changes: 6 additions & 4 deletions src/Resource_Files/python3lib/opf_newparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab

# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
# Copyright (c) 2014-2023 Kevin B. Hendricks, John Schember, and Doug Massay
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
Expand Down Expand Up @@ -51,6 +51,8 @@ def xmldecode(data):
newdata = newdata.replace('&amp;', '&')
return newdata

WHITESPACE_CHARS = (' ', '\n', '\r', '\t')

SPECIAL_HANDLING_TAGS = OrderedDict([
('?xml', ('xmlheader', -1)),
('!--', ('comment', -3)),
Expand Down Expand Up @@ -237,13 +239,13 @@ def _parsetag(self, s):
if ttype is None:
# parse any attributes of begin or single tags
while s.find('=',p) != -1 :
while p < n and s[p:p+1] == ' ' : p += 1
while p < n and s[p:p+1] in WHITESPACE_CHARS : p += 1
b = p
while p < n and s[p:p+1] != '=' : p += 1
aname = s[b:p].lower()
aname = aname.rstrip(' ')
aname = aname.rstrip(' \n\r\t')
p += 1
while p < n and s[p:p+1] == ' ' : p += 1
while p < n and s[p:p+1] in WHITESPACE_CHARS: p += 1
if s[p:p+1] in ('"', "'") :
qt = s[p:p+1]
p = p + 1
Expand Down

0 comments on commit feecb06

Please sign in to comment.