From feecb0604f31ff759e4ebec315dee575249c424a Mon Sep 17 00:00:00 2001 From: Kevin Hendricks Date: Fri, 1 Dec 2023 14:29:51 -0500 Subject: [PATCH] rules on whitespace chars when parsing xhtml/xml are stricter than html --- .../plugin_launchers/python/opf_parser.py | 10 ++++++---- .../plugin_launchers/python/quickparser.py | 4 ++-- src/Resource_Files/python3lib/opf_newparser.py | 10 ++++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/Resource_Files/plugin_launchers/python/opf_parser.py b/src/Resource_Files/plugin_launchers/python/opf_parser.py index b5610b486b..7dfcd03715 100644 --- a/src/Resource_Files/plugin_launchers/python/opf_parser.py +++ b/src/Resource_Files/plugin_launchers/python/opf_parser.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -# Copyright (c) 2014-2020 Kevin B. Hendricks and Doug Massay +# Copyright (c) 2014-2023 Kevin B. Hendricks and Doug Massay # Copyright (c) 2014 John Schember # All rights reserved. # @@ -33,6 +33,8 @@ from hrefutils import mime_group_map from collections import OrderedDict +WHITESPACE_CHARS = (' ', '\n', '\r', '\t') + SPECIAL_HANDLING_TAGS = OrderedDict([ ('?xml', ('xmlheader', -1)), ('!--', ('comment', -3)), @@ -352,13 +354,13 @@ def _parsetag(self, s): if ttype is None: # parse any attributes of begin or single tags while s.find('=', p) != -1 : - while p < n and s[p:p + 1] == ' ' : p += 1 + while p < n and s[p:p + 1] in WHITESPACE_CHARS : p += 1 b = p while p < n and s[p:p + 1] != '=' : p += 1 aname = s[b:p].lower() - aname = aname.rstrip(' ') + aname = aname.rstrip(' \n\r\t') p += 1 - while p < n and s[p:p + 1] == ' ' : p += 1 + while p < n and s[p:p + 1] in WHITESPACE_CHARS: p += 1 if s[p:p + 1] in ('"', "'") : qt = s[p:p + 1] p = p + 1 diff --git a/src/Resource_Files/plugin_launchers/python/quickparser.py b/src/Resource_Files/plugin_launchers/python/quickparser.py index 36d8faca41..fc3f57e82e 100644 --- a/src/Resource_Files/plugin_launchers/python/quickparser.py +++ b/src/Resource_Files/plugin_launchers/python/quickparser.py @@ -39,7 +39,7 @@ SPECIAL_HANDLING_TYPES = ['xmlheader', 'doctype', 'comment', 'cdata', 'pi'] -WHITESPACE_CHARS = (' ', '\n', '\r', '\f', '\t', '\v') +WHITESPACE_CHARS = (' ', '\n', '\r', '\t') class QuickXHTMLParser(object): @@ -116,7 +116,7 @@ def parsetag(self, s): while p < n and s[p:p + 1] != '=' : p += 1 # attribute names can be mixed case and are in SVG aname = s[b:p] - aname = aname.rstrip(' ') + aname = aname.rstrip(' \n\r\t') p += 1 while p < n and s[p:p + 1] in WHITESPACE_CHARS : p += 1 if s[p:p + 1] in ('"', "'") : diff --git a/src/Resource_Files/python3lib/opf_newparser.py b/src/Resource_Files/python3lib/opf_newparser.py index af217a4fa9..b4508fe31d 100644 --- a/src/Resource_Files/python3lib/opf_newparser.py +++ b/src/Resource_Files/python3lib/opf_newparser.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay +# Copyright (c) 2014-2023 Kevin B. Hendricks, John Schember, and Doug Massay # All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, @@ -51,6 +51,8 @@ def xmldecode(data): newdata = newdata.replace('&', '&') return newdata +WHITESPACE_CHARS = (' ', '\n', '\r', '\t') + SPECIAL_HANDLING_TAGS = OrderedDict([ ('?xml', ('xmlheader', -1)), ('!--', ('comment', -3)), @@ -237,13 +239,13 @@ def _parsetag(self, s): if ttype is None: # parse any attributes of begin or single tags while s.find('=',p) != -1 : - while p < n and s[p:p+1] == ' ' : p += 1 + while p < n and s[p:p+1] in WHITESPACE_CHARS : p += 1 b = p while p < n and s[p:p+1] != '=' : p += 1 aname = s[b:p].lower() - aname = aname.rstrip(' ') + aname = aname.rstrip(' \n\r\t') p += 1 - while p < n and s[p:p+1] == ' ' : p += 1 + while p < n and s[p:p+1] in WHITESPACE_CHARS: p += 1 if s[p:p+1] in ('"', "'") : qt = s[p:p+1] p = p + 1