From feecb0604f31ff759e4ebec315dee575249c424a Mon Sep 17 00:00:00 2001
From: Kevin Hendricks <kevinhendricks@users.noreply.github.com>
Date: Fri, 1 Dec 2023 14:29:51 -0500
Subject: [PATCH] rules on whitespace chars when parsing xhtml/xml are stricter
 than html

---
 .../plugin_launchers/python/opf_parser.py              | 10 ++++++----
 .../plugin_launchers/python/quickparser.py             |  4 ++--
 src/Resource_Files/python3lib/opf_newparser.py         | 10 ++++++----
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/Resource_Files/plugin_launchers/python/opf_parser.py b/src/Resource_Files/plugin_launchers/python/opf_parser.py
index b5610b486b..7dfcd03715 100644
--- a/src/Resource_Files/plugin_launchers/python/opf_parser.py
+++ b/src/Resource_Files/plugin_launchers/python/opf_parser.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 
-# Copyright (c) 2014-2020 Kevin B. Hendricks and Doug Massay
+# Copyright (c) 2014-2023 Kevin B. Hendricks and Doug Massay
 # Copyright (c) 2014      John Schember
 # All rights reserved.
 #
@@ -33,6 +33,8 @@
 from hrefutils import mime_group_map
 from collections import OrderedDict
 
+WHITESPACE_CHARS = (' ', '\n', '\r', '\t')
+
 SPECIAL_HANDLING_TAGS = OrderedDict([
     ('?xml', ('xmlheader', -1)),
     ('!--', ('comment', -3)),
@@ -352,13 +354,13 @@ def _parsetag(self, s):
         if ttype is None:
             # parse any attributes of begin or single tags
             while s.find('=', p) != -1 :
-                while p < n and s[p:p + 1] == ' ' : p += 1
+                while p < n and s[p:p + 1] in WHITESPACE_CHARS : p += 1
                 b = p
                 while p < n and s[p:p + 1] != '=' : p += 1
                 aname = s[b:p].lower()
-                aname = aname.rstrip(' ')
+                aname = aname.rstrip(' \n\r\t')
                 p += 1
-                while p < n and s[p:p + 1] == ' ' : p += 1
+                while p < n and s[p:p + 1] in WHITESPACE_CHARS: p += 1
                 if s[p:p + 1] in ('"', "'") :
                     qt = s[p:p + 1]
                     p = p + 1
diff --git a/src/Resource_Files/plugin_launchers/python/quickparser.py b/src/Resource_Files/plugin_launchers/python/quickparser.py
index 36d8faca41..fc3f57e82e 100644
--- a/src/Resource_Files/plugin_launchers/python/quickparser.py
+++ b/src/Resource_Files/plugin_launchers/python/quickparser.py
@@ -39,7 +39,7 @@
 
 SPECIAL_HANDLING_TYPES = ['xmlheader', 'doctype', 'comment', 'cdata', 'pi']
 
-WHITESPACE_CHARS = (' ', '\n', '\r', '\f', '\t', '\v')
+WHITESPACE_CHARS = (' ', '\n', '\r', '\t')
 
 class QuickXHTMLParser(object):
 
@@ -116,7 +116,7 @@ def parsetag(self, s):
                 while p < n and s[p:p + 1] != '=' : p += 1
                 # attribute names can be mixed case and are in SVG
                 aname = s[b:p]
-                aname = aname.rstrip(' ')
+                aname = aname.rstrip(' \n\r\t')
                 p += 1
                 while p < n and s[p:p + 1] in WHITESPACE_CHARS : p += 1
                 if s[p:p + 1] in ('"', "'") :
diff --git a/src/Resource_Files/python3lib/opf_newparser.py b/src/Resource_Files/python3lib/opf_newparser.py
index af217a4fa9..b4508fe31d 100644
--- a/src/Resource_Files/python3lib/opf_newparser.py
+++ b/src/Resource_Files/python3lib/opf_newparser.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 
-# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
+# Copyright (c) 2014-2023 Kevin B. Hendricks, John Schember, and Doug Massay
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without modification,
@@ -51,6 +51,8 @@ def xmldecode(data):
     newdata = newdata.replace('&amp;', '&')
     return newdata
 
+WHITESPACE_CHARS = (' ', '\n', '\r', '\t')
+
 SPECIAL_HANDLING_TAGS = OrderedDict([
     ('?xml', ('xmlheader', -1)),
     ('!--',  ('comment', -3)),
@@ -237,13 +239,13 @@ def _parsetag(self, s):
         if ttype is None:
             # parse any attributes of begin or single tags
             while s.find('=',p) != -1 :
-                while p < n and s[p:p+1] == ' ' : p += 1
+                while p < n and s[p:p+1] in WHITESPACE_CHARS : p += 1
                 b = p
                 while p < n and s[p:p+1] != '=' : p += 1
                 aname = s[b:p].lower()
-                aname = aname.rstrip(' ')
+                aname = aname.rstrip(' \n\r\t')
                 p += 1
-                while p < n and s[p:p+1] == ' ' : p += 1
+                while p < n and s[p:p+1] in WHITESPACE_CHARS: p += 1
                 if s[p:p+1] in ('"', "'") :
                     qt = s[p:p+1]
                     p = p + 1