Skip to content

Commit

Permalink
Merge pull request #39 from Ousret/develop
Browse files Browse the repository at this point in the history
Release 2.2.3
  • Loading branch information
Ousret authored May 28, 2020
2 parents fa3a1c8 + f3fdeaa commit 8377579
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 14 deletions.
8 changes: 6 additions & 2 deletions kiss_headers/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from email.message import Message
from email.parser import HeaderParser
from io import RawIOBase
from io import BufferedReader, RawIOBase
from typing import Any, Iterable, List, Mapping, Optional, Tuple, Type, TypeVar, Union

from kiss_headers.models import Header, Headers
Expand Down Expand Up @@ -31,7 +31,11 @@ def parse_it(raw_headers: Any) -> Headers:

if isinstance(raw_headers, str):
headers = HeaderParser().parsestr(raw_headers, headersonly=True).items()
elif isinstance(raw_headers, bytes) or isinstance(raw_headers, RawIOBase):
elif (
isinstance(raw_headers, bytes)
or isinstance(raw_headers, RawIOBase)
or isinstance(raw_headers, BufferedReader)
):
decoded, not_decoded = extract_encoded_headers(
raw_headers if isinstance(raw_headers, bytes) else raw_headers.read() or b""
)
Expand Down
8 changes: 6 additions & 2 deletions kiss_headers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@

from kiss_headers.structures import AttributeBag, CaseInsensitiveDict
from kiss_headers.utils import (
escape_double_quote,
extract_comments,
header_content_split,
header_name_to_class,
is_legal_header_name,
normalize_list,
normalize_str,
prettify_header_name,
unescape_double_quote,
unfold,
unpack_protected_keyword,
unquote,
Expand Down Expand Up @@ -1203,7 +1205,7 @@ def __init__(self, members: List[str]):
self.insert(unquote(member), None)
continue

self.insert(key, unquote(value))
self.insert(key, unescape_double_quote(unquote(value)))
continue

self.insert(unquote(member), None)
Expand All @@ -1220,7 +1222,9 @@ def __str__(self) -> str:

if value is not None:
content += '{semi_colon_r}{key}="{value}"'.format(
key=key, value=value, semi_colon_r="; " if content != "" else "",
key=key,
value=escape_double_quote(value),
semi_colon_r="; " if content != "" else "",
)
else:
content += "; " + key if content != "" else key
Expand Down
38 changes: 29 additions & 9 deletions kiss_headers/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from email.header import decode_header
from re import findall, search
from re import findall, search, sub
from typing import Any, Iterable, List, Optional, Set, Tuple, Type

RESERVED_KEYWORD: Set[str] = {
Expand Down Expand Up @@ -89,6 +89,8 @@ def header_content_split(string: str, delimiter: str) -> List[str]:
['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:50.0) Gecko/20100101 Firefox/50.0']
>>> header_content_split("text/html; charset=UTF-8", ";")
['text/html', 'charset=UTF-8']
>>> header_content_split('text/html; charset="UTF-\\\"8"', ";")
['text/html', 'charset="UTF-"8"']
"""
if len(delimiter) != 1 or delimiter not in {";", ",", " "}:
raise ValueError("Delimiter should be either semi-colon, a coma or a space.")
Expand Down Expand Up @@ -394,16 +396,12 @@ def extract_comments(content: str) -> List[str]:


def unfold(content: str) -> str:
"""Some header content may have folded content (LF + 9 spaces, LF + 7 spaces, or LF + 1 spaces) in it, making your job at reading them a little more difficult.
r"""Some header content may have folded content (CRLF + n spaces) in it, making your job at reading them a little more difficult.
This function undoes the folding in the given content.
>>> unfold("eqHS2AQD+hfNNlTiLej73CiBUGVQifX4watAaxUkdjGeH578i7n3Wwcdw2nLz+U0bH\\n ehSe/2QytZGWM5CewwNdumT1IVGzjFs+cRgfK0V6JlEIOoV3bRXxnjenWFfWdVNXtw8s")
'eqHS2AQD+hfNNlTiLej73CiBUGVQifX4watAaxUkdjGeH578i7n3Wwcdw2nLz+U0bHehSe/2QytZGWM5CewwNdumT1IVGzjFs+cRgfK0V6JlEIOoV3bRXxnjenWFfWdVNXtw8s'
>>> unfold("___utmvbtouVBFmB=gZg\r\n XbNOjalT: Lte; path=/; Max-Age=900")
'___utmvbtouVBFmB=gZg XbNOjalT: Lte; path=/; Max-Age=900'
"""
return (
content.replace("\n" + (9 * " "), "")
.replace("\n" + (7 * " "), " ")
.replace("\n" + (1 * " "), " ")
)
return sub(r"\r\n[ ]+", " ", content)


def extract_encoded_headers(payload: bytes) -> Tuple[str, bytes]:
Expand All @@ -427,3 +425,25 @@ def extract_encoded_headers(payload: bytes) -> Tuple[str, bytes]:
break

return result, b"\r\n".join(lines[index + 1 :])


def unescape_double_quote(content: str) -> str:
"""
Replace escaped double quote in content by removing the backslash.
>>> unescape_double_quote(r'UTF\"-8')
'UTF"-8'
>>> unescape_double_quote(r'UTF"-8')
'UTF"-8'
"""
return content.replace(r"\"", '"')


def escape_double_quote(content: str) -> str:
r"""
Replace not escaped double quote in content by adding a backslash beforehand.
>>> escape_double_quote(r'UTF\"-8')
'UTF\\"-8'
>>> escape_double_quote(r'UTF"-8')
'UTF\\"-8'
"""
return unescape_double_quote(content).replace('"', r"\"")
2 changes: 1 addition & 1 deletion kiss_headers/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
Expose version
"""

__version__ = "2.2.2"
__version__ = "2.2.3"
VERSION = __version__.split(".")
14 changes: 14 additions & 0 deletions tests/test_attributes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest

from kiss_headers import Attributes
from kiss_headers.utils import header_content_split


class AttributesTestCase(unittest.TestCase):
Expand All @@ -22,6 +23,19 @@ def test_eq(self):

self.assertNotEqual(attr_a, attr_e)

def test_esc_double_quote(self):

with self.subTest(
"Ensure that the double quote character is handled correctly."
):
attributes = Attributes(
header_content_split(r'text/html; charset="UTF-\"8"', ";")
)

self.assertEqual(attributes["charset"], 'UTF-"8')

self.assertEqual(str(attributes), r'text/html; charset="UTF-\"8"')


if __name__ == "__main__":
unittest.main()

0 comments on commit 8377579

Please sign in to comment.