From b0c61ba66a4ab0df768a6f6b1464c02cb87f9689 Mon Sep 17 00:00:00 2001 From: Q_back Date: Fri, 24 Apr 2020 16:04:01 +0200 Subject: [PATCH] fix UnicodeDecodeError occuring diff.py::split_by_sep() --- w3af/core/controllers/misc/diff.py | 9 ++++++--- w3af/core/controllers/misc/tests/test_diff.py | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/w3af/core/controllers/misc/diff.py b/w3af/core/controllers/misc/diff.py index 22b68e3f74..aa5b27494d 100644 --- a/w3af/core/controllers/misc/diff.py +++ b/w3af/core/controllers/misc/diff.py @@ -143,7 +143,7 @@ def chunked_diff(a, b): return ''.join(a_chunks), ''.join(b_chunks) -def split_by_sep(seq): +def split_by_sep(sequence): """ This method will split the HTTP response body by various separators, such as new lines, tabs, <, double and single quotes. @@ -178,7 +178,7 @@ def split_by_sep(seq): chunks without much meaning and reduce the performance improvement we have achieved. - :param seq: A string + :param sequence: A string which we will split :return: A list of strings (chunks) for the input string """ # @@ -195,5 +195,8 @@ def split_by_sep(seq): # # [0] https://github.com/andresriancho/w3af/blob/2ded693c959c91dc3e4daca276460d6c64ada479/w3af/core/controllers/misc/diff.py#L173 # - translated_seq = string.translate(seq, TRANSLATION_TABLE) + try: + translated_seq = string.translate(sequence, TRANSLATION_TABLE) + except UnicodeDecodeError: + translated_seq = string.translate(sequence.encode('utf-8'), TRANSLATION_TABLE) return translated_seq.split('\0') diff --git a/w3af/core/controllers/misc/tests/test_diff.py b/w3af/core/controllers/misc/tests/test_diff.py index 76539fc09f..f3e2283518 100644 --- a/w3af/core/controllers/misc/tests/test_diff.py +++ b/w3af/core/controllers/misc/tests/test_diff.py @@ -135,6 +135,11 @@ def test_split_by_sep_2(self): result = split_by_sep('hello world