From b1b96edcadbd48f7f9c51de50759f4c93166da8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nikola=20Forr=C3=B3?= Date: Fri, 1 Dec 2023 16:59:26 +0100 Subject: [PATCH] Fix handling of quotes during option tokenization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b5a1ce79 and constructs the input list in such a way that nodes other than string literals are treated as regular characters, making the original algorithm work even with quotes contained within macro expansions and other expressions. Signed-off-by: Nikola Forró --- specfile/options.py | 128 +++++++++++++++++-------------------- tests/unit/test_options.py | 30 +++++++++ 2 files changed, 88 insertions(+), 70 deletions(-) diff --git a/specfile/options.py b/specfile/options.py index c91798a..7d09620 100644 --- a/specfile/options.py +++ b/specfile/options.py @@ -9,7 +9,7 @@ from specfile.exceptions import OptionsException from specfile.formatter import formatted -from specfile.value_parser import Node, StringLiteral, ValueParser +from specfile.value_parser import StringLiteral, ValueParser class TokenType(Enum): @@ -476,79 +476,67 @@ def tokenize(option_string: str) -> List[Token]: OptionsException if the option string is untokenizable. """ result: List[Token] = [] - - def append_default(s): - if result and result[-1].type == TokenType.DEFAULT: - result[-1].value += s - else: - result.append(Token(TokenType.DEFAULT, s)) - - token_nodes: List[Node] = [] + token = "" + quote = None + inp = [] for node in ValueParser.parse(option_string): if isinstance(node, StringLiteral): - if token_nodes: - append_default("".join(str(n) for n in token_nodes)) - token_nodes = [] - token = "" + inp.extend(list(str(node))) + continue + inp.append(str(node)) + while inp: + c = inp.pop(0) + if c == quote: + if token: + result.append( + Token( + TokenType.QUOTED + if quote == "'" + else TokenType.DOUBLE_QUOTED, + token, + ) + ) + token = "" quote = None - inp = list(str(node)) + continue + if quote: + if c == "\\": + if not inp: + raise OptionsException("No escaped character") + c = inp.pop(0) + if c != quote: + token += "\\" + token += c + continue + if c.isspace(): + if token: + result.append(Token(TokenType.DEFAULT, token)) + token = "" + whitespace = c while inp: c = inp.pop(0) - if c == quote: - if token: - result.append( - Token( - TokenType.QUOTED - if quote == "'" - else TokenType.DOUBLE_QUOTED, - token, - ) - ) - token = "" - quote = None - continue - if quote: - if c == "\\": - if not inp: - raise OptionsException("No escaped character") - c = inp.pop(0) - if c != quote: - token += "\\" - token += c - continue - if c.isspace(): - if token: - append_default(token) - token = "" - whitespace = c - while inp: - c = inp.pop(0) - if not c.isspace(): - break - whitespace += c - else: - result.append(Token(TokenType.WHITESPACE, whitespace)) - break - inp.insert(0, c) - result.append(Token(TokenType.WHITESPACE, whitespace)) - continue - if c in ('"', "'"): - if token: - append_default(token) - token = "" - quote = c - continue - if c == "\\": - if not inp: - raise OptionsException("No escaped character") - c = inp.pop(0) - token += c - if quote: - raise OptionsException("No closing quotation") + if not c.isspace(): + break + whitespace += c + else: + result.append(Token(TokenType.WHITESPACE, whitespace)) + break + inp.insert(0, c) + result.append(Token(TokenType.WHITESPACE, whitespace)) + continue + if c in ('"', "'"): if token: - append_default(token) - else: - token_nodes.append(node) - if token_nodes: - append_default("".join(str(n) for n in token_nodes)) + result.append(Token(TokenType.DEFAULT, token)) + token = "" + quote = c + continue + if c == "\\": + if not inp: + raise OptionsException("No escaped character") + c = inp.pop(0) + token += c + if quote: + raise OptionsException("No closing quotation") + if token: + result.append(Token(TokenType.DEFAULT, token)) return result diff --git a/tests/unit/test_options.py b/tests/unit/test_options.py index 3c31986..96f3f34 100644 --- a/tests/unit/test_options.py +++ b/tests/unit/test_options.py @@ -258,6 +258,36 @@ def test_options_find_option(optstring, tokens, option, result): Token(TokenType.DEFAULT, '%{name}-%{version}%[%{rc}?"-rc":""]'), ], ), + ( + "-q -n '%{name}-%{version}'", + [ + Token(TokenType.DEFAULT, "-q"), + Token(TokenType.WHITESPACE, " "), + Token(TokenType.DEFAULT, "-n"), + Token(TokenType.WHITESPACE, " "), + Token(TokenType.QUOTED, "%{name}-%{version}"), + ], + ), + ( + '-q -n "%{name}-%{version}"', + [ + Token(TokenType.DEFAULT, "-q"), + Token(TokenType.WHITESPACE, " "), + Token(TokenType.DEFAULT, "-n"), + Token(TokenType.WHITESPACE, " "), + Token(TokenType.DOUBLE_QUOTED, "%{name}-%{version}"), + ], + ), + ( + '-q -n \'%{name}-%{version}%[%{rc}?"-rc":""]\'', + [ + Token(TokenType.DEFAULT, "-q"), + Token(TokenType.WHITESPACE, " "), + Token(TokenType.DEFAULT, "-n"), + Token(TokenType.WHITESPACE, " "), + Token(TokenType.QUOTED, '%{name}-%{version}%[%{rc}?"-rc":""]'), + ], + ), ], ) def test_options_tokenize(option_string, result):