From b1b96edcadbd48f7f9c51de50759f4c93166da8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nikola=20Forr=C3=B3?= <nforro@redhat.com>
Date: Fri, 1 Dec 2023 16:59:26 +0100
Subject: [PATCH] Fix handling of quotes during option tokenization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit b5a1ce79 and constructs the input list in such a way
that nodes other than string literals are treated as regular characters,
making the original algorithm work even with quotes contained within
macro expansions and other expressions.

Signed-off-by: Nikola Forró <nforro@redhat.com>
---
 specfile/options.py        | 128 +++++++++++++++++--------------------
 tests/unit/test_options.py |  30 +++++++++
 2 files changed, 88 insertions(+), 70 deletions(-)

diff --git a/specfile/options.py b/specfile/options.py
index c91798a..7d09620 100644
--- a/specfile/options.py
+++ b/specfile/options.py
@@ -9,7 +9,7 @@
 
 from specfile.exceptions import OptionsException
 from specfile.formatter import formatted
-from specfile.value_parser import Node, StringLiteral, ValueParser
+from specfile.value_parser import StringLiteral, ValueParser
 
 
 class TokenType(Enum):
@@ -476,79 +476,67 @@ def tokenize(option_string: str) -> List[Token]:
             OptionsException if the option string is untokenizable.
         """
         result: List[Token] = []
-
-        def append_default(s):
-            if result and result[-1].type == TokenType.DEFAULT:
-                result[-1].value += s
-            else:
-                result.append(Token(TokenType.DEFAULT, s))
-
-        token_nodes: List[Node] = []
+        token = ""
+        quote = None
+        inp = []
         for node in ValueParser.parse(option_string):
             if isinstance(node, StringLiteral):
-                if token_nodes:
-                    append_default("".join(str(n) for n in token_nodes))
-                    token_nodes = []
-                token = ""
+                inp.extend(list(str(node)))
+                continue
+            inp.append(str(node))
+        while inp:
+            c = inp.pop(0)
+            if c == quote:
+                if token:
+                    result.append(
+                        Token(
+                            TokenType.QUOTED
+                            if quote == "'"
+                            else TokenType.DOUBLE_QUOTED,
+                            token,
+                        )
+                    )
+                    token = ""
                 quote = None
-                inp = list(str(node))
+                continue
+            if quote:
+                if c == "\\":
+                    if not inp:
+                        raise OptionsException("No escaped character")
+                    c = inp.pop(0)
+                    if c != quote:
+                        token += "\\"
+                token += c
+                continue
+            if c.isspace():
+                if token:
+                    result.append(Token(TokenType.DEFAULT, token))
+                    token = ""
+                whitespace = c
                 while inp:
                     c = inp.pop(0)
-                    if c == quote:
-                        if token:
-                            result.append(
-                                Token(
-                                    TokenType.QUOTED
-                                    if quote == "'"
-                                    else TokenType.DOUBLE_QUOTED,
-                                    token,
-                                )
-                            )
-                            token = ""
-                        quote = None
-                        continue
-                    if quote:
-                        if c == "\\":
-                            if not inp:
-                                raise OptionsException("No escaped character")
-                            c = inp.pop(0)
-                            if c != quote:
-                                token += "\\"
-                        token += c
-                        continue
-                    if c.isspace():
-                        if token:
-                            append_default(token)
-                            token = ""
-                        whitespace = c
-                        while inp:
-                            c = inp.pop(0)
-                            if not c.isspace():
-                                break
-                            whitespace += c
-                        else:
-                            result.append(Token(TokenType.WHITESPACE, whitespace))
-                            break
-                        inp.insert(0, c)
-                        result.append(Token(TokenType.WHITESPACE, whitespace))
-                        continue
-                    if c in ('"', "'"):
-                        if token:
-                            append_default(token)
-                            token = ""
-                        quote = c
-                        continue
-                    if c == "\\":
-                        if not inp:
-                            raise OptionsException("No escaped character")
-                        c = inp.pop(0)
-                    token += c
-                if quote:
-                    raise OptionsException("No closing quotation")
+                    if not c.isspace():
+                        break
+                    whitespace += c
+                else:
+                    result.append(Token(TokenType.WHITESPACE, whitespace))
+                    break
+                inp.insert(0, c)
+                result.append(Token(TokenType.WHITESPACE, whitespace))
+                continue
+            if c in ('"', "'"):
                 if token:
-                    append_default(token)
-            else:
-                token_nodes.append(node)
-        if token_nodes:
-            append_default("".join(str(n) for n in token_nodes))
+                    result.append(Token(TokenType.DEFAULT, token))
+                    token = ""
+                quote = c
+                continue
+            if c == "\\":
+                if not inp:
+                    raise OptionsException("No escaped character")
+                c = inp.pop(0)
+            token += c
+        if quote:
+            raise OptionsException("No closing quotation")
+        if token:
+            result.append(Token(TokenType.DEFAULT, token))
         return result
diff --git a/tests/unit/test_options.py b/tests/unit/test_options.py
index 3c31986..96f3f34 100644
--- a/tests/unit/test_options.py
+++ b/tests/unit/test_options.py
@@ -258,6 +258,36 @@ def test_options_find_option(optstring, tokens, option, result):
                 Token(TokenType.DEFAULT, '%{name}-%{version}%[%{rc}?"-rc":""]'),
             ],
         ),
+        (
+            "-q -n '%{name}-%{version}'",
+            [
+                Token(TokenType.DEFAULT, "-q"),
+                Token(TokenType.WHITESPACE, " "),
+                Token(TokenType.DEFAULT, "-n"),
+                Token(TokenType.WHITESPACE, " "),
+                Token(TokenType.QUOTED, "%{name}-%{version}"),
+            ],
+        ),
+        (
+            '-q -n "%{name}-%{version}"',
+            [
+                Token(TokenType.DEFAULT, "-q"),
+                Token(TokenType.WHITESPACE, " "),
+                Token(TokenType.DEFAULT, "-n"),
+                Token(TokenType.WHITESPACE, " "),
+                Token(TokenType.DOUBLE_QUOTED, "%{name}-%{version}"),
+            ],
+        ),
+        (
+            '-q -n \'%{name}-%{version}%[%{rc}?"-rc":""]\'',
+            [
+                Token(TokenType.DEFAULT, "-q"),
+                Token(TokenType.WHITESPACE, " "),
+                Token(TokenType.DEFAULT, "-n"),
+                Token(TokenType.WHITESPACE, " "),
+                Token(TokenType.QUOTED, '%{name}-%{version}%[%{rc}?"-rc":""]'),
+            ],
+        ),
     ],
 )
 def test_options_tokenize(option_string, result):