Skip to content

Commit 9db1546

Browse files
fix[parser]: fix bad tokenization of hex strings (#4406)
this commit fixes parsing of hex strings. there were several issues with the hex string pre-parser, including: - modification of the string locations - incorrectly not exiting the state machine if a non-string token is encountered. this commit fixes the state machine, changes the pre-parser to leave the locations of hex strings unmodified as to minimize the changes to locations in the reformatted code vs source code. to see the effect, print out the reformatted code of the test cases included in this PR before and after this commit. this commit additionally adds several sanity checks to the pre-parser so that the chance of future tokenization bugs is minimized.
1 parent 66272e6 commit 9db1546

File tree

5 files changed

+74
-21
lines changed

5 files changed

+74
-21
lines changed

.github/workflows/pull-request.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ jobs:
3434
# lang: language changes
3535
# stdlib: changes to the stdlib
3636
# ux: language changes (UX)
37+
# parser: parser changes
3738
# tool: integration
3839
# ir: (old) IR/codegen changes
3940
# codegen: lowering from vyper AST to codegen
@@ -46,6 +47,7 @@ jobs:
4647
lang
4748
stdlib
4849
ux
50+
parser
4951
tool
5052
ir
5153
codegen

tests/functional/codegen/types/test_bytes.py

+15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
22

3+
from vyper.compiler import compile_code
34
from vyper.exceptions import TypeMismatch
45

56

@@ -281,6 +282,20 @@ def test2(l: Bytes[{m}] = x"{val}") -> bool:
281282
assert c.test2(vyper_literal) is True
282283

283284

285+
def test_hex_literal_parser_edge_case():
286+
# see GH issue 4405 example 2
287+
code = """
288+
interface FooBar:
289+
def test(a: Bytes[2], b: String[4]): payable
290+
291+
@deploy
292+
def __init__(ext: FooBar):
293+
extcall ext.test(x'6161', x'6161') #ext.test(b'\x61\61', '6161') gets called
294+
"""
295+
with pytest.raises(TypeMismatch):
296+
compile_code(code)
297+
298+
284299
def test_zero_padding_with_private(get_contract):
285300
code = """
286301
counter: uint256

tests/functional/syntax/test_bytes.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,17 @@ def test() -> Bytes[1]:
8181
"""
8282
@external
8383
def test() -> Bytes[2]:
84-
a: Bytes[2] = x"abc"
84+
a: Bytes[2] = x"abc" # non-hex nibbles
85+
return a
86+
""",
87+
SyntaxException,
88+
),
89+
(
90+
"""
91+
@external
92+
def test() -> Bytes[10]:
93+
# GH issue 4405 example 1
94+
a: Bytes[10] = x x x x x x"61" # messed up hex prefix
8595
return a
8696
""",
8797
SyntaxException,
@@ -107,6 +117,24 @@ def test_bytes_fail(bad_code):
107117
compiler.compile_code(bad_code)
108118

109119

120+
@pytest.mark.xfail
121+
def test_hexbytes_offset():
122+
good_code = """
123+
event X:
124+
a: Bytes[2]
125+
126+
@deploy
127+
def __init__():
128+
# GH issue 4405, example 1
129+
#
130+
# log changes offset of HexString, and the hex_string_locations tracked
131+
# location is incorrect when visiting ast
132+
log X(a = x"6161")
133+
"""
134+
# move this to valid list once it passes.
135+
assert compiler.compile_code(good_code) is not None
136+
137+
110138
valid_list = [
111139
"""
112140
@external

vyper/ast/parse.py

+5
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ def _parse_to_ast_with_settings(
117117
# postcondition: consumed all the for loop annotations
118118
assert len(pre_parser.for_loop_annotations) == 0
119119

120+
# postcondition: we have used all the hex strings found by the
121+
# pre-parser
122+
assert len(pre_parser.hex_string_locations) == 0
123+
120124
# Convert to Vyper AST.
121125
module = vy_ast.get_node(py_ast)
122126
assert isinstance(module, vy_ast.Module) # mypy hint
@@ -440,6 +444,7 @@ def visit_Constant(self, node):
440444
node.col_offset,
441445
)
442446
node.ast_type = "HexBytes"
447+
self._pre_parser.hex_string_locations.remove(key)
443448
else:
444449
node.ast_type = "Str"
445450
elif isinstance(node.value, bytes):

vyper/ast/pre_parser.py

+23-20
Original file line numberDiff line numberDiff line change
@@ -109,37 +109,40 @@ def consume(self, token):
109109
class HexStringParser:
110110
def __init__(self):
111111
self.locations = []
112-
self._current_x = None
112+
self._tokens = []
113113
self._state = ParserState.NOT_RUNNING
114114

115115
def consume(self, token, result):
116116
# prepare to check if the next token is a STRING
117-
if token.type == NAME and token.string == "x":
118-
self._state = ParserState.RUNNING
119-
self._current_x = token
120-
return True
121-
122117
if self._state == ParserState.NOT_RUNNING:
118+
if token.type == NAME and token.string == "x":
119+
self._tokens.append(token)
120+
self._state = ParserState.RUNNING
121+
return True
122+
123123
return False
124124

125-
if self._state == ParserState.RUNNING:
126-
current_x = self._current_x
127-
self._current_x = None
128-
self._state = ParserState.NOT_RUNNING
125+
assert self._state == ParserState.RUNNING, "unreachable"
129126

130-
toks = [current_x]
127+
self._state = ParserState.NOT_RUNNING
131128

132-
# drop the leading x token if the next token is a STRING to avoid a python
133-
# parser error
134-
if token.type == STRING:
135-
self.locations.append(current_x.start)
136-
toks = [TokenInfo(STRING, token.string, current_x.start, token.end, token.line)]
137-
result.extend(toks)
138-
return True
129+
if token.type != STRING:
130+
# flush the tokens we have accumulated and move on
131+
result.extend(self._tokens)
132+
self._tokens = []
133+
return False
139134

140-
result.extend(toks)
135+
# mark hex string in locations for later processing
136+
self.locations.append(token.start)
141137

142-
return False
138+
# discard the `x` token and apply sanity checks -
139+
# we should only be discarding one token.
140+
assert len(self._tokens) == 1
141+
assert (x_tok := self._tokens[0]).type == NAME and x_tok.string == "x"
142+
self._tokens = [] # discard tokens
143+
144+
result.append(token)
145+
return True
143146

144147

145148
# compound statements that are replaced with `class`

0 commit comments

Comments
 (0)