From 24cfe0bcc10ec9418054c8def5cf4eeaa3ed0164 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 7 Jun 2024 14:49:51 -0700 Subject: [PATCH 01/20] fix[tool]: star option in `outputSelection` (#4094) vyper will accept either `"*"` or `["*"]` for `outputSelection`, but some verifiers expect it to always be a list. make `solc_json` output choose the common formatting. --- vyper/compiler/output_bundle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/compiler/output_bundle.py b/vyper/compiler/output_bundle.py index b93ecbd015..92494e3a70 100644 --- a/vyper/compiler/output_bundle.py +++ b/vyper/compiler/output_bundle.py @@ -194,7 +194,7 @@ def write_integrity(self, integrity_sum: str): def write_compilation_target(self, targets: list[str]): for target in targets: - self._output["settings"]["outputSelection"][target] = "*" + self._output["settings"]["outputSelection"][target] = ["*"] def write_version(self, version): self._output["compiler_version"] = version From 4d1bacd5b7ef3e2986393fd7f23b0ac24bbe1227 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 10 Jun 2024 11:16:53 -0700 Subject: [PATCH 02/20] fix[venom]: move loop invariant assertion to entry block (#4098) loop invariant bound check was in the body of the loop, not the entry block. move it up to the entry so we don't re-check the same assertion every loop iteration. --- tests/functional/syntax/test_for_range.py | 4 ++-- vyper/venom/ir_node_to_venom.py | 20 ++++++++------------ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/tests/functional/syntax/test_for_range.py b/tests/functional/syntax/test_for_range.py index 1de32108c5..97e77f32f7 100644 --- a/tests/functional/syntax/test_for_range.py +++ b/tests/functional/syntax/test_for_range.py @@ -368,14 +368,14 @@ def foo(): """ @external def foo(): - x: int128 = 5 + x: int128 = 4 for i: int128 in range(x, bound=4): pass """, """ @external def foo(): - x: int128 = 5 + x: int128 = 4 for i: int128 in range(0, x, bound=4): pass """, diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 61b3c081ff..2c99cf5668 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -468,14 +468,7 @@ def emit_body_blocks(): start, end, _ = _convert_ir_bb_list(fn, ir.args[1:4], symbols) assert ir.args[3].is_literal, "repeat bound expected to be literal" - bound = ir.args[3].value - if ( - isinstance(end, IRLiteral) - and isinstance(start, IRLiteral) - and end.value + start.value <= bound - ): - bound = None body = ir.args[4] @@ -491,9 +484,15 @@ def emit_body_blocks(): counter_var = entry_block.append_instruction("store", start) symbols[sym.value] = counter_var + + if bound is not None: + # assert le end bound + invalid_end = entry_block.append_instruction("gt", bound, end) + valid_end = entry_block.append_instruction("iszero", invalid_end) + entry_block.append_instruction("assert", valid_end) + end = entry_block.append_instruction("add", start, end) - if bound: - bound = entry_block.append_instruction("add", start, bound) + entry_block.append_instruction("jmp", cond_block.label) xor_ret = cond_block.append_instruction("xor", counter_var, end) @@ -501,9 +500,6 @@ def emit_body_blocks(): fn.append_basic_block(cond_block) fn.append_basic_block(body_block) - if bound: - xor_ret = body_block.append_instruction("xor", counter_var, bound) - body_block.append_instruction("assert", xor_ret) emit_body_blocks() body_end = fn.get_basic_block() From 7c8862ae9da3267c3c65ce06f2f0b1c59d082704 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 11 Jun 2024 06:23:26 -0700 Subject: [PATCH 03/20] feat[lang]: rename `_abi_encode` and `_abi_decode` (#4097) rename to `abi_encode` and `abi_decode` respectively leave the old ones in, but with deprecation warnings --- docs/built-in-functions.rst | 18 ++++-- .../builtins/codegen/test_abi_decode.py | 12 ++-- .../builtins/codegen/test_abi_encode.py | 62 +++++++++---------- vyper/ast/grammar.lark | 2 +- vyper/builtins/functions.py | 32 ++++++++-- 5 files changed, 78 insertions(+), 48 deletions(-) diff --git a/docs/built-in-functions.rst b/docs/built-in-functions.rst index 2e2f38ab74..367a08d80d 100644 --- a/docs/built-in-functions.rst +++ b/docs/built-in-functions.rst @@ -264,7 +264,7 @@ Vyper has three built-ins for contract creation; all three contract creation bui x: uint256 = 123 success, response = raw_call( _target, - _abi_encode(x, method_id=method_id("someMethodName(uint256)")), + abi_encode(x, method_id=method_id("someMethodName(uint256)")), max_outsize=32, value=msg.value, revert_on_failure=False @@ -1023,7 +1023,7 @@ Utilities >>> ExampleContract.foo() 0xa9059cbb -.. py:function:: _abi_encode(*args, ensure_tuple: bool = True) -> Bytes[] +.. py:function:: abi_encode(*args, ensure_tuple: bool = True) -> Bytes[] Takes a variable number of args as input, and returns the ABIv2-encoded bytestring. Used for packing arguments to raw_call, EIP712 and other cases where a consistent and efficient serialization method is needed. Once this function has seen more use we provisionally plan to put it into the ``ethereum.abi`` namespace. @@ -1041,7 +1041,7 @@ Utilities def foo() -> Bytes[132]: x: uint256 = 1 y: Bytes[32] = b"234" - return _abi_encode(x, y, method_id=method_id("foo()")) + return abi_encode(x, y, method_id=method_id("foo()")) .. code-block:: vyper @@ -1052,15 +1052,18 @@ Utilities "0000000000000000000000000000000000000000000000000000000000000003" "3233340000000000000000000000000000000000000000000000000000000000" + .. note:: + Prior to v0.4.0, this function was named ``_abi_encode``. + -.. py:function:: _abi_decode(b: Bytes, output_type: type_, unwrap_tuple: bool = True) -> Any +.. py:function:: abi_decode(b: Bytes, output_type: type_, unwrap_tuple: bool = True) -> Any Takes a byte array as input, and returns the decoded values according to the specified output types. Used for unpacking ABIv2-encoded values. Once this function has seen more use we provisionally plan to put it into the ``ethereum.abi`` namespace. * ``b``: A byte array of a length that is between the minimum and maximum ABIv2 size bounds of the ``output type``. * ``output_type``: Name of the output type, or tuple of output types, to be decoded. - * ``unwrap_tuple``: If set to True, the input is decoded as a tuple even if only one output type is specified. In other words, ``_abi_decode(b, Bytes[32])`` gets decoded as ``(Bytes[32],)``. This is the convention for ABIv2-encoded values generated by Vyper and Solidity functions. Except for very specific use cases, this should be set to True. Must be a literal. + * ``unwrap_tuple``: If set to True, the input is decoded as a tuple even if only one output type is specified. In other words, ``abi_decode(b, Bytes[32])`` gets decoded as ``(Bytes[32],)``. This is the convention for ABIv2-encoded values generated by Vyper and Solidity functions. Except for very specific use cases, this should be set to True. Must be a literal. Returns the decoded value(s), with type as specified by `output_type`. @@ -1071,9 +1074,12 @@ Utilities def foo(someInput: Bytes[128]) -> (uint256, Bytes[32]): x: uint256 = empty(uint256) y: Bytes[32] = empty(Bytes[32]) - x, y = _abi_decode(someInput, (uint256, Bytes[32])) + x, y = abi_decode(someInput, (uint256, Bytes[32])) return x, y + .. note:: + Prior to v0.4.0, this function was named ``_abi_decode``. + .. py:function:: print(*args, hardhat_compat=False) -> None diff --git a/tests/functional/builtins/codegen/test_abi_decode.py b/tests/functional/builtins/codegen/test_abi_decode.py index 36b87137b9..d77bb1b5ae 100644 --- a/tests/functional/builtins/codegen/test_abi_decode.py +++ b/tests/functional/builtins/codegen/test_abi_decode.py @@ -31,7 +31,7 @@ def abi_decode(x: Bytes[160]) -> (address, int128, bool, decimal, bytes32): c: bool = False d: decimal = 0.0 e: bytes32 = 0x0000000000000000000000000000000000000000000000000000000000000000 - a, b, c, d, e = _abi_decode(x, (address, int128, bool, decimal, bytes32)) + a, b, c, d, e = abi_decode(x, (address, int128, bool, decimal, bytes32)) return a, b, c, d, e @external @@ -48,7 +48,7 @@ def abi_decode_struct(x: Bytes[544]) -> Human: metadata=0x0000000000000000000000000000000000000000000000000000000000000000 ) ) - human = _abi_decode(x, Human) + human = abi_decode(x, Human) return human """ @@ -97,7 +97,7 @@ def test_abi_decode_single(get_contract, expected, input_len, output_typ, abi_ty contract = f""" @external def foo(x: Bytes[{input_len}]) -> {output_typ}: - a: {output_typ} = _abi_decode(x, {output_typ}, unwrap_tuple={unwrap_tuple}) + a: {output_typ} = abi_decode(x, {output_typ}, unwrap_tuple={unwrap_tuple}) return a """ c = get_contract(contract) @@ -135,7 +135,7 @@ def test_abi_decode_double( def foo(x: Bytes[{input_len}]) -> ({output_typ1}, {output_typ2}): a: {output_typ1} = empty({output_typ1}) b: {output_typ2} = empty({output_typ2}) - a, b = _abi_decode(x, ({output_typ1}, {output_typ2}), unwrap_tuple={unwrap_tuple}) + a, b = abi_decode(x, ({output_typ1}, {output_typ2}), unwrap_tuple={unwrap_tuple}) return a, b """ @@ -173,7 +173,7 @@ def test_abi_decode_nested_dynarray(get_contract, args, unwrap_tuple): @external def abi_decode(x: Bytes[{len}]) -> DynArray[DynArray[uint256, 3], 3]: a: DynArray[DynArray[uint256, 3], 3] = [] - a = _abi_decode(x, DynArray[DynArray[uint256, 3], 3], unwrap_tuple={unwrap_tuple}) + a = abi_decode(x, DynArray[DynArray[uint256, 3], 3], unwrap_tuple={unwrap_tuple}) return a """ @@ -213,7 +213,7 @@ def test_abi_decode_nested_dynarray2(get_contract, args, unwrap_tuple): @external def abi_decode(x: Bytes[{len}]) -> DynArray[DynArray[DynArray[uint256, 3], 3], 3]: a: DynArray[DynArray[DynArray[uint256, 3], 3], 3] = [] - a = _abi_decode( + a = abi_decode( x, DynArray[DynArray[DynArray[uint256, 3], 3], 3], unwrap_tuple={unwrap_tuple} diff --git a/tests/functional/builtins/codegen/test_abi_encode.py b/tests/functional/builtins/codegen/test_abi_encode.py index 7acf00e0b6..123a3898bb 100644 --- a/tests/functional/builtins/codegen/test_abi_encode.py +++ b/tests/functional/builtins/codegen/test_abi_encode.py @@ -48,38 +48,38 @@ def abi_encode( ) if ensure_tuple: if not include_method_id: - return _abi_encode(human) # default ensure_tuple=True - return _abi_encode(human, method_id=0xdeadbeef) + return abi_encode(human) # default ensure_tuple=True + return abi_encode(human, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(human, ensure_tuple=False) - return _abi_encode(human, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(human, ensure_tuple=False) + return abi_encode(human, ensure_tuple=False, method_id=0xdeadbeef) @external def abi_encode2(name: String[32], ensure_tuple: bool, include_method_id: bool) -> Bytes[100]: if ensure_tuple: if not include_method_id: - return _abi_encode(name) # default ensure_tuple=True - return _abi_encode(name, method_id=0xdeadbeef) + return abi_encode(name) # default ensure_tuple=True + return abi_encode(name, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(name, ensure_tuple=False) - return _abi_encode(name, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(name, ensure_tuple=False) + return abi_encode(name, ensure_tuple=False, method_id=0xdeadbeef) @external def abi_encode3(x: uint256, ensure_tuple: bool, include_method_id: bool) -> Bytes[36]: if ensure_tuple: if not include_method_id: - return _abi_encode(x) # default ensure_tuple=True + return abi_encode(x) # default ensure_tuple=True - return _abi_encode(x, method_id=0xdeadbeef) + return abi_encode(x, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(x, ensure_tuple=False) + return abi_encode(x, ensure_tuple=False) - return _abi_encode(x, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(x, ensure_tuple=False, method_id=0xdeadbeef) """ c = get_contract(code) @@ -129,7 +129,7 @@ def test_abi_encode_length_failing(get_contract, assert_compile_failed, type, va @internal def foo(): x: WrappedBytes = WrappedBytes(bs={value}) - y: {type}[96] = _abi_encode(x, ensure_tuple=True) # should be Bytes[128] + y: {type}[96] = abi_encode(x, ensure_tuple=True) # should be Bytes[128] """ assert_compile_failed(lambda: get_contract(code)) @@ -141,12 +141,12 @@ def test_abi_encode_dynarray(get_contract): def abi_encode(d: DynArray[uint256, 3], ensure_tuple: bool, include_method_id: bool) -> Bytes[164]: if ensure_tuple: if not include_method_id: - return _abi_encode(d) # default ensure_tuple=True - return _abi_encode(d, method_id=0xdeadbeef) + return abi_encode(d) # default ensure_tuple=True + return abi_encode(d, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(d, ensure_tuple=False) - return _abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(d, ensure_tuple=False) + return abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) """ c = get_contract(code) @@ -185,12 +185,12 @@ def abi_encode( ) -> Bytes[548]: if ensure_tuple: if not include_method_id: - return _abi_encode(d) # default ensure_tuple=True - return _abi_encode(d, method_id=0xdeadbeef) + return abi_encode(d) # default ensure_tuple=True + return abi_encode(d, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(d, ensure_tuple=False) - return _abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(d, ensure_tuple=False) + return abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) """ c = get_contract(code) @@ -236,12 +236,12 @@ def abi_encode( ) -> Bytes[1700]: if ensure_tuple: if not include_method_id: - return _abi_encode(d) # default ensure_tuple=True - return _abi_encode(d, method_id=0xdeadbeef) + return abi_encode(d) # default ensure_tuple=True + return abi_encode(d, method_id=0xdeadbeef) else: if not include_method_id: - return _abi_encode(d, ensure_tuple=False) - return _abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) + return abi_encode(d, ensure_tuple=False) + return abi_encode(d, ensure_tuple=False, method_id=0xdeadbeef) """ c = get_contract(code) @@ -281,7 +281,7 @@ def get_counter() -> (uint256, String[6]): nonpayable @external def foo(addr: address) -> Bytes[164]: - return _abi_encode(extcall Foo(addr).get_counter(), method_id=0xdeadbeef) + return abi_encode(extcall Foo(addr).get_counter(), method_id=0xdeadbeef) """ c2 = get_contract(contract_2) @@ -300,7 +300,7 @@ def test_abi_encode_private(get_contract): bytez: Bytes[96] @internal def _foo(bs: Bytes[32]): - self.bytez = _abi_encode(bs) + self.bytez = abi_encode(bs) @external def foo(bs: Bytes[32]) -> (uint256, Bytes[96]): @@ -318,7 +318,7 @@ def test_abi_encode_private_dynarray(get_contract): bytez: Bytes[160] @internal def _foo(bs: DynArray[uint256, 3]): - self.bytez = _abi_encode(bs) + self.bytez = abi_encode(bs) @external def foo(bs: DynArray[uint256, 3]) -> (uint256, Bytes[160]): dont_clobber_me: uint256 = max_value(uint256) @@ -335,7 +335,7 @@ def test_abi_encode_private_nested_dynarray(get_contract): bytez: Bytes[1696] @internal def _foo(bs: DynArray[DynArray[DynArray[uint256, 3], 3], 3]): - self.bytez = _abi_encode(bs) + self.bytez = abi_encode(bs) @external def foo(bs: DynArray[DynArray[DynArray[uint256, 3], 3], 3]) -> (uint256, Bytes[1696]): @@ -358,9 +358,9 @@ def test_abi_encode_empty_string(get_contract, empty_literal): @external def foo(ensure_tuple: bool) -> Bytes[96]: if ensure_tuple: - return _abi_encode({empty_literal}) # default ensure_tuple=True + return abi_encode({empty_literal}) # default ensure_tuple=True else: - return _abi_encode({empty_literal}, ensure_tuple=False) + return abi_encode({empty_literal}, ensure_tuple=False) """ c = get_contract(code) diff --git a/vyper/ast/grammar.lark b/vyper/ast/grammar.lark index 3feb4df92f..97f9f70e24 100644 --- a/vyper/ast/grammar.lark +++ b/vyper/ast/grammar.lark @@ -297,7 +297,7 @@ call: atom_expr "(" [arguments] ")" empty: "empty" "(" type ")" // special rule to handle types as "arguments" (for `_abi_decode` builtin) -abi_decode: "_abi_decode" "(" arg "," type ( "," kwarg )* ")" +abi_decode: ("_abi_decode" | "abi_decode") "(" arg "," type ( "," kwarg )* ")" special_builtins: empty | abi_decode diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index d4c83b2bda..1944d32125 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -2331,7 +2331,7 @@ def build_IR(self, expr, args, kwargs, context): class ABIEncode(BuiltinFunctionT): - _id = "_abi_encode" # TODO prettier to rename this to abi.encode + _id = "abi_encode" # signature: *, ensure_tuple= -> Bytes[] # explanation of ensure_tuple: # default is to force even a single value into a tuple, @@ -2452,7 +2452,7 @@ def build_IR(self, expr, args, kwargs, context): class ABIDecode(BuiltinFunctionT): - _id = "_abi_decode" + _id = "abi_decode" _inputs = [("data", BytesT.any()), ("output_type", TYPE_T.any())] _kwargs = {"unwrap_tuple": KwargSettings(BoolT(), True, require_literal=True)} @@ -2541,6 +2541,28 @@ def build_IR(self, expr, args, kwargs, context): return b1.resolve(ret) +class OldABIEncode(ABIEncode): + _warned = False + _id = "_abi_encode" + + def _try_fold(self, node): + if not self.__class__._warned: + vyper_warn(f"`{self._id}()` is deprecated! Please use `{super()._id}()` instead.", node) + self.__class__._warned = True + super()._try_fold(node) + + +class OldABIDecode(ABIDecode): + _warned = False + _id = "_abi_decode" + + def _try_fold(self, node): + if not self.__class__._warned: + vyper_warn(f"`{self._id}()` is deprecated! Please use `{super()._id}()` instead.", node) + self.__class__._warned = True + super()._try_fold(node) + + class _MinMaxValue(TypenameFoldedFunctionT): def _try_fold(self, node): self._validate_arg_types(node) @@ -2593,8 +2615,10 @@ def _try_fold(self, node): DISPATCH_TABLE = { - "_abi_encode": ABIEncode(), - "_abi_decode": ABIDecode(), + "abi_encode": ABIEncode(), + "abi_decode": ABIDecode(), + "_abi_encode": OldABIEncode(), + "_abi_decode": OldABIDecode(), "floor": Floor(), "ceil": Ceil(), "convert": Convert(), From 21f7172274e551c721e9e35ab3c9d8322a2455d0 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 11 Jun 2024 14:04:33 -0700 Subject: [PATCH 04/20] fix[codegen]: recursive dynarray oob check (#4091) this commit fixes more edge cases in `abi_decode` dynarray validation. these are bugs which were missed (or regressions) in 1f6b9433fbd524, which itself was a continuation of eb011367cc769. there are multiple fixes contained in this commit. - similar conceptual error as in 1f6b9433fbd524. when the length word is out-of-bounds and its runtime is value is zero, `make_setter` does not enter recursion and therefore there is no oob check. an example payload which demonstrates this is in `test_nested_invalid_dynarray_head()`. the fix is to check the size of the static section ("embedded static size") before entering the recursion, rather than child_type.static_size (which could be zero). essentially, this checks that the end of the static section is in bounds, rather than the beginning. - the fallback case in `complex_make_setter` could be referring to a tuple of dynamic types, which makes the tuple itself dynamic, so there needs to be an oob check there as well. - `static_size()` is more appropriate than `min_size()` for abi payload validation, because you can have "valid" ABI payloads where the runtime length of the dynamic section is zero, because the heads in the static section all point back into the static section. this commit replaces the `static_size()` check with `min_size()` check, everywhere. - remove `returndatasize` check in external calls, because it gets checked anyways during `make_setter` oob checks. - add a comment clarifying that payloads larger than `size_bound()` get rejected by `abi_decode` but not calldata decoding. tests for each case, contributed by @trocher --------- Co-authored-by: trocher --- .../builtins/codegen/test_abi_decode.py | 98 +++++++++++++++++++ .../test_external_contract_calls.py | 8 +- vyper/builtins/functions.py | 6 +- vyper/codegen/core.py | 15 ++- vyper/codegen/external_call.py | 12 +-- .../function_definitions/external_function.py | 2 +- 6 files changed, 119 insertions(+), 22 deletions(-) diff --git a/tests/functional/builtins/codegen/test_abi_decode.py b/tests/functional/builtins/codegen/test_abi_decode.py index d77bb1b5ae..5773636add 100644 --- a/tests/functional/builtins/codegen/test_abi_decode.py +++ b/tests/functional/builtins/codegen/test_abi_decode.py @@ -1323,3 +1323,101 @@ def run(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]): with tx_failed(): c.run(data) + + +def test_nested_invalid_dynarray_head(get_contract, tx_failed): + code = """ +@nonpayable +@external +def foo(x:Bytes[320]): + if True: + a: Bytes[320-32] = b'' + + # make the word following the buffer x_mem dirty to make a potential + # OOB revert + fake_head: uint256 = 32 + x_mem: Bytes[320] = x + + y: DynArray[DynArray[uint256, 2], 2] = _abi_decode(x_mem,DynArray[DynArray[uint256, 2], 2]) + +@nonpayable +@external +def bar(x:Bytes[320]): + x_mem: Bytes[320] = x + + y:DynArray[DynArray[uint256, 2], 2] = _abi_decode(x_mem,DynArray[DynArray[uint256, 2], 2]) + """ + c = get_contract(code) + + encoded = (0x20, 0x02) # head of the dynarray # len of outer + inner = ( + 0x0, # head1 + # 0x0, # head2 + ) + + encoded = _abi_payload_from_tuple(encoded + inner) + with tx_failed(): + c.foo(encoded) # revert + with tx_failed(): + c.bar(encoded) # return [[],[]] + + +def test_static_outer_type_invalid_heads(get_contract, tx_failed): + code = """ +@nonpayable +@external +def foo(x:Bytes[320]): + x_mem: Bytes[320] = x + y:DynArray[uint256, 2][2] = _abi_decode(x_mem,DynArray[uint256, 2][2]) + +@nonpayable +@external +def bar(x:Bytes[320]): + if True: + a: Bytes[160] = b'' + # write stuff here to make the call revert in case decode do + # an out of bound access: + fake_head: uint256 = 32 + x_mem: Bytes[320] = x + y:DynArray[uint256, 2][2] = _abi_decode(x_mem,DynArray[uint256, 2][2]) + """ + c = get_contract(code) + + encoded = (0x20,) # head of the static array + inner = ( + 0x00, # head of the first dynarray + # 0x00, # head of the second dynarray + ) + + encoded = _abi_payload_from_tuple(encoded + inner) + + with tx_failed(): + c.foo(encoded) + with tx_failed(): + c.bar(encoded) + + +def test_abi_decode_max_size(get_contract, tx_failed): + # test case where the payload is "too large" than the max size + # of abi encoding the type. this can happen when the payload is + # "sparse" and has garbage bytes in between the static and dynamic + # sections + code = """ +@external +def foo(a:Bytes[1000]): + v: DynArray[uint256, 1] = _abi_decode(a,DynArray[uint256, 1]) + """ + c = get_contract(code) + + payload = ( + 0xA0, # head + 0x00, # garbage + 0x00, # garbage + 0x00, # garbage + 0x00, # garbage + 0x01, # len + 0x12, # elem1 + ) + + with tx_failed(): + c.foo(_abi_payload_from_tuple(payload)) diff --git a/tests/functional/codegen/calling_convention/test_external_contract_calls.py b/tests/functional/codegen/calling_convention/test_external_contract_calls.py index 2192e6b6ab..e6616a1481 100644 --- a/tests/functional/codegen/calling_convention/test_external_contract_calls.py +++ b/tests/functional/codegen/calling_convention/test_external_contract_calls.py @@ -2519,13 +2519,13 @@ def foo(a: DynArray[{typ}, 3], b: String[5]): encoded = abi.encode(f"({typ}[],string)", val).hex() data = f"0x{sig}{encoded}" - # Dynamic size is short by 1 byte - malformed = data[:264] + # Static size is short by 1 byte + malformed = data[:136] with tx_failed(): env.message_call(c1.address, data=malformed) - # Dynamic size is at least minimum (132 bytes * 2 + 2 (for 0x) = 266) - valid = data[:266] + # Static size is at least minimum ((4 + 64) bytes * 2 + 2 (for 0x) = 138) + valid = data[:138] env.message_call(c1.address, data=valid) diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index 1944d32125..2564329b65 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -2482,7 +2482,7 @@ def build_IR(self, expr, args, kwargs, context): wrapped_typ = calculate_type_for_external_return(output_typ) abi_size_bound = wrapped_typ.abi_type.size_bound() - abi_min_size = wrapped_typ.abi_type.min_size() + abi_min_size = wrapped_typ.abi_type.static_size() # Get the size of data input_max_len = data.typ.maxlen @@ -2506,6 +2506,10 @@ def build_IR(self, expr, args, kwargs, context): ret = ["seq"] + # NOTE: we could replace these 4 lines with + # `[assert [le, abi_min_size, data_len]]`. it depends on + # what we consider a "valid" payload. + # cf. test_abi_decode_max_size() if abi_min_size == abi_size_bound: ret.append(["assert", ["eq", abi_min_size, data_len]]) else: diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 5d4621518f..ff0f801d74 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -895,10 +895,7 @@ def _abi_payload_size(ir_node): # the amount of size each value occupies in static section # (the amount of size it occupies in the dynamic section is handled in # make_setter recursion) - item_size = ir_node.typ.value_type.abi_type.static_size() - if item_size == 0: - # manual optimization; the mload cannot currently be optimized out - return ["add", OFFSET, 0] + item_size = ir_node.typ.value_type.abi_type.embedded_static_size() return ["add", OFFSET, ["mul", get_dyn_array_count(ir_node), item_size]] if isinstance(ir_node.typ, _BytestringT): @@ -982,7 +979,15 @@ def make_setter(left, right, hi=None): # Complex Types assert isinstance(left.typ, (SArrayT, TupleT, StructT)) - return _complex_make_setter(left, right, hi=hi) + with right.cache_when_complex("c_right") as (b1, right): + ret = ["seq"] + if hi is not None: + item_end = add_ofst(right, right.typ.abi_type.static_size()) + len_check = ["assert", ["le", item_end, hi]] + ret.append(len_check) + + ret.append(_complex_make_setter(left, right, hi=hi)) + return b1.resolve(IRnode.from_list(ret)) # locations with no dedicated copy opcode diff --git a/vyper/codegen/external_call.py b/vyper/codegen/external_call.py index f3cd4e7a44..b6ac180722 100644 --- a/vyper/codegen/external_call.py +++ b/vyper/codegen/external_call.py @@ -86,9 +86,8 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp abi_return_t = wrapped_return_t.abi_type - min_return_size = abi_return_t.min_size() max_return_size = abi_return_t.size_bound() - assert 0 < min_return_size <= max_return_size + assert 0 <= max_return_size ret_ofst = buf ret_len = max_return_size @@ -103,15 +102,6 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp unpacker = ["seq"] - # revert when returndatasize is not in bounds - # (except when return_override is provided.) - if not call_kwargs.skip_contract_check: - assertion = IRnode.from_list( - ["assert", ["ge", "returndatasize", min_return_size]], - error_msg="returndatasize too small", - ) - unpacker.append(assertion) - assert isinstance(wrapped_return_t, TupleT) # unpack strictly diff --git a/vyper/codegen/function_definitions/external_function.py b/vyper/codegen/function_definitions/external_function.py index a9b4a93025..4c733ee851 100644 --- a/vyper/codegen/function_definitions/external_function.py +++ b/vyper/codegen/function_definitions/external_function.py @@ -84,7 +84,7 @@ def handler_for(calldata_kwargs, default_kwargs): # ensure calldata is at least of minimum length args_abi_t = calldata_args_t.abi_type - calldata_min_size = args_abi_t.min_size() + 4 + calldata_min_size = args_abi_t.static_size() + 4 # TODO optimize make_setter by using # TupleT(list(arg.typ for arg in calldata_kwargs + default_kwargs)) From 7770967ff8346430d699325fed3c02744e0100a9 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 12 Jun 2024 05:35:55 -0700 Subject: [PATCH 05/20] fix[codegen]: `make_setter` overlap in the presence of `staticcall` (#4128) this commit fixes another overlap bug in `make_setter`. this is a variant of the fixes in ad9c10b0b98e2d and 1c8349e867b2b3, specifically fixing an oversight in ad9c10b0b98e2d - when there is a `staticcall` contained inside of `make_setter`, there can still be src/dst overlap, due to read-only re-entrancy(!). this commit adds `staticcall` to the list of "risky call" opcodes, and adds a poc test case (contributed by @trocher). --------- Co-authored-by: trocher --- .../test_external_contract_calls.py | 35 +++++++++++++++++++ vyper/codegen/ir_node.py | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/tests/functional/codegen/calling_convention/test_external_contract_calls.py b/tests/functional/codegen/calling_convention/test_external_contract_calls.py index e6616a1481..f9252f0a99 100644 --- a/tests/functional/codegen/calling_convention/test_external_contract_calls.py +++ b/tests/functional/codegen/calling_convention/test_external_contract_calls.py @@ -2582,3 +2582,38 @@ def boo(): c = get_contract(code) assert c.foo() == [1, 2, 3, 4] + + +def test_make_setter_staticcall(get_contract): + # variant of GH #3503 + code = """ +interface A: + def boo() -> uint256 : view +interface B: + def boo() -> uint256 : nonpayable + +a: DynArray[uint256, 10] + +@external +def foo() -> DynArray[uint256, 10]: + self.a = [3, 0, 0] + self.a = [1, 2, staticcall A(self).boo(), 4] + return self.a # bug returns [1, 2, 1, 4] + +@external +def bar() -> DynArray[uint256, 10]: + self.a = [3, 0, 0] + self.a = [1, 2, extcall B(self).boo(), 4] + return self.a # returns [1, 2, 3, 4] + + +@external +@view +# @nonpayable +def boo() -> uint256: + return self.a[0] + """ + c = get_contract(code) + + assert c.foo() == [1, 2, 3, 4] + assert c.bar() == [1, 2, 3, 4] diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 3a9540f2cc..9d39ebd033 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -468,7 +468,7 @@ def referenced_variables(self): @cached_property def contains_risky_call(self): - ret = self.value in ("call", "delegatecall", "create", "create2") + ret = self.value in ("call", "delegatecall", "staticcall", "create", "create2") for arg in self.args: ret |= arg.contains_risky_call From 153262bfb690cdb3c367d5961c0b70a670a646a8 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 12 Jun 2024 23:44:42 +0300 Subject: [PATCH 06/20] fix[venom]: clear `out_vars` during calculation (#4129) Clear `out_vars` set at start of calculations When computing `out_vars` for each basic block, we perform set unions of variables expected by all successor basic blocks of the current block. This commit clears the `out_vars` set at the beginning of the calculations. --------- Co-authored-by: Charles Cooper --- vyper/venom/analysis/liveness.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vyper/venom/analysis/liveness.py b/vyper/venom/analysis/liveness.py index ac06ff4dae..5d1ac488f1 100644 --- a/vyper/venom/analysis/liveness.py +++ b/vyper/venom/analysis/liveness.py @@ -54,7 +54,8 @@ def _calculate_out_vars(self, bb: IRBasicBlock) -> bool: Compute out_vars of basic block. Returns True if out_vars changed """ - out_vars = bb.out_vars.copy() + out_vars = bb.out_vars + bb.out_vars = OrderedSet() for out_bb in bb.cfg_out: target_vars = self.input_vars_from(bb, out_bb) bb.out_vars = bb.out_vars.union(target_vars) From e9e9d78a2bb8eeb693709b56c8152df5f8af2efa Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 13 Jun 2024 08:29:10 -0700 Subject: [PATCH 07/20] fix[ux]: improve initializer hint for unimported modules (#4145) improve initializer hint in the case that the needed module is not already imported --- tests/functional/syntax/modules/test_initializers.py | 5 +++-- vyper/semantics/analysis/module.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/functional/syntax/modules/test_initializers.py b/tests/functional/syntax/modules/test_initializers.py index 624904e45f..ead0fbcf6b 100644 --- a/tests/functional/syntax/modules/test_initializers.py +++ b/tests/functional/syntax/modules/test_initializers.py @@ -1273,7 +1273,7 @@ def foo(): assert e.value._hint == "add `lib3 := lib3` to its initializer list" -def test_hint_for_missing_initializer_when_no_import(make_input_bundle): +def test_hint_for_missing_initializer_when_no_import(make_input_bundle, chdir_tmp_path): lib1 = """ counter: uint256 """ @@ -1297,7 +1297,8 @@ def foo(): with pytest.raises(InitializerException) as e: compile_code(main, input_bundle=input_bundle) assert e.value._message == "`lib2` uses `lib1`, but it is not initialized with `lib1`" - assert e.value._hint == "try importing lib1 first" + hint = "try importing `lib1` first (located at `lib1.vy`)" + assert e.value._hint == hint @pytest.fixture diff --git a/vyper/semantics/analysis/module.py b/vyper/semantics/analysis/module.py index d0b019db7a..d6bbea1b48 100644 --- a/vyper/semantics/analysis/module.py +++ b/vyper/semantics/analysis/module.py @@ -514,7 +514,8 @@ def visit_InitializesDecl(self, node): break if rhs is None: - hint = f"try importing {item.alias} first" + hint = f"try importing `{item.alias}` first " + hint += f"(located at `{item.module_t._module.path}`)" elif not isinstance(annotation, vy_ast.Subscript): # it's `initializes: foo` instead of `initializes: foo[...]` hint = f"did you mean {module_ref.id}[{lhs} := {rhs}]?" From 44bb281ccaac89dc3bd66030702473c386bceae6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 13 Jun 2024 09:22:58 -0700 Subject: [PATCH 08/20] fix[codegen]: add back in `returndatasize` check (#4144) add back in `returndatasize` check for external calls in the case that `make_setter()` is not called (i.e. when `needs_clamp()` is `True`). the check was removed (i.e. there was a regression) in 21f7172274e test case and poc contributed by @cyberthirst --------- Co-authored-by: cyberthirst --- .../builtins/codegen/test_abi_decode.py | 20 +++++++++++ vyper/abi_types.py | 25 -------------- vyper/codegen/external_call.py | 33 ++++++++++++++++--- 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/tests/functional/builtins/codegen/test_abi_decode.py b/tests/functional/builtins/codegen/test_abi_decode.py index 5773636add..9ae869c9cc 100644 --- a/tests/functional/builtins/codegen/test_abi_decode.py +++ b/tests/functional/builtins/codegen/test_abi_decode.py @@ -1421,3 +1421,23 @@ def foo(a:Bytes[1000]): with tx_failed(): c.foo(_abi_payload_from_tuple(payload)) + + +# returndatasize check for uint256 +def test_returndatasize_check(get_contract, tx_failed): + code = """ +@external +def bar(): + pass + +interface A: + def bar() -> uint256: nonpayable + +@external +def run() -> uint256: + return extcall A(self).bar() + """ + c = get_contract(code) + + with tx_failed(): + c.run() diff --git a/vyper/abi_types.py b/vyper/abi_types.py index 24d6fe866a..a95930b16d 100644 --- a/vyper/abi_types.py +++ b/vyper/abi_types.py @@ -24,11 +24,6 @@ def embedded_dynamic_size_bound(self): return 0 return self.size_bound() - def embedded_min_dynamic_size(self): - if not self.is_dynamic(): - return 0 - return self.min_size() - # size (in bytes) of the static section def static_size(self): raise NotImplementedError("ABIType.static_size") @@ -42,14 +37,6 @@ def dynamic_size_bound(self): def size_bound(self): return self.static_size() + self.dynamic_size_bound() - def min_size(self): - return self.static_size() + self.min_dynamic_size() - - def min_dynamic_size(self): - if not self.is_dynamic(): - return 0 - raise NotImplementedError("ABIType.min_dynamic_size") - # The canonical name of the type for calculating the function selector def selector_name(self): raise NotImplementedError("ABIType.selector_name") @@ -158,9 +145,6 @@ def static_size(self): def dynamic_size_bound(self): return self.m_elems * self.subtyp.embedded_dynamic_size_bound() - def min_dynamic_size(self): - return self.m_elems * self.subtyp.embedded_min_dynamic_size() - def selector_name(self): return f"{self.subtyp.selector_name()}[{self.m_elems}]" @@ -187,9 +171,6 @@ def dynamic_size_bound(self): # length word + data return 32 + ceil32(self.bytes_bound) - def min_dynamic_size(self): - return 32 - def selector_name(self): return "bytes" @@ -222,9 +203,6 @@ def dynamic_size_bound(self): # length + size of embedded children return 32 + subtyp_size * self.elems_bound - def min_dynamic_size(self): - return 32 - def selector_name(self): return f"{self.subtyp.selector_name()}[]" @@ -245,9 +223,6 @@ def static_size(self): def dynamic_size_bound(self): return sum([t.embedded_dynamic_size_bound() for t in self.subtyps]) - def min_dynamic_size(self): - return sum([t.embedded_min_dynamic_size() for t in self.subtyps]) - def is_complex_type(self): return True diff --git a/vyper/codegen/external_call.py b/vyper/codegen/external_call.py index b6ac180722..72fff5378f 100644 --- a/vyper/codegen/external_call.py +++ b/vyper/codegen/external_call.py @@ -86,8 +86,9 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp abi_return_t = wrapped_return_t.abi_type + min_return_size = abi_return_t.static_size() max_return_size = abi_return_t.size_bound() - assert 0 <= max_return_size + assert 0 < min_return_size <= max_return_size ret_ofst = buf ret_len = max_return_size @@ -105,11 +106,35 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp assert isinstance(wrapped_return_t, TupleT) # unpack strictly - if needs_clamp(wrapped_return_t, encoding): + if not needs_clamp(wrapped_return_t, encoding): + # revert when returndatasize is not in bounds, except when + # skip_contract_check is enabled. + # NOTE: there is an optimization here: when needs_clamp is True, + # make_setter (implicitly) checks returndatasize during abi + # decoding. + # since make_setter is not called in this branch, we need to check + # returndatasize here, but we avoid a redundant check by only doing + # the returndatasize check inside of this branch (and not in the + # `needs_clamp==True` branch). + # in the future, this check could be moved outside of the branch, and + # instead rely on the optimizer to optimize out the redundant check, + # it would need the optimizer to do algebraic reductions (along the + # lines of `a>b and b>c and a>c` reduced to `a>b and b>c`). + # another thing we could do instead once we have the machinery is to + # simply always use make_setter instead of having this assertion, and + # rely on memory analyser to optimize out the memory movement. + if not call_kwargs.skip_contract_check: + assertion = IRnode.from_list( + ["assert", ["ge", "returndatasize", min_return_size]], + error_msg="returndatasize too small", + ) + unpacker.append(assertion) + return_buf = buf + + else: return_buf = context.new_internal_variable(wrapped_return_t) # note: make_setter does ABI decoding and clamps - payload_bound = IRnode.from_list( ["select", ["lt", ret_len, "returndatasize"], ret_len, "returndatasize"] ) @@ -117,8 +142,6 @@ def _unpack_returndata(buf, fn_type, call_kwargs, contract_address, context, exp unpacker.append( b1.resolve(make_setter(return_buf, buf, hi=add_ofst(buf, payload_bound))) ) - else: - return_buf = buf if call_kwargs.default_return_value is not None: # if returndatasize == 0: From 69e5c0541a9b23ad1b085e0a89b545124716b516 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 14 Jun 2024 13:31:45 -0700 Subject: [PATCH 09/20] feat[test]: implement `abi_decode` spec test (#4095) this commit implements a spec-based differential fuzzer for `abi_decode`. it introduces several components: - a "spec" implementation of `abi_decode`, which is how vyper's abi_decode should behave on a given payload, implemented in python - a hypothesis strategy to draw vyper types - hypothesis strategy to create valid data for a given vyper type - a hypothesis strategy to _mutate_ a given payload which is designed to introduce faults in the decoder. testing indicated splicing pointers into the payload - either valid pointers or "nearly" valid pointers - had the highest success rate for finding bugs in the decoder. the intuition here is that the most difficult part of the decoder is validating out-of-bound pointers in the payload, so pointers represent "semantically high-value" data to the fuzzer. - some hypothesis tuning to ensure a good distribution of types over several days of testing+tuning, this fuzzer independently found the bugs fixed in 44bb281ccaa and 21f7172274e (which were originally found by manual review). --- tests/conftest.py | 2 +- tests/evm_backends/base_env.py | 12 +- tests/evm_backends/revm_env.py | 2 + .../functional/builtins/codegen/abi_decode.py | 148 +++++++ .../builtins/codegen/test_abi_decode_fuzz.py | 416 ++++++++++++++++++ vyper/codegen/core.py | 12 +- vyper/semantics/types/subscriptable.py | 5 +- 7 files changed, 592 insertions(+), 5 deletions(-) create mode 100644 tests/functional/builtins/codegen/abi_decode.py create mode 100644 tests/functional/builtins/codegen/test_abi_decode_fuzz.py diff --git a/tests/conftest.py b/tests/conftest.py index 4b3d90f65a..31c72246bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -196,7 +196,7 @@ def env(gas_limit, evm_version, evm_backend, tracing, account_keys) -> BaseEnv: ) -@pytest.fixture +@pytest.fixture(scope="module") def get_contract_from_ir(env, optimize): def ir_compiler(ir, *args, **kwargs): ir = IRnode.from_list(ir) diff --git a/tests/evm_backends/base_env.py b/tests/evm_backends/base_env.py index a8ab4d2367..1ea3dba328 100644 --- a/tests/evm_backends/base_env.py +++ b/tests/evm_backends/base_env.py @@ -30,7 +30,7 @@ class ExecutionResult: gas_used: int -class EvmError(RuntimeError): +class EvmError(Exception): """Exception raised when a call fails.""" @@ -205,6 +205,16 @@ def out_of_gas_error(self) -> str: """Expected error message when user runs out of gas""" raise NotImplementedError # must be implemented by subclasses + @property + def contract_size_limit_error(self) -> str: + """Expected error message when contract is over codesize limit""" + raise NotImplementedError # must be implemented by subclasses + + @property + def initcode_size_limit_error(self) -> str: + """Expected error message when contract is over codesize limit""" + raise NotImplementedError # must be implemented by subclasses + def _compile( source_code: str, diff --git a/tests/evm_backends/revm_env.py b/tests/evm_backends/revm_env.py index 5c8b8aba08..d5a7570f96 100644 --- a/tests/evm_backends/revm_env.py +++ b/tests/evm_backends/revm_env.py @@ -11,6 +11,8 @@ class RevmEnv(BaseEnv): invalid_opcode_error = "InvalidFEOpcode" out_of_gas_error = "OutOfGas" + contract_size_limit_error = "CreateContractSizeLimit" + initcode_size_limit_error = "CreateInitCodeSizeLimit" def __init__( self, diff --git a/tests/functional/builtins/codegen/abi_decode.py b/tests/functional/builtins/codegen/abi_decode.py new file mode 100644 index 0000000000..9e10b862d5 --- /dev/null +++ b/tests/functional/builtins/codegen/abi_decode.py @@ -0,0 +1,148 @@ +from typing import TYPE_CHECKING, Iterable + +from eth_utils import to_checksum_address + +from vyper.abi_types import ( + ABI_Address, + ABI_Bool, + ABI_Bytes, + ABI_BytesM, + ABI_DynamicArray, + ABI_GIntM, + ABI_StaticArray, + ABI_String, + ABI_Tuple, + ABIType, +) +from vyper.utils import int_bounds, unsigned_to_signed + +if TYPE_CHECKING: + from vyper.semantics.types import VyperType + + +class DecodeError(Exception): + pass + + +def _strict_slice(payload, start, length): + if start < 0: + raise DecodeError(f"OOB {start}") + + end = start + length + if end > len(payload): + raise DecodeError(f"OOB {start} + {length} (=={end}) > {len(payload)}") + return payload[start:end] + + +def _read_int(payload, ofst): + return int.from_bytes(_strict_slice(payload, ofst, 32)) + + +# vyper abi_decode spec implementation +def spec_decode(typ: "VyperType", payload: bytes): + abi_t = typ.abi_type + + lo, hi = abi_t.static_size(), abi_t.size_bound() + if not (lo <= len(payload) <= hi): + raise DecodeError(f"bad payload size {lo}, {len(payload)}, {hi}") + + return _decode_r(abi_t, 0, payload) + + +def _decode_r(abi_t: ABIType, current_offset: int, payload: bytes): + if isinstance(abi_t, ABI_Tuple): + return tuple(_decode_multi_r(abi_t.subtyps, current_offset, payload)) + + if isinstance(abi_t, ABI_StaticArray): + n = abi_t.m_elems + subtypes = [abi_t.subtyp] * n + return _decode_multi_r(subtypes, current_offset, payload) + + if isinstance(abi_t, ABI_DynamicArray): + bound = abi_t.elems_bound + + n = _read_int(payload, current_offset) + if n > bound: + raise DecodeError("Dynarray too large") + + # offsets in dynarray start from after the length word + current_offset += 32 + subtypes = [abi_t.subtyp] * n + return _decode_multi_r(subtypes, current_offset, payload) + + # sanity check + assert not abi_t.is_complex_type() + + if isinstance(abi_t, ABI_Bytes): + bound = abi_t.bytes_bound + length = _read_int(payload, current_offset) + if length > bound: + raise DecodeError("bytes too large") + + current_offset += 32 # size of length word + ret = _strict_slice(payload, current_offset, length) + + # abi string doesn't actually define string decoder, so we + # just bytecast the output + if isinstance(abi_t, ABI_String): + # match eth-stdlib, since that's what we check against + ret = ret.decode(errors="surrogateescape") + + return ret + + # sanity check + assert not abi_t.is_dynamic() + + if isinstance(abi_t, ABI_GIntM): + ret = _read_int(payload, current_offset) + + # handle signedness + if abi_t.signed: + ret = unsigned_to_signed(ret, 256, strict=True) + + # bounds check + lo, hi = int_bounds(signed=abi_t.signed, bits=abi_t.m_bits) + if not (lo <= ret <= hi): + u = "" if abi_t.signed else "u" + raise DecodeError(f"invalid {u}int{abi_t.m_bits}") + + if isinstance(abi_t, ABI_Address): + return to_checksum_address(ret.to_bytes(20, "big")) + + if isinstance(abi_t, ABI_Bool): + if ret not in (0, 1): + raise DecodeError("invalid bool") + return ret + + return ret + + if isinstance(abi_t, ABI_BytesM): + ret = _strict_slice(payload, current_offset, 32) + m = abi_t.m_bytes + assert 1 <= m <= 32 # internal sanity check + # BytesM is right-padded with zeroes + if ret[m:] != b"\x00" * (32 - m): + raise DecodeError(f"invalid bytes{m}") + return ret[:m] + + raise RuntimeError("unreachable") + + +def _decode_multi_r(types: Iterable[ABIType], outer_offset: int, payload: bytes) -> list: + ret = [] + static_ofst = outer_offset + + for sub_t in types: + if sub_t.is_dynamic(): + # "head" terminology from abi spec + head = _read_int(payload, static_ofst) + ofst = outer_offset + head + else: + ofst = static_ofst + + item = _decode_r(sub_t, ofst, payload) + + ret.append(item) + static_ofst += sub_t.embedded_static_size() + + return ret diff --git a/tests/functional/builtins/codegen/test_abi_decode_fuzz.py b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py new file mode 100644 index 0000000000..d12b2cde7e --- /dev/null +++ b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py @@ -0,0 +1,416 @@ +from dataclasses import dataclass + +import hypothesis as hp +import hypothesis.strategies as st +import pytest +from eth.codecs import abi + +from tests.evm_backends.base_env import EvmError +from vyper.codegen.core import calculate_type_for_external_return, needs_external_call_wrap +from vyper.semantics.types import ( + AddressT, + BoolT, + BytesM_T, + BytesT, + DArrayT, + DecimalT, + HashMapT, + IntegerT, + SArrayT, + StringT, + TupleT, + VyperType, + _get_primitive_types, + _get_sequence_types, +) +from vyper.semantics.types.shortcuts import UINT256_T + +from .abi_decode import DecodeError, spec_decode + +pytestmark = pytest.mark.fuzzing + +type_ctors = [] +for t in _get_primitive_types().values(): + if t == HashMapT or t == DecimalT(): + continue + if isinstance(t, VyperType): + t = t.__class__ + if t in type_ctors: + continue + type_ctors.append(t) + +complex_static_ctors = [SArrayT, TupleT] +complex_dynamic_ctors = [DArrayT] +leaf_ctors = [t for t in type_ctors if t not in _get_sequence_types().values()] +static_leaf_ctors = [t for t in leaf_ctors if t._is_prim_word] +dynamic_leaf_ctors = [BytesT, StringT] + +MAX_MUTATIONS = 33 + + +@st.composite +# max type nesting +def vyper_type(draw, nesting=3, skip=None): + assert nesting >= 0 + + skip = skip or [] + + st_leaves = st.one_of(st.sampled_from(dynamic_leaf_ctors), st.sampled_from(static_leaf_ctors)) + st_complex = st.one_of( + st.sampled_from(complex_dynamic_ctors), st.sampled_from(complex_static_ctors) + ) + + if nesting == 0: + st_type = st_leaves + else: + st_type = st.one_of(st_complex, st_leaves) + + # filter here is a bit of a kludge, would be better to improve sampling + t = draw(st_type.filter(lambda t: t not in skip)) + + # note: maybe st.deferred is good here, we could define it with + # mutual recursion + def _go(skip=skip): + return draw(vyper_type(nesting=nesting - 1, skip=skip)) + + if t in (BytesT, StringT): + # arbitrary max_value + bound = draw(st.integers(min_value=1, max_value=1024)) + return t(bound) + + if t == SArrayT: + subtype = _go(skip=[TupleT, BytesT, StringT]) + bound = draw(st.integers(min_value=1, max_value=6)) + return t(subtype, bound) + if t == DArrayT: + subtype = _go(skip=[TupleT]) + bound = draw(st.integers(min_value=1, max_value=16)) + return t(subtype, bound) + + if t == TupleT: + # zero-length tuples are not allowed in vyper + n = draw(st.integers(min_value=1, max_value=6)) + subtypes = [_go() for _ in range(n)] + return TupleT(subtypes) + + if t in (BoolT, AddressT): + return t() + + if t == IntegerT: + signed = draw(st.booleans()) + bits = 8 * draw(st.integers(min_value=1, max_value=32)) + return t(signed, bits) + + if t == BytesM_T: + m = draw(st.integers(min_value=1, max_value=32)) + return t(m) + + raise RuntimeError("unreachable") + + +@st.composite +def data_for_type(draw, typ): + def _go(t): + return draw(data_for_type(t)) + + if isinstance(typ, TupleT): + return tuple(_go(item_t) for item_t in typ.member_types) + + if isinstance(typ, SArrayT): + return [_go(typ.value_type) for _ in range(typ.length)] + + if isinstance(typ, DArrayT): + n = draw(st.integers(min_value=0, max_value=typ.length)) + return [_go(typ.value_type) for _ in range(n)] + + if isinstance(typ, StringT): + # technically the ABI spec doesn't say string has to be valid utf-8, + # but eth-stdlib won't encode invalid utf-8 + return draw(st.text(max_size=typ.length)) + + if isinstance(typ, BytesT): + return draw(st.binary(max_size=typ.length)) + + if isinstance(typ, IntegerT): + lo, hi = typ.ast_bounds + return draw(st.integers(min_value=lo, max_value=hi)) + + if isinstance(typ, BytesM_T): + return draw(st.binary(min_size=typ.length, max_size=typ.length)) + + if isinstance(typ, BoolT): + return draw(st.booleans()) + + if isinstance(typ, AddressT): + ret = draw(st.binary(min_size=20, max_size=20)) + return "0x" + ret.hex() + + raise RuntimeError("unreachable") + + +def _sort2(x, y): + if x > y: + return y, x + return x, y + + +@st.composite +def _mutate(draw, payload, max_mutations=MAX_MUTATIONS): + # do point+bulk mutations, + # add/edit/delete/splice/flip up to max_mutations. + if len(payload) == 0: + return + + ret = bytearray(payload) + + # for add/edit, the new byte is any character, but we bias it towards + # bytes already in the payload. + st_any_byte = st.integers(min_value=0, max_value=255) + payload_nonzeroes = list(x for x in payload if x != 0) + if len(payload_nonzeroes) > 0: + st_existing_byte = st.sampled_from(payload) + st_byte = st.one_of(st_existing_byte, st_any_byte) + else: + st_byte = st_any_byte + + # add, edit, delete, word, splice, flip + possible_actions = "adwww" + actions = draw(st.lists(st.sampled_from(possible_actions), max_size=MAX_MUTATIONS)) + + for action in actions: + if len(ret) == 0: + # bail out. could we maybe be smarter, like only add here? + break + + # for the mutation position, we can use any index in the payload, + # but we bias it towards indices of nonzero bytes. + st_any_ix = st.integers(min_value=0, max_value=len(ret) - 1) + nonzero_indexes = [i for i, s in enumerate(ret) if s != 0] + if len(nonzero_indexes) > 0: + st_nonzero_ix = st.sampled_from(nonzero_indexes) + st_ix = st.one_of(st_any_ix, st_nonzero_ix) + else: + st_ix = st_any_ix + + ix = draw(st_ix) + + if action == "a": + ret.insert(ix, draw(st_byte)) + elif action == "e": + ret[ix] = draw(st_byte) + elif action == "d": + ret.pop(ix) + elif action == "w": + # splice word + st_uint256 = st.integers(min_value=0, max_value=2**256 - 1) + + # valid pointers, but maybe *just* out of bounds + st_poison = st.integers(min_value=-2 * len(ret), max_value=2 * len(ret)).map( + lambda x: x % (2**256) + ) + word = draw(st.one_of(st_poison, st_uint256)) + ret[ix - 31 : ix + 1] = word.to_bytes(32) + elif action == "s": + ix2 = draw(st_ix) + ix, ix2 = _sort2(ix, ix2) + ix2 += 1 + # max splice is 64 bytes, due to MAX_BUFFER_SIZE limitation in st.binary + ix2 = ix + (ix2 % 64) + length = ix2 - ix + substr = draw(st.binary(min_size=length, max_size=length)) + ret[ix:ix2] = substr + elif action == "f": + ix2 = draw(st_ix) + ix, ix2 = _sort2(ix, ix2) + ix2 += 1 + for i in range(ix, ix2): + # flip the bits in the byte + ret[i] = 255 ^ ret[i] + else: + raise RuntimeError("unreachable") + + return bytes(ret) + + +@st.composite +def payload_from(draw, typ): + data = draw(data_for_type(typ)) + schema = typ.abi_type.selector_name() + payload = abi.encode(schema, data) + + return draw(_mutate(payload)) + + +_settings = dict( + report_multiple_bugs=False, + # verbosity=hp.Verbosity.verbose, + suppress_health_check=( + hp.HealthCheck.data_too_large, + hp.HealthCheck.too_slow, + hp.HealthCheck.large_base_example, + ), + phases=( + hp.Phase.explicit, + hp.Phase.reuse, + hp.Phase.generate, + hp.Phase.target, + # Phase.shrink, # can force long waiting for examples + # Phase.explain, # not helpful here + ), +) + + +@dataclass(frozen=True) +class _TypeStats: + nesting: int = 0 + num_dynamic_types: int = 0 # number of dynamic types in the type + breadth: int = 0 # e.g. int16[50] has higher breadth than int16[1] + width: int = 0 # size of type + + +def _type_stats(typ: VyperType) -> _TypeStats: + def _finalize(): # little trick to save re-typing the arguments + width = typ.memory_bytes_required + return _TypeStats( + nesting=nesting, num_dynamic_types=num_dynamic_types, breadth=breadth, width=width + ) + + if typ._is_prim_word: + nesting = 0 + breadth = 1 + num_dynamic_types = 0 + return _finalize() + + if isinstance(typ, (BytesT, StringT)): + nesting = 0 + breadth = 1 # idk + num_dynamic_types = 1 + return _finalize() + + if isinstance(typ, TupleT): + substats = [_type_stats(t) for t in typ.member_types] + nesting = 1 + max(s.nesting for s in substats) + breadth = max(typ.length, *[s.breadth for s in substats]) + num_dynamic_types = sum(s.num_dynamic_types for s in substats) + return _finalize() + + if isinstance(typ, DArrayT): + substat = _type_stats(typ.value_type) + nesting = 1 + substat.nesting + breadth = max(typ.count, substat.breadth) + num_dynamic_types = 1 + substat.num_dynamic_types + return _finalize() + + if isinstance(typ, SArrayT): + substat = _type_stats(typ.value_type) + nesting = 1 + substat.nesting + breadth = max(typ.count, substat.breadth) + num_dynamic_types = substat.num_dynamic_types + return _finalize() + + raise RuntimeError("unreachable") + + +@pytest.fixture(scope="module") +def payload_copier(get_contract_from_ir): + # some contract which will return the buffer passed to it + # note: hardcode the location of the bytestring + ir = [ + "with", + "length", + ["calldataload", 36], + ["seq", ["calldatacopy", 0, 68, "length"], ["return", 0, "length"]], + ] + return get_contract_from_ir(["deploy", 0, ir, 0]) + + +PARALLELISM = 1 # increase on fuzzer box + + +# NOTE: this is a heavy test. 100 types * 100 payloads per type can take +# 3-4minutes on a regular CPU core. +@pytest.mark.parametrize("_n", list(range(PARALLELISM))) +@hp.given(typ=vyper_type()) +@hp.settings(max_examples=100, **_settings) +@hp.example(typ=DArrayT(DArrayT(UINT256_T, 2), 2)) +def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier): + # import time + # t0 = time.time() + # print("ENTER", typ) + + wrapped_type = calculate_type_for_external_return(typ) + + stats = _type_stats(typ) + # for k, v in asdict(stats).items(): + # event(k, v) + hp.target(stats.num_dynamic_types) + # hp.target(typ.abi_type.is_dynamic() + typ.abi_type.is_complex_type())) + + # add max_mutations bytes worth of padding so we don't just get caught + # by bytes length check at function entry + type_bound = wrapped_type.abi_type.size_bound() + buffer_bound = type_bound + MAX_MUTATIONS + type_str = repr(typ) # annotation in vyper code + # TODO: intrinsic decode from staticcall/extcall + # TODO: _abi_decode from other sources (staticcall/extcall?) + # TODO: dirty the buffer + # TODO: check unwrap_tuple=False + code = f""" +@external +def run(xs: Bytes[{buffer_bound}]) -> {type_str}: + ret: {type_str} = abi_decode(xs, {type_str}) + return ret + +interface Foo: + def foo(xs: Bytes[{buffer_bound}]) -> {type_str}: view # STATICCALL + def bar(xs: Bytes[{buffer_bound}]) -> {type_str}: nonpayable # CALL + +@external +def run2(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: + assert len(xs) <= {type_bound} + return staticcall copier.foo(xs) + +@external +def run3(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: + assert len(xs) <= {type_bound} + return (extcall copier.bar(xs)) + """ + c = get_contract(code) + + @hp.given(data=payload_from(wrapped_type)) + @hp.settings(max_examples=100, **_settings) + def _fuzz(data): + hp.note(f"type: {typ}") + hp.note(f"abi_t: {wrapped_type.abi_type.selector_name()}") + hp.note(code) + hp.note(data.hex()) + + try: + expected = spec_decode(wrapped_type, data) + + # unwrap if necessary + if needs_external_call_wrap(typ): + assert isinstance(expected, tuple) + (expected,) = expected + + hp.note(f"expected {expected}") + assert expected == c.run(data) + assert expected == c.run2(data, payload_copier.address) + assert expected == c.run3(data, payload_copier.address) + + except DecodeError: + # note EvmError includes reverts *and* exceptional halts. + # we can get OOG during abi decoding due to how + # `_abi_payload_size()` works + hp.note("expect failure") + with tx_failed(EvmError): + c.run(data) + with tx_failed(EvmError): + c.run2(data, payload_copier.address) + with tx_failed(EvmError): + c.run3(data, payload_copier.address) + + _fuzz() + + # t1 = time.time() + # print(f"elapsed {t1 - t0}s") diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index ff0f801d74..9a0a08097c 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -1169,8 +1169,12 @@ def clamp_bytestring(ir_node, hi=None): if hi is not None: assert t.maxlen < 2**64 # sanity check - # note: this add does not risk arithmetic overflow because + # NOTE: this add does not risk arithmetic overflow because # length is bounded by maxlen. + # however(!) _abi_payload_size can OOG, since it loads the word + # at `ir_node` to find the length of the bytearray, which could + # be out-of-bounds. + # if we didn't get OOG, we could overflow in `add`. item_end = add_ofst(ir_node, _abi_payload_size(ir_node)) len_check = ["seq", ["assert", ["le", item_end, hi]], len_check] @@ -1189,8 +1193,12 @@ def clamp_dyn_array(ir_node, hi=None): if hi is not None: assert t.count < 2**64 # sanity check - # note: this add does not risk arithmetic overflow because + # NOTE: this add does not risk arithmetic overflow because # length is bounded by count * elemsize. + # however(!) _abi_payload_size can OOG, since it loads the word + # at `ir_node` to find the length of the bytearray, which could + # be out-of-bounds. + # if we didn't get OOG, we could overflow in `add`. item_end = add_ofst(ir_node, _abi_payload_size(ir_node)) # if the subtype is dynamic, the length check is performed in diff --git a/vyper/semantics/types/subscriptable.py b/vyper/semantics/types/subscriptable.py index c392ff48b1..4068d815d2 100644 --- a/vyper/semantics/types/subscriptable.py +++ b/vyper/semantics/types/subscriptable.py @@ -334,7 +334,10 @@ def __init__(self, member_types: Tuple[VyperType, ...]) -> None: self.key_type = UINT256_T # API Compatibility def __repr__(self): - return "(" + ", ".join(repr(t) for t in self.member_types) + ")" + if len(self.member_types) == 1: + (t,) = self.member_types + return f"({t},)" + return "(" + ", ".join(f"{t}" for t in self.member_types) + ")" @property def length(self): From 2d82a74937edeed5e9d4c0c8cecd78a0d70530fa Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 17 Jun 2024 04:10:01 -0700 Subject: [PATCH 10/20] feat[test]: add more coverage to `abi_decode` fuzzer tests (#4153) fuzz with `unwrap_tuple=False` add fuzzing for structs follow up to 69e5c0541a9b23 --- .../builtins/codegen/test_abi_decode_fuzz.py | 124 +++++++++++++++--- vyper/semantics/types/user.py | 11 +- 2 files changed, 115 insertions(+), 20 deletions(-) diff --git a/tests/functional/builtins/codegen/test_abi_decode_fuzz.py b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py index d12b2cde7e..e215002446 100644 --- a/tests/functional/builtins/codegen/test_abi_decode_fuzz.py +++ b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py @@ -18,12 +18,12 @@ IntegerT, SArrayT, StringT, + StructT, TupleT, VyperType, _get_primitive_types, _get_sequence_types, ) -from vyper.semantics.types.shortcuts import UINT256_T from .abi_decode import DecodeError, spec_decode @@ -39,7 +39,7 @@ continue type_ctors.append(t) -complex_static_ctors = [SArrayT, TupleT] +complex_static_ctors = [SArrayT, TupleT, StructT] complex_dynamic_ctors = [DArrayT] leaf_ctors = [t for t in type_ctors if t not in _get_sequence_types().values()] static_leaf_ctors = [t for t in leaf_ctors if t._is_prim_word] @@ -50,10 +50,12 @@ @st.composite # max type nesting -def vyper_type(draw, nesting=3, skip=None): +def vyper_type(draw, nesting=3, skip=None, source_fragments=None): assert nesting >= 0 skip = skip or [] + if source_fragments is None: + source_fragments = [] st_leaves = st.one_of(st.sampled_from(dynamic_leaf_ctors), st.sampled_from(static_leaf_ctors)) st_complex = st.one_of( @@ -71,39 +73,52 @@ def vyper_type(draw, nesting=3, skip=None): # note: maybe st.deferred is good here, we could define it with # mutual recursion def _go(skip=skip): - return draw(vyper_type(nesting=nesting - 1, skip=skip)) + _, typ = draw(vyper_type(nesting=nesting - 1, skip=skip, source_fragments=source_fragments)) + return typ + + def finalize(typ): + return source_fragments, typ if t in (BytesT, StringT): # arbitrary max_value bound = draw(st.integers(min_value=1, max_value=1024)) - return t(bound) + return finalize(t(bound)) if t == SArrayT: subtype = _go(skip=[TupleT, BytesT, StringT]) bound = draw(st.integers(min_value=1, max_value=6)) - return t(subtype, bound) + return finalize(t(subtype, bound)) if t == DArrayT: subtype = _go(skip=[TupleT]) bound = draw(st.integers(min_value=1, max_value=16)) - return t(subtype, bound) + return finalize(t(subtype, bound)) if t == TupleT: # zero-length tuples are not allowed in vyper n = draw(st.integers(min_value=1, max_value=6)) subtypes = [_go() for _ in range(n)] - return TupleT(subtypes) + return finalize(TupleT(subtypes)) + + if t == StructT: + n = draw(st.integers(min_value=1, max_value=6)) + subtypes = {f"x{i}": _go() for i in range(n)} + _id = len(source_fragments) # poor man's unique id + name = f"MyStruct{_id}" + typ = StructT(name, subtypes) + source_fragments.append(typ.def_source_str()) + return finalize(StructT(name, subtypes)) if t in (BoolT, AddressT): - return t() + return finalize(t()) if t == IntegerT: signed = draw(st.booleans()) bits = 8 * draw(st.integers(min_value=1, max_value=32)) - return t(signed, bits) + return finalize(t(signed, bits)) if t == BytesM_T: m = draw(st.integers(min_value=1, max_value=32)) - return t(m) + return finalize(t(m)) raise RuntimeError("unreachable") @@ -116,6 +131,9 @@ def _go(t): if isinstance(typ, TupleT): return tuple(_go(item_t) for item_t in typ.member_types) + if isinstance(typ, StructT): + return tuple(_go(item_t) for item_t in typ.tuple_members()) + if isinstance(typ, SArrayT): return [_go(typ.value_type) for _ in range(typ.length)] @@ -294,6 +312,13 @@ def _finalize(): # little trick to save re-typing the arguments num_dynamic_types = sum(s.num_dynamic_types for s in substats) return _finalize() + if isinstance(typ, StructT): + substats = [_type_stats(t) for t in typ.tuple_members()] + nesting = 1 + max(s.nesting for s in substats) + breadth = max(len(typ.member_types), *[s.breadth for s in substats]) + num_dynamic_types = sum(s.num_dynamic_types for s in substats) + return _finalize() + if isinstance(typ, DArrayT): substat = _type_stats(typ.value_type) nesting = 1 + substat.nesting @@ -332,8 +357,8 @@ def payload_copier(get_contract_from_ir): @pytest.mark.parametrize("_n", list(range(PARALLELISM))) @hp.given(typ=vyper_type()) @hp.settings(max_examples=100, **_settings) -@hp.example(typ=DArrayT(DArrayT(UINT256_T, 2), 2)) -def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier): +def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier, env): + source_fragments, typ = typ # import time # t0 = time.time() # print("ENTER", typ) @@ -350,12 +375,13 @@ def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier): # by bytes length check at function entry type_bound = wrapped_type.abi_type.size_bound() buffer_bound = type_bound + MAX_MUTATIONS - type_str = repr(typ) # annotation in vyper code - # TODO: intrinsic decode from staticcall/extcall - # TODO: _abi_decode from other sources (staticcall/extcall?) - # TODO: dirty the buffer - # TODO: check unwrap_tuple=False + + preamble = "\n\n".join(source_fragments) + type_str = str(typ) # annotation in vyper code + code = f""" +{preamble} + @external def run(xs: Bytes[{buffer_bound}]) -> {type_str}: ret: {type_str} = abi_decode(xs, {type_str}) @@ -375,6 +401,13 @@ def run3(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: assert len(xs) <= {type_bound} return (extcall copier.bar(xs)) """ + try: + c = get_contract(code) + except EvmError as e: + if env.contract_size_limit_error in str(e): + hp.assume(False) + # print(code) + hp.note(code) c = get_contract(code) @hp.given(data=payload_from(wrapped_type)) @@ -382,7 +415,6 @@ def run3(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: def _fuzz(data): hp.note(f"type: {typ}") hp.note(f"abi_t: {wrapped_type.abi_type.selector_name()}") - hp.note(code) hp.note(data.hex()) try: @@ -414,3 +446,57 @@ def _fuzz(data): # t1 = time.time() # print(f"elapsed {t1 - t0}s") + + +@pytest.mark.parametrize("_n", list(range(PARALLELISM))) +@hp.given(typ=vyper_type()) +@hp.settings(max_examples=100, **_settings) +def test_abi_decode_no_wrap_fuzz(_n, typ, get_contract, tx_failed, env): + source_fragments, typ = typ + # import time + # t0 = time.time() + # print("ENTER", typ) + + stats = _type_stats(typ) + hp.target(stats.num_dynamic_types) + + # add max_mutations bytes worth of padding so we don't just get caught + # by bytes length check at function entry + type_bound = typ.abi_type.size_bound() + buffer_bound = type_bound + MAX_MUTATIONS + + type_str = str(typ) # annotation in vyper code + preamble = "\n\n".join(source_fragments) + + code = f""" +{preamble} + +@external +def run(xs: Bytes[{buffer_bound}]) -> {type_str}: + ret: {type_str} = abi_decode(xs, {type_str}, unwrap_tuple=False) + return ret + """ + try: + c = get_contract(code) + except EvmError as e: + if env.contract_size_limit_error in str(e): + hp.assume(False) + + @hp.given(data=payload_from(typ)) + @hp.settings(max_examples=100, **_settings) + def _fuzz(data): + hp.note(code) + hp.note(data.hex()) + try: + expected = spec_decode(typ, data) + hp.note(f"expected {expected}") + assert expected == c.run(data) + except DecodeError: + hp.note("expect failure") + with tx_failed(EvmError): + c.run(data) + + _fuzz() + + # t1 = time.time() + # print(f"elapsed {t1 - t0}s") diff --git a/vyper/semantics/types/user.py b/vyper/semantics/types/user.py index a6ee646e62..ca8e99bc92 100644 --- a/vyper/semantics/types/user.py +++ b/vyper/semantics/types/user.py @@ -371,8 +371,11 @@ def from_StructDef(cls, base_node: vy_ast.StructDef) -> "StructT": return cls(struct_name, members, ast_def=base_node) + def __str__(self): + return f"{self._id}" + def __repr__(self): - return f"{self._id} declaration object" + return f"{self._id} {self.members}" def _try_fold(self, node): if len(node.args) != 1: @@ -384,6 +387,12 @@ def _try_fold(self, node): # it can't be reduced, but this lets upstream code know it's constant return node + def def_source_str(self): + ret = f"struct {self._id}:\n" + for k, v in self.member_types.items(): + ret += f" {k}: {v}\n" + return ret + @property def size_in_bytes(self): return sum(i.size_in_bytes for i in self.member_types.values()) From c79c0b658ba34d7b161048d0d80ebd207ff5247b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 17 Jun 2024 08:22:50 -0700 Subject: [PATCH 11/20] fix[venom]: alloca for default arguments (#4155) this commit fixes an `ir_node_to_venom` translation bug. when there is a default argument to an external function, it can generate multiple allocas, because the entry points allocate separate symbol tables, but actually they should all correspond to the same alloca. for instance, `external 1 foo(uint256)12345` and `external 1 foo()67890` both feed into the same `external 1 foo()__common`, but the current translator mistakenly creates different symbol tables for the two "feeder" entry points, resulting in separate allocas for the same logical variable. this commit fixes the bug by fusing the symbol tables for multiple entry points to the same external function. --- vyper/venom/ir_node_to_venom.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 2c99cf5668..4fca95be90 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -107,14 +107,16 @@ NOOP_INSTRUCTIONS = frozenset(["pass", "cleanup_repeat", "var_list", "unique_symbol"]) SymbolTable = dict[str, Optional[IROperand]] -_global_symbols: SymbolTable = {} +_global_symbols: SymbolTable = None # type: ignore MAIN_ENTRY_LABEL_NAME = "__main_entry" +_external_functions: dict[int, SymbolTable] = None # type: ignore # convert IRnode directly to venom def ir_node_to_venom(ir: IRnode) -> IRContext: - global _global_symbols + global _global_symbols, _external_functions _global_symbols = {} + _external_functions = {} ctx = IRContext() fn = ctx.create_function(MAIN_ENTRY_LABEL_NAME) @@ -214,10 +216,6 @@ def _convert_ir_bb_list(fn, ir, symbols): return ret -current_func = None -var_list: list[str] = [] - - def pop_source_on_return(func): @functools.wraps(func) def pop_source(*args, **kwargs): @@ -232,7 +230,10 @@ def pop_source(*args, **kwargs): @pop_source_on_return def _convert_ir_bb(fn, ir, symbols): assert isinstance(ir, IRnode), ir - global _break_target, _continue_target, current_func, var_list, _global_symbols + # TODO: refactor these to not be globals + global _break_target, _continue_target, _global_symbols, _external_functions + + # keep a map from external functions to all possible entry points ctx = fn.ctx fn.push_source(ir) @@ -274,7 +275,6 @@ def _convert_ir_bb(fn, ir, symbols): return ret elif is_external: - _global_symbols = {} ret = _convert_ir_bb(fn, ir.args[0], symbols) _append_return_args(fn) else: @@ -382,6 +382,13 @@ def _convert_ir_bb(fn, ir, symbols): data = _convert_ir_bb(fn, c, symbols) ctx.append_data("db", [data]) # type: ignore elif ir.value == "label": + function_id_pattern = r"external (\d+)" + function_name = ir.args[0].value + m = re.match(function_id_pattern, function_name) + if m is not None: + function_id = m.group(1) + _global_symbols = _external_functions.setdefault(function_id, {}) + label = IRLabel(ir.args[0].value, True) bb = fn.get_basic_block() if not bb.is_terminated: From a72488ce68125a65813199f9b1188ce60a987feb Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 17 Jun 2024 14:23:30 -0700 Subject: [PATCH 12/20] fix[venom]: add `unique_symbols` check to venom pipeline (#4149) when `-Onone` is specified along with `--experimental-codegen`, the unique symbols check does not get run. this calculates the `ir_node.unique_symbols` property, which implicitly runs the unique symbols check. also, change an assertion to a proper panic exception --- vyper/codegen/ir_node.py | 3 ++- vyper/venom/ir_node_to_venom.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 9d39ebd033..97d9c45fb6 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -405,7 +405,8 @@ def unique_symbols(self): for arg in children: s = arg.unique_symbols non_uniques = ret.intersection(s) - assert len(non_uniques) == 0, f"non-unique symbols {non_uniques}" + if len(non_uniques) != 0: # pragma: nocover + raise CompilerPanic(f"non-unique symbols {non_uniques}") ret |= s return ret diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 4fca95be90..85172c70e1 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -114,6 +114,8 @@ # convert IRnode directly to venom def ir_node_to_venom(ir: IRnode) -> IRContext: + _ = ir.unique_symbols # run unique symbols check + global _global_symbols, _external_functions _global_symbols = {} _external_functions = {} From d92cd344add84aa17434baefed24a6c548471cc2 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 18 Jun 2024 10:23:37 -0700 Subject: [PATCH 13/20] chore[docs]: add evaluation order warning for builtins (#4158) some builtins have undefined order of evaluation of arguments; make a note in the docs --- docs/built-in-functions.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/built-in-functions.rst b/docs/built-in-functions.rst index 367a08d80d..a0e424adb4 100644 --- a/docs/built-in-functions.rst +++ b/docs/built-in-functions.rst @@ -1090,3 +1090,6 @@ Utilities .. note:: Issuing of the static call is *NOT* mode-dependent (that is, it is not removed from production code), although the compiler will issue a warning whenever ``print`` is used. + +.. warning:: + In Vyper, as of v0.4.0, the order of argument evaluation of builtins is not defined. That means that the compiler may choose to reorder evaluation of arguments. For example, ``extract32(x(), y())`` may yield unexpected results if ``x()`` and ``y()`` both touch the same data. For this reason, it is best to avoid calling functions with side-effects inside of builtins. For more information, see `GHSA-g2xh-c426-v8mf `_ and `issue #4019 `_. From 3d9c537142fb99b2672f21e2057f5f202cde194f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 18 Jun 2024 12:49:36 -0700 Subject: [PATCH 14/20] fix[codegen]: panic on potential eval order issue for some builtins (#4157) `extract32()` and `slice()` have an evaluation order issue when the arguments touch the same data. specifically, the length and data evaluation are interleaved with the index/start/length evaluations. in unusual situations (such as those in the included test cases), this can result in "invalid" reads where the data and length reads appear out of order. this commit conservatively blocks compilation if the preconditions for the interleaved evaluation are detected. --------- Co-authored-by: trocher Co-authored-by: cyberthirst --- .../builtins/codegen/test_extract32.py | 48 +++++++++++++++++ .../functional/builtins/codegen/test_slice.py | 52 ++++++++++++++++++- vyper/builtins/functions.py | 7 +++ 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/tests/functional/builtins/codegen/test_extract32.py b/tests/functional/builtins/codegen/test_extract32.py index 8a92adbb07..f8db51ee36 100644 --- a/tests/functional/builtins/codegen/test_extract32.py +++ b/tests/functional/builtins/codegen/test_extract32.py @@ -1,6 +1,7 @@ import pytest from vyper.evm.opcodes import version_check +from vyper.exceptions import CompilerPanic @pytest.mark.parametrize("location", ["storage", "transient"]) @@ -98,3 +99,50 @@ def foq(inp: Bytes[32]) -> address: with tx_failed(): c.foq(b"crow" * 8) + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_extract32_order_of_eval(get_contract): + extract32_code = """ +var:DynArray[Bytes[96], 1] + +@internal +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 3 + +@external +def foo() -> bytes32: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return extract32(self.var[0], self.bar(), output_type=bytes32) + """ + + c = get_contract(extract32_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_extract32_order_of_eval_extcall(get_contract): + slice_code = """ +var:DynArray[Bytes[96], 1] + +interface Bar: + def bar() -> uint256: payable + +@external +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 3 + +@external +def foo() -> bytes32: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return extract32(self.var[0], extcall Bar(self).bar(), output_type=bytes32) + """ + + c = get_contract(slice_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" diff --git a/tests/functional/builtins/codegen/test_slice.py b/tests/functional/builtins/codegen/test_slice.py index 08800e7a8c..d5d1efca0f 100644 --- a/tests/functional/builtins/codegen/test_slice.py +++ b/tests/functional/builtins/codegen/test_slice.py @@ -5,7 +5,7 @@ from vyper.compiler import compile_code from vyper.compiler.settings import OptimizationLevel, Settings from vyper.evm.opcodes import version_check -from vyper.exceptions import ArgumentException, TypeMismatch +from vyper.exceptions import ArgumentException, CompilerPanic, TypeMismatch _fun_bytes32_bounds = [(0, 32), (3, 29), (27, 5), (0, 5), (5, 3), (30, 2)] @@ -562,3 +562,53 @@ def foo(cs: String[64]) -> uint256: c = get_contract(code) # ensure that counter was incremented only once assert c.foo(arg) == 1 + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_slice_order_of_eval(get_contract): + slice_code = """ +var:DynArray[Bytes[96], 1] + +interface Bar: + def bar() -> uint256: payable + +@external +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 32 + +@external +def foo() -> Bytes[96]: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return slice(self.var[0], 3, extcall Bar(self).bar()) + """ + + c = get_contract(slice_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_slice_order_of_eval2(get_contract): + slice_code = """ +var:DynArray[Bytes[96], 1] + +interface Bar: + def bar() -> uint256: payable + +@external +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 3 + +@external +def foo() -> Bytes[96]: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return slice(self.var[0], extcall Bar(self).bar(), 32) + """ + + c = get_contract(slice_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index 2564329b65..672d978455 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -29,6 +29,7 @@ get_type_for_exact_size, ir_tuple_from_args, make_setter, + potential_overlap, promote_signed_int, sar, shl, @@ -357,6 +358,9 @@ def build_IR(self, expr, args, kwargs, context): assert is_bytes32, src src = ensure_in_memory(src, context) + if potential_overlap(src, start) or potential_overlap(src, length): + raise CompilerPanic("risky overlap") + with src.cache_when_complex("src") as (b1, src), start.cache_when_complex("start") as ( b2, start, @@ -862,6 +866,9 @@ def build_IR(self, expr, args, kwargs, context): bytez, index = args ret_type = kwargs["output_type"] + if potential_overlap(bytez, index): + raise CompilerPanic("risky overlap") + def finalize(ret): annotation = "extract32" ret = IRnode.from_list(ret, typ=ret_type, annotation=annotation) From 4594f8badf13a583875f8891698cd3bbefb1c787 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 19 Jun 2024 12:38:33 -0700 Subject: [PATCH 15/20] fix[codegen]: panic on potential subscript eval order issue (#4159) subscript expressions have an evaluation order issue when evaluation of the index (i.e. `node.index`) modifies the parent (i.e. `node.value`). because the evaluation of the parent is interleaved with evaluation of the index, it can result in "invalid" reads where the length check occurs before evaluation of the index, but the data read occurs afterwards. if evaluation of the index results in modification of the container size for instance, the data read from the container can happen on a dangling reference. another variant of this issue would be accessing `self.nested_array.pop().append(...)`; however, this currently happens to be blocked by a panic in the frontend. this commit conservatively blocks compilation if the preconditions for the interleaved evaluation are detected. POC tests that the appropriate panics are generated are included as well. --------- Co-authored-by: trocher Co-authored-by: Hubert Ritzdorf Co-authored-by: cyberthirst --- .../codegen/types/test_array_indexing.py | 77 +++++++++++++++++++ .../codegen/types/test_dynamic_array.py | 16 ++++ vyper/ast/nodes.pyi | 1 + vyper/codegen/core.py | 20 +++++ vyper/codegen/expr.py | 7 ++ vyper/codegen/ir_node.py | 12 +++ vyper/semantics/analysis/utils.py | 15 +++- 7 files changed, 147 insertions(+), 1 deletion(-) diff --git a/tests/functional/codegen/types/test_array_indexing.py b/tests/functional/codegen/types/test_array_indexing.py index 45e777d919..7f5c0d0e21 100644 --- a/tests/functional/codegen/types/test_array_indexing.py +++ b/tests/functional/codegen/types/test_array_indexing.py @@ -1,5 +1,9 @@ # TODO: rewrite the tests in type-centric way, parametrize array and indices types +import pytest + +from vyper.exceptions import CompilerPanic + def test_negative_ix_access(get_contract, tx_failed): # Arrays can't be accessed with negative indices @@ -130,3 +134,76 @@ def foo(): c.foo() for i in range(10): assert c.arr(i) == i + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_array_index_overlap(get_contract): + code = """ +a: public(DynArray[DynArray[Bytes[96], 5], 5]) + +@external +def foo() -> Bytes[96]: + self.a.append([b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx']) + return self.a[0][self.bar()] + + +@internal +def bar() -> uint256: + self.a[0] = [b'yyy'] + self.a.pop() + return 0 + """ + c = get_contract(code) + # tricky to get this right, for now we just panic instead of generating code + assert c.foo() == b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_array_index_overlap_extcall(get_contract): + code = """ + +interface Bar: + def bar() -> uint256: payable + +a: public(DynArray[DynArray[Bytes[96], 5], 5]) + +@external +def foo() -> Bytes[96]: + self.a.append([b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx']) + return self.a[0][extcall Bar(self).bar()] + + +@external +def bar() -> uint256: + self.a[0] = [b'yyy'] + self.a.pop() + return 0 + """ + c = get_contract(code) + assert c.foo() == b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_array_index_overlap_extcall2(get_contract): + code = """ +interface B: + def calculate_index() -> uint256: nonpayable + +a: HashMap[uint256, DynArray[uint256, 5]] + +@external +def bar() -> uint256: + self.a[0] = [2] + return self.a[0][extcall B(self).calculate_index()] + +@external +def calculate_index() -> uint256: + self.a[0] = [1] + return 0 + """ + c = get_contract(code) + + assert c.bar() == 1 diff --git a/tests/functional/codegen/types/test_dynamic_array.py b/tests/functional/codegen/types/test_dynamic_array.py index 5f26e05839..2a0f4e77e5 100644 --- a/tests/functional/codegen/types/test_dynamic_array.py +++ b/tests/functional/codegen/types/test_dynamic_array.py @@ -8,6 +8,7 @@ from vyper.exceptions import ( ArgumentException, ArrayIndexException, + CompilerPanic, ImmutableViolation, OverflowException, StackTooDeep, @@ -1887,3 +1888,18 @@ def boo() -> uint256: c = get_contract(code) assert c.foo() == [1, 2, 3, 4] + + +@pytest.mark.xfail(raises=CompilerPanic) +def test_dangling_reference(get_contract, tx_failed): + code = """ +a: DynArray[DynArray[uint256, 5], 5] + +@external +def foo(): + self.a = [[1]] + self.a.pop().append(2) + """ + c = get_contract(code) + with tx_failed(): + c.foo() diff --git a/vyper/ast/nodes.pyi b/vyper/ast/nodes.pyi index 1c7aaf55ee..58c7d0b2e4 100644 --- a/vyper/ast/nodes.pyi +++ b/vyper/ast/nodes.pyi @@ -23,6 +23,7 @@ class VyperNode: end_col_offset: int = ... _metadata: dict = ... _original_node: Optional[VyperNode] = ... + _children: list[VyperNode] = ... def __init__(self, parent: Optional[VyperNode] = ..., **kwargs: Any) -> None: ... def __hash__(self) -> Any: ... def __eq__(self, other: Any) -> Any: ... diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 9a0a08097c..25a6d06fbf 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -924,6 +924,26 @@ def potential_overlap(left, right): return False +# similar to `potential_overlap()`, but compares left's _reads_ vs +# right's _writes_. +# TODO: `potential_overlap()` can probably be replaced by this function, +# but all the cases need to be checked. +def read_write_overlap(left, right): + if not isinstance(left, IRnode) or not isinstance(right, IRnode): + return False + + if left.typ._is_prim_word and right.typ._is_prim_word: + return False + + if len(left.referenced_variables & right.variable_writes) > 0: + return True + + if len(left.referenced_variables) > 0 and right.contains_risky_call: + return True + + return False + + # Create an x=y statement, where the types may be compound def make_setter(left, right, hi=None): check_assign(left, right) diff --git a/vyper/codegen/expr.py b/vyper/codegen/expr.py index 65df5a0930..f28a068be6 100644 --- a/vyper/codegen/expr.py +++ b/vyper/codegen/expr.py @@ -21,6 +21,7 @@ make_setter, pop_dyn_array, potential_overlap, + read_write_overlap, sar, shl, shr, @@ -40,6 +41,7 @@ UnimplementedException, tag_exceptions, ) +from vyper.semantics.analysis.utils import get_expr_writes from vyper.semantics.types import ( AddressT, BoolT, @@ -86,6 +88,9 @@ def __init__(self, node, context, is_stmt=False): self.ir_node = fn() assert isinstance(self.ir_node, IRnode), self.ir_node + writes = set(access.variable for access in get_expr_writes(self.expr)) + self.ir_node._writes = writes + self.ir_node.annotation = self.expr.get("node_source_code") self.ir_node.ast_source = self.expr @@ -352,6 +357,8 @@ def parse_Subscript(self): elif is_array_like(sub.typ): index = Expr.parse_value_expr(self.expr.slice, self.context) + if read_write_overlap(sub, index): + raise CompilerPanic("risky overlap") elif is_tuple_like(sub.typ): # should we annotate expr.slice in the frontend with the diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 97d9c45fb6..6f9eb0359b 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -467,6 +467,18 @@ def referenced_variables(self): return ret + @cached_property + def variable_writes(self): + ret = getattr(self, "_writes", set()) + + for arg in self.args: + ret |= arg.variable_writes + + if getattr(self, "is_self_call", False): + ret |= self.invoked_function_ir.func_ir.variable_writes + + return ret + @cached_property def contains_risky_call(self): ret = self.value in ("call", "delegatecall", "staticcall", "create", "create2") diff --git a/vyper/semantics/analysis/utils.py b/vyper/semantics/analysis/utils.py index be323b1d13..d30eee79e0 100644 --- a/vyper/semantics/analysis/utils.py +++ b/vyper/semantics/analysis/utils.py @@ -24,7 +24,7 @@ from vyper.semantics.types.bytestrings import BytesT, StringT from vyper.semantics.types.primitives import AddressT, BoolT, BytesM_T, IntegerT from vyper.semantics.types.subscriptable import DArrayT, SArrayT, TupleT -from vyper.utils import checksum_encode, int_to_fourbytes +from vyper.utils import OrderedSet, checksum_encode, int_to_fourbytes def _validate_op(node, types_list, validation_fn_name): @@ -681,3 +681,16 @@ def check_modifiability(node: vy_ast.ExprNode, modifiability: Modifiability) -> info = get_expr_info(node) return info.modifiability <= modifiability + + +# TODO: move this into part of regular analysis in `local.py` +def get_expr_writes(node: vy_ast.VyperNode) -> OrderedSet[VarAccess]: + if "writes_r" in node._metadata: + return node._metadata["writes_r"] + ret: OrderedSet = OrderedSet() + if isinstance(node, vy_ast.ExprNode) and node._expr_info is not None: + ret = node._expr_info._writes + for c in node._children: + ret |= get_expr_writes(c) + node._metadata["writes_r"] = ret + return ret From e9db8d9f7486eae38f5b86531629019ad28f514e Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 20 Jun 2024 09:27:25 -0700 Subject: [PATCH 16/20] feat[docs]: v0.4.0 release (#4152) add release notes for v0.4.0 release slight update to wording of front matter --- docs/index.rst | 7 +- docs/release-notes.rst | 310 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 311 insertions(+), 6 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 5baaebb339..6c36b5fd7c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,14 +6,17 @@ Vyper ##### -Vyper is a contract-oriented, pythonic programming language that targets the `Ethereum Virtual Machine (EVM) `_. +Vyper is a contract-oriented, Pythonic programming language that targets the `Ethereum Virtual Machine (EVM) `_. +It prioritizes user safety, encourages clear coding practices via language design and efficient execution. In other words, Vyper code is safe, clear and efficient! Principles and Goals ==================== * **Security**: It should be possible and natural to build secure smart-contracts in Vyper. * **Language and compiler simplicity**: The language and the compiler implementation should strive to be simple. -* **Auditability**: Vyper code should be maximally human-readable. Furthermore, it should be maximally difficult to write misleading code. Simplicity for the reader is more important than simplicity for the writer, and simplicity for readers with low prior experience with Vyper (and low prior experience with programming in general) is particularly important. +* **Auditability**: Vyper code should be maximally human-readable. + Furthermore, it should be maximally difficult to write misleading code. + Simplicity for the reader is more important than simplicity for the writer, and simplicity for readers with low prior experience with Vyper (and low prior experience with programming in general) is particularly important. Because of this Vyper provides the following features: diff --git a/docs/release-notes.rst b/docs/release-notes.rst index c107ee5554..fa17ef4f7b 100644 --- a/docs/release-notes.rst +++ b/docs/release-notes.rst @@ -11,17 +11,319 @@ Release Notes :'<,'>s/\v(https:\/\/github.com\/vyperlang\/vyper\/pull\/)(\d+)/(`#\2 <\1\2>`_)/g ex. in: https://github.com/vyperlang/vyper/pull/3373 ex. out: (`#3373 `_) + remove authorship slugs (leave them on github release page; they have no meaning outside of github though) + :'<,'>s/by @\S\+ //c for advisory links: :'<,'>s/\v(https:\/\/github.com\/vyperlang\/vyper\/security\/advisories\/)([-A-Za-z0-9]+)/(`\2 <\1\2>`_)/g -v0.4.0b1 ("Nagini") -******************* +v0.4.0 ("Nagini") +***************** -Date released: TBD -================== +Date released: 2024-06-20 +========================= v0.4.0 represents a major overhaul to the Vyper language. Notably, it overhauls the import system and adds support for code reuse. It also adds a new, experimental backend to Vyper which lays the foundation for improved analysis, optimization and integration with third party tools. +Breaking Changes +---------------- +* feat[tool]!: make cancun the default evm version (`#4029 `_) +* feat[lang]: remove named reentrancy locks (`#3769 `_) +* feat[lang]!: change the signature of ``block.prevrandao`` (`#3879 `_) +* feat[lang]!: change ABI type of ``decimal`` to ``int168`` (`#3696 `_) +* feat[lang]: rename ``_abi_encode`` and ``_abi_decode`` (`#4097 `_) +* feat[lang]!: add feature flag for decimals (`#3930 `_) +* feat[lang]!: make internal decorator optional (`#4040 `_) +* feat[lang]: protect external calls with keyword (`#2938 `_) +* introduce floordiv, ban regular div for integers (`#2937 `_) +* feat[lang]: use keyword arguments for struct instantiation (`#3777 `_) +* feat: require type annotations for loop variables (`#3596 `_) +* feat: replace ``enum`` with ``flag`` keyword (`#3697 `_) +* feat: remove builtin constants (`#3350 `_) +* feat: drop istanbul and berlin support (`#3843 `_) +* feat: allow range with two arguments and bound (`#3679 `_) +* fix[codegen]: range bound check for signed integers (`#3814 `_) +* feat: default code offset = 3 (`#3454 `_) +* feat: rename ``vyper.interfaces`` to ``ethereum.ercs`` (`#3741 `_) +* chore: add prefix to ERC interfaces (`#3804 `_) +* chore[ux]: compute natspec as part of standard pipeline (`#3946 `_) +* feat: deprecate ``vyper-serve`` (`#3666 `_) + +Module system +------------- +* refactor: internal handling of imports (`#3655 `_) +* feat: implement "stateless" modules (`#3663 `_) +* feat[lang]: export interfaces (`#3919 `_) +* feat[lang]: singleton modules with ownership hierarchy (`#3729 `_) +* feat[lang]: implement function exports (`#3786 `_) +* feat[lang]: auto-export events in ABI (`#3808 `_) +* fix: allow using interface defs from imported modules (`#3725 `_) +* feat: add support for constants in imported modules (`#3726 `_) +* fix[lang]: prevent modules as storage variables (`#4088 `_) +* fix[ux]: improve initializer hint for unimported modules (`#4145 `_) +* feat: add python ``sys.path`` to vyper path (`#3763 `_) +* feat[ux]: improve error message for importing ERC20 (`#3816 `_) +* fix[lang]: fix importing of flag types (`#3871 `_) +* feat: search path resolution for cli (`#3694 `_) +* fix[lang]: transitive exports (`#3888 `_) +* fix[ux]: error messages relating to initializer issues (`#3831 `_) +* fix[lang]: recursion in ``uses`` analysis for nonreentrant functions (`#3971 `_) +* fix[ux]: fix ``uses`` error message (`#3926 `_) +* fix[lang]: fix ``uses`` analysis for nonreentrant functions (`#3927 `_) +* fix[lang]: fix a hint in global initializer check (`#4089 `_) +* fix[lang]: builtin type comparisons (`#3956 `_) +* fix[tool]: fix ``combined_json`` output for CLI (`#3901 `_) +* fix[tool]: compile multiple files (`#4053 `_) +* refactor: reimplement AST folding (`#3669 `_) +* refactor: constant folding (`#3719 `_) +* fix[lang]: typecheck hashmap indexes with folding (`#4007 `_) +* fix[lang]: fix array index checks when the subscript is folded (`#3924 `_) +* fix[lang]: pure access analysis (`#3895 `_) + +Venom +----- +* feat: implement new IR for vyper (venom IR) (`#3659 `_) +* feat[ir]: add ``make_ssa`` pass to venom pipeline (`#3825 `_) +* feat[venom]: implement ``mem2var`` and ``sccp`` passes (`#3941 `_) +* feat[venom]: add store elimination pass (`#4021 `_) +* feat[venom]: add ``extract_literals`` pass (`#4067 `_) +* feat[venom]: optimize branching (`#4049 `_) +* feat[venom]: avoid last ``swap`` for commutative ops (`#4048 `_) +* feat[venom]: "pickaxe" stack scheduler optimization (`#3951 `_) +* feat[venom]: add algebraic optimization pass (`#4054 `_) +* feat: Implement target constrained venom jump instruction (`#3687 `_) +* feat: remove ``deploy`` instruction from venom (`#3703 `_) +* fix[venom]: liveness analysis in some loops (`#3732 `_) +* feat: add more venom instructions (`#3733 `_) +* refactor[venom]: use venom pass instances (`#3908 `_) +* refactor[venom]: refactor venom operand classes (`#3915 `_) +* refactor[venom]: introduce ``IRContext`` and ``IRAnalysisCache`` (`#3983 `_) +* feat: add utility functions to ``OrderedSet`` (`#3833 `_) +* feat[venom]: optimize ``get_basic_block()`` (`#4002 `_) +* fix[venom]: fix branch eliminator cases in sccp (`#4003 `_) +* fix[codegen]: same symbol jumpdest merge (`#3982 `_) +* fix[venom]: fix eval of ``exp`` in sccp (`#4009 `_) +* refactor[venom]: remove unused method in ``make_ssa.py`` (`#4012 `_) +* fix[venom]: fix return opcode handling in mem2var (`#4011 `_) +* fix[venom]: fix ``cfg`` output format (`#4010 `_) +* chore[venom]: fix output formatting of data segment in ``IRContext`` (`#4016 `_) +* feat[venom]: optimize mem2var and store/variable elimination pass sequences (`#4032 `_) +* fix[venom]: fix some sccp evaluations (`#4028 `_) +* fix[venom]: add ``unique_symbols`` check to venom pipeline (`#4149 `_) +* feat[venom]: remove redundant store elimination pass (`#4036 `_) +* fix[venom]: remove some dead code in ``venom_to_assembly`` (`#4042 `_) +* feat[venom]: improve unused variable removal pass (`#4055 `_) +* fix[venom]: remove liveness requests (`#4058 `_) +* fix[venom]: fix list of volatile instructions (`#4065 `_) +* fix[venom]: remove dominator tree invalidation for store elimination pass (`#4069 `_) +* fix[venom]: move loop invariant assertion to entry block (`#4098 `_) +* fix[venom]: clear ``out_vars`` during calculation (`#4129 `_) +* fix[venom]: alloca for default arguments (`#4155 `_) +* Refactor ctx.add_instruction() and friends (`#3685 `_) +* fix: type annotation of helper function (`#3702 `_) +* feat[ir]: emit ``djump`` in dense selector table (`#3849 `_) +* chore: move venom tests to ``tests/unit/compiler`` (`#3684 `_) + +Other new features +------------------ +* feat[lang]: add ``blobhash()`` builtin (`#3962 `_) +* feat[lang]: support ``block.blobbasefee`` (`#3945 `_) +* feat[lang]: add ``revert_on_failure`` kwarg for create builtins (`#3844 `_) +* feat[lang]: allow downcasting of bytestrings (`#3832 `_) + +Docs +---- +* chore[docs]: add docs for v0.4.0 features (`#3947 `_) +* chore[docs]: ``implements`` does not check event declarations (`#4052 `_) +* docs: adopt a new theme: ``shibuya`` (`#3754 `_) +* chore[docs]: add evaluation order warning for builtins (`#4158 `_) +* Update ``FUNDING.yml`` (`#3636 `_) +* docs: fix nit in v0.3.10 release notes (`#3638 `_) +* docs: add note on ``pragma`` parsing (`#3640 `_) +* docs: retire security@vyperlang.org (`#3660 `_) +* feat[docs]: add more detail to modules docs (`#4087 `_) +* docs: update resources section (`#3656 `_) +* docs: add script to help working on the compiler (`#3674 `_) +* docs: add warnings at the top of all example token contracts (`#3676 `_) +* docs: typo in ``on_chain_market_maker.vy`` (`#3677 `_) +* docs: clarify ``address.codehash`` for empty account (`#3711 `_) +* docs: indexed arguments for events are limited (`#3715 `_) +* docs: Fix typos (`#3747 `_) +* docs: Upgrade dependencies and fixes (`#3745 `_) +* docs: add missing cli flags (`#3736 `_) +* chore: fix formatting and docs for new struct instantiation syntax (`#3792 `_) +* docs: floordiv (`#3797 `_) +* docs: add missing ``annotated_ast`` flag (`#3813 `_) +* docs: update logo in readme, remove competition reference (`#3837 `_) +* docs: add rationale for floordiv rounding behavior (`#3845 `_) +* chore[docs]: amend ``revert_on_failure`` kwarg docs for create builtins (`#3921 `_) +* fix[docs]: fix clipped ``endAuction`` method in example section (`#3969 `_) +* refactor[docs]: refactor security policy (`#3981 `_) +* fix: edit link to style guide (`#3658 `_) +* Add Vyper online compiler tooling (`#3680 `_) +* chore: fix typos (`#3749 `_) + +Bugfixes +-------- +* fix[codegen]: fix ``raw_log()`` when topics are non-literals (`#3977 `_) +* fix[codegen]: fix transient codegen for ``slice`` and ``extract32`` (`#3874 `_) +* fix[codegen]: bounds check for signed index accesses (`#3817 `_) +* fix: disallow ``value=`` passing for delegate and static raw_calls (`#3755 `_) +* fix[codegen]: fix double evals in sqrt, slice, blueprint (`#3976 `_) +* fix[codegen]: fix double eval in dynarray append/pop (`#4030 `_) +* fix[codegen]: fix double eval of start in range expr (`#4033 `_) +* fix[codegen]: overflow check in ``slice()`` (`#3818 `_) +* fix: concat buffer bug (`#3738 `_) +* fix[codegen]: fix ``make_setter`` overlap with internal calls (`#4037 `_) +* fix[codegen]: fix ``make_setter`` overlap in ``dynarray_append`` (`#4059 `_) +* fix[codegen]: ``make_setter`` overlap in the presence of ``staticcall`` (`#4128 `_) +* fix[codegen]: fix ``_abi_decode`` buffer overflow (`#3925 `_) +* fix[codegen]: zero-length dynarray ``abi_decode`` validation (`#4060 `_) +* fix[codegen]: recursive dynarray oob check (`#4091 `_) +* fix[codegen]: add back in ``returndatasize`` check (`#4144 `_) +* fix: block memory allocation overflow (`#3639 `_) +* fix[codegen]: panic on potential eval order issue for some builtins (`#4157 `_) +* fix[codegen]: panic on potential subscript eval order issue (`#4159 `_) +* add comptime check for uint2str input (`#3671 `_) +* fix: dead code analysis inside for loops (`#3731 `_) +* fix[ir]: fix a latent bug in ``sha3_64`` codegen (`#4063 `_) +* fix: ``opcodes`` and ``opcodes_runtime`` outputs (`#3735 `_) +* fix: bad assertion in expr.py (`#3758 `_) +* fix: iterator modification analysis (`#3764 `_) +* feat: allow constant interfaces (`#3718 `_) +* fix: assembly dead code eliminator (`#3791 `_) +* fix: prevent range over decimal (`#3798 `_) +* fix: mutability check for interface implements (`#3805 `_) +* fix[codegen]: fix non-memory reason strings (`#3877 `_) +* fix[ux]: fix compiler hang for large exponentiations (`#3893 `_) +* fix[lang]: allow type expressions inside pure functions (`#3906 `_) +* fix[ux]: raise ``VersionException`` with source info (`#3920 `_) +* fix[lang]: fix ``pow`` folding when args are not literals (`#3949 `_) +* fix[codegen]: fix some hardcoded references to ``STORAGE`` location (`#4015 `_) + +Patched security advisories (GHSAs) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Bounds check on built-in ``slice()`` function can be overflowed (`GHSA-9x7f-gwxq-6f2c `_) +* ``concat`` built-in can corrupt memory (`GHSA-2q8v-3gqq-4f8p `_) +* ``raw_call`` ``value=`` kwargs not disabled for static and delegate calls (`GHSA-x2c2-q32w-4w6m `_) +* negative array index bounds checks (`GHSA-52xq-j7v9-v4v2 `_) +* ``range(start, start + N)`` reverts for negative numbers (`GHSA-ppx5-q359-pvwj `_) +* incorrect topic logging in ``raw_log`` (`GHSA-xchq-w5r3-4wg3 `_) +* double eval of the ``slice`` start/length args in certain cases (`GHSA-r56x-j438-vw5m `_) +* multiple eval of ``sqrt()`` built in argument (`GHSA-5jrj-52x8-m64h `_) +* double eval of raw_args in ``create_from_blueprint`` (`GHSA-3whq-64q2-qfj6 `_) +* ``sha3`` codegen bug (`GHSA-6845-xw22-ffxv `_) +* ``extract32`` can read dirty memory (`GHSA-4hwq-4cpm-8vmx `_) +* ``_abi_decode`` Memory Overflow (`GHSA-9p8r-4xp4-gw5w `_) +* External calls can overflow return data to return input buffer (`GHSA-gp3w-2v2m-p686 `_) + +Tooling +------- +* feat[tool]: archive format (`#3891 `_) +* feat[tool]: add source map for constructors (`#4008 `_) +* feat: add short options ``-v`` and ``-O`` to the CLI (`#3695 `_) +* feat: Add ``bb`` and ``bb_runtime`` output options (`#3700 `_) +* fix: remove hex-ir from format cli options list (`#3657 `_) +* fix: pickleability of ``CompilerData`` (`#3803 `_) +* feat[tool]: validate AST nodes early in the pipeline (`#3809 `_) +* feat[tool]: delay global constraint check (`#3810 `_) +* feat[tool]: export variable read/write access (`#3790 `_) +* feat[tool]: improvements to AST annotation (`#3829 `_) +* feat[tool]: add ``node_id`` map to source map (`#3811 `_) +* chore[tool]: add help text for ``hex-ir`` CLI flag (`#3942 `_) +* refactor[tool]: refactor storage layout export (`#3789 `_) +* fix[tool]: fix cross-compilation issues, add windows CI (`#4014 `_) +* fix[tool]: star option in ``outputSelection`` (`#4094 `_) + +Performance +----------- +* perf: lazy eval of f-strings in IRnode ctor (`#3602 `_) +* perf: levenshtein optimization (`#3780 `_) +* feat: frontend optimizations (`#3781 `_) +* feat: optimize ``VyperNode.deepcopy`` (`#3784 `_) +* feat: more frontend optimizations (`#3785 `_) +* perf: reimplement ``IRnode.__deepcopy__`` (`#3761 `_) + +Testing suite improvements +-------------------------- +* refactor[test]: bypass ``eth-tester`` and interface with evm backend directly (`#3846 `_) +* feat: Refactor assert_tx_failed into a context (`#3706 `_) +* feat[test]: implement ``abi_decode`` spec test (`#4095 `_) +* feat[test]: add more coverage to ``abi_decode`` fuzzer tests (`#4153 `_) +* feat[ci]: enable cancun testing (`#3861 `_) +* fix: add missing test for memory allocation overflow (`#3650 `_) +* chore: fix test for ``slice`` (`#3633 `_) +* add abi_types unit tests (`#3662 `_) +* refactor: test directory structure (`#3664 `_) +* chore: test all output formats (`#3683 `_) +* chore: deduplicate test files (`#3773 `_) +* feat[test]: add more transient storage tests (`#3883 `_) +* chore[ci]: fix apt-get failure in era pipeline (`#3821 `_) +* chore[ci]: enable python3.12 tests (`#3860 `_) +* chore[ci]: refactor jobs to use gh actions (`#3863 `_) +* chore[ci]: use ``--dist worksteal`` from latest ``xdist`` (`#3869 `_) +* chore: run mypy as part of lint rule in Makefile (`#3771 `_) +* chore[test]: always specify the evm backend (`#4006 `_) +* chore: update lint dependencies (`#3704 `_) +* chore: add color to mypy output (`#3793 `_) +* chore: remove tox rules for lint commands (`#3826 `_) +* chore[ci]: roll back GH actions/artifacts version (`#3838 `_) +* chore: Upgrade GitHub action dependencies (`#3807 `_) +* chore[ci]: pin eth-abi for decode regression (`#3834 `_) +* fix[ci]: release artifacts (`#3839 `_) +* chore[ci]: merge mypy job into lint (`#3840 `_) +* test: parametrize CI over EVM versions (`#3842 `_) +* feat[ci]: add PR title validation (`#3887 `_) +* fix[test]: fix failure in grammar fuzzing (`#3892 `_) +* feat[test]: add ``xfail_strict``, clean up ``setup.cfg`` (`#3889 `_) +* fix[ci]: pin hexbytes to pre-1.0.0 (`#3903 `_) +* chore[test]: update hexbytes version and tests (`#3904 `_) +* fix[test]: fix a bad bound in decimal fuzzing (`#3909 `_) +* fix[test]: fix a boundary case in decimal fuzzing (`#3918 `_) +* feat[ci]: update pypi release pipeline to use OIDC (`#3912 `_) +* chore[ci]: reconfigure single commit validation (`#3937 `_) +* chore[ci]: downgrade codecov action to v3 (`#3940 `_) +* feat[ci]: add codecov configuration (`#4057 `_) +* feat[test]: remove memory mocker (`#4005 `_) +* refactor[test]: change fixture scope in examples (`#3995 `_) +* fix[test]: fix call graph stability fuzzer (`#4064 `_) +* chore[test]: add macos to test matrix (`#4025 `_) +* refactor[test]: change default expected exception type (`#4004 `_) + +Misc / refactor +--------------- +* feat[ir]: add ``eval_once`` sanity fences to more builtins (`#3835 `_) +* fix: reorder compilation of branches in stmt.py (`#3603 `_) +* refactor[codegen]: make settings into a global object (`#3929 `_) +* chore: improve exception handling in IR generation (`#3705 `_) +* refactor: merge ``annotation.py`` and ``local.py`` (`#3456 `_) +* chore[ux]: remove deprecated python AST classes (`#3998 `_) +* refactor[ux]: remove deprecated ``VyperNode`` properties (`#3999 `_) +* feat: remove Index AST node (`#3757 `_) +* refactor: for loop target parsing (`#3724 `_) +* chore: improve diagnostics for invalid for loop annotation (`#3721 `_) +* refactor: builtin functions inherit from ``VyperType`` (`#3559 `_) +* fix: remove .keyword from Call AST node (`#3689 `_) +* improvement: assert descriptions in Crowdfund finalize() and participate() (`#3064 `_) +* feat: improve panics in IR generation (`#3708 `_) +* feat: improve warnings, refactor ``vyper_warn()`` (`#3800 `_) +* fix[ir]: unique symbol name (`#3848 `_) +* refactor: remove duplicate terminus checking code (`#3541 `_) +* refactor: ``ExprVisitor`` type validation (`#3739 `_) +* chore: improve exception for type validation (`#3759 `_) +* fix: fuzz test not updated to use TypeMismatch (`#3768 `_) +* chore: fix StringEnum._generate_next_value_ signature (`#3770 `_) +* chore: improve some error messages (`#3775 `_) +* refactor: ``get_search_paths()`` for vyper cli (`#3778 `_) +* chore: replace occurrences of 'enum' by 'flag' (`#3794 `_) +* chore: add another borrowship test (`#3802 `_) +* chore[ux]: improve an exports error message (`#3822 `_) +* chore: improve codegen test coverage report (`#3824 `_) +* chore: improve syntax error messages (`#3885 `_) +* chore[tool]: remove ``vyper-serve`` from ``setup.py`` (`#3936 `_) +* fix[ux]: replace standard strings with f-strings (`#3953 `_) +* chore[ir]: sanity check types in for range codegen (`#3968 `_) + v0.3.10 ("Black Adder") *********************** From f92ef414a551de34b64ae09ba2985ee240244c4c Mon Sep 17 00:00:00 2001 From: Benny Date: Wed, 26 Jun 2024 15:47:04 +1000 Subject: [PATCH 17/20] chore[docs]: add `FUNDING.json` for drips funding (#4167) Add json file to verify Vyper on https://www.drips.network/app/projects --- FUNDING.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 FUNDING.json diff --git a/FUNDING.json b/FUNDING.json new file mode 100644 index 0000000000..301aa05572 --- /dev/null +++ b/FUNDING.json @@ -0,0 +1,7 @@ +{ + "drips": { + "ethereum": { + "ownedBy": "0x70CCBE10F980d80b7eBaab7D2E3A73e87D67B775" + } + } +} From 5067b86906f4a3815c4d7a2d3b64f2694ae3a520 Mon Sep 17 00:00:00 2001 From: Rim Rakhimov Date: Wed, 3 Jul 2024 20:30:07 +0400 Subject: [PATCH 18/20] chore[docs]: update `sourceMap` field descriptions (#4170) * Removed `evm.deployedBytecode.sourceMapFull`, as it does not work in v0.4.0 * Updated `evm.deployedBytecode.sourceMap` to be an object in compiler output * Added `evm.bytecode.sourceMap` into compiler input and compiler output --- docs/compiling-a-contract.rst | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/docs/compiling-a-contract.rst b/docs/compiling-a-contract.rst index 751af980b2..c2cd3ed22c 100644 --- a/docs/compiling-a-contract.rst +++ b/docs/compiling-a-contract.rst @@ -308,10 +308,10 @@ The following example describes the expected input format of ``vyper-json``. (Co // devdoc - Natspec developer documentation // evm.bytecode.object - Bytecode object // evm.bytecode.opcodes - Opcodes list + // evm.bytecode.sourceMap - Source mapping (useful for debugging) // evm.deployedBytecode.object - Deployed bytecode object // evm.deployedBytecode.opcodes - Deployed opcodes list - // evm.deployedBytecode.sourceMap - Solidity-style source mapping - // evm.deployedBytecode.sourceMapFull - Deployed source mapping (useful for debugging) + // evm.deployedBytecode.sourceMap - Deployed source mapping (useful for debugging) // evm.methodIdentifiers - The list of function hashes // // Using `evm`, `evm.bytecode`, etc. will select every target part of that output. @@ -388,15 +388,37 @@ The following example describes the output format of ``vyper-json``. Comments ar // The bytecode as a hex string. "object": "00fe", // Opcodes list (string) - "opcodes": "" + "opcodes": "", + // The deployed source mapping. + "sourceMap": { + "breakpoints": [], + "error_map": {}, + "pc_ast_map": {}, + "pc_ast_map_item_keys": [], + "pc_breakpoints": [], + "pc_jump_map": {}, + "pc_pos_map": {}, + // The deployed source mapping as a string. + "pc_pos_map_compressed": "" + } }, "deployedBytecode": { // The deployed bytecode as a hex string. "object": "00fe", // Deployed opcodes list (string) "opcodes": "", - // The deployed source mapping as a string. - "sourceMap": "" + // The deployed source mapping. + "sourceMap": { + "breakpoints": [], + "error_map": {}, + "pc_ast_map": {}, + "pc_ast_map_item_keys": [], + "pc_breakpoints": [], + "pc_jump_map": {}, + "pc_pos_map": {}, + // The deployed source mapping as a string. + "pc_pos_map_compressed": "" + } }, // The list of function hashes "methodIdentifiers": { From 8931e54f8c577f6d60563ff47588c18e58a04c04 Mon Sep 17 00:00:00 2001 From: Zhipeng Xue <543984341@qq.com> Date: Fri, 26 Jul 2024 09:40:23 +0800 Subject: [PATCH 19/20] chore[test]: fix a type hint (#4173) Description Fix a type check warning reported by Pyre@Google, which was outdated after code modifications. Detail update the return type of function fix_terminal from bool to str, since it could be str after commit 176e7f7 --- tests/functional/grammar/test_grammar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/grammar/test_grammar.py b/tests/functional/grammar/test_grammar.py index de399e84b7..c1d2e1d6e6 100644 --- a/tests/functional/grammar/test_grammar.py +++ b/tests/functional/grammar/test_grammar.py @@ -37,7 +37,7 @@ def test_basic_grammar_empty(): assert len(tree.children) == 0 -def fix_terminal(terminal: str) -> bool: +def fix_terminal(terminal: str) -> str: # these throw exceptions in the grammar for bad in ("\x00", "\\ ", "\x0c"): terminal = terminal.replace(bad, " ") From fc192847932dcac83f0d1a0f8f8679867f525a1e Mon Sep 17 00:00:00 2001 From: HodanPlodky <36966616+HodanPlodky@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:03:53 +0000 Subject: [PATCH 20/20] feat[venom]: offset instruction (#4180) this commit introduces an `offset` instruction that is emitted in the algebraic pass when the add instruction calculates an offset from a code label, which is used for immutables. this allows compilation directly to the magic `OFST` assembly instruction, which does additional constant folding after symbol resolution. --------- Co-authored-by: Charles Cooper --- .../venom/test_algebraic_optimizer.py | 51 +++++++++++++++++++ vyper/venom/passes/algebraic_optimization.py | 16 +++++- vyper/venom/passes/extract_literals.py | 2 +- vyper/venom/venom_to_assembly.py | 6 +++ 4 files changed, 73 insertions(+), 2 deletions(-) diff --git a/tests/unit/compiler/venom/test_algebraic_optimizer.py b/tests/unit/compiler/venom/test_algebraic_optimizer.py index e0368d4197..b5d55efbdc 100644 --- a/tests/unit/compiler/venom/test_algebraic_optimizer.py +++ b/tests/unit/compiler/venom/test_algebraic_optimizer.py @@ -127,3 +127,54 @@ def test_interleaved_case(interleave_point): assert bb.instructions[-1].operands[0] == op3_inv else: assert bb.instructions[-1].operands[0] == op3 + + +def test_offsets(): + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + + br1 = IRBasicBlock(IRLabel("then"), fn) + fn.append_basic_block(br1) + br2 = IRBasicBlock(IRLabel("else"), fn) + fn.append_basic_block(br2) + + p1 = bb.append_instruction("param") + op1 = bb.append_instruction("store", 32) + op2 = bb.append_instruction("add", 0, IRLabel("mem")) + op3 = bb.append_instruction("store", 64) + bb.append_instruction("dloadbytes", op1, op2, op3) + op5 = bb.append_instruction("mload", op3) + op6 = bb.append_instruction("iszero", op5) + bb.append_instruction("jnz", op6, br1.label, br2.label) + + op01 = br1.append_instruction("store", 32) + op02 = br1.append_instruction("add", 0, IRLabel("mem")) + op03 = br1.append_instruction("store", 64) + br1.append_instruction("dloadbytes", op01, op02, op03) + op05 = br1.append_instruction("mload", op03) + op06 = br1.append_instruction("iszero", op05) + br1.append_instruction("return", p1, op06) + + op11 = br2.append_instruction("store", 32) + op12 = br2.append_instruction("add", 0, IRLabel("mem")) + op13 = br2.append_instruction("store", 64) + br2.append_instruction("dloadbytes", op11, op12, op13) + op15 = br2.append_instruction("mload", op13) + op16 = br2.append_instruction("iszero", op15) + br2.append_instruction("return", p1, op16) + + ac = IRAnalysesCache(fn) + MakeSSA(ac, fn).run_pass() + AlgebraicOptimizationPass(ac, fn).run_pass() + RemoveUnusedVariablesPass(ac, fn).run_pass() + + offset_count = 0 + for bb in fn.get_basic_blocks(): + for instruction in bb.instructions: + assert instruction.opcode != "add" + if instruction.opcode == "offset": + offset_count += 1 + + assert offset_count == 3 diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index 4094219a6d..1d375ea988 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -1,6 +1,6 @@ from vyper.venom.analysis.dfg import DFGAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis -from vyper.venom.basicblock import IRInstruction, IROperand +from vyper.venom.basicblock import IRInstruction, IRLabel, IRLiteral, IROperand from vyper.venom.passes.base_pass import IRPass @@ -58,10 +58,24 @@ def _get_iszero_chain(self, op: IROperand) -> list[IRInstruction]: chain.reverse() return chain + def _handle_offsets(self): + for bb in self.function.get_basic_blocks(): + for inst in bb.instructions: + # check if the instruction is of the form + # `add