From ce17b83b713cf1e3eef14ce924f5beb978326b02 Mon Sep 17 00:00:00 2001 From: Andreas Rossberg Date: Mon, 26 Jun 2023 13:20:27 +0200 Subject: [PATCH 1/3] Tweak2 --- document/core/appendix/algorithm.rst | 134 ++++++++++++++++++--------- 1 file changed, 91 insertions(+), 43 deletions(-) diff --git a/document/core/appendix/algorithm.rst b/document/core/appendix/algorithm.rst index ee55fbdb1..496f4f245 100644 --- a/document/core/appendix/algorithm.rst +++ b/document/core/appendix/algorithm.rst @@ -16,7 +16,7 @@ Consequently, it can be integrated directly into a decoder. The algorithm is expressed in typed pseudo code whose semantics is intended to be self-explanatory. -.. index:: value type, stack, label, frame, instruction +.. index:: value type, reference type, vector type, number type, packed type, field type, structure type, array type, function type, compound type, sub type, recursive type, defined type, stack, label, frame, instruction Data Structures ~~~~~~~~~~~~~~~ @@ -24,13 +24,16 @@ Data Structures Types ..... -Value types are representable as a set of enumerations. +Value types are representable as sets of enumerations: .. code-block:: pseudo type num_type = I32 | I64 | F32 | F64 type vec_type = V128 - type heap_type = Def(idx : nat) | Func | Extern | Bot + type heap_type = + Any | Eq | I31 | Struct | Array | None | + Func | Nofunc | Extern | Noextern | Bot | + Def(def : def_type) type ref_type = Ref(heap : heap_type, null : bool) type val_type = num_type | vec_type | ref_type | Bot @@ -43,38 +46,46 @@ Value types are representable as a set of enumerations. func is_ref(t : val_type) : bool = return not (is_num t || is_vec t) || t = Bot -Equivalence and subtyping checks can be defined on these types. +Similarly, :ref:`defined types ` :code:`def_type` can be represented: .. code-block:: pseudo - func eq_def(n1, n2) = - ... // check that both type definitions are equivalent (TODO) - - func matches_null(null1 : bool, null2 : bool) : bool = - return null1 = null2 || null2 - - func matches_heap(t1 : heap_type, t2 : heap_type) : bool = - switch (t1, t2) - case (Def(n1), Def(n2)) - return eq_def(n1, n2) - case (Def(_), Func) - return true - case (Bot, _) - return true - case (_, _) - return t1 = t2 - - func matches_ref(t1 : ref_type, t2 : ref_type) : bool = - return matches_heap(t1.heap, t2.heap) && matches_null(t1.null, t2.null) - - func matches(t1 : val_type, t2 : val_type) : bool = - switch (t1, t2) - case (Ref(_), Ref(_)) - return matches_ref(t1, t2) - case (Bot, _) - return true - case (_, _) - return t1 = t2 + type packed_type = I8 | I16 + type field_type = Field(val : val_type | packed_type, mut : bool) + + type struct_type = Struct(fields : list(field_type)) + type array_type = Array(fields : field_type) + type func_type = Func(params : list(val_type), results : list(val_type)) + type comp_type = struct_type | array_type | func_type + + type sub_type = Sub(super : list(def_type), body : comp_type, final : bool) + type rec_type = Rec(types : list(sub_type)) + + type def_type = Def(rec : rec_type, proj : int32) + + func unpack_field(t : field_type) : val_type = + if (it = I8 || t = I16) return I32 + return t + + func expand_def(t : def_type) : comp_type = + return t.rec.types[t.proj].body + +These representations assume that all types have been :ref:`closed ` by :ref:`substituting ` all :ref:`type indices ` (in :ref:`concrete heap types ` and in :ref:`sub types `) with their respective :ref:`defined types `. +This includes *recursive* references to enclosing :ref:`defined types `, +such that type representations form graphs and may be *cyclic* for :ref:`recursive types `. + +We assume that all types have been *canonicalized*, such that equality on two type representations holds if and only if they are in fact :ref:`equivalent `. making it a constant-time check. + +.. note:: + For the purpose of type canonicalization, recursive references from a :ref:`heap type ` to an enclosing :ref:`recursive type ` (i.e., forward edges in the graph that form a cycle) need to be distinguished from references to previously defined types. + However, this distinction does not otherwise affect validation, so is ignored here. + In the graph representation, all recursive types are effectively infinitely :ref:`unrolled `. + +We further assume that a :ref:`subtyping ` check is defined on value types: + +.. code-block:: pseudo + + func matches_val(t1 : val_type, t2 : val_type) : bool Context ....... @@ -84,6 +95,8 @@ For the purpose of presenting the algorithm, it is maintained in a set of global .. code-block:: pseudo + var return_type : list(val_type) + var types : array(def_type) var locals : array(val_type) var locals_init : array(bool) var globals : array(global_type) @@ -142,7 +155,7 @@ However, these variables are not manipulated directly by the main checking funct func pop_val(expect : val_type) : val_type = let actual = pop_val() - error_if(not matches(actual, expect)) + error_if(not matches_val(actual, expect)) return actual func pop_num() : num_type | Bot = @@ -249,7 +262,6 @@ it is an invariant of the validation algorithm that there always is at least one However, a polymorphic stack cannot underflow, but instead generates :code:`Bot` types as needed. - .. index:: opcode Validation of Opcode Sequences @@ -354,15 +366,51 @@ Other instructions are checked in a similar manner. push_vals(label_types(ctrls[n])) push_val(Ref(rt.heap, false)) - case (call_ref) - let rt = pop_ref() - if (rt.heap =/= Bot) - error_if(not is_def(rt.heap)) - let ft = funcs[rt.heap.idx] - pop_vals(ft.params) - push_vals(ft.results) + case (br_on_cast n rt1 rt2) + validate_ref_type(rt1) + validate_ref_type(rt2) + pop_val(rt1) + push_val(rt2) + pop_vals(label_types(ctrls[n])) + push_vals(label_types(ctrls[n])) + pop_val(rt2) + push_val(diff_ref_type(rt2, rt1)) + + case (return) + pop_vals(return_types) + unreachable() + + case (call_ref x) + let t = expand_def(types[x]) + error_if(not is_func(t)) + pop_vals(t.params) + pop_val(Ref(Def(types[x]))) + push_vals(t.results) + + case (return_call_ref x) + let t = expand_def(types[x]) + error_if(not is_func(t)) + pop_vals(t.params) + pop_val(Ref(Def(types[x]))) + error_if(t.results.len() =/= return_types.len()) + push_vals(t.results) + pop_vals(return_types) + unreachable() + + case (struct.new x) + let t = expand_def(types[x]) + error_if(not is_struct(t)) + for (ti in reverse(t.fields)) + pop_val(unpack_field(ti)) + push_val(Ref(Def(types[x]))) + + case (struct.set x n) + let t = expand_def(types[x]) + error_if(not is_struct(t) || n >= t.fields.len()) + pop_val(Ref(Def(types[x]))) + pop_val(unpack_field(st.fields[n])) .. note:: - It is an invariant under the current WebAssembly instruction set that an operand of :code:`Unknown` type is never duplicated on the stack. + It is an invariant under the current WebAssembly instruction set that an operand of :code:`Bot` type is never duplicated on the stack. This would change if the language were extended with stack instructions like :code:`dup`. - Under such an extension, the above algorithm would need to be refined by replacing the :code:`Unknown` type with proper *type variables* to ensure that all uses are consistent. + Under such an extension, the above algorithm would need to be refined by replacing the :code:`Bot` type with proper *type variables* to ensure that all uses are consistent. From 4961c9555b48546a725423d5f72a3d085ce5848f Mon Sep 17 00:00:00 2001 From: Andreas Rossberg Date: Wed, 19 Jul 2023 11:41:58 +0200 Subject: [PATCH 2/3] List aux functions --- document/core/appendix/algorithm.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/document/core/appendix/algorithm.rst b/document/core/appendix/algorithm.rst index 496f4f245..152c8c1d2 100644 --- a/document/core/appendix/algorithm.rst +++ b/document/core/appendix/algorithm.rst @@ -81,11 +81,19 @@ We assume that all types have been *canonicalized*, such that equality on two ty However, this distinction does not otherwise affect validation, so is ignored here. In the graph representation, all recursive types are effectively infinitely :ref:`unrolled `. -We further assume that a :ref:`subtyping ` check is defined on value types: +We further assume that :ref:`validation ` and :ref:`subtyping ` checks are defined on value types, as well as a few auxiliary functions on compound types: .. code-block:: pseudo + func validate_val_type(t : val_type) + func validate_ref_type(t : ref_type) + func matches_val(t1 : val_type, t2 : val_type) : bool + func matches_ref(t1 : val_type, t2 : val_type) : bool + + func is_func(t : comp_type) : bool + func is_struct(t : comp_type) : bool + func is_array(t : comp_type) : bool Context ....... From d3ce44c8495c1e2997b8d524c6964056266e4ada Mon Sep 17 00:00:00 2001 From: Andreas Rossberg Date: Wed, 19 Jul 2023 12:16:37 +0200 Subject: [PATCH 3/3] Fix spacing --- document/core/valid/modules.rst | 2 +- document/core/valid/types.rst | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/document/core/valid/modules.rst b/document/core/valid/modules.rst index 37f6a5e10..8dee84b8c 100644 --- a/document/core/valid/modules.rst +++ b/document/core/valid/modules.rst @@ -46,7 +46,7 @@ The sequence of :ref:`types ` defined in a module is validated incr \qquad C = C' \with \CTYPES = C'.\CTYPES~\rolldt_{|C'.\CTYPES|}(\rectype) \qquad - C \vdashrectype \rectype \ok(|C'.\CTYPES|) + C \vdashrectype \rectype ~{\ok}(|C'.\CTYPES|) }{ C \vdashtypes \type^\ast~\rectype \ok } diff --git a/document/core/valid/types.rst b/document/core/valid/types.rst index 192b86dfa..fe5a42344 100644 --- a/document/core/valid/types.rst +++ b/document/core/valid/types.rst @@ -394,15 +394,15 @@ Recursive Types .. math:: \frac{ }{ - C \vdashrectype \TREC~\epsilon \ok(x) + C \vdashrectype \TREC~\epsilon ~{\ok}(x) } \qquad \frac{ - C \vdashsubtype \subtype \ok(x) + C \vdashsubtype \subtype ~{\ok}(x) \qquad - C \vdashrectype \TREC~{\subtype'}^\ast \ok(x + 1) + C \vdashrectype \TREC~{\subtype'}^\ast ~{\ok}(x + 1) }{ - C \vdashrectype \TREC~\subtype~{\subtype'}^\ast \ok(x) + C \vdashrectype \TREC~\subtype~{\subtype'}^\ast ~{\ok}(x) } :math:`\TSUB~\TFINAL^?~y^\ast~\comptype` @@ -442,7 +442,7 @@ Recursive Types (C \vdashcomptypematch \comptype \matchescomptype \comptype')^\ast \end{array} }{ - C \vdashsubtype \TSUB~\TFINAL^?~y^\ast~\comptype \ok(x) + C \vdashsubtype \TSUB~\TFINAL^?~y^\ast~\comptype ~{\ok}(x) } .. note:: @@ -472,7 +472,7 @@ Defined Types .. math:: \frac{ - C \vdashrectype \rectype \ok(x) + C \vdashrectype \rectype ~{\ok}(x) \qquad \rectype = \TREC~\subtype^n \qquad