Skip to content

Commit

Permalink
Refactored Instruction parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
virgil-serbanuta committed Jan 22, 2025
1 parent 2e01a7a commit fced21b
Show file tree
Hide file tree
Showing 70 changed files with 6,698 additions and 118 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ jobs:
run: make -C pykwasm cov-unit
- name: 'Run Rust unit-tests'
run: make rust-tests
- name: 'Generated code did not change'
run: make generate-code && git status --porcelain=v1 2>/dev/null

conformance-tests:
name: 'Conformance Tests'
Expand Down
15 changes: 10 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,30 @@ endif
pykwasm:
$(POETRY) install

.PHONY: generate-code
generate-code: pykwasm
$(POETRY_RUN) binary-parser-gen > pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/instr.md

.PHONY: build build-simple build-prove build-wrc20 build-binary-parser-test
build: pykwasm
build: pykwasm generate-code
$(KDIST) -v build -j3

build-simple: pykwasm
build-simple: pykwasm generate-code
$(KDIST) -v build wasm-semantics.llvm -j3

build-prove: pykwasm
build-prove: pykwasm generate-code
$(KDIST) -v build wasm-semantics.kwasm-lemmas -j3

build-wrc20: pykwasm
build-wrc20: pykwasm generate-code
$(KDIST) -v build wasm-semantics.wrc20 -j3

build-binary-parser-test: pykwasm
build-binary-parser-test: pykwasm generate-code
$(KDIST) -v build wasm-semantics.binary-parser-test -j3

.PHONY: clean
clean: pykwasm
$(KDIST) clean
rm build -rf


# Building ULM-integrated Definition
Expand Down
1 change: 1 addition & 0 deletions pykwasm/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ wasm2kast = "pykwasm.wasm2kast:main"
kwasm = "pykwasm.scripts.kwasm:main"
kwasm-convert = "pykwasm.scripts.convert:main"
kwasm-preprocess = "pykwasm.scripts.preprocessor:main"
binary-parser-gen = "pykwasm.scripts.binary-parser-gen:main"

[tool.poetry.plugins.kdist]
wasm-semantics = "pykwasm.kdist.plugin"
Expand Down
37 changes: 36 additions & 1 deletion pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parser-test.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,46 @@ module BINARY-PARSER-TEST
rule parseBinary(B:Bytes) => parseModule(B)
rule addDefnToModule
( false => true
, _D:GlobalDefn
, #module(... metadata: _ => #meta (... id: , funcIds: .Map , filename: "error: test addDefnToModule(GlobalDefn) branch called." )))
[owise]
rule addDefnToModule
( false => true
, _D:TableDefn
, #module(... metadata: _ => #meta (... id: , funcIds: .Map , filename: "error: test addDefnToModule(TableDefn) branch called." )))
[owise]
rule addDefnToModule
( false => true
, _D:MemoryDefn
, #module(... metadata: _ => #meta (... id: , funcIds: .Map , filename: "error: test addDefnToModule(MemoryDefn) branch called." )))
[owise]
rule addDefnToModule
( false => true
, _D:ElemDefn
, #module(... metadata: _ => #meta (... id: , funcIds: .Map , filename: "error: test addDefnToModule(ElemDefn) branch called." )))
[owise]
rule addDefnToModule
( false => true
, _D:DataDefn
, #module(... metadata: _ => #meta (... id: , funcIds: .Map , filename: "error: test addDefnToModule(DataDefn) branch called." )))
[owise]
rule addDefnToModule
( false => true
, _D:StartDefn
, #module(... metadata: _ => #meta (... id: , funcIds: .Map , filename: "error: test addDefnToModule(StartDefn) branch called." )))
[owise]
rule addDefnToModule
( false => true
, _D:ExportDefn
, #module(... metadata: _ => #meta (... id: , funcIds: .Map , filename: "error: test addDefnToModule(ExportDefn) branch called." )))
[owise]
rule addDefnToModule
( false => true
, _D:Defn
, #module(... metadata: _ => #meta (... id: , funcIds: .Map , filename: "error: test addDefnToModule branch called." )))
[owise]
[priority(250)]
endmodule
```
29 changes: 29 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,28 @@ This file defines a Wasm binary parser based on this
```k
requires "binary-parsing/base.md"
requires "binary-parsing/block.md"
requires "binary-parsing/bytes.md"
requires "binary-parsing/code.md"
requires "binary-parsing/code-section.md"
requires "binary-parsing/constant.md"
requires "binary-parsing/defn.md"
requires "binary-parsing/float.md"
requires "binary-parsing/func-section.md"
requires "binary-parsing/func-section-entry.md"
requires "binary-parsing/functype.md"
requires "binary-parsing/globaltype.md"
requires "binary-parsing/helpers.md"
requires "binary-parsing/if.md"
requires "binary-parsing/import.md"
requires "binary-parsing/import-section.md"
requires "binary-parsing/instr.md"
requires "binary-parsing/instr-list.md"
requires "binary-parsing/int.md"
requires "binary-parsing/limits.md"
requires "binary-parsing/locals.md"
requires "binary-parsing/loop.md"
requires "binary-parsing/memarg.md"
requires "binary-parsing/module.md"
requires "binary-parsing/name.md"
requires "binary-parsing/resulttype.md"
Expand All @@ -22,6 +36,7 @@ requires "binary-parsing/tags.md"
requires "binary-parsing/type-section.md"
requires "binary-parsing/valtype.md"
module BINARY-PARSER-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM
Expand All @@ -33,13 +48,27 @@ endmodule
module BINARY-PARSER [private]
imports BINARY-PARSER-MODULE-SYNTAX
imports BINARY-PARSER-BLOCK
imports BINARY-PARSER-BYTES
imports BINARY-PARSER-CODE
imports BINARY-PARSER-CODE-SECTION
imports BINARY-PARSER-CONSTANT
imports BINARY-PARSER-FLOAT
imports BINARY-PARSER-FUNC-SECTION
imports BINARY-PARSER-FUNC-SECTION-ENTRY
imports BINARY-PARSER-FUNCTYPE
imports BINARY-PARSER-GLOBALTYPE
imports BINARY-PARSER-HELPERS
imports BINARY-PARSER-IF
imports BINARY-PARSER-IMPORT
imports BINARY-PARSER-IMPORT-SECTION
imports BINARY-PARSER-INSTR-LIST
imports BINARY-PARSER-INSTR
imports BINARY-PARSER-INT
imports BINARY-PARSER-LIMITS
imports BINARY-PARSER-LOCALS
imports BINARY-PARSER-LOOP
imports BINARY-PARSER-MEMARG
imports BINARY-PARSER-MODULE
imports BINARY-PARSER-NAME
imports BINARY-PARSER-RESULTTYPE
Expand Down
74 changes: 74 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/block.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
Parsing [blocks](https://webassembly.github.io/spec/core/binary/instructions.html#control-instructions),
i.e., a blocktype + instr list.

```k
module BINARY-PARSER-BLOCK-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports BINARY-PARSER-INSTR-LIST-SYNTAX
imports WASM-COMMON-SYNTAX
imports WASM-DATA-COMMON
syntax BlockType ::= "epsilon" | ValType | Int
syntax Block ::= block(BlockType, BinaryInstrs)
syntax BinaryInstr ::= Block
syntax BlockResult ::= blockResult(Block, endsWithElse: Bool, BytesWithIndex) | ParseError
syntax BlockResult ::= parseBlock(BytesWithIndex) [function, total]
syntax VecTypeOrError ::= VecType | ParseError
syntax VecTypeOrError ::= blockTypeToVecType(BlockType, Defns) [function, total]
endmodule
module BINARY-PARSER-BLOCK [private]
imports BINARY-PARSER-BLOCK-SYNTAX
imports BINARY-PARSER-CONSTANT-SYNTAX
imports BINARY-PARSER-INSTR-LIST-SYNTAX
imports BINARY-PARSER-INT-SYNTAX
imports BINARY-PARSER-TAGS
imports BINARY-PARSER-VALTYPE-SYNTAX
syntax BlockResult ::= #parseBlock1(BlockTypeResult) [function, total]
| #parseBlock2(BlockType, InstrListResult) [function, total]
rule parseBlock(BWI:BytesWithIndex) => #parseBlock1(parseBlockType(BWI))
rule #parseBlock1(blockTypeResult(BT:BlockType, BWI:BytesWithIndex))
=> #parseBlock2(BT, parseInstrList(BWI))
rule #parseBlock1(E:ParseError) => E
rule #parseBlock2(BT:BlockType, instrListResult(Is:BinaryInstrs, EndsWithElse:Bool, BWI:BytesWithIndex))
=> blockResult(block(BT, Is), EndsWithElse, BWI)
rule #parseBlock2(_:BlockType, E:ParseError) => E
syntax BlockTypeResult ::= blockTypeResult(BlockType, BytesWithIndex) | ParseError
syntax BlockTypeResult ::= parseBlockType(BytesWithIndex) [function, total]
| #parseBlockType1(BytesWithIndex, BytesWithIndexOrError) [function, total]
| #parseBlockType2(BytesWithIndex, ValTypeResult) [function, total]
| #parseBlockType3(IntResult) [function, total]
rule parseBlockType(BWI:BytesWithIndex)
=> #parseBlockType1(BWI, parseConstant(BWI, TYPE_EMPTY))
rule #parseBlockType1(_:BytesWithIndex, BWI:BytesWithIndex)
=> blockTypeResult(epsilon, BWI:BytesWithIndex)
rule #parseBlockType1(BWI:BytesWithIndex, _:ParseError)
=> #parseBlockType2(BWI, parseValType(BWI))
rule #parseBlockType2(_:BytesWithIndex, valTypeResult(VT:ValType, BWI:BytesWithIndex))
=> blockTypeResult(VT, BWI)
rule #parseBlockType2(BWI:BytesWithIndex, _:ParseError)
=> #parseBlockType3(parseLeb128SInt(BWI))
rule #parseBlockType3(intResult(Value:Int, BWI:BytesWithIndex))
=> blockTypeResult(Value, BWI)
rule #parseBlockType3(E:ParseError) => E
rule blockTypeToVecType(epsilon, _:Defns) => [ .ValTypes ]
rule blockTypeToVecType(ValType, _:Defns) => [ ValType .ValTypes ]
rule blockTypeToVecType(Index:Int, Ds::Defns) => parseError("blockTypeToVecType: unimplemented", ListItem(Index) ListItem(Ds))
// rule blockTypeToVecType(Index:Int, .Defns) => parseError("blockTypeToVecType: not found", ListItem(Index))
// rule blockTypeToVecType(0, #type(FT:FuncType, _:OptionalId) Ds:Defns) => [FT]
// rule blockTypeToVecType(0, D:Defn Ds:Defns) => parseError("blockTypeToVecType", ListItem(D) ListItem(Ds))
// [owise]
// rule blockTypeToVecType(Index:Int, D:Defn Ds:Defns) => blockTypeToVecType(Index -Int 1, Ds)
// requires Index >Int 0
// rule blockTypeToVecType(Index:Int, Ds:Defns) => parseError("blockTypeToVecType", ListItem(I) ListItem(Ds))
// requires Index <Int 0
endmodule
```
34 changes: 34 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/bytes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
Extract a number of bytes from the buffer.

```k
module BINARY-PARSER-BYTES-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
syntax BytesResult ::= bytesResult(Bytes, BytesWithIndex) | ParseError
syntax BytesResult ::= parseBytes(BytesWithIndex, Int) [function, total]
syntax BytesWithIndexOrError ::= ignoreBytes(BytesWithIndex, Int) [function, total]
endmodule
module BINARY-PARSER-BYTES [private]
imports BINARY-PARSER-BYTES-SYNTAX
rule parseBytes(bwi(Buffer:Bytes, Index:Int), Count:Int)
=> bytesResult
( substrBytes(Buffer, Index, Index +Int Count)
, bwi(Buffer, Index +Int Count)
)
requires Index +Int Count <=Int lengthBytes(Buffer)
rule parseBytes(bwi(Buffer:Bytes, Index:Int), Count:Int)
=> parseError("parseBytes", ListItem(lengthBytes(Buffer)) ListItem(Index) ListItem(Count) ListItem(Buffer))
[owise]
rule ignoreBytes(BWI:BytesWithIndex, Count:Int)
=> #ignoreBytes(parseBytes(BWI, Count))
syntax BytesWithIndexOrError ::= #ignoreBytes(BytesResult) [function, total]
rule #ignoreBytes(bytesResult(_:Bytes, BWI:BytesWithIndex)) => BWI
rule #ignoreBytes(E:ParseError) => E
endmodule
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Parsing a [code section](https://webassembly.github.io/spec/core/binary/modules.html#code-section).

```k
module BINARY-PARSER-CODE-SECTION-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
syntax SectionResult ::= parseCodeSection(BytesWithIndex) [function, total]
endmodule
module BINARY-PARSER-CODE-SECTION [private]
imports BINARY-PARSER-CODE-SECTION-SYNTAX
imports BINARY-PARSER-CODE-SYNTAX
imports BINARY-PARSER-INT-SYNTAX
imports BINARY-PARSER-SECTION-SYNTAX
syntax SectionResult ::= #parseCodeSection(IntResult) [function, total]
rule parseCodeSection(BWI:BytesWithIndex) => #parseCodeSection(parseLeb128UInt(BWI))
rule #parseCodeSection(intResult(Count:Int, BWI:BytesWithIndex))
=> parseSectionVector(defnCode, Count, .BinaryDefns, BWI)
rule #parseCodeSection(E:ParseError) => E
syntax DefnKind ::= "defnCode"
rule parseDefn(defnCode, BWI:BytesWithIndex) => parseDefnCode(BWI)
endmodule
```
43 changes: 43 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/code.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Parsing a [code object](https://webassembly.github.io/spec/core/binary/modules.html#code-section).

```k
module BINARY-PARSER-CODE-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports BINARY-PARSER-INSTR-LIST-SYNTAX
imports WASM
syntax BinaryDefn ::= BinaryDefnFunctionBody
syntax BinaryDefnFunctionBody ::= binaryDefnFunctionBody(VecType, BinaryInstrs)
syntax BinaryDefnFunctionBodies ::= List{BinaryDefnFunctionBody, ""}
syntax DefnResult ::= parseDefnCode(BytesWithIndex) [function, total]
endmodule
module BINARY-PARSER-CODE [private]
imports BINARY-PARSER-CODE-SYNTAX
imports BINARY-PARSER-INSTR-LIST-SYNTAX
imports BINARY-PARSER-INT-SYNTAX
imports BINARY-PARSER-LOCALS-SYNTAX
imports BINARY-PARSER-SECTION-SYNTAX
imports BINARY-PARSER-VALTYPE-SYNTAX
imports BOOL
syntax DefnResult ::= #parseDefnCode(sizeInBytes:IntResult) [function, total]
| #parseDefnCode1(sizeInBytes:Int, LocalsVecResult) [function, total]
| #parseDefnCode2(sizeInBytes:Int, ValTypes, InstrListResult) [function, total]
rule parseDefnCode(BWI:BytesWithIndex) => #parseDefnCode(parseLeb128UInt(BWI))
rule #parseDefnCode(intResult(Size:Int, BWI:BytesWithIndex))
=> #parseDefnCode1(Size, parseLocalsVec(BWI))
rule #parseDefnCode(E:ParseError) => E
rule #parseDefnCode1(Size:Int, localsVecResult(Locals:LocalsVec, BWI:BytesWithIndex))
=> #parseDefnCode2(Size, localsVecToValTypes(Locals), parseInstrList(BWI))
rule #parseDefnCode1(_, E:ParseError) => E
rule #parseDefnCode2(_Size:Int, Locals:ValTypes, instrListResult(Is:BinaryInstrs, false, BWI:BytesWithIndex))
=> defnResult(binaryDefnFunctionBody([ Locals ], Is), BWI)
rule #parseDefnCode2(_Size:Int, Locals:ValTypes, instrListResult(Is:BinaryInstrs, true, BWI:BytesWithIndex))
=> parseError("#parseDefnCode2", ListItem(Locals) ListItem(Is) ListItem(BWI))
rule #parseDefnCode2(_, _, E:ParseError) => E
endmodule
```
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ module BINARY-PARSER-DEFN-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
imports WASM-COMMON-SYNTAX
syntax DefnResult ::= defnResult(Defn, BytesWithIndex) | ParseError
syntax BinaryDefn ::= Defn
syntax DefnResult ::= defnResult(BinaryDefn, BytesWithIndex) | ParseError
syntax BinaryDefns ::= List{BinaryDefn, ""}
syntax DefnOrError ::= Defn | ParseError
syntax DefnsOrError ::= Defns | ParseError
endmodule
```
34 changes: 34 additions & 0 deletions pykwasm/src/pykwasm/kdist/wasm-semantics/binary-parsing/float.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Parsing a [float](https://webassembly.github.io/spec/core/binary/values.html#floating-point).

```k
module BINARY-PARSER-FLOAT-SYNTAX
imports BINARY-PARSER-BASE-SYNTAX
syntax FloatResult ::= floatResult(Float, BytesWithIndex) | ParseError
syntax FloatResult ::= parseFloat64(BytesWithIndex) [function, total]
syntax FloatResult ::= parseFloat32(BytesWithIndex) [function, total]
endmodule
module BINARY-PARSER-FLOAT [private]
imports BINARY-PARSER-BYTES-SYNTAX
imports BINARY-PARSER-FLOAT-SYNTAX
syntax FloatResult ::= #parseFloat64(BytesResult) [function, total]
rule parseFloat64(BWI:BytesWithIndex) => #parseFloat64(parseBytes(BWI, 8))
rule #parseFloat64(bytesResult(Bytes:Bytes, BWI:BytesWithIndex))
=> parseError("#parseFloat64: bytesToFloat is not implemented", ListItem(Bytes) ListItem(BWI)) // float(bytesToFloat(Bytes), BWI)
rule #parseFloat64(E:ParseError) => E
syntax FloatResult ::= #parseFloat32(BytesResult) [function, total]
rule parseFloat32(BWI:BytesWithIndex) => #parseFloat32(parseBytes(BWI, 4))
rule #parseFloat32(bytesResult(Bytes:Bytes, BWI:BytesWithIndex))
=> parseError("#parseFloat32: bytesToFloat is not implemented", ListItem(Bytes) ListItem(BWI)) // float(bytesToFloat(Bytes), BWI)
rule #parseFloat32(E:ParseError) => E
// syntax Float ::= bytesToFloat(Bytes) [function, total]
// TODO: implement.
endmodule
```
Loading

0 comments on commit fced21b

Please sign in to comment.