diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 997fda5bf..cd5cc0d41 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -211,7 +211,8 @@ jobs: - name: Run Merkle tree tests run: | - docker run --rm -t ${{ github.repository_owner }}/machine-emulator:tests test-merkle-tree-hash --log2-root-size=30 --log2-leaf-size=12 --input=/usr/bin/test-merkle-tree-hash + docker run --rm -t ${{ github.repository_owner }}/machine-emulator:tests test-merkle-tree-hash --log2-root-size=30 --log2-leaf-size=12 --input=/usr/bin/test-merkle-tree-hash --hash-function=keccak256 + docker run --rm -t ${{ github.repository_owner }}/machine-emulator:tests test-merkle-tree-hash --log2-root-size=30 --log2-leaf-size=12 --input=/usr/bin/test-merkle-tree-hash --hash-function=sha256 - name: Run C API tests run: | @@ -379,7 +380,8 @@ jobs: - name: Run Merkle tree tests run: | - docker run --platform linux/arm64 --rm -t ${{ github.repository_owner }}/machine-emulator:tests test-merkle-tree-hash --log2-root-size=30 --log2-leaf-size=12 --input=/usr/bin/test-merkle-tree-hash + docker run --platform linux/arm64 --rm -t ${{ github.repository_owner }}/machine-emulator:tests test-merkle-tree-hash --log2-root-size=30 --log2-leaf-size=12 --input=/usr/bin/test-merkle-tree-hash --hash-function=keccak256 + docker run --platform linux/arm64 --rm -t ${{ github.repository_owner }}/machine-emulator:tests test-merkle-tree-hash --log2-root-size=30 --log2-leaf-size=12 --input=/usr/bin/test-merkle-tree-hash --hash-function=sha256 - name: Run C API tests run: | @@ -633,7 +635,7 @@ jobs: - name: Run tests with sanitizer run: | - docker run --rm -t ${{ github.repository_owner }}/machine-emulator:sanitizer make sanitize=yes test-save-and-load test-machine test-hash test-lua test-jsonrpc test-c-api coverage-machine test-uarch-rv64ui test-uarch-interpreter coverage-uarch + docker run --rm -t ${{ github.repository_owner }}/machine-emulator:sanitizer make sanitize=yes test-save-and-load test-machine test-lua test-jsonrpc test-c-api coverage-machine test-uarch-rv64ui test-uarch-interpreter coverage-uarch publish_artifacts: name: Publish artifacts diff --git a/.typos.toml b/.typos.toml index ccb29e1e3..66118197f 100644 --- a/.typos.toml +++ b/.typos.toml @@ -9,3 +9,4 @@ stap = "stap" wronly = "wronly" optin = "optin" sxl = "sxl" +nd = "nd" diff --git a/LICENSES.md b/LICENSES.md index 0c12e43b3..89a7f8ac4 100644 --- a/LICENSES.md +++ b/LICENSES.md @@ -12,7 +12,6 @@ This project includes several submodules and dependencies, each with its own lic - `third-party/riscv-arch-test`: Source code licensed under the Apache 2.0 and BSD 3-Clause licenses. Documentation under `CC-BY-4.0`. License information is provided in README.md and other COPYING.* files like [third-party/riscv-arch-test/COPYING.APACHE](third-party/riscv-arch-test/COPYING.APACHE). - `third-party/riscv-tests`: Licensed under the BSD 3-Clause "New" or "Revised" License. See [third-party/riscv-tests/LICENSE](third-party/riscv-tests/LICENSE) for license details. - `third-party/riscv-tests/env`: Licensed under the BSD 3-Clause "New" or "Revised" License. License details are in [third-party/riscv-tests/env/LICENSE](third-party/riscv-tests/env/LICENSE). -- `third-party/tiny_sha3`: Licensed under the MIT License. The license can be found at [third-party/tiny_sha3/LICENSE](third-party/tiny_sha3/LICENSE). - `third-party/nlohmann-json`: Licensed under the MIT License. The license can be found at [third-party/nlohmann-json/LICENSE.MIT](third-party/nlohmann-json/LICENSE.MIT). ## Debian Packages diff --git a/Makefile b/Makefile index 98a72b129..9fe55574a 100644 --- a/Makefile +++ b/Makefile @@ -77,7 +77,7 @@ EMU_TO_BIN= src/cartesi-jsonrpc-machine src/cartesi-merkle-tree-hash EMU_TO_LIB= src/$(LIBCARTESI_SO) src/$(LIBCARTESI_SO_JSONRPC) EMU_TO_LIB_A= src/libcartesi.a src/libcartesi_jsonrpc.a src/libluacartesi.a src/libluacartesi_jsonrpc.a EMU_LUA_TO_BIN= src/cartesi-machine.lua src/cartesi-machine-stored-hash.lua -EMU_TO_LUA_PATH= src/cartesi/util.lua src/cartesi/proof.lua src/cartesi/gdbstub.lua +EMU_TO_LUA_PATH= src/cartesi/util.lua src/cartesi/gdbstub.lua EMU_TO_LUA_CPATH= src/cartesi.so EMU_TO_LUA_CARTESI_CPATH= src/cartesi/jsonrpc.so EMU_TO_INC= $(addprefix src/,jsonrpc-machine-c-api.h machine-c-api.h machine-c-version.h) diff --git a/src/Makefile b/src/Makefile index db2098986..54d232b0d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -164,7 +164,6 @@ CLANG_TIDY_WARNS=-Wthread-safety -Wglobal-constructors # Place our include directories before the system's INCS+= \ -I../third-party/llvm-flang-uint128 \ - -I../third-party/tiny_sha3 \ -I../third-party/nlohmann-json \ -I../third-party/downloads \ $(BOOST_INC) @@ -221,8 +220,8 @@ ifneq ($(git_commit),) DEFS+=-DGIT_COMMIT='"$(git_commit)"' endif -# The SHA3 is third party library we always want to compile with O3 -SHA3_CFLAGS=-O3 +# Hashing libraries have special optimizations flags +HASH_CFLAGS=-O3 -DNDEBUG -funroll-loops -fno-stack-protector # Optimization flags for the interpreter ifneq (,$(filter yes,$(relwithdebinfo) $(release))) @@ -370,7 +369,9 @@ LIBCARTESI_OBJS:= \ pristine-merkle-tree.o \ replay-step-state-access-interop.o \ send-cmio-response.o \ - sha3.o \ + keccak-256-hasher.o \ + sha-256-hasher.o \ + is-pristine.o \ uarch-pristine-hash.o \ uarch-pristine-ram.o \ uarch-pristine-state-hash.o \ @@ -397,7 +398,9 @@ LUACARTESI_OBJS:= \ $(CARTESI_CLUA_OBJS) LIBCARTESI_MERKLE_TREE_OBJS:= \ - sha3.o \ + keccak-256-hasher.o \ + sha-256-hasher.o \ + is-pristine.o \ back-merkle-tree.o \ pristine-merkle-tree.o \ complete-merkle-tree.o \ @@ -555,8 +558,6 @@ jsonrpc-discover.cpp: jsonrpc-discover.json @$(CC) $(CFLAGS) $< -MM -MT $@ -MF $@.d > /dev/null 2>&1 @touch $@ -sha3.o: ../third-party/tiny_sha3/sha3.c - $(CC) $(CFLAGS) $(SHA3_CFLAGS) -c -o $@ $< uarch-pristine-ram.o: $(UARCH_PRISTINE_RAM_C) $(CC) $(CFLAGS) -c -o $@ $< @@ -570,6 +571,15 @@ interpret-jump-table.h: ../tools/gen-interpret-jump-table.lua interpret.o: interpret.cpp machine-c-version.h interpret-jump-table.h $(CXX) $(CXXFLAGS) $(INTERPRET_CXXFLAGS) -c -o $@ $< +keccak-256-hasher.o: keccak-256-hasher.cpp + $(CXX) $(CXXFLAGS) $(HASH_CFLAGS) -c -o $@ $< + +sha-256-hasher.o: sha-256-hasher.cpp + $(CXX) $(CXXFLAGS) $(HASH_CFLAGS) -c -o $@ $< + +is-pristine.o: is-pristine.cpp + $(CXX) $(CXXFLAGS) $(HASH_CFLAGS) -c -o $@ $< + %.o: %.cpp machine-c-version.h $(CXX) $(CXXFLAGS) -c -o $@ $< diff --git a/src/address-range.h b/src/address-range.h index 0865f0617..8c307efbd 100644 --- a/src/address-range.h +++ b/src/address-range.h @@ -352,7 +352,7 @@ class address_range { } #ifndef MICROARCHITECTURE - // Defaul implemenation returns always dirty tree + // Default implementation returns always dirty tree virtual const i_dirty_page_tree &do_get_dirty_page_tree() const noexcept { const static empty_dirty_page_tree no_dirty{}; return no_dirty; @@ -363,7 +363,7 @@ class address_range { return const_cast(std::as_const(*this).do_get_dirty_page_tree()); } - // Defaul implemenation returns no hashes + // Default implementation returns no hashes virtual const i_dense_hash_tree &do_get_dense_hash_tree() const noexcept { const static empty_dense_hash_tree no_hashes{}; return no_hashes; diff --git a/src/array2d.h b/src/array2d.h new file mode 100644 index 000000000..91e67aad2 --- /dev/null +++ b/src/array2d.h @@ -0,0 +1,31 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef ARRAY2D_H +#define ARRAY2D_H + +#include +#include + +namespace cartesi { + +//??(edubart): In future C++ standards we should switch to `std::mdarray` or `std::mdspan` +template +using array2d = std::array, M>; + +} // namespace cartesi + +#endif // ARRAY2D_H diff --git a/src/back-merkle-tree.cpp b/src/back-merkle-tree.cpp index 9fbcafe28..ab081aa7f 100644 --- a/src/back-merkle-tree.cpp +++ b/src/back-merkle-tree.cpp @@ -29,12 +29,14 @@ namespace cartesi { -back_merkle_tree::back_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size) : +back_merkle_tree::back_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, + hash_function_type hash_function) : m_log2_root_size{log2_root_size}, m_log2_leaf_size{log2_leaf_size}, m_max_leaves{UINT64_C(1) << (log2_root_size - log2_leaf_size)}, m_context(std::max(1, log2_root_size - log2_leaf_size + 1)), - m_pristine_hashes{log2_root_size, log2_word_size} { + m_pristine_hashes{log2_root_size, log2_word_size, hash_function}, + m_hash_function(hash_function) { if (log2_root_size < 0) { throw std::out_of_range{"log2_root_size is negative"}; } @@ -56,7 +58,7 @@ back_merkle_tree::back_merkle_tree(int log2_root_size, int log2_leaf_size, int l } void back_merkle_tree::push_back(const machine_hash &new_leaf_hash) { - hasher_type h; + variant_hasher h{m_hash_function}; machine_hash right = new_leaf_hash; if (m_leaf_count >= m_max_leaves) { throw std::out_of_range{"too many leaves"}; @@ -75,7 +77,7 @@ void back_merkle_tree::push_back(const machine_hash &new_leaf_hash) { } void back_merkle_tree::pad_back(uint64_t new_leaf_count) { - hasher_type h; + variant_hasher h{m_hash_function}; if (new_leaf_count > m_max_leaves || m_leaf_count + new_leaf_count > m_max_leaves) { throw std::invalid_argument("too many leaves"); } @@ -120,7 +122,7 @@ void back_merkle_tree::pad_back(uint64_t new_leaf_count) { } machine_hash back_merkle_tree::get_root_hash() const { - hasher_type h; + variant_hasher h{m_hash_function}; assert(m_leaf_count <= m_max_leaves); const int depth = m_log2_root_size - m_log2_leaf_size; if (m_leaf_count < m_max_leaves) { @@ -144,7 +146,7 @@ back_merkle_tree::proof_type back_merkle_tree::get_next_leaf_proof() const { if (m_leaf_count >= m_max_leaves) { throw std::out_of_range{"tree is full"}; } - hasher_type h; + variant_hasher h{m_hash_function}; proof_type proof{m_log2_root_size, m_log2_leaf_size}; proof.set_target_address(m_leaf_count << m_log2_leaf_size); proof.set_target_hash(m_pristine_hashes.get_hash(m_log2_leaf_size)); diff --git a/src/back-merkle-tree.h b/src/back-merkle-tree.h index 4b887fcc6..3543bc5c2 100644 --- a/src/back-merkle-tree.h +++ b/src/back-merkle-tree.h @@ -21,9 +21,9 @@ #include #include "hash-tree-proof.h" -#include "keccak-256-hasher.h" #include "machine-hash.h" #include "pristine-merkle-tree.h" +#include "variant-hasher.h" /// \file /// \brief Back Merkle tree interface. @@ -40,12 +40,6 @@ namespace cartesi { /// The class only ever stores log(n) hashes (1 for each tree level). class back_merkle_tree { public: - /// \brief Hasher class. - using hasher_type = keccak_256_hasher; - - /// \brief Hasher class. - using hash_type = machine_hash; - /// \brief Storage for the proof of a word value. using proof_type = hash_tree_proof; @@ -53,7 +47,7 @@ class back_merkle_tree { /// \param log2_root_size Log2 of root node /// \param log2_leaf_size Log2 of leaf node /// \param log2_word_size Log2 of word node - back_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size); + back_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, hash_function_type hash_function); /// \brief Appends a new hash to the tree /// \param new_leaf_hash Hash of new leaf data @@ -76,7 +70,7 @@ class back_merkle_tree { /// If the bit i is set in leaf_count, we replace context[i] = hash(context[i], right) and move up a bit. /// If the bit is not set, we simply store context[i] = right and break. /// In other words, we can update the context in log time (log2_root_size-log2_leaf_size) - void push_back(const hash_type &new_leaf_hash); + void push_back(const machine_hash &new_leaf_hash); /// \brief Appends a number of padding hashes to the tree /// \param leaf_count Number of padding hashes to append @@ -107,7 +101,7 @@ class back_merkle_tree { /// are growing is to the right of what is in the context) /// If bit i is not set, we set root = hash(root, pristine[i+log2_leaf_size]) and move up a bit /// (i.e., to grow our subtree, we need to pad it on the right with a pristine subtree of the same size) - hash_type get_root_hash() const; + machine_hash get_root_hash() const; /// \brief Returns proof for the next pristine leaf /// \returns Proof for leaf at given index, or throws exception @@ -120,8 +114,9 @@ class back_merkle_tree { int m_log2_leaf_size; ///< Log2 of leaf size uint64_t m_leaf_count{0}; ///< Number of leaves already added uint64_t m_max_leaves; ///< Maximum number of leaves - std::vector m_context; ///< Hashes of bits set in leaf_count + std::vector m_context; ///< Hashes of bits set in leaf_count pristine_merkle_tree m_pristine_hashes; ///< Hash of pristine subtrees of all sizes + hash_function_type m_hash_function; ///< Hash function }; } // namespace cartesi diff --git a/src/cartesi-machine.lua b/src/cartesi-machine.lua index 73e1b6d78..d89dc69ae 100755 --- a/src/cartesi-machine.lua +++ b/src/cartesi-machine.lua @@ -205,13 +205,13 @@ where options are: configures the global hash tree the the machine : is one of - hasher: + hash_function: sht_filename: phtc_filename: phtc_size: shared - hasher (default: "keccak") + hash_function (default: "keccak256") hashing algorithm used for the tree sht_filename (optional) @@ -678,7 +678,7 @@ local uarch = { } local pmas = {} local hash_tree = { - hasher = default_config.hash_tree.hasher, + hash_function = default_config.hash_tree.hash_function, } local concurrency_update_hash_tree = 0 local skip_root_hash_check = false @@ -1063,7 +1063,7 @@ local options = { "^(%-%-hash%-tree%=(.+))$", function(all, opts) local h = util.parse_options(opts, { - hasher = true, + hash_function = true, sht_filename = true, phtc_filename = true, phtc_size = true, @@ -1071,10 +1071,12 @@ local options = { }) if h.sht_filename == true then h.sht_filename = "" end if h.phtc_filename == true then h.phtc_filename = "" end - if h.hasher == true then h.hasher = "keccak" end + if h.hash_function == true then h.hash_function = "keccak256" end if h.shared == nil or h.shared == "false" then h.shared = false end if h.shared == "true" then h.shared = true end - h.phtc_size = assert(util.parse_number(h.phtc_size), "invalid page hash cache size in " .. all) + if h.phtc_size ~= nil then + assert(util.parse_number(h.phtc_size), "invalid page hash cache size in " .. all) + end assert(type(h.shared) == "boolean", "invalid hash tree shared value in " .. all) for i, v in pairs(h) do hash_tree[i] = v diff --git a/src/cartesi/proof.lua b/src/cartesi/proof.lua deleted file mode 100644 index 2e661824b..000000000 --- a/src/cartesi/proof.lua +++ /dev/null @@ -1,43 +0,0 @@ -local cartesi = require("cartesi") - -local _M = {} - -function _M.roll_hash_up_tree(proof, target_hash) - local hash = target_hash - for log2_size = proof.log2_target_size, proof.log2_root_size - 1 do - local bit = (proof.target_address & (1 << log2_size)) ~= 0 - local first, second - local i = proof.log2_root_size - log2_size - if bit then - first, second = proof.sibling_hashes[i], hash - else - first, second = hash, proof.sibling_hashes[i] - end - hash = cartesi.keccak(first, second) - end - return hash -end - -function _M.slice_assert(root_hash, proof) - assert(root_hash == proof.root_hash, "proof root_hash mismatch") - assert(_M.roll_hash_up_tree(proof, proof.target_hash) == root_hash, "node not in tree") -end - -function _M.word_slice_assert(root_hash, proof, word) - assert(proof.log2_target_size == 3, "not a word proof") - assert(root_hash == proof.root_hash, "proof root_hash mismatch") - assert(cartesi.keccak(word) == proof.target_hash, "proof target_hash mismatch") - assert(_M.roll_hash_up_tree(proof, proof.target_hash) == root_hash, "node not in tree") -end - -function _M.splice_assert(root_hash, proof, new_target_hash, new_root_hash) - _M.slice_assert(root_hash, proof) - assert(_M.roll_hash_up_tree(proof, new_target_hash) == new_root_hash, "new root hash mismatch") -end - -function _M.word_splice_assert(root_hash, proof, old_word, new_word, new_root_hash) - _M.word_slice_assert(root_hash, proof, old_word) - assert(_M.roll_hash_up_tree(proof, cartesi.keccak(new_word)) == new_root_hash, "new root hash mismatch") -end - -return _M diff --git a/src/circular-buffer.h b/src/circular-buffer.h index 6cffb5755..4c59d46b9 100644 --- a/src/circular-buffer.h +++ b/src/circular-buffer.h @@ -23,12 +23,16 @@ #include #include #include +#include #include #include +#include +#include #include #include +#include "concepts.h" #include "meta.h" namespace cartesi { @@ -36,7 +40,7 @@ namespace cartesi { /// \brief Circular buffer container. /// \tparam T type of entry in container. /// \tparam N maximum number of entries container can hold. -/// \details The circular buffer contianer with a fixed number of entries. +/// \details The circular buffer container with a fixed number of entries. /// The container that be statically allocated, when \p N is passed as a template argument at compile time. /// It can be dynamically allocated, with the size is chosen in the constructor at runtime. /// \p T does not need to be default-constructible. @@ -167,10 +171,12 @@ class circular_buffer { /// \brief Adds new entry to back of container, if not already there /// \tparam U Type for universal reference to value /// \param value Value to insert. L-value references are copied, r-value references are moved. + /// \details The container must not be full. template requires std::constructible_from && std::equality_comparable_with void try_push_back(U &&value) noexcept { if (empty() || back() != value) { + assert(!full() && "circular buffer container is full"); push_back(std::forward(value)); } } diff --git a/src/clua-cartesi.cpp b/src/clua-cartesi.cpp index 6ec6923a6..b8176f619 100644 --- a/src/clua-cartesi.cpp +++ b/src/clua-cartesi.cpp @@ -31,9 +31,9 @@ extern "C" { #include "clua-i-machine.h" #include "clua.h" #include "htif-constants.h" -#include "keccak-256-hasher.h" #include "machine-c-api.h" #include "machine-c-version.h" +#include "machine-hash.h" #include "riscv-constants.h" #include "uarch-constants.h" #include "uarch-pristine.h" @@ -58,46 +58,61 @@ static const auto gperf_meta = clua_make_luaL_Reg_array({ }); #endif -/// \brief This is the cartesi.keccak() function implementation. +/// \brief Generic hasher function implementation. +/// \tparam hash_function Hash function type (keccak256 or sha256). /// \param L Lua state. -static int cartesi_mod_keccak(lua_State *L) { - using namespace cartesi; - keccak_256_hasher h; - machine_hash hash; - if (lua_gettop(L) > 2) { +template +static int cartesi_mod_hasher(lua_State *L) { + const auto nargs = lua_gettop(L); + if (nargs > 2) { luaL_argerror(L, 3, "too many arguments"); } - if (lua_gettop(L) < 1) { + if (nargs < 1) { luaL_argerror(L, 1, "too few arguments"); } - if (lua_isinteger(L, 1) != 0) { - if (lua_gettop(L) > 1) { - luaL_argerror(L, 2, "too many arguments"); + size_t len1 = 0; + size_t len2 = 0; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const auto *data1 = reinterpret_cast(luaL_checklstring(L, 1, &len1)); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const auto *data2 = reinterpret_cast(luaL_optlstring(L, 2, "", &len2)); + + cm_hash hash{}; + + if (len2 > 0) { // Concat hash of two inputs + if (len1 != CM_HASH_SIZE || len2 != CM_HASH_SIZE) { + luaL_argerror(L, 1, "concatenate hash is only supported for inputs with size of hash"); } - uint64_t word = luaL_checkinteger(L, 1); - h.begin(); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - h.add_data(std::span(reinterpret_cast(&word), sizeof(word))); - h.end(hash); + const auto *left = reinterpret_cast(data1); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - lua_pushlstring(L, reinterpret_cast(hash.data()), hash.size()); - return 1; + const auto *right = reinterpret_cast(data2); + if (cm_get_concat_hash(hash_function, left, right, &hash) != 0) { + return luaL_error(L, "%s", cm_get_last_error_message()); + } + } else { // Single hash + if (cm_get_hash(hash_function, data1, len1, &hash) != 0) { + return luaL_error(L, "%s", cm_get_last_error_message()); + } } - h.begin(); - size_t len1 = 0; - const char *hash1 = luaL_checklstring(L, 1, &len1); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - h.add_data(std::span(hash1, len1)); - size_t len2 = 0; - const char *hash2 = luaL_optlstring(L, 2, "", &len2); - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - h.add_data(std::span(hash2, len2)); - h.end(hash); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - lua_pushlstring(L, reinterpret_cast(hash.data()), hash.size()); + lua_pushlstring(L, reinterpret_cast(hash), sizeof(hash)); return 1; } +/// \brief This is the cartesi.keccak256() function implementation. +/// \param L Lua state. +static int cartesi_mod_keccak256(lua_State *L) { + return cartesi_mod_hasher(L); +} + +/// \brief This is the cartesi.sha256() function implementation. +/// \param L Lua state. +static int cartesi_mod_sha256(lua_State *L) { + return cartesi_mod_hasher(L); +} + static int cartesi_mod_tobase64(lua_State *L) try { size_t size = 0; const char *data = luaL_checklstring(L, 1, &size); @@ -153,7 +168,8 @@ static int cartesi_mod_new(lua_State *L) try { /// \brief Contents of the cartesi module table. static const auto cartesi_mod = clua_make_luaL_Reg_array({ - {"keccak", cartesi_mod_keccak}, + {"keccak256", cartesi_mod_keccak256}, + {"sha256", cartesi_mod_sha256}, {"tobase64", cartesi_mod_tobase64}, {"frombase64", cartesi_mod_frombase64}, {"tojson", cartesi_mod_tojson}, diff --git a/src/compiler-defines.h b/src/compiler-defines.h index b8dda2894..7aec1a521 100644 --- a/src/compiler-defines.h +++ b/src/compiler-defines.h @@ -17,6 +17,8 @@ #ifndef COMPILER_DEFINES_H #define COMPILER_DEFINES_H +// NOLINTBEGIN(cppcoreguidelines-macro-usage) + #ifndef CODE_COVERAGE #define FORCE_INLINE __attribute__((always_inline)) inline #else @@ -30,20 +32,38 @@ #define NO_RETURN [[noreturn]] // These macros are used only in very hot code paths (such as TLB hit checks). -// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define likely(x) __builtin_expect((x), 1) -// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define unlikely(x) __builtin_expect((x), 0) -//??E Although using PGO (Profile Guided Optimizations) makes use of these macros unneeded, -// using them allows for more performance without the need to compile with PGO, -// useful when doing benchmark of code changes. #define PACKED __attribute__((packed)) -#if defined(__GNUC__) && !defined(__clang__) -#define FORCE_OPTIMIZE_O3 __attribute__((optimize("-O3"))) +// Helper macros for stringification +#define TO_STRING_HELPER(X) #X +#define TO_STRING(X) TO_STRING_HELPER(X) + +// Define loop unrolling depending on the compiler +#if defined(__clang__) +#define UNROLL_LOOP(n) _Pragma(TO_STRING(unroll(n))) +#define UNROLL_LOOP_FULL(n) _Pragma(TO_STRING(unroll)) +#elif defined(__GNUC__) && !defined(__clang__) +#define UNROLL_LOOP(n) _Pragma(TO_STRING(GCC unroll(n))) +#define UNROLL_LOOP_FULL(n) _Pragma(TO_STRING(GCC unroll(65534))) +#else +#define UNROLL_LOOP(n) +#define UNROLL_LOOP_FULL(n) +#endif + +#if defined(__GNUC__) && defined(__amd64__) && !defined(NO_MULTIVERSIONING) +#define USE_MULTIVERSINING_AMD64 +#define MULTIVERSION_GENERIC __attribute__((target("default"))) +#define MULTIVERSION_AMD64_AVX2_BMI_BMI2 __attribute__((target("avx2,bmi,bmi2"))) +#define MULTIVERSION_AMD64_AVX512_BMI_BMI2 __attribute__((target("avx512f,avx512vl,bmi,bmi2"))) +#define MULTIVERSION_AMD64_AVX2 __attribute__((target("avx2"))) +#define MULTIVERSION_AMD64_AVX512 __attribute__((target("avx512f,avx512vl"))) #else -#define FORCE_OPTIMIZE_O3 +#define MULTIVERSION_GENERIC __attribute__((noinline)) #endif +// NOLINTEND(cppcoreguidelines-macro-usage) + #endif diff --git a/src/complete-merkle-tree.cpp b/src/complete-merkle-tree.cpp index 334862846..b7ca70fcd 100644 --- a/src/complete-merkle-tree.cpp +++ b/src/complete-merkle-tree.cpp @@ -23,17 +23,20 @@ #include #include "i-hasher.h" +#include "variant-hasher.h" /// \file /// \brief Complete Merkle tree implementation. namespace cartesi { -complete_merkle_tree::complete_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size) : +complete_merkle_tree::complete_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, + hash_function_type hash_function) : m_log2_root_size{log2_root_size}, m_log2_leaf_size{log2_leaf_size}, - m_pristine{log2_root_size, log2_word_size}, - m_tree(std::max(0, log2_root_size - log2_leaf_size + 1)) { + m_pristine{log2_root_size, log2_word_size, hash_function}, + m_tree(std::max(0, log2_root_size - log2_leaf_size + 1)), + m_hash_function(hash_function) { check_log2_sizes(log2_root_size, log2_leaf_size, log2_word_size); } @@ -105,7 +108,7 @@ const machine_hash &complete_merkle_tree::get_node_hash(uint64_t address, int lo } void complete_merkle_tree::bubble_up() { - hasher_type h; + variant_hasher h{m_hash_function}; // Go bottom up, updating hashes for (int log2_next_size = get_log2_leaf_size() + 1; log2_next_size <= get_log2_root_size(); ++log2_next_size) { auto log2_prev_size = log2_next_size - 1; diff --git a/src/complete-merkle-tree.h b/src/complete-merkle-tree.h index 24950d927..e2e4462c7 100644 --- a/src/complete-merkle-tree.h +++ b/src/complete-merkle-tree.h @@ -22,10 +22,10 @@ #include #include "hash-tree-proof.h" -#include "keccak-256-hasher.h" #include "machine-hash.h" #include "meta.h" #include "pristine-merkle-tree.h" +#include "variant-hasher.h" /// \file /// \brief Complete Merkle tree interface. @@ -39,9 +39,6 @@ namespace cartesi { /// The tree is optimized to store only the hashes that are not pristine. class complete_merkle_tree { public: - /// \brief Hasher class. - using hasher_type = keccak_256_hasher; - /// \brief Storage for a proof. using proof_type = hash_tree_proof; @@ -52,15 +49,16 @@ class complete_merkle_tree { /// \param log2_root_size Log2 of tree size /// \param log2_leaf_size Log2 of leaf node /// \param log2_word_size Log2 of word - complete_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size); + complete_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, hash_function_type hash_function); /// \brief Constructor from non-pristine leaves (assumed flushed left) /// \param log2_root_size Log2 of tree size /// \param log2_leaf_size Log2 of leaf node /// \param log2_word_size Log2 of word template - complete_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, L &&leaves) : - complete_merkle_tree{log2_root_size, log2_leaf_size, log2_word_size} { + complete_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, L &&leaves, + hash_function_type hash_function) : + complete_merkle_tree{log2_root_size, log2_leaf_size, log2_word_size, hash_function} { static_assert(std::is_same_v>, "not a leaves vector"); get_level(get_log2_leaf_size()) = std::forward(leaves); bubble_up(); @@ -124,10 +122,11 @@ class complete_merkle_tree { /// hash at level level_type &get_level(int log2_size); - int m_log2_root_size; ///< Log2 of tree size - int m_log2_leaf_size; ///< Log2 of page size - pristine_merkle_tree m_pristine; ///< Pristine hashes for all levels - std::vector m_tree; ///< Merkle tree + int m_log2_root_size; ///< Log2 of tree size + int m_log2_leaf_size; ///< Log2 of page size + pristine_merkle_tree m_pristine; ///< Pristine hashes for all levels + std::vector m_tree; ///< Merkle tree + hash_function_type m_hash_function; ///< Hash function }; } // namespace cartesi diff --git a/src/full-merkle-tree.cpp b/src/full-merkle-tree.cpp index 286673a1c..64e34c1c9 100644 --- a/src/full-merkle-tree.cpp +++ b/src/full-merkle-tree.cpp @@ -29,26 +29,29 @@ namespace cartesi { -full_merkle_tree::full_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size) : +full_merkle_tree::full_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, + hash_function_type hash_function) : m_log2_root_size{log2_root_size}, m_log2_leaf_size{log2_leaf_size}, - m_max_leaves{UINT64_C(1) << std::max(0, log2_root_size - log2_leaf_size)} { + m_max_leaves{UINT64_C(1) << std::max(0, log2_root_size - log2_leaf_size)}, + m_hash_function{hash_function} { check_log2_sizes(log2_root_size, log2_leaf_size, log2_word_size); m_tree.resize(2 * m_max_leaves); - init_pristine_subtree(pristine_merkle_tree{log2_root_size, log2_word_size}, 1, log2_root_size); + init_pristine_subtree(pristine_merkle_tree{log2_root_size, log2_word_size, hash_function}, 1, log2_root_size); } full_merkle_tree::full_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, - const std::vector &leaves) : + const std::vector &leaves, hash_function_type hash_function) : m_log2_root_size(log2_root_size), m_log2_leaf_size(log2_leaf_size), - m_max_leaves{UINT64_C(1) << std::max(0, log2_root_size - log2_leaf_size)} { + m_max_leaves{UINT64_C(1) << std::max(0, log2_root_size - log2_leaf_size)}, + m_hash_function{hash_function} { check_log2_sizes(log2_root_size, log2_leaf_size, log2_word_size); if (leaves.size() > m_max_leaves) { throw std::out_of_range{"too many leaves"}; } m_tree.resize(2 * m_max_leaves); - init_tree(pristine_merkle_tree{log2_root_size, log2_word_size}, leaves); + init_tree(pristine_merkle_tree{log2_root_size, log2_word_size, hash_function}, leaves); } full_merkle_tree::proof_type full_merkle_tree::get_proof(uint64_t address, int log2_size) const { @@ -68,7 +71,7 @@ full_merkle_tree::proof_type full_merkle_tree::get_proof(uint64_t address, int l proof.set_sibling_hash(get_node_hash(sibling_address, log2_sibling_size), log2_sibling_size); } #ifndef NDEBUG - hasher_type h{}; + variant_hasher h{m_hash_function}; if (!proof.verify(h)) { throw std::runtime_error{"produced invalid proof"}; } @@ -105,7 +108,7 @@ void full_merkle_tree::init_pristine_subtree(const pristine_merkle_tree &pristin } } -void full_merkle_tree::init_subtree(hasher_type &h, int index, int log2_size) { +void full_merkle_tree::init_subtree(variant_hasher &h, int index, int log2_size) { if (log2_size > get_log2_leaf_size()) { init_subtree(h, left_child_index(index), log2_size - 1); init_subtree(h, right_child_index(index), log2_size - 1); @@ -117,7 +120,7 @@ void full_merkle_tree::init_tree(const pristine_merkle_tree &pristine, const std std::copy(leaves.begin(), leaves.end(), &m_tree[m_max_leaves]); std::fill_n(&m_tree[m_max_leaves + leaves.size()], m_max_leaves - leaves.size(), pristine.get_hash(get_log2_leaf_size())); - hasher_type h; + variant_hasher h{m_hash_function}; init_subtree(h, 1, get_log2_root_size()); } diff --git a/src/full-merkle-tree.h b/src/full-merkle-tree.h index 2f063642d..9898de6c1 100644 --- a/src/full-merkle-tree.h +++ b/src/full-merkle-tree.h @@ -21,9 +21,9 @@ #include #include "hash-tree-proof.h" -#include "keccak-256-hasher.h" #include "machine-hash.h" #include "pristine-merkle-tree.h" +#include "variant-hasher.h" /// \file /// \brief Full Merkle tree interface. @@ -34,9 +34,6 @@ namespace cartesi { /// \details This class implements a full merkle tree class full_merkle_tree { public: - /// \brief Hasher class. - using hasher_type = keccak_256_hasher; - /// \brief Storage for a proof. using proof_type = hash_tree_proof; @@ -44,7 +41,7 @@ class full_merkle_tree { /// \param log2_root_size Log2 of root node /// \param log2_leaf_size Log2 of leaf node /// \param log2_word_size Log2 of word - full_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size); + full_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, hash_function_type hash_function); /// \brief Constructor for list of consecutive leaf hashes /// \param log2_root_size Log2 of root node @@ -52,7 +49,7 @@ class full_merkle_tree { /// \param log2_word_size Log2 of word /// \param leaves List of leaf hashes full_merkle_tree(int log2_root_size, int log2_leaf_size, int log2_word_size, - const std::vector &leaves); + const std::vector &leaves, hash_function_type hash_function); /// \brief Returns log2 of size of tree int get_log2_root_size() const { @@ -114,7 +111,7 @@ class full_merkle_tree { /// \param log2_size Log2 size of root at index /// \details The nodes corresponding to subtrees of size log2_leaf_size /// are assumed to have already been set prior to calling this function - void init_subtree(hasher_type &h, int index, int log2_size); + void init_subtree(variant_hasher &h, int index, int log2_size); /// \brief Initialize tree from a list of consecutive page hashes /// \param leaves List of page hashes @@ -128,10 +125,11 @@ class full_merkle_tree { /// \param log2_size uint64_t get_node_index(uint64_t address, int log2_size) const; - int m_log2_root_size; ///< Log2 of tree size - int m_log2_leaf_size; ///< Log2 of leaf size - uint64_t m_max_leaves; ///< Maximum number of leaves - std::vector m_tree; ///< Binary heap with tree node hashes + int m_log2_root_size; ///< Log2 of tree size + int m_log2_leaf_size; ///< Log2 of leaf size + uint64_t m_max_leaves; ///< Maximum number of leaves + std::vector m_tree; ///< Binary heap with tree node hashes + hash_function_type m_hash_function; ///< Hash function }; } // namespace cartesi diff --git a/src/hash-tree-stats.h b/src/hash-tree-stats.h index c439abc5f..74be38fae 100644 --- a/src/hash-tree-stats.h +++ b/src/hash-tree-stats.h @@ -27,8 +27,8 @@ namespace cartesi { struct hash_tree_stats { page_hash_tree_cache_stats phtc; - uint64_t sparse_node_hashes; - std::array dense_node_hashes; + uint64_t sparse_node_hashes{0}; + std::array dense_node_hashes{}; }; } // namespace cartesi diff --git a/src/hash-tree.cpp b/src/hash-tree.cpp index cd75eaae8..ce26c2905 100644 --- a/src/hash-tree.cpp +++ b/src/hash-tree.cpp @@ -16,7 +16,6 @@ #include "hash-tree.h" -#include #include #include #include @@ -24,10 +23,16 @@ #include #include +#ifdef _OPENMP #include +#endif +#include "i-hasher.h" #include "machine-address-ranges.h" +#include "machine-hash.h" +#include "page-hash-tree-cache-stats.h" #include "signposts.h" +#include "simd-hasher.h" namespace cartesi { @@ -73,9 +78,8 @@ void hash_tree::get_page_proof(address_range &ar, uint64_t address, proof_type & auto opt_br = m_page_cache.borrow_entry(paddr_page, hit); assert(opt_br && "page hash-tree cache has no entries to lend"); if (!hit) { - hasher_type h; [[maybe_unused]] bool changed = false; - update_dirty_page(h, ar, opt_br->get(), changed); + update_dirty_page(ar, opt_br->get(), changed); } const auto log2_target_size = proof.get_log2_target_size(); assert(log2_target_size >= HASH_TREE_LOG2_WORD_SIZE && "log2_size is too small"); @@ -156,16 +160,9 @@ hash_tree::proof_type hash_tree::get_proof(address_ranges ars, uint64_t address, hash_tree_stats hash_tree::get_stats(bool clear) noexcept { auto s = hash_tree_stats{ .phtc = m_page_cache.get_stats(clear), - .sparse_node_hashes = m_sparse_node_hashes.load(), + .sparse_node_hashes = m_sparse_node_hashes, + .dense_node_hashes = m_dense_node_hashes, }; - std::ranges::copy(m_dense_node_hashes | std::views::transform([](auto &a) { - std::cerr << "ha " << a.load() << '\n'; - return a.load(); - }), - s.dense_node_hashes.begin()); - for (auto i : s.dense_node_hashes) { - std::cerr << "hi " << i << '\n'; - } if (clear) { m_sparse_node_hashes = 0; for (auto &a : m_dense_node_hashes) { @@ -175,8 +172,7 @@ hash_tree_stats hash_tree::get_stats(bool clear) noexcept { return s; } -bool hash_tree::update_dirty_page(hasher_type &h, address_range &ar, page_hash_tree_cache::entry &entry, - bool &changed) { +bool hash_tree::update_dirty_page(address_range &ar, page_hash_tree_cache::entry &entry, bool &changed) { const auto paddr_page = entry.get_paddr_page(); const auto *base = ar.get_host_memory(); if (!ar.is_memory() || base == nullptr || !ar.contains_absolute(paddr_page, HASH_TREE_PAGE_SIZE)) { @@ -184,33 +180,98 @@ bool hash_tree::update_dirty_page(hasher_type &h, address_range &ar, page_hash_t } const auto offset = paddr_page - ar.get_start(); const auto page_view = std::span{base + offset, HASH_TREE_PAGE_SIZE}; - auto ret = m_page_cache.update_entry(h, page_view, entry); + variant_hasher h{m_hash_function}; + page_hash_tree_cache::simd_page_hasher queue(h); + auto &stats = m_page_cache.get_stats_ref(); + auto ret = m_page_cache.enqueue_hash_entry(queue, page_view, entry, stats); + stats.inner_page_hashes += queue.flush(); auto node_hash_view = ar.get_dense_hash_tree().node_hash_view(offset, HASH_TREE_LOG2_PAGE_SIZE); changed = !std::ranges::equal(entry.root_hash_view(), node_hash_view); if (changed) { std::ranges::copy(entry.root_hash_view(), node_hash_view.begin()); + ++stats.page_changes; } return ret; } +bool hash_tree::enqueue_hash_dirty_page(page_hash_tree_cache::simd_page_hasher &queue, + address_range &ar, page_hash_tree_cache::entry &entry, page_hash_tree_cache_stats &stats) { + const auto paddr_page = entry.get_paddr_page(); + const auto *base = ar.get_host_memory(); + if (!ar.is_memory() || base == nullptr || !ar.contains_absolute(paddr_page, HASH_TREE_PAGE_SIZE)) { + return false; + } + const auto offset = paddr_page - ar.get_start(); + const auto page_view = std::span{base + offset, HASH_TREE_PAGE_SIZE}; + return m_page_cache.enqueue_hash_entry(queue, page_view, entry, stats); +} + bool hash_tree::return_updated_dirty_pages(address_ranges ars, dirty_pages &batch, changed_address_ranges &changed_ars) { if (batch.empty()) { return true; } - const int batch_size = static_cast(batch.size()); - std::atomic update_failed{0}; // NOLINT(misc-const-correctness) //??D The batch size past which we switch to parallel updates needs to be tuned empirically - hasher_type h; // NOLINT(misc-const-correctness) -#pragma omp parallel for private(h) schedule(dynamic) if (batch_size > m_concurrency * 4) - // NOLINTNEXTLINE(modernize-loop-convert) - for (int i = 0; i < batch_size; ++i) { - auto &[ar_index, br, changed] = batch[i]; - auto &ar = ars[ar_index]; - if (!update_dirty_page(h, ar, br, changed)) { - update_failed.store(1, std::memory_order_relaxed); + const int batch_size = static_cast(batch.size()); + // Set block size to maximize SIMD lane utilization by hashing multiple pages together + const int block_size = std::min(static_cast(variant_hasher{m_hash_function}.get_optimal_lane_count()), + page_hash_tree_cache::simd_page_hasher::QUEUE_MAX_PAGE_COUNT); + // It's only worth to use multi-threading if we have enough entries to process + const int threads = + (m_concurrency > 1 && batch_size > m_concurrency && batch_size > block_size) ? m_concurrency : 1; + uint64_t update_failures{0}; + uint64_t word_hits{0}; + uint64_t word_misses{0}; + uint64_t page_changes{0}; + uint64_t inner_page_hashes{0}; + uint64_t pristine_pages{0}; + uint64_t non_pristine_pages{0}; +#pragma omp parallel for schedule(dynamic) if (threads > 1) num_threads(threads) reduction(+ : update_failures, \ + word_hits, word_misses, page_changes, inner_page_hashes, pristine_pages, non_pristine_pages) + for (int i = 0; i < batch_size; i += block_size) { + // Queue entries to be hashed + variant_hasher h{m_hash_function}; + page_hash_tree_cache::simd_page_hasher queue(h); + page_hash_tree_cache_stats stats; + for (int j = i; j < std::min(batch_size, i + block_size); ++j) { + auto &[ar_index, br, changed] = batch[j]; + auto &ar = ars[ar_index]; + if (!enqueue_hash_dirty_page(queue, ar, br, stats)) { + ++update_failures; + } } + // Flush SIMD hasher queue + stats.inner_page_hashes += queue.flush(); + // Update changed entries + for (int j = i; j < std::min(batch_size, i + block_size); ++j) { + auto &[ar_index, br, changed] = batch[j]; + auto &ar = ars[ar_index]; + const auto offset = br.get_paddr_page() - ar.get_start(); + auto root_hash_view = br.root_hash_view(); + auto node_hash_view = ar.get_dense_hash_tree().node_hash_view(offset, HASH_TREE_LOG2_PAGE_SIZE); + changed = !std::ranges::equal(root_hash_view, node_hash_view); + if (changed) { + std::ranges::copy(root_hash_view, node_hash_view.begin()); + ++stats.page_changes; + } + } + // Increment stats + word_hits += stats.word_hits; + word_misses += stats.word_misses; + page_changes += stats.page_changes; + inner_page_hashes += stats.inner_page_hashes; + pristine_pages += stats.pristine_pages; + non_pristine_pages += stats.non_pristine_pages; } + + auto &page_stats = m_page_cache.get_stats_ref(); + page_stats.word_hits += word_hits; + page_stats.word_misses += word_misses; + page_stats.page_changes += page_changes; + page_stats.inner_page_hashes += inner_page_hashes; + page_stats.pristine_pages += pristine_pages; + page_stats.non_pristine_pages += non_pristine_pages; + // Return all entries and collect address ranges that were actually changed by update for (auto &[ar_index, br, changed] : batch) { auto &ar = ars[ar_index]; @@ -229,24 +290,23 @@ bool hash_tree::return_updated_dirty_pages(address_ranges ars, dirty_pages &batc ar.get_dirty_page_tree().mark_clean_page_and_up(offset); } } else { - update_failed.store(1, std::memory_order_relaxed); + ++update_failures; } } // Done with batch batch.clear(); - return static_cast(update_failed.load(std::memory_order_relaxed)); + return update_failures > 0; } hash_tree::~hash_tree() { #ifdef DUMP_HASH_TREE_STATS std::cerr << "sparse node hashes: " << std::dec << m_sparse_node_hashes << '\n'; std::cerr << "dense node hashes: \n"; - int sum = 0; - for (int i = 0; auto &a : m_dense_node_hashes) { - auto av = a.load(); - sum += av; - if (av != 0) { - std::cerr << " " << std::dec << i << ": " << av << '\n'; + uint64_t sum = 0; + for (int i = 0; auto a : m_dense_node_hashes) { + sum += a; + if (a != 0) { + std::cerr << " " << std::dec << i << ": " << a << '\n'; } ++i; } @@ -314,22 +374,26 @@ void hash_tree::update_and_clear_dense_node_entries(dense_node_entries &batch, i if (batch.empty()) { return; } - const int batch_size = static_cast(batch.size()); //??D The batch size past which we switch to parallel updates needs to be tuned empirically - int updates = 0; - hasher_type h; // NOLINT(misc-const-correctness) -#pragma omp parallel for private(h, updates) schedule(dynamic) if (batch_size > m_concurrency * 32) - // NOLINTNEXTLINE(modernize-loop-convert) - for (decltype(batch.size()) i = 0; i < batch.size(); ++i) { - auto &[dht, offset] = batch[i]; - auto child_size = UINT64_C(1) << (log2_size - 1); - auto parent = dht.node_hash_view(offset, log2_size); - auto left = dht.node_hash_view(offset, log2_size - 1); - auto right = dht.node_hash_view(offset + child_size, log2_size - 1); - get_concat_hash(h, left, right, parent); - ++updates; - } - m_dense_node_hashes[log2_size] += updates; + const int batch_size = static_cast(batch.size()); + const int block_size = static_cast(variant_hasher{m_hash_function}.get_optimal_lane_count()); + // It's only worth to use multi-threading if we have enough entries to process + const int threads = (m_concurrency > 1 && batch_size > m_concurrency * block_size) ? m_concurrency : 1; +#pragma omp parallel for schedule(static) if (threads > 1) num_threads(threads) + for (int block_start = 0; block_start < batch_size; block_start += block_size) { + variant_hasher h{m_hash_function}; + simd_concat_hasher queue(h); + for (int i = block_start; i < std::min(batch_size, block_start + block_size); ++i) { + auto &[dht, offset] = batch[i]; + auto child_size = UINT64_C(1) << (log2_size - 1); + auto parent = dht.node_hash_view(offset, log2_size); + auto left = dht.node_hash_view(offset, log2_size - 1); + auto right = dht.node_hash_view(offset + child_size, log2_size - 1); + queue.enqueue(left, right, parent); + } + queue.flush(); + } + m_dense_node_hashes[log2_size] += batch_size; batch.clear(); } @@ -338,8 +402,8 @@ bool hash_tree::update_dense_trees(address_ranges ars, const changed_address_ran if (changed_ars.empty()) { return true; } - const auto thread_count = 8; - const auto batch_size = thread_count << 10; + // We can batch more if we have more concurrency, however we need to limit to not run out of memory. + const size_t batch_size = std::min(m_concurrency << 10, 16384); dense_node_entries batch; batch.reserve(batch_size); // Get maximum log2_size of all address ranges @@ -397,7 +461,9 @@ bool hash_tree::update_sparse_tree(address_ranges ars, const changed_address_ran changed.emplace(ar_parent_node.log2_size, ar_node.parent); } } - hasher_type h; + variant_hasher h{m_hash_function}; + simd_concat_hasher queue(h); + int last_log2_size = -1; while (!changed.empty()) { auto [log2_size, inner_index] = changed.top(); changed.pop(); @@ -408,7 +474,12 @@ bool hash_tree::update_sparse_tree(address_ranges ars, const changed_address_ran inner_node.marked = 0; auto left_hash_view = get_sparse_node_hash_view(inner_node.left, log2_size - 1); auto right_hash_view = get_sparse_node_hash_view(inner_node.right, log2_size - 1); - get_concat_hash(h, left_hash_view, right_hash_view, inner_node.hash); + // When crossing tree height boundary, we need to flush the queue + if (last_log2_size != log2_size) { + last_log2_size = log2_size; + queue.flush(); + } + queue.enqueue(left_hash_view, right_hash_view, inner_node.hash); ++m_sparse_node_hashes; if (!is_pristine(inner_node.parent)) { auto &parent_node = m_sparse_nodes[inner_node.parent]; @@ -418,6 +489,7 @@ bool hash_tree::update_sparse_tree(address_ranges ars, const changed_address_ran } } } + queue.flush(); return true; } @@ -434,9 +506,8 @@ machine_hash hash_tree::get_dense_node_hash(address_range &ar, uint64_t address, auto opt_br = m_page_cache.borrow_entry(paddr_page, hit); assert(opt_br && "page hash-tree cache has no entries to lend"); if (!hit) { - hasher_type h; [[maybe_unused]] bool changed = false; - update_dirty_page(h, ar, *opt_br, changed); + update_dirty_page(ar, *opt_br, changed); } auto node_offset = address - paddr_page; auto hash = to_hash(opt_br->get().node_hash_view(node_offset, log2_size)); @@ -474,7 +545,7 @@ machine_hash hash_tree::get_node_hash(address_ranges ars, uint64_t address, int // transition to dense tree if (is_ar_node(node)) { auto &ar = ars[node.right]; - const int ar_log2_size = HASH_TREE_LOG2_PAGE_SIZE + ar.get_level_count() - 1; + [[maybe_unused]] const int ar_log2_size = HASH_TREE_LOG2_PAGE_SIZE + ar.get_level_count() - 1; assert(curr_log2_size == ar_log2_size && "incorrect ar node log2_size"); return get_dense_node_hash(ar, address, log2_size); } @@ -489,7 +560,7 @@ machine_hash hash_tree::get_node_hash(address_ranges ars, uint64_t address, int } bool hash_tree::verify(address_ranges ars) const { - hasher_type h; + variant_hasher h{m_hash_function}; bool ret = true; for (auto ar_node_index = get_ar_sparse_node_index(0); const auto &ar : ars) { const std::span mem{ar.get_host_memory(), ar.get_length()}; @@ -535,9 +606,8 @@ bool hash_tree::verify(address_ranges ars) const { bool hash_tree::update(address_ranges ars) { SCOPED_SIGNPOST(m_log, m_spid_update, "hash-tree: update", ""); - auto old_concurrency = omp_get_num_threads(); - omp_set_num_threads(m_concurrency); changed_address_ranges changed_ars; + changed_ars.reserve(ars.size()); auto update_succeeded = update_dirty_pages(ars, changed_ars) && update_dense_trees(ars, changed_ars) && update_sparse_tree(ars, changed_ars); if (update_succeeded) { @@ -545,14 +615,13 @@ bool hash_tree::update(address_ranges ars) { ars[ar_index].get_dirty_page_tree().clean(); } } - omp_set_num_threads(old_concurrency); return update_succeeded; } bool hash_tree::update_page(address_ranges ars, uint64_t paddr_page) { paddr_page >>= HASH_TREE_LOG2_PAGE_SIZE; paddr_page <<= HASH_TREE_LOG2_PAGE_SIZE; - hasher_type h; + variant_hasher h{m_hash_function}; // Find address range where page might lie auto it = std::ranges::find_if(ars, [paddr_page](auto &ar) { return ar.contains_absolute(paddr_page, HASH_TREE_PAGE_SIZE); }); @@ -567,7 +636,7 @@ bool hash_tree::update_page(address_ranges ars, uint64_t paddr_page) { auto &entry = opt_br->get(); bool changed = false; // Update page with data from address range - update_dirty_page(h, ar, entry, changed); + update_dirty_page(ar, entry, changed); // If nothing changed, we are done if (!changed) { m_page_cache.return_entry(entry); @@ -608,8 +677,8 @@ bool hash_tree::update_page(address_ranges ars, uint64_t paddr_page) { return true; } -hash_tree::pristine_hashes hash_tree::get_pristine_hashes() { - hasher_type h; +hash_tree::pristine_hashes hash_tree::get_pristine_hashes(hash_function_type hash_function) { + variant_hasher h{hash_function}; pristine_hashes hashes{}; std::array zero{}; machine_hash hash = get_hash(h, zero); @@ -620,25 +689,31 @@ hash_tree::pristine_hashes hash_tree::get_pristine_hashes() { return hashes; }; -static int get_concurrency(int value) { +static int get_concurrency([[maybe_unused]] int value) { +#ifdef _OPENMP const int concurrency = value != 0 ? value : omp_get_max_threads(); return std::min(concurrency, omp_get_max_threads()); +#else + return 1; +#endif } -hash_tree::hash_tree(const hash_tree_config &config, uint64_t concurrency, const_address_ranges ars) : +hash_tree::hash_tree(const hash_tree_config &config, uint64_t concurrency, const_address_ranges ars, + hash_function_type hash_function) : #ifdef HAS_SIGNPOSTS m_log{os_log_create("io.cartesi.machine-emulator", "hash-tree")}, m_spid_update{os_signpost_id_generate(m_log)}, m_spid_update_page_hashes{os_signpost_id_generate(m_log)}, m_spid_update_dense_trees{os_signpost_id_generate(m_log)}, m_spid_update_sparse_tree{os_signpost_id_generate(m_log)}, - m_page_cache{m_log, hasher_type{}, config.phtc_size}, + m_page_cache{m_log, variant_hasher{hash_function}, config.phtc_size}, #else - m_page_cache{hasher_type{}, config.phtc_size}, + m_page_cache{variant_hasher{hash_function}, config.phtc_size}, #endif m_sparse_nodes{create_nodes(ars)}, - m_pristine_hashes{get_pristine_hashes()}, - m_concurrency{get_concurrency(static_cast(concurrency))} { + m_pristine_hashes{get_pristine_hashes(hash_function)}, + m_concurrency{get_concurrency(static_cast(concurrency))}, + m_hash_function{hash_function} { } void hash_tree::check_address_ranges(const_address_ranges ars) { diff --git a/src/hash-tree.h b/src/hash-tree.h index 6c02a4cda..0f1898168 100644 --- a/src/hash-tree.h +++ b/src/hash-tree.h @@ -24,13 +24,13 @@ #include "hash-tree-constants.h" #include "hash-tree-proof.h" #include "hash-tree-stats.h" -#include "keccak-256-hasher.h" #include "machine-address-ranges.h" #include "machine-config.h" #include "machine-hash.h" #include "page-hash-tree-cache.h" #include "signposts.h" #include "unique-c-ptr.h" +#include "variant-hasher.h" namespace cartesi { @@ -106,13 +106,13 @@ class hash_tree { using const_address_ranges = hash_tree_view; public: - using hasher_type = keccak_256_hasher; using proof_type = hash_tree_proof; using nodes_type = std::vector; using sibling_hashes_type = std::vector; - hash_tree(const hash_tree_config &config, uint64_t concurrency, const_address_ranges ars); + hash_tree(const hash_tree_config &config, uint64_t concurrency, const_address_ranges ars, + hash_function_type hash_function); hash_tree(const hash_tree &other) = delete; hash_tree(hash_tree &&other) = delete; @@ -144,10 +144,12 @@ class hash_tree { machine_hash get_dense_node_hash(address_range &ar, uint64_t address, int log2_size); - static pristine_hashes get_pristine_hashes(); + static pristine_hashes get_pristine_hashes(hash_function_type hash_function); bool update_dirty_pages(address_ranges ars, changed_address_ranges &changed_ars); - bool update_dirty_page(hasher_type &h, address_range &ar, page_hash_tree_cache::entry &entry, bool &changed); + bool update_dirty_page(address_range &ar, page_hash_tree_cache::entry &entry, bool &changed); + bool enqueue_hash_dirty_page(page_hash_tree_cache::simd_page_hasher &queue, address_range &ar, + page_hash_tree_cache::entry &entry, page_hash_tree_cache_stats &stats); bool return_updated_dirty_pages(address_ranges ars, dirty_pages &batch, changed_address_ranges &changed_ars); bool update_dense_trees(address_ranges ars, const changed_address_ranges &changed_ars); @@ -178,9 +180,10 @@ class hash_tree { nodes_type m_sparse_nodes; const pristine_hashes m_pristine_hashes; int m_concurrency; + hash_function_type m_hash_function; - std::atomic m_sparse_node_hashes{0}; - std::array, HASH_TREE_LOG2_ROOT_SIZE> m_dense_node_hashes{}; + uint64_t m_sparse_node_hashes{0}; + std::array m_dense_node_hashes{}; }; } // namespace cartesi diff --git a/src/i-hasher.h b/src/i-hasher.h index ad9429a6a..1b67965cb 100644 --- a/src/i-hasher.h +++ b/src/i-hasher.h @@ -27,12 +27,17 @@ #include #include +#include "array2d.h" #include "concepts.h" +#include "hash-tree-constants.h" #include "machine-hash.h" #include "meta.h" namespace cartesi { +using hash_tree_word_view = std::span; +using const_hash_tree_word_view = std::span; + /// \brief Hasher interface. /// \tparam DERIVED Derived class implementing the interface. (An example of CRTP.) template @@ -48,17 +53,69 @@ class i_hasher { // CRTP } public: - void begin() noexcept { - return derived().do_begin(); + static constexpr int MAX_LANE_COUNT = DERIVED::MAX_LANE_COUNT; ///< Number of maximum supported SIMD lanes + + template + void hash(D &&data, machine_hash_view hash) noexcept { // NOLINT(cppcoreguidelines-missing-std-forward) + const auto data_span = + std::span{// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast(std::ranges::data(data)), std::ranges::size(data)}; + if (data_span.size() == HASH_TREE_WORD_SIZE) { // Special case for hash tree word hashing + return derived().do_simd_concat_hash( + array2d, 1, 1>{{{const_hash_tree_word_view{data_span}}}}, + std::array{hash}); + } + return derived().do_simd_concat_hash(array2d, 1, 1>{{{data_span}}}, + std::array{hash}); + } + + void hash(const_hash_tree_word_view data, machine_hash_view hash) noexcept { + return derived().do_simd_concat_hash(array2d{{{data}}}, + std::array{hash}); } template - void add_data(D &&data) noexcept { - return derived().do_add_data(std::forward(data)); + void concat_hash(D &&data1, D &&data2, // NOLINT(cppcoreguidelines-missing-std-forward) + machine_hash_view hash) noexcept { + auto data1_span = std::span{// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast(std::ranges::data(data1)), std::ranges::size(data1)}; + auto data2_span = std::span{// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast(std::ranges::data(data2)), std::ranges::size(data2)}; + if (data1_span.size() == MACHINE_HASH_SIZE && + data2_span.size() == MACHINE_HASH_SIZE) { // Special case for hash tree hash concatenation + return derived().do_simd_concat_hash( + array2d{ + {{const_machine_hash_view{data1_span}}, {const_machine_hash_view{data2_span}}}}, + std::array{hash}); + } + return derived().do_simd_concat_hash( + array2d, 2, 1>{{{data1_span}, {data2_span}}}, + std::array{hash}); + } + + void concat_hash(const_machine_hash_view data1, const_machine_hash_view data2, machine_hash_view hash) noexcept { + return derived().do_simd_concat_hash(array2d{{{data1}, {data2}}}, + std::array{hash}); + } + + // \brief Hashes the concatenation of data using multiple SIMD lanes. + // \tparam LaneCount Number of SIMD lanes + // \tparam ConcatCount Number of concatenated data items + // \tparam Extent Extent of the data span + // \param data Data to hash, as a multi-dimensional array of spans + // \param hash Array of machine hashes to store the results + // \warning When LaneCount is greater than 1, it is assumed data spans have same size, there is no check for that. + template + void simd_concat_hash(const array2d &data, + const std::array &hash) noexcept { + return derived().do_simd_concat_hash(data, hash); } - void end(machine_hash_view hash) noexcept { - return derived().do_end(hash); + // \brief Gets the optimal number of SIMD lanes for hashing. + // \returns The optimal number of SIMD lanes for hashing. + // \details This value is architecture-dependent and may vary based on the available instruction sets. + size_t get_optimal_lane_count() const noexcept { + return derived().do_get_optimal_lane_count(); } }; @@ -79,9 +136,7 @@ concept IHasher = is_an_i_hasher_v; /// \param result Receives the hash of data template inline static void get_hash(H &h, D &&data, machine_hash_view result) noexcept { - h.begin(); - h.add_data(std::forward(data)); - h.end(result); + h.hash(std::forward(data), result); } /// \brief Computes the hash of data @@ -105,10 +160,7 @@ inline static machine_hash get_hash(H &&h, D &&data) noexcept { template inline static void get_concat_hash(H &h, const_machine_hash_view left, const_machine_hash_view right, machine_hash_view result) noexcept { - h.begin(); - h.add_data(left); - h.add_data(right); - h.end(result); + h.concat_hash(left, right, result); } /// \brief Computes the hash of concatenated hashes diff --git a/src/is-pristine.cpp b/src/is-pristine.cpp new file mode 100644 index 000000000..4daa6d300 --- /dev/null +++ b/src/is-pristine.cpp @@ -0,0 +1,67 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#include "is-pristine.h" + +#include +#include +#include + +#include "address-range-constants.h" +#include "compiler-defines.h" + +namespace cartesi { + +template +FORCE_INLINE static bool is_pristine_impl(std::span data) noexcept { + unsigned char bits = 0; + // GCC and Clang are smart enough to use SIMD instructions with large words on this loop, + // however it may not unroll the loops, so we unroll it manually. + UNROLL_LOOP(64) + for (const unsigned char b : data) { + bits |= b; + } + return bits == 0; +} + +// Generic implementations + +MULTIVERSION_GENERIC bool is_pristine(std::span data) noexcept { + return is_pristine_impl(data); +} +MULTIVERSION_GENERIC bool is_pristine(std::span data) noexcept { + return is_pristine_impl(data); +} + +// x86_64 implementations + +#ifdef USE_MULTIVERSINING_AMD64 +MULTIVERSION_AMD64_AVX2 bool is_pristine(std::span data) noexcept { + return is_pristine_impl(data); +} +MULTIVERSION_AMD64_AVX2 bool is_pristine(std::span data) noexcept { + return is_pristine_impl(data); +} + +MULTIVERSION_AMD64_AVX512 bool is_pristine(std::span data) noexcept { + return is_pristine_impl(data); +} +MULTIVERSION_AMD64_AVX512 bool is_pristine(std::span data) noexcept { + return is_pristine_impl(data); +} +#endif + +} // namespace cartesi diff --git a/src/is-pristine.h b/src/is-pristine.h index f371399ab..eab0b327e 100644 --- a/src/is-pristine.h +++ b/src/is-pristine.h @@ -17,42 +17,41 @@ #ifndef IS_PRISTINE_H #define IS_PRISTINE_H +#include "compiler-defines.h" + #include #include #include +#include +#include "address-range-constants.h" +#include "concepts.h" #include "meta.h" namespace cartesi { -/// \brief This is an optimized function for checking if memory page is pristine. -/// \param data Memory pointer -/// \param length Memory length -/// \returns True if all values are 0, false otherwise +/// \brief This is an optimized function for checking if data is pristine. +/// \param data Memory data. +/// \returns True if all values are 0, false otherwise. /// \details It's to be used in situations where length is equal or less than a page size. -static inline bool is_pristine(const unsigned char *data, size_t length) { - // This tight for loop has no branches, and is optimized to SIMD instructions in x86_64, - // making it very fast to check if a given page is pristine. - unsigned char bits = 0; - for (size_t i = 0; i < length; ++i) { - bits |= data[i]; - } - return bits == 0; -} +MULTIVERSION_GENERIC bool is_pristine(std::span data) noexcept; /// \brief This is an optimized function for checking if memory page is pristine. -/// \param r Contiguous range of byte-like values -/// \returns True if all values are 0, false otherwise -/// \details It's to be used in situations where length is equal or less than a page size. +MULTIVERSION_GENERIC bool is_pristine(std::span data) noexcept; + template -static inline bool is_pristine(R &&r) { // NOLINT(cppcoreguidelines-missing-std-forward) - std::ranges::range_value_t bits{0}; - for (auto b : r) { - bits |= b; - } - return bits == 0; +bool is_pristine(R &&r) noexcept { // NOLINT(cppcoreguidelines-missing-std-forward) + return is_pristine(std::span{std::ranges::data(r), std::ranges::size(r)}); } +#ifdef USE_MULTIVERSINING_AMD64 +MULTIVERSION_AMD64_AVX2 bool is_pristine(std::span data) noexcept; +MULTIVERSION_AMD64_AVX2 bool is_pristine(std::span data) noexcept; + +MULTIVERSION_AMD64_AVX512 bool is_pristine(std::span data) noexcept; +MULTIVERSION_AMD64_AVX512 bool is_pristine(std::span data) noexcept; +#endif + } // namespace cartesi #endif diff --git a/src/json-util.cpp b/src/json-util.cpp index f1607b496..993d5eb66 100644 --- a/src/json-util.cpp +++ b/src/json-util.cpp @@ -552,6 +552,26 @@ static uarch_interpreter_break_reason uarch_interpreter_break_reason_from_name(c throw std::domain_error{"invalid uarch interpreter break reason"}; } +static hash_function_type hash_function_from_name(const std::string &name) { + const static std::unordered_map g_hash_function_name = { + {"sha256", hash_function_type::sha256}, {"keccak256", hash_function_type::keccak256}}; + auto got = g_hash_function_name.find(name); + if (got == g_hash_function_name.end()) { + throw std::domain_error{"invalid hash function type"}; + } + return got->second; +} + +static std::string hash_function_name(hash_function_type hf) { + switch (hf) { + case hash_function_type::keccak256: + return "keccak256"; + case hash_function_type::sha256: + return "sha256"; + } + throw std::domain_error{"invalid hash function type"}; +} + static std::string access_type_name(access_type at) { switch (at) { case access_type::read: @@ -909,6 +929,7 @@ void ju_get_opt_field(const nlohmann::json &j, const K &key, page_hash_tree_cach ju_get_opt_field(jstats, "page_misses"s, value.page_misses, new_path); ju_get_opt_field(jstats, "word_hits"s, value.word_hits, new_path); ju_get_opt_field(jstats, "word_misses"s, value.word_misses, new_path); + ju_get_opt_field(jstats, "page_changes"s, value.page_changes, new_path); ju_get_opt_field(jstats, "inner_page_hashes"s, value.inner_page_hashes, new_path); ju_get_opt_field(jstats, "pristine_pages"s, value.pristine_pages, new_path); ju_get_opt_field(jstats, "non_pristine_pages"s, value.non_pristine_pages, new_path); @@ -1379,6 +1400,24 @@ template void ju_get_opt_field(const nlohmann::json &j, const uint64_t template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, memory_range_config &value, const std::string &path); +template +void ju_get_opt_field(const nlohmann::json &j, const K &key, hash_function_type &value, const std::string &path) { + if (!contains(j, key, path)) { + return; + } + const auto &jk = j[key]; + if (!jk.is_string()) { + throw std::invalid_argument("\""s + path + to_string(key) + "\" not a string"); + } + value = hash_function_from_name(jk.template get()); +} + +template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, hash_function_type &value, + const std::string &path); + +template void ju_get_opt_field(const nlohmann::json &j, const std::string &key, hash_function_type &value, + const std::string &path); + template void ju_get_opt_field(const nlohmann::json &j, const K &key, hash_tree_config &value, const std::string &path) { if (!contains(j, key, path)) { @@ -1392,6 +1431,7 @@ void ju_get_opt_field(const nlohmann::json &j, const K &key, hash_tree_config &v ju_get_opt_field(jconfig, "sht_filename"s, value.sht_filename, new_path); ju_get_opt_field(jconfig, "phtc_filename"s, value.phtc_filename, new_path); ju_get_opt_field(jconfig, "phtc_size"s, value.phtc_size, new_path); + ju_get_opt_field(jconfig, "hash_function"s, value.hash_function, new_path); } template void ju_get_opt_field(const nlohmann::json &j, const uint64_t &key, hash_tree_config &value, @@ -1882,8 +1922,8 @@ void to_json(nlohmann::json &j, const memory_range_config &config) { void to_json(nlohmann::json &j, const hash_tree_config &config) { j = nlohmann::json{{"shared", config.shared}, {"create", config.create}, {"truncate", config.truncate}, - {"sht_filename", config.sht_filename}, {"phtc_filename", config.phtc_filename}, - {"phtc_size", config.phtc_size}}; + {"sht_filename", config.sht_filename}, {"phtc_filename", config.phtc_filename}, {"phtc_size", config.phtc_size}, + {"hash_function", hash_function_name(config.hash_function)}}; } void to_json(nlohmann::json &j, const registers_state &config) { @@ -2140,6 +2180,7 @@ void to_json(nlohmann::json &j, const page_hash_tree_cache_stats &stats) { {"page_misses", stats.page_misses}, {"word_hits", stats.word_hits}, {"word_misses", stats.word_misses}, + {"page_changes", stats.page_changes}, {"inner_page_hashes", stats.inner_page_hashes}, {"pristine_pages", stats.pristine_pages}, {"non_pristine_pages", stats.non_pristine_pages}, diff --git a/src/json-util.h b/src/json-util.h index c8aa9bb1c..aebb94c0b 100644 --- a/src/json-util.h +++ b/src/json-util.h @@ -511,6 +511,16 @@ void ju_get_opt_field(const nlohmann::json &j, const K &key, uarch_processor_con template void ju_get_opt_field(const nlohmann::json &j, const K &key, uarch_config &value, const std::string &path = "params/"); +/// \brief Attempts to load a hash_function_type object from a field in a JSON object +/// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) +/// \param j JSON object to load from +/// \param key Key to load value from +/// \param value Object to store value +/// \param path Path to j +template +void ju_get_opt_field(const nlohmann::json &j, const K &key, hash_function_type &value, + const std::string &path = "params/"); + /// \brief Attempts to load an hash_tree_config object from a field in a JSON object /// \tparam K Key type (explicit extern declarations for uint64_t and std::string are provided) /// \param j JSON object to load from diff --git a/src/jsonrpc-discover.json b/src/jsonrpc-discover.json index 40c9a4a93..4532af60a 100644 --- a/src/jsonrpc-discover.json +++ b/src/jsonrpc-discover.json @@ -389,13 +389,13 @@ "description": "Whether to clear the stats after retrieving them", "required": true, "schema": { - "type": "boolean" + "type": "boolean" } } ], "result": { "name": "stats", - "description": "Statistics for the hash tre", + "description": "Statistics for the hash tree", "schema": { "$ref": "#/components/schemas/HashTreeStats" } @@ -1023,6 +1023,9 @@ "word_misses": { "$ref": "#/components/schemas/UnsignedInteger" }, + "page_changes": { + "$ref": "#/components/schemas/UnsignedInteger" + }, "inner_page_hashes": { "$ref": "#/components/schemas/UnsignedInteger" }, @@ -1724,9 +1727,6 @@ "title": "HashTreeConfig", "type": "object", "properties": { - "hasher": { - "type": "string" - }, "shared": { "type": "boolean" }, @@ -1744,6 +1744,9 @@ }, "phtc_size": { "$ref": "#/components/schemas/UnsignedInteger" + }, + "hash_function": { + "$ref": "#/components/schemas/HashFunctionType" } } }, @@ -1797,6 +1800,10 @@ }, "required": ["ram"] }, + "HashFunctionType": { + "title": "HashFunctionType", + "enum": ["keccak256", "sha256"] + }, "InterpreterBreakReason": { "title": "InterpreterBreakReason", "enum": [ diff --git a/src/jsonrpc-machine.cpp b/src/jsonrpc-machine.cpp index 0b97415af..ca6b977f7 100644 --- a/src/jsonrpc-machine.cpp +++ b/src/jsonrpc-machine.cpp @@ -538,7 +538,7 @@ jsonrpc_machine::jsonrpc_machine(const std::string &address, int64_t spawn_timeo // Rebind the forked server to listen on the originally requested address std::string rebind_result; - request("rebind", std::tie(forked_grand_child.address), rebind_result, timeout_at, false); + request("rebind", std::tie(address), rebind_result, timeout_at, false); m_address = rebind_result; // At this point, we've confirmed the remote server is properly initialized and running diff --git a/src/jsonrpc-remote-machine.cpp b/src/jsonrpc-remote-machine.cpp index 3cbdcfa40..8afb99d4a 100644 --- a/src/jsonrpc-remote-machine.cpp +++ b/src/jsonrpc-remote-machine.cpp @@ -461,7 +461,7 @@ static json jsonrpc_response_invalid_params(const json &j, const std::string &me static void jsonrpc_check_allowed_fields(const json &j, const std::unordered_set &keys, const std::string &base = "params/") { for (const auto &[key, val] : j.items()) { - if (keys.find(key) == keys.end()) { + if (!keys.contains(key)) { // NOLINTNEXTLINE(performance-inefficient-string-concatenation) throw std::invalid_argument("unexpected field \"/"s + base + key + "\""s); } diff --git a/src/keccak-256-hasher.cpp b/src/keccak-256-hasher.cpp new file mode 100644 index 000000000..2ec108400 --- /dev/null +++ b/src/keccak-256-hasher.cpp @@ -0,0 +1,256 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#include "keccak-256-hasher.h" +#include "compiler-defines.h" +#include "i-hasher.h" +#include "keccakf.h" +#include "machine-hash.h" +#include "simd-vector-type.h" + +#include +#include +#include +#include + +namespace cartesi { + +// This code is not portable to big-endian architectures. +// NOLINTNEXTLINE(misc-redundant-expression) +static_assert(std::endian::native == std::endian::little, "code assumes little-endian byte ordering"); + +constexpr size_t KECCAK_WORD_COUNT = 25; +constexpr size_t KECCAK_RSIZE = (KECCAK_WORD_COUNT * sizeof(uint64_t)) - (static_cast(2) * MACHINE_HASH_SIZE); + +template +struct alignas(uint64_vector_type::align) keccak_256_context final { + using word_vector_type = uint64_vector_type::type; + using word_bytes_array = uint8_t[KECCAK_WORD_COUNT][LaneCount][sizeof(uint64_t)]; + using data_span = std::span; + + static constexpr size_t word_vector_align = uint64_vector_type::align; // 64-bit m_words + + word_vector_type m_words[5][5]{}; ///< Buffer for Keccak-256 words, interleaved by lanes + + FORCE_INLINE void update(const std::array &data, size_t &pos) noexcept { + [[maybe_unused]] auto words_bytes = // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast(m_words); + // Assume all data spans have the same length + const size_t data_len = data[0].size(); + for (size_t i = 0; i < data_len;) { + // Interleave data while XORing + const size_t step = std::min(KECCAK_RSIZE - pos, data_len - i); + if constexpr (DataExtent != std::dynamic_extent && DataExtent % sizeof(uint64_t) == 0) { + // If data length is a multiple of word size, process a word at time + UNROLL_LOOP(128) + for (size_t j = 0; j < step; j += sizeof(uint64_t)) { + word_vector_type data_word; + UNROLL_LOOP_FULL() + for (size_t l = 0; l < LaneCount; ++l) { + uint64_t lane_word{}; + __builtin_memcpy(&lane_word, &data[l][i + j], sizeof(lane_word)); + data_word[l] = lane_word; + } + m_words[((pos + j) / sizeof(uint64_t)) / 5][((pos + j) / sizeof(uint64_t)) % 5] ^= data_word; + } + } else { // Otherwise, process a byte at time + UNROLL_LOOP(128) + for (size_t j = 0; j < step; j++) { + const size_t bi = (pos + j) / sizeof(uint64_t); + const size_t bj = (pos + j) % sizeof(uint64_t); + UNROLL_LOOP_FULL() + for (size_t l = 0; l < LaneCount; ++l) { + words_bytes[bi][l][bj] ^= data[l][i + j]; + } + } + } + i += step; + pos += step; + // Perform Keccak-256 permutation + if (pos >= KECCAK_RSIZE) [[unlikely]] { + keccakf_1600(m_words); + pos = 0; + } + } + } + + FORCE_INLINE void finish(const std::array &hashes, size_t pos) noexcept { + // Append delimiter suffix + constexpr uint64_t KECCAK_DSUFFIX = 0x01; + const size_t dsuffix_word_pos = pos / sizeof(uint64_t); + const size_t dsuffix_byte_pos = pos % sizeof(uint64_t); + m_words[dsuffix_word_pos / 5][dsuffix_word_pos % 5] ^= KECCAK_DSUFFIX << (dsuffix_byte_pos * 8); + // Append last bit + constexpr uint64_t KECCAK_LASTBIT = 0x80; + constexpr size_t lastbit_word_pos = (KECCAK_RSIZE - 1) / sizeof(uint64_t); + constexpr size_t lastbit_byte_pos = (KECCAK_RSIZE - 1) % sizeof(uint64_t); + m_words[lastbit_word_pos / 5][lastbit_word_pos % 5] ^= KECCAK_LASTBIT << (lastbit_byte_pos * 8); + // Perform last permutation + keccakf_1600(m_words); + // Deinterleave hash + UNROLL_LOOP_FULL() + for (size_t l = 0; l < LaneCount; ++l) { + UNROLL_LOOP_FULL() + for (size_t i = 0; i < MACHINE_HASH_SIZE; i += sizeof(uint64_t)) { + const uint64_t word = m_words[0][i / sizeof(uint64_t)][l]; + __builtin_memcpy(&hashes[l][i], &word, sizeof(uint64_t)); + } + } + } + template + FORCE_INLINE static void simd_concat_hash(array2d data, + std::array hashes) noexcept { + keccak_256_context ctx; + // Position is kept local to allow the compiler optimize it out when DataExtent is a compile time constant. + size_t pos = 0; + UNROLL_LOOP(4) + for (size_t i = 0; i < ConcatCount; ++i) { + ctx.update(data[i], pos); + } + ctx.finish(hashes, pos); + } +}; + +// Generic implementations + +MULTIVERSION_GENERIC size_t keccak_256_get_optimal_lane_count() noexcept { +#if defined(__x86_64__) + // On AMD64, SSE2 has 128-bit registers, supporting up to 2 lanes. + return 2; +#elif defined(__aarch64__) + // On ARM64, NEON has 128-bit registers, supporting up to 2 lanes. + return 2; +#elif defined(__riscv) && defined(__riscv_v) + // RISC-V with Vector extension, we assume 128-bit registers are available, supporting up to 2 lanes. + return 2; +#elif defined(__wasm_simd128__) + // WebAssembly with SIMD extension has 128-bit registers, supporting up to 2 lanes. + return 2; +#else + // For other architectures, we assume vector instructions are not available and use scalar implementation. + return 1; +#endif +} + +MULTIVERSION_GENERIC void keccak_256_data_1x1(const array2d, 1, 1> &data, + const std::array &hash) noexcept { + keccak_256_context<1>::simd_concat_hash<1>(data, hash); +} +MULTIVERSION_GENERIC void keccak_256_word_1x1(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<1, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void keccak_256_word_1x2(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<2, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void keccak_256_word_1x4(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<4, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void keccak_256_word_1x8(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<8, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void keccak_256_data_2x1(const array2d, 2, 1> &data, + const std::array &hash) noexcept { + keccak_256_context<1>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void keccak_256_hash_2x1(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<1, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void keccak_256_hash_2x2(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<2, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void keccak_256_hash_2x4(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<4, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void keccak_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<8, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} + +// x86_64 implementations + +#ifdef USE_MULTIVERSINING_AMD64 + +// AVX2 +MULTIVERSION_AMD64_AVX2_BMI_BMI2 size_t keccak_256_get_optimal_lane_count() noexcept { + // AVX2 has 256-bit registers, supporting up to 4 lanes. + return 4; +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_data_1x1(const array2d, 1, 1> &data, + const std::array &hash) noexcept { + keccak_256_context<1>::simd_concat_hash<1>(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_word_1x1(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<1, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_word_1x2(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<2, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_word_1x4(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<4, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_word_1x8(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<8, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_data_2x1(const array2d, 2, 1> &data, + const std::array &hash) noexcept { + keccak_256_context<1>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_hash_2x1(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<1, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_hash_2x2(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<2, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_hash_2x4(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<4, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<8, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} + +// AVX-512 + +MULTIVERSION_AMD64_AVX512_BMI_BMI2 size_t keccak_256_get_optimal_lane_count() noexcept { + // AVX-512 has 512-bit registers, supporting up to 8 lanes. + return 8; +} +MULTIVERSION_AMD64_AVX512_BMI_BMI2 void keccak_256_word_1x8(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<8, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX512_BMI_BMI2 void keccak_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept { + keccak_256_context<8, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} + +#endif // USE_MULTIVERSINING_AMD64 + +} // namespace cartesi diff --git a/src/keccak-256-hasher.h b/src/keccak-256-hasher.h index 89fdf3e7e..7d8540c89 100644 --- a/src/keccak-256-hasher.h +++ b/src/keccak-256-hasher.h @@ -17,64 +17,257 @@ #ifndef KECCAK_256_HASHER_H #define KECCAK_256_HASHER_H +#include #include #include +#include #include +#include "array2d.h" +#include "compiler-defines.h" #include "i-hasher.h" #include "machine-hash.h" -extern "C" { -#include "sha3.h" -} - namespace cartesi { -struct keccak_instance final { - union { - uint8_t b[200]; - uint64_t q[25]; - } st; - int pt; -}; - -class keccak_256_hasher final : public i_hasher { - sha3_ctx_t m_ctx{}; - - friend i_hasher; - - void do_begin() noexcept { - sha3_init(&m_ctx, machine_hash_size, 0x01); - } - - template - void do_add_data(R &&r) noexcept { // NOLINT(cppcoreguidelines-missing-std-forward) - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - sha3_update(&m_ctx, reinterpret_cast(std::ranges::data(r)), std::ranges::size(r)); - } +// Generic implementations +MULTIVERSION_GENERIC size_t keccak_256_get_optimal_lane_count() noexcept; +MULTIVERSION_GENERIC void keccak_256_data_1x1(const array2d, 1, 1> &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void keccak_256_word_1x1(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void keccak_256_word_1x2(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void keccak_256_word_1x4(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void keccak_256_word_1x8(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void keccak_256_data_2x1(const array2d, 2, 1> &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void keccak_256_hash_2x1(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void keccak_256_hash_2x2(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void keccak_256_hash_2x4(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void keccak_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept; - void do_end(machine_hash_view hash) noexcept { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - sha3_final(reinterpret_cast(hash.data()), &m_ctx); - } +// Optimized implementation for x86_64 architecture leveraging modern CPU instruction sets: +// - BMI1/BMI2 (Bit Manipulation Instructions) provide specialized bit operations: +// * RORX performs optimized bitwise rotation without requiring separate shift operations +// * ANDN efficiently computes (~x & y) in a single instruction +// - AVX2 for x4 SIMD hashing +// - AVX-512 for x8 SIMD hashing +#ifdef USE_MULTIVERSINING_AMD64 +// AVX2 implementation for x1, x2, x4, x8 SIMD hashing +MULTIVERSION_AMD64_AVX2_BMI_BMI2 size_t keccak_256_get_optimal_lane_count() noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_data_1x1(const array2d, 1, 1> &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_word_1x1(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_word_1x2(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_word_1x4(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_word_1x8(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_data_2x1(const array2d, 2, 1> &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_hash_2x1(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_hash_2x2(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_hash_2x4(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void keccak_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept; +// AVX-512 implementation for x8 SIMD hashing +MULTIVERSION_AMD64_AVX512_BMI_BMI2 size_t keccak_256_get_optimal_lane_count() noexcept; +MULTIVERSION_AMD64_AVX512_BMI_BMI2 void keccak_256_word_1x8(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX512_BMI_BMI2 void keccak_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept; +#endif +class keccak_256_hasher final : public i_hasher { public: - /// \brief Default constructor - keccak_256_hasher() = default; + static constexpr int MAX_LANE_COUNT = 8; - /// \brief Default destructor - ~keccak_256_hasher() = default; + template + static void do_simd_concat_hash(const array2d, ConcatCount, LaneCount> &data, + const std::array &hash) noexcept; - /// \brief No copy constructor - keccak_256_hasher(const keccak_256_hasher &) = delete; - /// \brief No move constructor - keccak_256_hasher(keccak_256_hasher &&) = delete; - /// \brief No copy assignment - keccak_256_hasher &operator=(const keccak_256_hasher &) = delete; - /// \brief No move assignment - keccak_256_hasher &operator=(keccak_256_hasher &&) = delete; + static size_t do_get_optimal_lane_count() noexcept { + return keccak_256_get_optimal_lane_count(); + } }; +template <> +inline void keccak_256_hasher::do_simd_concat_hash<1, 1, std::dynamic_extent>( + const array2d, 1, 1> &data, const std::array &hash) noexcept { + keccak_256_data_1x1(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<1, 1, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + keccak_256_word_1x1(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<1, 2, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + keccak_256_word_1x2(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<1, 4, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + keccak_256_word_1x4(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<1, 8, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + keccak_256_word_1x8(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<1, 16, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + // Keccak-256 does not support 16-way parallelism, we simulate it by splitting it into two 8-way hashes + keccak_256_word_1x8(array2d{{{ + data[0][0], + data[0][1], + data[0][2], + data[0][3], + data[0][4], + data[0][5], + data[0][6], + data[0][7], + }}}, + std::array{ + hash[0], + hash[1], + hash[2], + hash[3], + hash[4], + hash[5], + hash[6], + hash[7], + }); + keccak_256_word_1x8(array2d{{{ + data[0][8], + data[0][9], + data[0][10], + data[0][11], + data[0][12], + data[0][13], + data[0][14], + data[0][15], + }}}, + std::array{ + hash[8], + hash[9], + hash[10], + hash[11], + hash[12], + hash[13], + hash[14], + hash[15], + }); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<2, 1, std::dynamic_extent>( + const array2d, 2, 1> &data, const std::array &hash) noexcept { + keccak_256_data_2x1(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<2, 1, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + keccak_256_hash_2x1(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<2, 2, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + keccak_256_hash_2x2(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<2, 4, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + keccak_256_hash_2x4(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<2, 8, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + keccak_256_hash_2x8(data, hash); +} +template <> +inline void keccak_256_hasher::do_simd_concat_hash<2, 16, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + // Keccak-256 does not support 16-way parallelism, we simulate it by splitting it into two 8-way hashes + keccak_256_hash_2x8(array2d{{ + { + data[0][0], + data[0][1], + data[0][2], + data[0][3], + data[0][4], + data[0][5], + data[0][6], + data[0][7], + }, + { + data[1][0], + data[1][1], + data[1][2], + data[1][3], + data[1][4], + data[1][5], + data[1][6], + data[1][7], + }, + }}, + std::array{ + hash[0], + hash[1], + hash[2], + hash[3], + hash[4], + hash[5], + hash[6], + hash[7], + }); + keccak_256_hash_2x8(array2d{{ + { + data[0][8], + data[0][9], + data[0][10], + data[0][11], + data[0][12], + data[0][13], + data[0][14], + data[0][15], + }, + { + data[1][8], + data[1][9], + data[1][10], + data[1][11], + data[1][12], + data[1][13], + data[1][14], + data[1][15], + }, + }}, + std::array{ + hash[8], + hash[9], + hash[10], + hash[11], + hash[12], + hash[13], + hash[14], + hash[15], + }); +} + } // namespace cartesi #endif diff --git a/src/keccakf.h b/src/keccakf.h new file mode 100644 index 000000000..9a4f41274 --- /dev/null +++ b/src/keccakf.h @@ -0,0 +1,2573 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef KECCAKF_H +#define KECCAKF_H + +#include +#include + +#include "compiler-defines.h" + +namespace cartesi { + +template +// NOLINTNEXTLINE(readability-function-size,hicpp-function-size,google-readability-function-size) +FORCE_INLINE static void keccakf_1600(word_vector_type A[5][5]) noexcept { + // This code is adapted from the XKCP generic64 Keccak implementation (public domain). + // Original source: + // https://github.com/XKCP/XKCP/blob/master/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c + // Selected for its 64-bit optimization, vectorization-friendly structure, branchless design, and efficient + // memory bandwidth. + alignas(word_vector_align) static constexpr uint64_t KECCAKF1600_ROUND_CONSTANTS[24] = {0x0000000000000001, + 0x0000000000008082, 0x800000000000808a, 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, 0x0000000000000088, 0x0000000080008009, + 0x000000008000000a, 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003, + 0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, + 0x8000000000008080, 0x0000000080000001, 0x8000000080008008}; + word_vector_type B[5][5]; + word_vector_type C[5]; + word_vector_type D[5]; + word_vector_type E[5][5]; + C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0]; + C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1]; + C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2]; + C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3]; + C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[0]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[1]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[2]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[3]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[4]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[5]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[6]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[7]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[8]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[9]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[10]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[11]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[12]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[13]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[14]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[15]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[16]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[17]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[18]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[19]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[20]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[21]; + C[0] = A[0][0]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = A[0][1]; + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = A[0][2]; + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = A[0][3]; + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = A[0][4]; + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= A[1][0]; + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= A[1][1]; + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= A[1][2]; + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= A[1][3]; + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= A[1][4]; + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= A[2][0]; + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= A[2][1]; + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= A[2][2]; + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= A[2][3]; + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= A[2][4]; + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= A[3][0]; + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= A[3][1]; + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= A[3][2]; + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= A[3][3]; + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= A[3][4]; + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= A[4][0]; + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= A[4][1]; + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= A[4][2]; + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= A[4][3]; + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= A[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + A[0][0] ^= D[0]; + B[0][0] = A[0][0]; + A[1][1] ^= D[1]; + B[0][1] = (((A[1][1]) << 44) ^ ((A[1][1]) >> (64 - 44))); + A[2][2] ^= D[2]; + B[0][2] = (((A[2][2]) << 43) ^ ((A[2][2]) >> (64 - 43))); + A[3][3] ^= D[3]; + B[0][3] = (((A[3][3]) << 21) ^ ((A[3][3]) >> (64 - 21))); + A[4][4] ^= D[4]; + B[0][4] = (((A[4][4]) << 14) ^ ((A[4][4]) >> (64 - 14))); + E[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + E[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[22]; + C[0] = E[0][0]; + E[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + C[1] = E[0][1]; + E[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + C[2] = E[0][2]; + E[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + C[3] = E[0][3]; + E[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + C[4] = E[0][4]; + A[0][3] ^= D[3]; + B[1][0] = (((A[0][3]) << 28) ^ ((A[0][3]) >> (64 - 28))); + A[1][4] ^= D[4]; + B[1][1] = (((A[1][4]) << 20) ^ ((A[1][4]) >> (64 - 20))); + A[2][0] ^= D[0]; + B[1][2] = (((A[2][0]) << 3) ^ ((A[2][0]) >> (64 - 3))); + A[3][1] ^= D[1]; + B[1][3] = (((A[3][1]) << 45) ^ ((A[3][1]) >> (64 - 45))); + A[4][2] ^= D[2]; + B[1][4] = (((A[4][2]) << 61) ^ ((A[4][2]) >> (64 - 61))); + E[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + C[0] ^= E[1][0]; + E[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + C[1] ^= E[1][1]; + E[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + C[2] ^= E[1][2]; + E[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + C[3] ^= E[1][3]; + E[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + C[4] ^= E[1][4]; + A[0][1] ^= D[1]; + B[2][0] = (((A[0][1]) << 1) ^ ((A[0][1]) >> (64 - 1))); + A[1][2] ^= D[2]; + B[2][1] = (((A[1][2]) << 6) ^ ((A[1][2]) >> (64 - 6))); + A[2][3] ^= D[3]; + B[2][2] = (((A[2][3]) << 25) ^ ((A[2][3]) >> (64 - 25))); + A[3][4] ^= D[4]; + B[2][3] = (((A[3][4]) << 8) ^ ((A[3][4]) >> (64 - 8))); + A[4][0] ^= D[0]; + B[2][4] = (((A[4][0]) << 18) ^ ((A[4][0]) >> (64 - 18))); + E[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + C[0] ^= E[2][0]; + E[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + C[1] ^= E[2][1]; + E[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + C[2] ^= E[2][2]; + E[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + C[3] ^= E[2][3]; + E[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + C[4] ^= E[2][4]; + A[0][4] ^= D[4]; + B[3][0] = (((A[0][4]) << 27) ^ ((A[0][4]) >> (64 - 27))); + A[1][0] ^= D[0]; + B[3][1] = (((A[1][0]) << 36) ^ ((A[1][0]) >> (64 - 36))); + A[2][1] ^= D[1]; + B[3][2] = (((A[2][1]) << 10) ^ ((A[2][1]) >> (64 - 10))); + A[3][2] ^= D[2]; + B[3][3] = (((A[3][2]) << 15) ^ ((A[3][2]) >> (64 - 15))); + A[4][3] ^= D[3]; + B[3][4] = (((A[4][3]) << 56) ^ ((A[4][3]) >> (64 - 56))); + E[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + C[0] ^= E[3][0]; + E[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + C[1] ^= E[3][1]; + E[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + C[2] ^= E[3][2]; + E[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + C[3] ^= E[3][3]; + E[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + C[4] ^= E[3][4]; + A[0][2] ^= D[2]; + B[4][0] = (((A[0][2]) << 62) ^ ((A[0][2]) >> (64 - 62))); + A[1][3] ^= D[3]; + B[4][1] = (((A[1][3]) << 55) ^ ((A[1][3]) >> (64 - 55))); + A[2][4] ^= D[4]; + B[4][2] = (((A[2][4]) << 39) ^ ((A[2][4]) >> (64 - 39))); + A[3][0] ^= D[0]; + B[4][3] = (((A[3][0]) << 41) ^ ((A[3][0]) >> (64 - 41))); + A[4][1] ^= D[1]; + B[4][4] = (((A[4][1]) << 2) ^ ((A[4][1]) >> (64 - 2))); + E[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + C[0] ^= E[4][0]; + E[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + C[1] ^= E[4][1]; + E[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + C[2] ^= E[4][2]; + E[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + C[3] ^= E[4][3]; + E[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); + C[4] ^= E[4][4]; + D[0] = C[4] ^ (((C[1]) << 1) ^ ((C[1]) >> (64 - 1))); + D[1] = C[0] ^ (((C[2]) << 1) ^ ((C[2]) >> (64 - 1))); + D[2] = C[1] ^ (((C[3]) << 1) ^ ((C[3]) >> (64 - 1))); + D[3] = C[2] ^ (((C[4]) << 1) ^ ((C[4]) >> (64 - 1))); + D[4] = C[3] ^ (((C[0]) << 1) ^ ((C[0]) >> (64 - 1))); + E[0][0] ^= D[0]; + B[0][0] = E[0][0]; + E[1][1] ^= D[1]; + B[0][1] = (((E[1][1]) << 44) ^ ((E[1][1]) >> (64 - 44))); + E[2][2] ^= D[2]; + B[0][2] = (((E[2][2]) << 43) ^ ((E[2][2]) >> (64 - 43))); + E[3][3] ^= D[3]; + B[0][3] = (((E[3][3]) << 21) ^ ((E[3][3]) >> (64 - 21))); + E[4][4] ^= D[4]; + B[0][4] = (((E[4][4]) << 14) ^ ((E[4][4]) >> (64 - 14))); + A[0][0] = B[0][0] ^ ((~B[0][1]) & B[0][2]); + A[0][0] ^= KECCAKF1600_ROUND_CONSTANTS[23]; + A[0][1] = B[0][1] ^ ((~B[0][2]) & B[0][3]); + A[0][2] = B[0][2] ^ ((~B[0][3]) & B[0][4]); + A[0][3] = B[0][3] ^ ((~B[0][4]) & B[0][0]); + A[0][4] = B[0][4] ^ ((~B[0][0]) & B[0][1]); + E[0][3] ^= D[3]; + B[1][0] = (((E[0][3]) << 28) ^ ((E[0][3]) >> (64 - 28))); + E[1][4] ^= D[4]; + B[1][1] = (((E[1][4]) << 20) ^ ((E[1][4]) >> (64 - 20))); + E[2][0] ^= D[0]; + B[1][2] = (((E[2][0]) << 3) ^ ((E[2][0]) >> (64 - 3))); + E[3][1] ^= D[1]; + B[1][3] = (((E[3][1]) << 45) ^ ((E[3][1]) >> (64 - 45))); + E[4][2] ^= D[2]; + B[1][4] = (((E[4][2]) << 61) ^ ((E[4][2]) >> (64 - 61))); + A[1][0] = B[1][0] ^ ((~B[1][1]) & B[1][2]); + A[1][1] = B[1][1] ^ ((~B[1][2]) & B[1][3]); + A[1][2] = B[1][2] ^ ((~B[1][3]) & B[1][4]); + A[1][3] = B[1][3] ^ ((~B[1][4]) & B[1][0]); + A[1][4] = B[1][4] ^ ((~B[1][0]) & B[1][1]); + E[0][1] ^= D[1]; + B[2][0] = (((E[0][1]) << 1) ^ ((E[0][1]) >> (64 - 1))); + E[1][2] ^= D[2]; + B[2][1] = (((E[1][2]) << 6) ^ ((E[1][2]) >> (64 - 6))); + E[2][3] ^= D[3]; + B[2][2] = (((E[2][3]) << 25) ^ ((E[2][3]) >> (64 - 25))); + E[3][4] ^= D[4]; + B[2][3] = (((E[3][4]) << 8) ^ ((E[3][4]) >> (64 - 8))); + E[4][0] ^= D[0]; + B[2][4] = (((E[4][0]) << 18) ^ ((E[4][0]) >> (64 - 18))); + A[2][0] = B[2][0] ^ ((~B[2][1]) & B[2][2]); + A[2][1] = B[2][1] ^ ((~B[2][2]) & B[2][3]); + A[2][2] = B[2][2] ^ ((~B[2][3]) & B[2][4]); + A[2][3] = B[2][3] ^ ((~B[2][4]) & B[2][0]); + A[2][4] = B[2][4] ^ ((~B[2][0]) & B[2][1]); + E[0][4] ^= D[4]; + B[3][0] = (((E[0][4]) << 27) ^ ((E[0][4]) >> (64 - 27))); + E[1][0] ^= D[0]; + B[3][1] = (((E[1][0]) << 36) ^ ((E[1][0]) >> (64 - 36))); + E[2][1] ^= D[1]; + B[3][2] = (((E[2][1]) << 10) ^ ((E[2][1]) >> (64 - 10))); + E[3][2] ^= D[2]; + B[3][3] = (((E[3][2]) << 15) ^ ((E[3][2]) >> (64 - 15))); + E[4][3] ^= D[3]; + B[3][4] = (((E[4][3]) << 56) ^ ((E[4][3]) >> (64 - 56))); + A[3][0] = B[3][0] ^ ((~B[3][1]) & B[3][2]); + A[3][1] = B[3][1] ^ ((~B[3][2]) & B[3][3]); + A[3][2] = B[3][2] ^ ((~B[3][3]) & B[3][4]); + A[3][3] = B[3][3] ^ ((~B[3][4]) & B[3][0]); + A[3][4] = B[3][4] ^ ((~B[3][0]) & B[3][1]); + E[0][2] ^= D[2]; + B[4][0] = (((E[0][2]) << 62) ^ ((E[0][2]) >> (64 - 62))); + E[1][3] ^= D[3]; + B[4][1] = (((E[1][3]) << 55) ^ ((E[1][3]) >> (64 - 55))); + E[2][4] ^= D[4]; + B[4][2] = (((E[2][4]) << 39) ^ ((E[2][4]) >> (64 - 39))); + E[3][0] ^= D[0]; + B[4][3] = (((E[3][0]) << 41) ^ ((E[3][0]) >> (64 - 41))); + E[4][1] ^= D[1]; + B[4][4] = (((E[4][1]) << 2) ^ ((E[4][1]) >> (64 - 2))); + A[4][0] = B[4][0] ^ ((~B[4][1]) & B[4][2]); + A[4][1] = B[4][1] ^ ((~B[4][2]) & B[4][3]); + A[4][2] = B[4][2] ^ ((~B[4][3]) & B[4][4]); + A[4][3] = B[4][3] ^ ((~B[4][4]) & B[4][0]); + A[4][4] = B[4][4] ^ ((~B[4][0]) & B[4][1]); +} + +} // namespace cartesi + +#endif // KECCAKF_H diff --git a/src/machine-c-api.cpp b/src/machine-c-api.cpp index de25eece0..66a894a7d 100644 --- a/src/machine-c-api.cpp +++ b/src/machine-c-api.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,7 @@ #include "i-machine.h" #include "interpret.h" #include "json-util.h" +#include "keccak-256-hasher.h" #include "local-machine.h" #include "machine-c-api-internal.h" #include "machine-config.h" @@ -46,6 +48,7 @@ #include "machine-runtime-config.h" #include "machine.h" #include "os-features.h" +#include "sha-256-hasher.h" static std::string &get_last_err_msg_storage() { static THREAD_LOCAL std::string last_err_msg; @@ -478,10 +481,20 @@ static cartesi::machine_hash convert_from_c(const cm_hash *c_hash) { throw std::invalid_argument("invalid hash"); } cartesi::machine_hash cpp_hash; // In emulator this is std::array; - memcpy(cpp_hash.data(), c_hash, sizeof(cm_hash)); + std::memcpy(cpp_hash.data(), c_hash, sizeof(cm_hash)); return cpp_hash; } +static void convert_to_c(cartesi::machine_hash cpp_hash, cm_hash *c_hash) { + if (c_hash == nullptr) { + throw std::invalid_argument("invalid hash output"); + } + using elem_t = std::ranges::range_value_t; + constexpr auto elem_n = std::extent_v; + static_assert(std::ranges::size(cpp_hash) == elem_n); + std::ranges::copy(cpp_hash | cartesi::views::cast_to, std::ranges::data(*c_hash)); +} + // ---------------------------------------------- // The C API implementation // ---------------------------------------------- @@ -774,30 +787,18 @@ cm_error cm_get_proof(const cm_machine *m, uint64_t address, int32_t log2_size, } cm_error cm_get_root_hash(const cm_machine *m, cm_hash *hash) try { - if (hash == nullptr) { - throw std::invalid_argument("invalid hash output"); - } const auto *cpp_m = convert_from_c(m); - cartesi::machine_hash cpp_hash = cpp_m->get_root_hash(); - using elem_t = std::ranges::range_value_t; - constexpr auto elem_n = std::extent_v; - static_assert(std::ranges::size(cpp_hash) == elem_n); - std::ranges::copy(cpp_hash | cartesi::views::cast_to, std::ranges::data(*hash)); + const cartesi::machine_hash cpp_hash = cpp_m->get_root_hash(); + convert_to_c(cpp_hash, hash); return cm_result_success(); } catch (...) { return cm_result_failure(); } cm_error cm_get_node_hash(const cm_machine *m, uint64_t address, int log2_size, cm_hash *hash) try { - if (hash == nullptr) { - throw std::invalid_argument("invalid hash output"); - } const auto *cpp_m = convert_from_c(m); - cartesi::machine_hash cpp_hash = cpp_m->get_node_hash(address, log2_size); - using elem_t = std::ranges::range_value_t; - constexpr auto elem_n = std::extent_v; - static_assert(std::ranges::size(cpp_hash) == elem_n); - std::ranges::copy(cpp_hash | cartesi::views::cast_to, std::ranges::data(*hash)); + const cartesi::machine_hash cpp_hash = cpp_m->get_node_hash(address, log2_size); + convert_to_c(cpp_hash, hash); return cm_result_success(); } catch (...) { return cm_result_failure(); @@ -874,17 +875,17 @@ cm_error cm_read_word(const cm_machine *m, uint64_t address, uint64_t *val) try return cm_result_failure(); } -cm_error cm_read_memory(const cm_machine *m, uint64_t address, uint8_t *data, uint64_t length) try { +cm_error cm_read_memory(const cm_machine *m, uint64_t paddr, uint8_t *data, uint64_t length) try { const auto *cpp_m = convert_from_c(m); - cpp_m->read_memory(address, data, length); + cpp_m->read_memory(paddr, data, length); return cm_result_success(); } catch (...) { return cm_result_failure(); } -cm_error cm_write_memory(cm_machine *m, uint64_t address, const uint8_t *data, uint64_t length) try { +cm_error cm_write_memory(cm_machine *m, uint64_t paddr, const uint8_t *data, uint64_t length) try { auto *cpp_m = convert_from_c(m); - cpp_m->write_memory(address, data, length); + cpp_m->write_memory(paddr, data, length); return cm_result_success(); } catch (...) { return cm_result_failure(); @@ -1125,3 +1126,58 @@ cm_error cm_verify_send_cmio_response(const cm_machine *m, uint16_t reason, cons } catch (...) { return cm_result_failure(); } + +cm_error cm_get_hash(cm_hash_function hash_function, const uint8_t *data, uint64_t length, cm_hash *result) try { + if (data == nullptr && length > 0) { + throw std::invalid_argument("invalid data input"); + } + const std::span data_span{data, length}; + cartesi::machine_hash cpp_result; + + switch (hash_function) { + case CM_HASH_KECCAK256: { + cartesi::keccak_256_hasher hasher; + hasher.hash(data_span, cpp_result); + break; + } + case CM_HASH_SHA256: { + cartesi::sha_256_hasher hasher; + hasher.hash(data_span, cpp_result); + break; + } + default: + throw std::invalid_argument("invalid hash function"); + } + + convert_to_c(cpp_result, result); + return cm_result_success(); +} catch (...) { + return cm_result_failure(); +} + +cm_error cm_get_concat_hash(cm_hash_function hash_function, const cm_hash *left, const cm_hash *right, + cm_hash *result) try { + cartesi::machine_hash cpp_left = convert_from_c(left); + cartesi::machine_hash cpp_right = convert_from_c(right); + cartesi::machine_hash cpp_result; + + switch (hash_function) { + case CM_HASH_KECCAK256: { + cartesi::keccak_256_hasher h; + h.concat_hash(cpp_left, cpp_right, cpp_result); + break; + } + case CM_HASH_SHA256: { + cartesi::sha_256_hasher h; + h.concat_hash(cpp_left, cpp_right, cpp_result); + break; + } + default: + throw std::invalid_argument("invalid hash function"); + } + + convert_to_c(cpp_result, result); + return cm_result_success(); +} catch (...) { + return cm_result_failure(); +} diff --git a/src/machine-c-api.h b/src/machine-c-api.h index c2928766c..24725220b 100644 --- a/src/machine-c-api.h +++ b/src/machine-c-api.h @@ -291,6 +291,12 @@ typedef enum cm_reg { CM_REG_LAST_ = CM_REG_UARCH_HALT_FLAG, } cm_reg; +/// \brief Hash function types. +typedef enum cm_hash_function { + CM_HASH_KECCAK256 = 0, ///< Keccak-256 (recommended for fraud proofs using microarchitecture) + CM_HASH_SHA256 = 1, ///< SHA-256 (recommended for fraud proofs using zkVMs) +} cm_hash_function; + /// \brief Storage for machine hash. typedef uint8_t cm_hash[CM_HASH_SIZE]; @@ -743,6 +749,29 @@ CM_API cm_error cm_verify_hash_tree(cm_machine *m, bool *result); /// \returns 0 for success, non zero code for error. CM_API cm_error cm_get_hash_tree_stats(cm_machine *m, bool clear, const char **stats); +// ------------------------------------ +// Hashing +// ------------------------------------ + +/// \brief Gets the hash of data. +/// \param hash_function Hash function to use. +/// \param data Data to hash. +/// \param length Length of data in bytes. +/// \param result Valid pointer to cm_hash (32-byte array) that receives the hash. +/// \returns 0 for success, non zero code for error. +/// \details This function is optimized to hash words of hash trees, but can be used for any data. +CM_API cm_error cm_get_hash(cm_hash_function hash_function, const uint8_t *data, uint64_t length, cm_hash *result); + +/// \brief Gets the hash of a concatenation of two hashes. +/// \param hash_function Hash function to use. +/// \param left Left hash to concatenate. +/// \param right Right hash to concatenate. +/// \param result Valid pointer to cm_hash (32-byte array) that receives the hash. +/// \returns 0 for success, non zero code for error. +/// \details This function is optimized and intended to be used for concatenating hashes of hash trees. +CM_API cm_error cm_get_concat_hash(cm_hash_function hash_function, const cm_hash *left, const cm_hash *right, + cm_hash *result); + #ifdef __cplusplus } #endif diff --git a/src/machine-config.h b/src/machine-config.h index 285bc3156..fe10c7ec6 100644 --- a/src/machine-config.h +++ b/src/machine-config.h @@ -26,6 +26,7 @@ #include "riscv-constants.h" #include "shadow-registers.h" #include "shadow-uarch-state.h" +#include "variant-hasher.h" namespace cartesi { @@ -156,7 +157,8 @@ struct hash_tree_config final { bool truncate{false}; ///< Should backing store be truncated to correct size? std::string sht_filename; ///< Backing storage for sparse hash-tree std::string phtc_filename; ///< Backing storage for page hash-tree cache - uint64_t phtc_size{2048}; ///< Max number of pages in page hash-tree cache + uint64_t phtc_size{8192}; ///< Max number of pages in page hash-tree cache + hash_function_type hash_function{hash_function_type::keccak256}; ///< Hash function type to use in the hash-tree }; /// \brief Machine state config @@ -168,7 +170,7 @@ struct machine_config final { virtio_configs virtio; ///< VirtIO devices config cmio_config cmio{}; ///< Cartesi Machine IO config pmas_config pmas{}; ///< Physical Memory Attributes config - uarch_config uarch{}; ///< Microarchition config + uarch_config uarch{}; ///< Microarchitecture config hash_tree_config hash_tree{}; ///< Hash-tree config /// \brief Get the name where config will be stored in a directory diff --git a/src/machine-hash.h b/src/machine-hash.h index fb68820b5..ce2749c13 100644 --- a/src/machine-hash.h +++ b/src/machine-hash.h @@ -25,10 +25,10 @@ namespace cartesi { -static constexpr size_t machine_hash_size = 32; -using machine_hash = std::array; -using machine_hash_view = std::span; -using const_machine_hash_view = std::span; +static constexpr size_t MACHINE_HASH_SIZE = 32; +using machine_hash = std::array; +using machine_hash_view = std::span; +using const_machine_hash_view = std::span; } // namespace cartesi diff --git a/src/machine-reg.h b/src/machine-reg.h index 277b69bac..277ef2071 100644 --- a/src/machine-reg.h +++ b/src/machine-reg.h @@ -17,6 +17,7 @@ #ifndef MACHINE_REG_H #define MACHINE_REG_H +#include "shadow-registers.h" #include "shadow-uarch-state.h" /// \file diff --git a/src/machine.cpp b/src/machine.cpp index 2ac5de47b..23b59dd2c 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -76,6 +75,7 @@ #include "uarch-state-access.h" #include "uarch-step.h" #include "unique-c-ptr.h" +#include "variant-hasher.h" #include "virtio-address-range.h" /// \file @@ -268,9 +268,12 @@ machine::machine(machine_config c, machine_runtime_config r) : m_c{std::move(c)}, // NOLINT(hicpp-move-const-arg,performance-move-const-arg) m_r{std::move(r)}, // NOLINT(hicpp-move-const-arg,performance-move-const-arg) m_ars{m_c}, - m_ht{m_c.hash_tree, m_r.concurrency.update_hash_tree, m_ars}, - m_s{std::bit_cast(m_ars.find(AR_SHADOW_STATE_START, AR_SHADOW_STATE_LENGTH).get_host_memory())}, - m_us{std::bit_cast( + m_ht{m_c.hash_tree, m_r.concurrency.update_hash_tree, m_ars, m_c.hash_tree.hash_function}, + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + m_s{reinterpret_cast( + m_ars.find(AR_SHADOW_STATE_START, AR_SHADOW_STATE_LENGTH).get_host_memory())}, + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + m_us{reinterpret_cast( m_ars.find(AR_SHADOW_UARCH_STATE_START, AR_SHADOW_UARCH_STATE_LENGTH).get_host_memory())} { init_processor(m_c.processor, m_r); init_uarch_processor(m_c.uarch.processor); @@ -1557,7 +1560,7 @@ void machine::fill_memory(uint64_t paddr, uint8_t val, uint64_t length) { foreach_aligned_chunk(paddr, length, AR_PAGE_SIZE, [&ar, val](auto chunk_start, auto chunk_length) { const auto offset = chunk_start - ar.get_start(); const auto dest = ar.get_host_memory() + offset; - if (val != 0 || !is_pristine(dest, chunk_length)) { + if (val != 0 || !is_pristine(std::span{dest, chunk_length})) { memset(dest, val, chunk_length); ar.get_dirty_page_tree().mark_dirty_page_and_up(offset); } @@ -1684,6 +1687,10 @@ void machine::send_cmio_response(uint16_t reason, const unsigned char *data, uin access_log machine::log_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, const access_log::type &log_type) { + if (m_c.hash_tree.hash_function != hash_function_type::keccak256) { + throw std::runtime_error{ + "access logs can only be used with hash tree configured with Keccak-256 hash function"}; + } auto root_hash_before = get_root_hash(); access_log log(log_type); // Call send_cmio_response with the recording state accessor @@ -1692,10 +1699,6 @@ access_log machine::log_send_cmio_response(uint16_t reason, const unsigned char [[maybe_unused]] auto note = a.make_scoped_note("send_cmio_response"); cartesi::send_cmio_response(a, reason, data, length); } - // Verify access log before returning - if (!update_hash_tree()) { - throw std::runtime_error{"update hash-tree failed"}; - } auto root_hash_after = get_root_hash(); verify_send_cmio_response(reason, data, length, root_hash_before, log, root_hash_after); return log; @@ -1703,7 +1706,7 @@ access_log machine::log_send_cmio_response(uint16_t reason, const unsigned char void machine::verify_send_cmio_response(uint16_t reason, const unsigned char *data, uint64_t length, const machine_hash &root_hash_before, const access_log &log, const machine_hash &root_hash_after) { - replay_send_cmio_state_access::context context(log, root_hash_before); + replay_send_cmio_state_access::context context{log, root_hash_before, hash_function_type::keccak256}; // Verify all intermediate state transitions replay_send_cmio_state_access a(context); cartesi::send_cmio_response(a, reason, data, length); @@ -1716,6 +1719,10 @@ void machine::verify_send_cmio_response(uint16_t reason, const unsigned char *da } void machine::reset_uarch() { + if (m_c.hash_tree.hash_function != hash_function_type::keccak256) { + throw std::runtime_error{ + "microarchitecture can only be used with hash tree configured with Keccak-256 hash function"}; + } write_reg(reg::uarch_halt_flag, UARCH_HALT_FLAG_INIT); write_reg(reg::uarch_pc, UARCH_PC_INIT); write_reg(reg::uarch_cycle, UARCH_CYCLE_INIT); @@ -1731,6 +1738,10 @@ void machine::reset_uarch() { } access_log machine::log_reset_uarch(const access_log::type &log_type) { + if (m_c.hash_tree.hash_function != hash_function_type::keccak256) { + throw std::runtime_error{ + "microarchitecture can only be used with hash tree configured with Keccak-256 hash function"}; + } const machine_hash root_hash_before = get_root_hash(); // Call uarch_reset_state with a uarch_record_state_access object access_log log(log_type); @@ -1739,10 +1750,6 @@ access_log machine::log_reset_uarch(const access_log::type &log_type) { [[maybe_unused]] auto note = a.make_scoped_note("reset_uarch_state"); uarch_reset_state(a); } - // Verify access log before returning - if (!update_hash_tree()) { - throw std::runtime_error{"update hash-tree failed"}; - } const auto root_hash_after = get_root_hash(); verify_reset_uarch(root_hash_before, log, root_hash_after); return log; @@ -1769,6 +1776,10 @@ access_log machine::log_step_uarch(const access_log::type &log_type) { if (read_reg(reg::iunrep) != 0) { throw std::runtime_error("microarchitecture cannot be used with unreproducible machines"); } + if (m_c.hash_tree.hash_function != hash_function_type::keccak256) { + throw std::runtime_error{ + "microarchitecture can only be used with hash tree configured with Keccak-256 hash function"}; + } auto root_hash_before = get_root_hash(); access_log log(log_type); // Call interpret with a logged state access object @@ -1807,6 +1818,10 @@ machine_config machine::get_default_config() { // NOLINTNEXTLINE(readability-convert-member-functions-to-static) uarch_interpreter_break_reason machine::run_uarch(uint64_t uarch_cycle_end) { + if (m_c.hash_tree.hash_function != hash_function_type::keccak256) { + throw std::runtime_error{ + "microarchitecture can only be used with hash tree configured with Keccak-256 hash function"}; + } if (read_reg(reg::iunrep) != 0) { throw std::runtime_error("microarchitecture cannot be used with unreproducible machines"); } @@ -1816,17 +1831,15 @@ uarch_interpreter_break_reason machine::run_uarch(uint64_t uarch_cycle_end) { } interpreter_break_reason machine::log_step(uint64_t mcycle_count, const std::string &filename) { - if (!update_hash_tree()) { - throw std::runtime_error{"update hash-tree failed"}; - } - // Ensure that the microarchitecture is reset - auto current_uarch_state_hash = - get_node_hash(AR_SHADOW_UARCH_STATE_START, UARCH_STATE_LOG2_SIZE, skip_hash_tree_update); - if (current_uarch_state_hash != get_uarch_pristine_state_hash()) { - throw std::runtime_error{"microarchitecture is not reset"}; + if (m_c.hash_tree.hash_function == hash_function_type::keccak256) { + // Ensure that the microarchitecture is reset + auto current_uarch_state_hash = get_node_hash(AR_SHADOW_UARCH_STATE_START, UARCH_STATE_LOG2_SIZE); + if (current_uarch_state_hash != get_uarch_pristine_state_hash()) { + throw std::runtime_error{"microarchitecture is not reset"}; + } } auto root_hash_before = get_root_hash(); - record_step_state_access::context context(filename); + record_step_state_access::context context(filename, m_c.hash_tree.hash_function); record_step_state_access a(context, *this); uint64_t mcycle_end{}; if (__builtin_add_overflow(a.read_mcycle(), mcycle_count, &mcycle_end)) { diff --git a/src/machine.h b/src/machine.h index 8d1b56049..1fee254a3 100644 --- a/src/machine.h +++ b/src/machine.h @@ -45,6 +45,7 @@ #include "shadow-tlb.h" #include "uarch-interpret.h" #include "uarch-processor-state.h" +#include "variant-hasher.h" #include "virtio-address-range.h" namespace cartesi { @@ -553,6 +554,11 @@ class machine final { bool get_soft_yield() const { return m_r.soft_yield; } + + /// \brief Returns hash tree hash function + hash_function_type get_hash_function() const { + return m_c.hash_tree.hash_function; + } }; } // namespace cartesi diff --git a/src/merkle-tree-hash.cpp b/src/merkle-tree-hash.cpp index 314917562..5797bf9ba 100644 --- a/src/merkle-tree-hash.cpp +++ b/src/merkle-tree-hash.cpp @@ -21,16 +21,16 @@ #include #include #include +#include #include #include #include "back-merkle-tree.h" #include "i-hasher.h" -#include "keccak-256-hasher.h" #include "unique-c-ptr.h" +#include "variant-hasher.h" using namespace cartesi; -using hasher_type = keccak_256_hasher; /// \brief Checks if string matches prefix and captures remaninder /// \param pre Prefix to match in str. @@ -80,12 +80,12 @@ static void print_hash(const machine_hash &hash, FILE *f) { /// \param f File to read from /// \returns Hash if successful, nothing otherwise static std::optional read_hash(FILE *f) { - std::array hex_hash{}; + std::array hex_hash{}; if (fread(hex_hash.data(), 1, hex_hash.size(), f) != hex_hash.size()) { return {}; } machine_hash h; - for (size_t i = 0; i < hasher_type::hash_size; ++i) { + for (size_t i = 0; i < variant_hasher::hash_size; ++i) { std::array hex_c{hex_hash[2 * i], hex_hash[2 * i + 1], '\0'}; unsigned c = 0; // NOLINTNEXTLINE(cert-err34-c): we just generated the string so we don't need to verify it @@ -115,10 +115,8 @@ __attribute__((__format__(__printf__, 1, 2))) static void error(const char *fmt, /// \param word Pointer to word data. Must contain 2^log2_word_size bytes /// \param log2_word_size Log2 of word size /// \param hash Receives the word hash -static void get_word_hash(hasher_type &h, const unsigned char *word, int log2_word_size, machine_hash &hash) { - h.begin(); - h.add_data(std::span(word, 1 << log2_word_size)); - h.end(hash); +static void get_word_hash(variant_hasher &h, const unsigned char *word, int log2_word_size, machine_hash &hash) { + h.hash(std::span(word, 1 << log2_word_size), hash); } /// \brief Computes the Merkle hash of a leaf of data @@ -128,7 +126,7 @@ static void get_word_hash(hasher_type &h, const unsigned char *word, int log2_wo /// \param log2_leaf_size Log2 of leaf size /// \param log2_word_size Log2 of word size /// \returns Merkle hash of leaf data -static machine_hash get_leaf_hash(hasher_type &h, const unsigned char *leaf_data, int log2_leaf_size, +static machine_hash get_leaf_hash(variant_hasher &h, const unsigned char *leaf_data, int log2_leaf_size, int log2_word_size) { assert(log2_word_size >= 1); assert(log2_leaf_size >= log2_word_size); @@ -144,17 +142,6 @@ static machine_hash get_leaf_hash(hasher_type &h, const unsigned char *leaf_data return leaf; } -/// \brief Computes the Merkle hash of a leaf of data -/// \param leaf_data Pointer to buffer containing leaf data with -/// at least 2^log2_leaf_size bytes -/// \param log2_leaf_size Log2 of leaf size -/// \param log2_word_size Log2 of word size -/// \returns Merkle hash of leaf data -static machine_hash get_leaf_hash(const unsigned char *leaf_data, int log2_leaf_size, int log2_word_size) { - hasher_type h; - return get_leaf_hash(h, leaf_data, log2_leaf_size, log2_word_size); -} - /// \brief Prints help message static void help(const char *name) { std::ignore = fprintf(stderr, R"(Usage: @@ -198,7 +185,7 @@ The hash function used is Keccak-256. exit(0); } -int main(int argc, char *argv[]) { +int main(int argc, char *argv[]) try { const char *input_name = nullptr; int log2_word_size = 3; int log2_leaf_size = 12; @@ -246,7 +233,9 @@ int main(int argc, char *argv[]) { return 1; } - back_merkle_tree back_tree{log2_root_size, log2_leaf_size, log2_word_size}; + const auto hash_function = hash_function_type::keccak256; + back_merkle_tree back_tree{log2_root_size, log2_leaf_size, log2_word_size, hash_function}; + variant_hasher h{hash_function}; const uint64_t max_leaves = UINT64_C(1) << (log2_root_size - log2_leaf_size); uint64_t leaf_count = 0; @@ -266,7 +255,7 @@ int main(int argc, char *argv[]) { // Pad leaf with zeros if file ended before next leaf boundary memset(leaf_buf.get() + got, 0, leaf_size - got); // Compute leaf hash - auto leaf_hash = get_leaf_hash(leaf_buf.get(), log2_leaf_size, log2_word_size); + auto leaf_hash = get_leaf_hash(h, leaf_buf.get(), log2_leaf_size, log2_word_size); // Add leaf to incremental tree back_tree.push_back(leaf_hash); // Compare the root hash for the incremental tree and the @@ -275,4 +264,10 @@ int main(int argc, char *argv[]) { } print_hash(back_tree.get_root_hash(), stdout); return 0; +} catch (const std::exception &e) { + error("exception: %s\n", e.what()); + return 1; +} catch (...) { + error("unknown exception\n"); + return 1; } diff --git a/src/os-mmap.cpp b/src/os-mmap.cpp index a49ec013e..74bb92b04 100644 --- a/src/os-mmap.cpp +++ b/src/os-mmap.cpp @@ -231,7 +231,9 @@ os_mmapd os_mmap(uint64_t length, const os_mmap_flags &flags, const std::string throw std::system_error{errno, std::generic_category(), "possible non zero partial page when mapping backing file '"s + backing_filename + "' to memory"s}; } - std::memset(std::bit_cast(backing_host_memory) + backing_file_length, 0, partial_page_remaining); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + std::memset(reinterpret_cast(backing_host_memory) + backing_file_length, 0, + partial_page_remaining); } } else { // Can perform a single mmap() host_memory = mmap(nullptr, length, mprot, mflags, backing_fd, 0); diff --git a/src/page-hash-tree-cache-stats.h b/src/page-hash-tree-cache-stats.h index 8d2977f5e..346544ebb 100644 --- a/src/page-hash-tree-cache-stats.h +++ b/src/page-hash-tree-cache-stats.h @@ -22,13 +22,14 @@ namespace cartesi { struct page_hash_tree_cache_stats { - uint64_t page_hits; ///\< Number of pages looked up and found in cache - uint64_t page_misses; ///\< Number of pages looked up but missing from cache - uint64_t word_hits; ///\< Number of words equal to corresponding word in cache entry - uint64_t word_misses; ///\< Number of words differing from corresponding word in cache entry - uint64_t inner_page_hashes; ///\< Number of inner page hashing operations performed - uint64_t pristine_pages; ///\< Number of pages found to be pristine during updates - uint64_t non_pristine_pages; ///\< Number of pages found not to be pristine during updates + uint64_t page_hits{0}; ///\< Number of pages looked up and found in cache + uint64_t page_misses{0}; ///\< Number of pages looked up but missing from cache + uint64_t word_hits{0}; ///\< Number of words equal to corresponding word in cache entry + uint64_t word_misses{0}; ///\< Number of words differing from corresponding word in cache entry + uint64_t page_changes{0}; ///\< Number of pages changes during updates + uint64_t inner_page_hashes{0}; ///\< Number of inner page hashing operations performed + uint64_t pristine_pages{0}; ///\< Number of pages found to be pristine during updates + uint64_t non_pristine_pages{0}; ///\< Number of pages found not to be pristine during updates }; } // namespace cartesi diff --git a/src/page-hash-tree-cache.h b/src/page-hash-tree-cache.h index a97212ea8..ac8c998ee 100644 --- a/src/page-hash-tree-cache.h +++ b/src/page-hash-tree-cache.h @@ -20,7 +20,7 @@ /// \file /// \brief Page hash-tree cache interface -#include +#include #include #include #include @@ -33,6 +33,7 @@ #include "address-range-constants.h" #include "circular-buffer.h" +#include "compiler-defines.h" #include "hash-tree-constants.h" #include "i-hasher.h" #include "is-pristine.h" @@ -41,6 +42,7 @@ #include "page-hash-tree-cache-stats.h" #include "ranges.h" #include "signposts.h" +#include "simd-hasher.h" #include "strict-aliasing.h" namespace cartesi { @@ -62,6 +64,7 @@ class page_hash_tree_cache { using const_page_view = std::span; using page_hash_tree = std::array; + using page_hash_tree_view = std::span; static_assert(POD, "hash_tree must be trivially copyable and standard layout"); static_assert(POD, "machine_hash must be trivially copyable and standard layout"); static_assert(POD, "page_hash_tree must be trivially copyable and standard layout"); @@ -146,6 +149,13 @@ class page_hash_tree_cache { return i / 2; } + /// \brief Returns the log2 level of a node in the page hash tree (its height in the tree) + /// \param i Node index + /// \returns Log 2 level of the node + static int log2_level(int i) { + return std::countl_zero(static_cast(i)) - 24; + } + /// \brief Returns a pristine page tree for a given hasher /// \tparam H Hasher type /// \param h Hasher object @@ -234,7 +244,72 @@ class page_hash_tree_cache { } }; - /// \brief Updates and entry with new page data + template + class simd_page_hasher { + struct leaf_entry { + const_hash_tree_word_view data; + machine_hash_view result; + }; + + struct node_entry { + const_machine_hash_view left; + const_machine_hash_view right; + machine_hash_view result; + }; + + struct dirty_entry { + page_hash_tree_view page_tree; + int node_index{0}; + bool operator==(const dirty_entry &other) const noexcept { + return page_tree.data() == other.page_tree.data() && node_index == other.node_index; + } + }; + + static constexpr size_t QUEUE_MAX_SIZE = + ((UINT64_C(1) << ((HASH_TREE_LOG2_PAGE_SIZE - HASH_TREE_LOG2_WORD_SIZE) - 1))) * + hasher_type::MAX_LANE_COUNT; + + simd_data_hasher m_leaves_queue; + simd_concat_hasher m_concat_queue; + circular_buffer m_dirty_queue; + + public: + static constexpr int QUEUE_MAX_PAGE_COUNT = hasher_type::MAX_LANE_COUNT; + + explicit simd_page_hasher(hasher_type &hasher) : m_leaves_queue{hasher}, m_concat_queue{hasher} {} + + /// \brief Enqueues a leaf for hashing + void enqueue_leaf(const_hash_tree_word_view data, page_hash_tree_view page_tree, int word_index) noexcept { + m_leaves_queue.enqueue(data, page_tree[word_index]); + m_dirty_queue.try_push_back(dirty_entry{.page_tree = page_tree, .node_index = entry::parent(word_index)}); + } + + /// \brief Flushes the entire queue + int flush() noexcept { + m_leaves_queue.flush(); + int hashes = 0; + + while (!m_dirty_queue.empty()) { + for (size_t i = 0, n = m_dirty_queue.size(); i < n; ++i) { + const auto &d = m_dirty_queue.front(); + const auto node_index = d.node_index; + const auto page_tree = d.page_tree; + m_dirty_queue.pop_front(); + ++hashes; + m_concat_queue.enqueue(page_tree[entry::left_child(node_index)], + page_tree[entry::right_child(node_index)], page_tree[node_index]); + if (node_index > 1) [[likely]] { + m_dirty_queue.try_push_back( + dirty_entry{.page_tree = page_tree, .node_index = entry::parent(node_index)}); + } + } + m_concat_queue.flush(); + } + return hashes; + } + }; + + /// \brief Enqueue entry to be hashed with new page data /// \tparam H Hasher type /// \tparam D Data range type /// \param h Hasher object @@ -243,7 +318,8 @@ class page_hash_tree_cache { /// \returns True if update succeeded, false otherwise template // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) - bool update_entry(H &&h, D &&d, entry &e) noexcept { + bool enqueue_hash_entry(page_hash_tree_cache::simd_page_hasher &queue, D &&d, entry &e, + page_hash_tree_cache_stats &stats) noexcept { if (std::ranges::size(d) != m_page_size) { return false; } @@ -253,14 +329,12 @@ class page_hash_tree_cache { SCOPED_SIGNPOST(m_log, m_spid_pristine_update, "phtc: pristine update", ""); e.clear_hash_tree(m_pristine_page_hash_tree); e.clear_page(); - ++m_pristine_pages; + ++stats.pristine_pages; return true; } } SCOPED_SIGNPOST(m_log, m_spid_non_pristine_update, "phtc: non-pristine update", ""); - ++m_non_pristine_pages; const const_page_view page{std::ranges::data(d), std::ranges::size(d)}; - circular_buffer dirty_nodes; // Go over all words in the entry page, comparing with updated page, // and updating the hashes for the modified words //??D In C++23, we would use std::views::slide and std::views::zip to write this in declarative style. @@ -269,31 +343,19 @@ class page_hash_tree_cache { int miss = 0; auto &page_tree = e.get_page_hash_tree(); for (int offset = 0, index = m_page_word_count; offset < m_page_size; offset += m_word_size, ++index) { - const auto entry_word = entry_page.subspan(offset, m_word_size); - const auto page_word = page.subspan(offset, m_word_size); - if (!std::ranges::equal(entry_word, page_word)) { - get_hash(h, page_word, page_tree[index]); + const auto entry_word = std::span{entry_page.subspan(offset, m_word_size)}; + const auto page_word = std::span{page.subspan(offset, m_word_size)}; + if (!std::ranges::equal(entry_word, page_word)) [[unlikely]] { std::ranges::copy(page_word, entry_word.begin()); - dirty_nodes.try_push_back(e.parent(index)); + queue.enqueue_leaf(page_word, page_tree, index); ++miss; } else { ++hit; } } - m_word_hits += hit; - m_word_misses += miss; - // Now go over fifo, taking a node, updating its from its children, and enqueueing its parent for update - int inner_page_hashes = 0; - while (!dirty_nodes.empty()) { - const int index = dirty_nodes.front(); - dirty_nodes.pop_front(); - ++inner_page_hashes; - get_concat_hash(h, page_tree[e.left_child(index)], page_tree[e.right_child(index)], page_tree[index]); - if (index != 1) { - dirty_nodes.try_push_back(e.parent(index)); - } - } - m_inner_page_hashes += inner_page_hashes; + stats.word_hits += hit; + stats.word_misses += miss; + ++stats.non_pristine_pages; return true; } @@ -304,7 +366,7 @@ class page_hash_tree_cache { const page_view entry_page{e.get_page()}; auto &page_tree = e.get_page_hash_tree(); for (int offset = 0, index = m_page_word_count; offset < m_page_size; offset += m_word_size, ++index) { - const auto page_word = entry_page.subspan(offset, m_word_size); + const auto page_word = std::span{entry_page.subspan(offset, m_word_size)}; const auto page_word_hash = get_hash(h, page_word); if (page_word_hash != page_tree[index]) { const int log2_size = HASH_TREE_LOG2_WORD_SIZE; @@ -368,14 +430,14 @@ class page_hash_tree_cache { // Make it most recently used m_lru.splice(m_lru.begin(), m_lru, it->second.first); hit = true; - ++m_page_hits; + ++m_stats.page_hits; // Return borrowed entry return std::ref(e.set_borrowed(true)); } hit = false; // Not in map, but we still have unused entries to lend if (m_used < m_entries.size()) { - ++m_page_misses; + ++m_stats.page_misses; entry &e = m_entries[m_used++]; if (e.get_borrowed()) { throw std::runtime_error{"page hash-tree cache entry already borrowed"}; @@ -391,6 +453,7 @@ class page_hash_tree_cache { if (e.get_borrowed()) { return {}; } + ++m_stats.page_misses; m_map.erase(e.get_paddr_page()); m_lru.pop_back(); m_lru.push_front(e); @@ -435,27 +498,24 @@ class page_hash_tree_cache { /// \param clear Whether to clear statistics after retrieving them /// \returns Statistics page_hash_tree_cache_stats get_stats(bool clear = false) noexcept { - auto s = page_hash_tree_cache_stats{ - .page_hits = m_page_hits.load(), - .page_misses = m_page_misses.load(), - .word_hits = m_word_hits.load(), - .word_misses = m_word_misses.load(), - .inner_page_hashes = m_inner_page_hashes.load(), - .pristine_pages = m_pristine_pages.load(), - .non_pristine_pages = m_non_pristine_pages.load(), - }; + auto s = m_stats; if (clear) { - m_page_hits = 0; - m_page_misses = 0; - m_word_hits = 0; - m_word_misses = 0; - m_inner_page_hashes = 0; - m_pristine_pages = 0; - m_non_pristine_pages = 0; + m_stats.page_hits = 0; + m_stats.page_misses = 0; + m_stats.word_hits = 0; + m_stats.word_misses = 0; + m_stats.page_changes = 0; + m_stats.inner_page_hashes = 0; + m_stats.pristine_pages = 0; + m_stats.non_pristine_pages = 0; } return s; } + page_hash_tree_cache_stats &get_stats_ref() noexcept { + return m_stats; + } + /// \brief Destructor ~page_hash_tree_cache() { #ifdef DUMP_HASH_TREE_STATS @@ -474,11 +534,12 @@ class page_hash_tree_cache { std::cerr << "word hit rate: " << 100.0 * static_cast(s.word_hits) / static_cast(word_all) << '\n'; } + std::cerr << "changed pages: " << s.page_changes << '\n'; std::cerr << "inner page hashes: " << s.inner_page_hashes << '\n'; auto pristine_all = s.pristine_pages + s.non_pristine_pages; if (pristine_all > 0) { - std::cerr << "pristine pages: " << s.inner_page_hashes << '\n'; - std::cerr << "non-pristine pages: " << s.inner_page_hashes << '\n'; + std::cerr << "pristine pages: " << s.pristine_pages << '\n'; + std::cerr << "non-pristine pages: " << s.non_pristine_pages << '\n'; std::cerr << "pristine page ratio: " << 100.0 * static_cast(s.pristine_pages) / static_cast(pristine_all) << '\n'; } @@ -533,13 +594,7 @@ class page_hash_tree_cache { size_t m_used{0}; ///< How many entries have already been used // Statistics - std::atomic m_page_hits{0}; ///\< Number of pages looked up and found in cache - std::atomic m_page_misses{0}; ///\< Number of pages looked up but missing from cache - std::atomic m_word_hits{0}; ///\< Number of words equal to corresponding word in cache entry - std::atomic m_word_misses{0}; ///\< Number of words differing from corresponding word in cache entry - std::atomic m_inner_page_hashes{0}; ///\< Number of inner page hashing operations performed - std::atomic m_pristine_pages{0}; ///\< Number of pages found to be pristine - std::atomic m_non_pristine_pages{0}; ///\< Number of pages found not to be pristine + page_hash_tree_cache_stats m_stats; }; } // namespace cartesi diff --git a/src/pristine-merkle-tree.cpp b/src/pristine-merkle-tree.cpp index ef31d26b4..57edd6cf1 100644 --- a/src/pristine-merkle-tree.cpp +++ b/src/pristine-merkle-tree.cpp @@ -29,7 +29,7 @@ namespace cartesi { -pristine_merkle_tree::pristine_merkle_tree(int log2_root_size, int log2_word_size) : +pristine_merkle_tree::pristine_merkle_tree(int log2_root_size, int log2_word_size, hash_function_type hash_function) : m_log2_root_size{log2_root_size}, m_log2_word_size{log2_word_size}, m_hashes(std::max(0, log2_root_size - log2_word_size + 1)) { @@ -44,10 +44,8 @@ pristine_merkle_tree::pristine_merkle_tree(int log2_root_size, int log2_word_siz } std::vector word(1 << log2_word_size, 0); assert(word.size() == (UINT64_C(1) << log2_word_size)); - hasher_type h; - h.begin(); - h.add_data(word); - h.end(m_hashes[0]); + variant_hasher h{hash_function}; + h.hash(word, m_hashes[0]); for (unsigned i = 1; i < m_hashes.size(); ++i) { get_concat_hash(h, m_hashes[i - 1], m_hashes[i - 1], m_hashes[i]); } diff --git a/src/pristine-merkle-tree.h b/src/pristine-merkle-tree.h index f40b289fb..9f65bf565 100644 --- a/src/pristine-merkle-tree.h +++ b/src/pristine-merkle-tree.h @@ -20,8 +20,8 @@ #include #include -#include "keccak-256-hasher.h" #include "machine-hash.h" +#include "variant-hasher.h" /// \file /// \brief Pristine Merkle tree interface. @@ -31,13 +31,10 @@ namespace cartesi { /// \brief Hashes of pristine subtrees for a range of sizes class pristine_merkle_tree { public: - /// \brief Hasher class. - using hasher_type = keccak_256_hasher; - /// \brief Constructor /// \param log2_root_size Log2 of root node /// \param log2_word_size Log2 of word - pristine_merkle_tree(int log2_root_size, int log2_word_size); + pristine_merkle_tree(int log2_root_size, int log2_word_size, hash_function_type hash_function); /// \brief Returns hash of pristine subtree /// \param log2_size Log2 of subtree size. Must be between diff --git a/src/record-send-cmio-state-access.h b/src/record-send-cmio-state-access.h index 55fdeda38..00f1b024c 100644 --- a/src/record-send-cmio-state-access.h +++ b/src/record-send-cmio-state-access.h @@ -53,16 +53,15 @@ struct i_state_access_fast_addr { class record_send_cmio_state_access : public i_state_access, public i_accept_scoped_notes { - using hasher_type = hash_tree::hasher_type; // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) machine &m_m; ///< Associated machine access_log &m_log; ///< Pointer to access log // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) - static void get_hash(const access_data &data, machine_hash &hash) { - hasher_type hasher; - get_merkle_tree_hash(hasher, std::span{data.data(), data.size()}, - HASH_TREE_WORD_SIZE, hash); + template + static void get_hash(H &h, const access_data &data, machine_hash &hash) { + get_merkle_tree_hash(h, std::span{data.data(), data.size()}, HASH_TREE_WORD_SIZE, + hash); } public: @@ -103,7 +102,8 @@ class record_send_cmio_state_access : a.get_read().value().resize(HASH_TREE_WORD_SIZE); // read the entire leaf where the word is located m_m.read_memory(pleaf_aligned, a.get_read().value().data(), HASH_TREE_WORD_SIZE); - get_hash(a.get_read().value(), a.get_read_hash()); + variant_hasher h(m_m.get_hash_function()); + get_hash(h, a.get_read().value(), a.get_read_hash()); // NOLINTEND(bugprone-unchecked-optional-access) m_log.push_access(std::move(a), text); } @@ -138,14 +138,15 @@ class record_send_cmio_state_access : a.get_read().emplace(); a.get_read().value().resize(HASH_TREE_WORD_SIZE); m_m.read_memory(pleaf_aligned, a.get_read().value().data(), HASH_TREE_WORD_SIZE); - get_hash(a.get_read().value(), a.get_read_hash()); + variant_hasher h(m_m.get_hash_function()); + get_hash(h, a.get_read().value(), a.get_read_hash()); // the logged written data is the same as the read data, but with the word at paligned replaced by word a.set_written(access_data(a.get_read().value())); // copy the read data const int word_offset = static_cast(paligned - pleaf_aligned); // offset of word in leaf replace_word_access_data(val, a.get_written().value(), word_offset); // replace the word // compute the hash of the written data a.get_written_hash().emplace(); - get_hash(a.get_written().value(), a.get_written_hash().value()); + get_hash(h, a.get_written().value(), a.get_written_hash().value()); // NOLINTEND(bugprone-unchecked-optional-access) m_log.push_access(std::move(a), text); } @@ -247,9 +248,9 @@ class record_send_cmio_state_access : // log hash and written data // NOLINTBEGIN(bugprone-unchecked-optional-access) a.get_written_hash().emplace(); - hasher_type hasher{}; + variant_hasher h(m_m.get_hash_function()); const auto offset = paddr - ar.get_start(); - get_merkle_tree_hash(hasher, std::span{ar.get_host_memory() + offset, write_length}, + get_merkle_tree_hash(h, std::span{ar.get_host_memory() + offset, write_length}, HASH_TREE_WORD_SIZE, a.get_written_hash().value()); if (m_log.get_log_type().has_large_data()) { access_data &data = a.get_written().emplace(write_length); diff --git a/src/record-step-state-access.h b/src/record-step-state-access.h index 58c90a7fd..7eb3e5920 100644 --- a/src/record-step-state-access.h +++ b/src/record-step-state-access.h @@ -31,6 +31,7 @@ #include "pmas.h" #include "shadow-tlb.h" #include "unique-c-ptr.h" +#include "variant-hasher.h" namespace cartesi { @@ -58,10 +59,13 @@ class record_step_state_access : struct context { /// \brief Constructor of record step state access context /// \param filename where to save the log - explicit context(std::string filename) : filename(std::move(filename)) { + explicit context(std::string filename, hash_function_type hash_function) : + filename(std::move(filename)), + hash_function(hash_function) { ; } std::string filename; ///< where to save the log + hash_function_type hash_function; ///< hash function type to use for the log mutable pages_type touched_pages; ///< copy of all pages touched during execution }; @@ -91,11 +95,16 @@ class record_step_state_access : // Write log file. // The log format is as follows: - // page_count, [(page_index, data, scratch_area), ...], sibling_count, [sibling_hash, ...] + // hash_function, page_count, [(page_index, data, scratch_area), ...], sibling_count, [sibling_hash, ...] // We store the page index, instead of the page address. // Scratch area is used by the replay to store page hashes, which change during replay // This is to work around the lack of dynamic memory allocation when replaying the log in microarchitectures auto fp = make_unique_fopen(m_context.filename.c_str(), "wb"); + // write the hash function type so the hasher can be recreated by the replay + auto hash_function = static_cast(m_context.hash_function); + if (fwrite(&hash_function, sizeof(hash_function), 1, fp.get()) != 1) { + throw std::runtime_error("Could not write hash function type to log file"); + } if (fwrite(&page_count, sizeof(page_count), 1, fp.get()) != 1) { throw std::runtime_error("Could not write page count to log file"); } @@ -129,7 +138,7 @@ class record_step_state_access : /// \param address address of the page void touch_page(uint64_t address) const { auto page = address & ~PAGE_OFFSET_MASK; - if (m_context.touched_pages.find(page) != m_context.touched_pages.end()) { + if (m_context.touched_pages.contains(page)) { return; // already saved } auto [it, _] = m_context.touched_pages.emplace(page, page_data_type()); diff --git a/src/replay-send-cmio-state-access.h b/src/replay-send-cmio-state-access.h index ffa8ae46b..52ba8f38c 100644 --- a/src/replay-send-cmio-state-access.h +++ b/src/replay-send-cmio-state-access.h @@ -32,10 +32,12 @@ #include "hash-tree.h" #include "i-hasher.h" #include "i-state-access.h" +#include "machine-reg.h" #include "meta.h" #include "riscv-constants.h" #include "shadow-registers.h" #include "unique-c-ptr.h" +#include "variant-hasher.h" namespace cartesi { @@ -50,15 +52,14 @@ struct i_state_access_fast_addr { /// \brief Allows replaying a machine::send_cmio_response() from an access log. class replay_send_cmio_state_access : public i_state_access { public: - using hasher_type = hash_tree::hasher_type; - struct context { /// \brief Constructor replay_send_cmio_state_access context /// \param log Access log to be replayed /// \param initial_hash Initial root hash - context(const access_log &log, const machine_hash &initial_hash) : + context(const access_log &log, const machine_hash &initial_hash, hash_function_type hash_function) : accesses(log.get_accesses()), - root_hash(initial_hash) { + root_hash(initial_hash), + hash_function(hash_function) { ; } const std::vector &accesses; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) @@ -66,8 +67,8 @@ class replay_send_cmio_state_access : public i_state_access + template static void get_hash(H &h, const access_data &data, machine_hash &hash) { get_merkle_tree_hash(h, std::span{data.data(), data.size()}, HASH_TREE_WORD_SIZE, hash); } @@ -153,9 +154,10 @@ class replay_send_cmio_state_access : public i_state_access data_length) { memset(scratch.get() + data_length, 0, write_length - data_length); } - get_merkle_tree_hash(hasher, std::span{scratch.get(), write_length}, HASH_TREE_WORD_SIZE, + get_merkle_tree_hash(h, std::span{scratch.get(), write_length}, HASH_TREE_WORD_SIZE, computed_data_hash); // check if logged written hash matches the computed data hash if (written_hash != computed_data_hash) { @@ -334,7 +337,7 @@ class replay_send_cmio_state_access : public i_state_access), "hash_type size mismatch"); -extern "C" void interop_merkle_tree_hash(const unsigned char *data, size_t size, interop_hash_type hash) { - hash_tree::hasher_type hasher{}; - get_merkle_tree_hash(hasher, std::span{data, size}, HASH_TREE_WORD_SIZE, +extern "C" void interop_merkle_tree_hash(cartesi::hash_function_type hash_function, const unsigned char *data, + size_t size, interop_hash_type hash) { + variant_hasher h{hash_function}; + get_merkle_tree_hash(h, std::span{data, size}, HASH_TREE_WORD_SIZE, machine_hash_view{*hash, interop_machine_hash_byte_size}); } -extern "C" void interop_concat_hash(interop_const_hash_type left, interop_const_hash_type right, - interop_hash_type result) { - hash_tree::hasher_type hasher{}; +extern "C" void interop_concat_hash(cartesi::hash_function_type hash_function, interop_const_hash_type left, + interop_const_hash_type right, interop_hash_type result) { + variant_hasher h{hash_function}; // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) - get_concat_hash(hasher, *reinterpret_cast(left), - *reinterpret_cast(right), *reinterpret_cast(result)); + get_concat_hash(h, *reinterpret_cast(left), *reinterpret_cast(right), + *reinterpret_cast(result)); // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) } diff --git a/src/replay-step-state-access-interop.h b/src/replay-step-state-access-interop.h index b8e4d3428..2e4f63f51 100644 --- a/src/replay-step-state-access-interop.h +++ b/src/replay-step-state-access-interop.h @@ -17,10 +17,11 @@ #ifndef REPLAY_STEP_STATE_ACCESS_INTEROP_H #define REPLAY_STEP_STATE_ACCESS_INTEROP_H -#include "compiler-defines.h" #include #include -#include + +#include "compiler-defines.h" +#include "variant-hasher.h" const static uint64_t interop_log2_root_size = 64; constexpr size_t interop_machine_hash_byte_size = 32; @@ -32,9 +33,10 @@ NO_RETURN inline void interop_throw_runtime_error(const char *msg) { throw std::runtime_error(msg); } -extern "C" void interop_merkle_tree_hash(const unsigned char *data, size_t size, interop_hash_type hash); +extern "C" void interop_merkle_tree_hash(cartesi::hash_function_type hash_function, const unsigned char *data, + size_t size, interop_hash_type hash); -extern "C" void interop_concat_hash(interop_const_hash_type left, interop_const_hash_type right, - interop_hash_type result); +extern "C" void interop_concat_hash(cartesi::hash_function_type hash_function, interop_const_hash_type left, + interop_const_hash_type right, interop_hash_type result); #endif diff --git a/src/replay-step-state-access.h b/src/replay-step-state-access.h index e8219924c..60de9271c 100644 --- a/src/replay-step-state-access.h +++ b/src/replay-step-state-access.h @@ -17,8 +17,10 @@ #ifndef REPLAY_STEP_STATE_ACCESS_H #define REPLAY_STEP_STATE_ACCESS_H +#include #include #include +#include #include #include @@ -27,6 +29,7 @@ #include "i-accept-scoped-notes.h" #include "i-prefer-shadow-state.h" #include "i-state-access.h" +#include "machine-reg.h" #include "mock-address-range.h" #include "pmas.h" #include "replay-step-state-access-interop.h" @@ -37,6 +40,7 @@ #include "strict-aliasing.h" #include "uarch-constants.h" #include "uarch-defines.h" +#include "variant-hasher.h" namespace cartesi { @@ -78,21 +82,21 @@ class replay_step_state_access : public: using address_type = uint64_t; using data_type = unsigned char[AR_PAGE_SIZE]; - using hash_type = std::array; - static_assert(sizeof(hash_type) == interop_machine_hash_byte_size); + static_assert(sizeof(machine_hash) == interop_machine_hash_byte_size); struct PACKED page_type { address_type index; data_type data; - hash_type hash; + machine_hash hash; }; struct context { - uint64_t page_count{0}; ///< Number of pages in the step log - page_type *pages{nullptr}; ///< Array of page data - uint64_t sibling_count{0}; ///< Number of sibling hashes in the step log - hash_type *sibling_hashes{nullptr}; ///< Array of sibling hashes - mock_address_ranges ars{}; ///< Array of address ranges + hash_function_type hash_function{hash_function_type::keccak256}; ///< Hash function used for the step log + uint64_t page_count{0}; ///< Number of pages in the step log + page_type *pages{nullptr}; ///< Array of page data + uint64_t sibling_count{0}; ///< Number of sibling hashes in the step log + machine_hash *sibling_hashes{nullptr}; ///< Array of sibling hashes + mock_address_ranges ars{}; ///< Array of address ranges }; private: @@ -106,19 +110,38 @@ class replay_step_state_access : // \param root_hash_before The expected machine root hash before the replay // \throw runtime_error if the initial root hash does not match or the log data is invalid replay_step_state_access(context &context, unsigned char *log_image, uint64_t log_size, - const hash_type &root_hash_before) : + const machine_hash &root_hash_before) : m_context(context) { // relevant offsets in the log data + uint64_t page_count_offset{}; uint64_t first_page_offset{}; uint64_t first_sibling_offset{}; uint64_t sibling_count_offset{}; uint64_t end_offset{}; // end of the log data + // hash function type + uint64_t temp_hash_function{}; + if (!validate_and_advance_offset(log_size, 0, sizeof(temp_hash_function), 1, &page_count_offset)) { + interop_throw_runtime_error("hash function type past end of step log"); + } + temp_hash_function = aliased_aligned_read(log_image + 0); + switch (temp_hash_function) { + case static_cast(hash_function_type::keccak256): + m_context.hash_function = hash_function_type::keccak256; + break; + case static_cast(hash_function_type::sha256): + m_context.hash_function = hash_function_type::sha256; + break; + default: + interop_throw_runtime_error("invalid log format: unsupported hash function type"); + } + // set page count - if (!validate_and_advance_offset(log_size, 0, sizeof(m_context.page_count), 1, &first_page_offset)) { + if (!validate_and_advance_offset(log_size, page_count_offset, sizeof(m_context.page_count), 1, + &first_page_offset)) { interop_throw_runtime_error("page count past end of step log"); } - m_context.page_count = aliased_aligned_read(log_image); + m_context.page_count = aliased_aligned_read(log_image + page_count_offset); if (m_context.page_count == 0) { interop_throw_runtime_error("page count is zero"); } @@ -138,12 +161,12 @@ class replay_step_state_access : m_context.sibling_count = aliased_aligned_read(log_image + sibling_count_offset); // set sibling hashes - if (!validate_and_advance_offset(log_size, first_sibling_offset, sizeof(hash_type), m_context.sibling_count, + if (!validate_and_advance_offset(log_size, first_sibling_offset, sizeof(machine_hash), m_context.sibling_count, &end_offset)) { interop_throw_runtime_error("sibling hashes past end of step log"); } // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - m_context.sibling_hashes = reinterpret_cast(log_image + first_sibling_offset); + m_context.sibling_hashes = reinterpret_cast(log_image + first_sibling_offset); // ensure that we read exactly the expected log size if (end_offset != log_size) { @@ -152,7 +175,7 @@ class replay_step_state_access : // ensure that the page indexes are in increasing order // and that the scratch hash area is all zeros - static const hash_type all_zeros{}; + static const machine_hash all_zeros{}; for (uint64_t i = 0; i < m_context.page_count; i++) { if (i > 0 && m_context.pages[i - 1].index >= m_context.pages[i].index) { interop_throw_runtime_error("invalid log format: page index is not in increasing order"); @@ -181,7 +204,7 @@ class replay_step_state_access : // \brief Finish the replay and check the final machine root hash // \param final_root_hash The expected final machine root hash // \throw runtime_error if the final root hash does not match - void finish(const hash_type &root_hash_after) { + void finish(const machine_hash &root_hash_after) { // reset all tlb vh offsets to zero // this is to mimic peek behavior of tlb pma device relocate_tlb_vh_offset_to_vp_offset(); @@ -314,13 +337,13 @@ class replay_step_state_access : } // \brief Compute the current machine root hash - hash_type compute_root_hash() { + machine_hash compute_root_hash() { //??D Here we should only do this for dirty pages, right? //??D Initially, all pages are dirty, because we don't know their hashes //??D But in the end, we should only update those pages that we touched //??D May improve performance when we are running this on ZK for (uint64_t i = 0; i < m_context.page_count; i++) { - interop_merkle_tree_hash(m_context.pages[i].data, AR_PAGE_SIZE, + interop_merkle_tree_hash(m_context.hash_function, m_context.pages[i].data, AR_PAGE_SIZE, // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) reinterpret_cast(&m_context.pages[i].hash)); } @@ -342,7 +365,7 @@ class replay_step_state_access : // \param next_page Index of the next page to be visited // \param next_sibling Index of the next sibling hash to be visited // \return Resulting root hash of the range - hash_type compute_root_hash_impl(address_type page_index, int log2_page_count, size_t &next_page, + machine_hash compute_root_hash_impl(address_type page_index, int log2_page_count, size_t &next_page, size_t &next_sibling) { // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast)) auto page_count = UINT64_C(1) << log2_page_count; @@ -356,9 +379,9 @@ class replay_step_state_access : auto left = compute_root_hash_impl(page_index, log2_page_count - 1, next_page, next_sibling); const auto halfway_page_index = page_index + (page_count >> 1); auto right = compute_root_hash_impl(halfway_page_index, log2_page_count - 1, next_page, next_sibling); - hash_type hash{}; - interop_concat_hash(reinterpret_cast(&left), reinterpret_cast(&right), - reinterpret_cast(&hash)); + machine_hash hash{}; + interop_concat_hash(m_context.hash_function, reinterpret_cast(&left), + reinterpret_cast(&right), reinterpret_cast(&hash)); return hash; } if (m_context.pages[next_page].index == page_index) { diff --git a/src/sha-256-hasher.cpp b/src/sha-256-hasher.cpp new file mode 100644 index 000000000..b4a19033b --- /dev/null +++ b/src/sha-256-hasher.cpp @@ -0,0 +1,356 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#include "sha-256-hasher.h" +#include "compiler-defines.h" +#include "i-hasher.h" +#include "machine-hash.h" +#include "simd-vector-type.h" + +#include +#include +#include +#include + +namespace cartesi { + +// This code is not portable to big-endian architectures. +// NOLINTNEXTLINE(misc-redundant-expression) +static_assert(std::endian::native == std::endian::little, "code assumes little-endian byte ordering"); + +static constexpr size_t SHA256_ROUND_COUNT = 64; +static constexpr size_t SHA256_STATE_WORD_COUNT = 8; +static constexpr size_t SHA256_BUF_WORD_COUNT = 16; +static constexpr size_t SHA256_BUF_SIZE = 64; +static constexpr size_t SHA256_LENGTH_WORD_INDEX = 14; + +template +struct alignas(uint32_vector_type::align) sha_256_context final { + using word_vector_type = uint32_vector_type::type; + using word_bytes_array = uint8_t[SHA256_BUF_WORD_COUNT][LaneCount][sizeof(uint32_t)]; + using data_span = std::span; + + static constexpr size_t word_vector_align = uint32_vector_type::align; + + word_vector_type m_words[SHA256_BUF_WORD_COUNT]{}; ///< Buffer for SHA-256 words, interleaved by lanes + word_vector_type m_state[SHA256_STATE_WORD_COUNT] = { + ///< SHA-256 state, interleaved by lanes + word_vector_type{} | 0x6a09e667, + word_vector_type{} | 0xbb67ae85, + word_vector_type{} | 0x3c6ef372, + word_vector_type{} | 0xa54ff53a, + word_vector_type{} | 0x510e527f, + word_vector_type{} | 0x9b05688c, + word_vector_type{} | 0x1f83d9ab, + word_vector_type{} | 0x5be0cd19, + }; + + FORCE_INLINE void update(const std::array &data, size_t &pos, size_t &len) noexcept { + [[maybe_unused]] auto words_bytes = // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast(m_words); + // Assume all data spans have the same length + const size_t data_len = data[0].size(); + for (size_t i = 0; i < data_len;) { + // Interleave data + const size_t step = std::min(SHA256_BUF_SIZE - pos, data_len - i); + if constexpr (DataExtent != std::dynamic_extent && DataExtent % sizeof(uint32_t) == 0) { + // If data length is a multiple of word size, process a word at time + UNROLL_LOOP(64) + for (size_t j = 0; j < step; j += sizeof(uint32_t)) { + word_vector_type data_word; + UNROLL_LOOP_FULL() + for (size_t l = 0; l < LaneCount; ++l) { + uint32_t lane_word{}; + __builtin_memcpy(&lane_word, &data[l][i + j], sizeof(lane_word)); + data_word[l] = __builtin_bswap32(lane_word); + } + m_words[(pos + j) / sizeof(uint32_t)] = word_vector_type{} | data_word; + } + } else { // Otherwise, process a byte at time + UNROLL_LOOP(64) + for (size_t j = 0; j < step; j++) { + const size_t bi = (pos + j) / sizeof(uint32_t); + const size_t bj = sizeof(uint32_t) - 1 - ((pos + j) % sizeof(uint32_t)); + UNROLL_LOOP_FULL() + for (size_t l = 0; l < LaneCount; ++l) { + words_bytes[bi][l][bj] = data[l][i + j]; + } + } + } + i += step; + pos += step; + // Perform SHA-256 compression + if (pos >= SHA256_BUF_SIZE) [[unlikely]] { + compress(); + len++; + pos = 0; + } + } + } + + FORCE_INLINE void finish(const std::array &hashes, size_t pos, size_t len) noexcept { + // Pad and append the 1 bit in the last word + const size_t bi = pos / sizeof(uint32_t); + const size_t bj = sizeof(uint32_t) - 1 - (pos % sizeof(uint32_t)); + m_words[bi] &= static_cast(0xffffff00) << (bj * 8); + m_words[bi] |= static_cast(0x00000080) << (bj * 8); + // Pad remaining words with zeros + UNROLL_LOOP(64) + for (size_t x = (pos / sizeof(uint32_t)) + 1; x < SHA256_BUF_WORD_COUNT; ++x) { + m_words[x] = word_vector_type{}; + } + // Compress if there is no space left to store the length + if (pos >= SHA256_LENGTH_WORD_INDEX * sizeof(uint32_t)) [[unlikely]] { + compress(); + // Clear all words + UNROLL_LOOP_FULL() + for (size_t x = 0; x < SHA256_BUF_WORD_COUNT; ++x) { // NOLINT(modernize-loop-convert) + m_words[x] = word_vector_type{}; + } + } + // Store length in the last two words + const uint64_t bit_len = ((static_cast(len) * SHA256_BUF_SIZE) + static_cast(pos)) * 8; + m_words[SHA256_LENGTH_WORD_INDEX + 0] |= static_cast(bit_len >> 32); + m_words[SHA256_LENGTH_WORD_INDEX + 1] |= static_cast(bit_len); + // Perform final compression + compress(); + // Deinterleave hash + UNROLL_LOOP_FULL() + for (size_t l = 0; l < LaneCount; ++l) { + UNROLL_LOOP_FULL() + for (size_t i = 0; i < MACHINE_HASH_SIZE; i += sizeof(uint32_t)) { + const uint32_t word = __builtin_bswap32(m_state[i / sizeof(uint32_t)][l]); + __builtin_memcpy(&hashes[l][i], &word, sizeof(uint32_t)); + } + } + } + + template + FORCE_INLINE static void simd_concat_hash(array2d data, + std::array hashes) noexcept { + sha_256_context ctx; + // Position and length are kept local to allow the compiler optimize them out + // when DataExtent is a compile time constant. + size_t pos = 0; // Current position in the buffer in bytes + size_t len = 0; // Current position in the buffer in 64-byte blocks + UNROLL_LOOP(4) + for (size_t i = 0; i < ConcatCount; ++i) { + ctx.update(data[i], pos, len); + } + ctx.finish(hashes, pos, len); + } + +private: + FORCE_INLINE void compress() noexcept { + // This code is inspired by SHA-256 pseudo-code from Wikipedia: + // https://en.wikipedia.org/wiki/SHA-2#Pseudocode + // Selected for its simplicity and vectorization-friendly structure. + // However it was optimized to use circular buffers in words array to minimize memory bandwidth, + // similar how is done in the generic SHA-256 implementation of OpenSSL. + alignas(word_vector_align) static constexpr uint32_t SHA256_K[SHA256_ROUND_COUNT] = {0x428a2f98, 0x71374491, + 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, + 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, + 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, + 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, + 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2}; + // Unfortunately we can't use C++ functions that take vectors as arguments + // because according to GCC it would violate ABI rules, so we have to use macros instead. + // NOLINTBEGIN(cppcoreguidelines-macro-usage,cppcoreguidelines-macro-to-enum,modernize-macro-to-enum) +#define SHA256_CH(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) +#define SHA256_MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define SHA256_ROR(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define SHA256_S0(x) (SHA256_ROR(x, 2) ^ SHA256_ROR(x, 13) ^ SHA256_ROR(x, 22)) +#define SHA256_S1(x) (SHA256_ROR(x, 6) ^ SHA256_ROR(x, 11) ^ SHA256_ROR(x, 25)) +#define SHA256_G0(x) (SHA256_ROR(x, 7) ^ SHA256_ROR(x, 18) ^ ((x) >> 3)) +#define SHA256_G1(x) (SHA256_ROR(x, 17) ^ SHA256_ROR(x, 19) ^ ((x) >> 10)) +#define SHA256_WI(i, k) (((i) - (k)) & 15) // Circular buffer index for words +#define SHA256_SI(i, k) (((k) - (i)) & 7) // Circular buffer index for state + // NOLINTEND(cppcoreguidelines-macro-usage,cppcoreguidelines-macro-to-enum,modernize-macro-to-enum) + // Load state + word_vector_type s[8]{m_state[0], m_state[1], m_state[2], m_state[3], m_state[4], m_state[5], m_state[6], + m_state[7]}; + word_vector_type w[16]; + // Perform SHA-256 rounds + UNROLL_LOOP_FULL() + for (size_t r = 0; r < SHA256_ROUND_COUNT; r++) { + const size_t i = r % 16; + const size_t j = r - i; + if (j == 0) { + w[i] = m_words[i]; + } else { + w[i] += SHA256_G1(w[SHA256_WI(i, 2)]) + w[SHA256_WI(i, 7)] + SHA256_G0(w[SHA256_WI(i, 15)]); + } + s[SHA256_SI(i, 7)] += w[i] + SHA256_S1(s[SHA256_SI(i, 4)]) + + SHA256_CH(s[SHA256_SI(i, 4)], s[SHA256_SI(i, 5)], s[SHA256_SI(i, 6)]) + SHA256_K[i + j]; + s[SHA256_SI(i, 3)] += s[SHA256_SI(i, 7)]; + s[SHA256_SI(i, 7)] += + SHA256_S0(s[SHA256_SI(i, 0)]) + SHA256_MAJ(s[SHA256_SI(i, 0)], s[SHA256_SI(i, 1)], s[SHA256_SI(i, 2)]); + } + // Store state + UNROLL_LOOP_FULL() + for (size_t i = 0; i < SHA256_STATE_WORD_COUNT; ++i) { + m_state[i] += s[i]; + } + } +}; + +// Generic implementations + +MULTIVERSION_GENERIC size_t sha_256_get_optimal_lane_count() noexcept { +#if defined(__x86_64__) + // On AMD64, SSE2 has 128-bit registers, supporting up to 4 lanes. + return 4; +#elif defined(__aarch64__) + // On ARM64, NEON has 128-bit registers, supporting up to 4 lanes. + return 4; +#elif defined(__riscv) && defined(__riscv_v) + // RISC-V with Vector extension, we assume 128-bit registers are available, supporting up to 4 lanes. + return 4; +#elif defined(__wasm_simd128__) + // WebAssembly with SIMD extension has 128-bit registers, supporting up to 4 lanes. + return 4; +#else + // For other architectures, we assume vector instructions are not available and use scalar implementation. + return 1; +#endif +} +MULTIVERSION_GENERIC void sha_256_data_1x1(const array2d, 1, 1> &data, + const std::array &hash) noexcept { + sha_256_context<1>::simd_concat_hash<1>(data, hash); +} +MULTIVERSION_GENERIC void sha_256_word_1x1(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<1, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_word_1x2(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<2, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_word_1x4(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<4, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_word_1x8(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<8, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_word_1x16(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<16, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_data_2x1(const array2d, 2, 1> &data, + const std::array &hash) noexcept { + sha_256_context<1>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_hash_2x1(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<1, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_hash_2x2(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<2, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_hash_2x4(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<4, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<8, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_GENERIC void sha_256_hash_2x16(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<16, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} + +// x86_64 implementations + +#ifdef USE_MULTIVERSINING_AMD64 + +// AVX2 +MULTIVERSION_AMD64_AVX2_BMI_BMI2 size_t sha_256_get_optimal_lane_count() noexcept { + // AVX2 has 256-bit registers, supporting up to 8 lanes. + return 8; +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_data_1x1(const array2d, 1, 1> &data, + const std::array &hash) noexcept { + sha_256_context<1>::simd_concat_hash<1>(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x1(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<1, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x2(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<2, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x4(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<4, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x8(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<8, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x16(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<16, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_data_2x1(const array2d, 2, 1> &data, + const std::array &hash) noexcept { + sha_256_context<1>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x1(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<1, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x2(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<2, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x4(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<4, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<8, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x16(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<16, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} + +// AVX-512 + +MULTIVERSION_AMD64_AVX512_BMI_BMI2 size_t sha_256_get_optimal_lane_count() noexcept { + // AVX-512 has 512-bit registers, supporting up to 16 lanes. + return 16; +} +MULTIVERSION_AMD64_AVX512_BMI_BMI2 void sha_256_word_1x16(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<16, const_hash_tree_word_view::extent>::simd_concat_hash(data, hash); +} +MULTIVERSION_AMD64_AVX512_BMI_BMI2 void sha_256_hash_2x16(const array2d &data, + const std::array &hash) noexcept { + sha_256_context<16, const_machine_hash_view::extent>::simd_concat_hash(data, hash); +} + +#endif // USE_MULTIVERSINING_AMD64 + +} // namespace cartesi diff --git a/src/sha-256-hasher.h b/src/sha-256-hasher.h new file mode 100644 index 000000000..d81e4bbf5 --- /dev/null +++ b/src/sha-256-hasher.h @@ -0,0 +1,177 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef SHA_256_HASHER_H +#define SHA_256_HASHER_H + +#include +#include +#include +#include +#include + +#include "array2d.h" +#include "compiler-defines.h" +#include "i-hasher.h" +#include "machine-hash.h" + +namespace cartesi { + +// Generic implementations +MULTIVERSION_GENERIC size_t sha_256_get_optimal_lane_count() noexcept; +MULTIVERSION_GENERIC void sha_256_data_1x1(const array2d, 1, 1> &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_word_1x1(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_word_1x2(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_word_1x4(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_word_1x8(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_word_1x16(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_data_2x1(const array2d, 2, 1> &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_hash_2x1(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_hash_2x2(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_hash_2x4(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_GENERIC void sha_256_hash_2x16(const array2d &data, + const std::array &hash) noexcept; + +// Optimized implementation for x86_64 architecture leveraging modern CPU instruction sets: +// - BMI1/BMI2 (Bit Manipulation Instructions) provide specialized bit operations: +// * RORX performs optimized bitwise rotation without requiring separate shift operations +// * ANDN efficiently computes (~x & y) in a single instruction +// - AVX2 for x8 SIMD hashing +// - AVX-512 for x16 SIMD hashing +#ifdef USE_MULTIVERSINING_AMD64 +// AVX2 implementation for x1, x2, x4, x8, x16 SIMD hashing +MULTIVERSION_AMD64_AVX2_BMI_BMI2 size_t sha_256_get_optimal_lane_count() noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_data_1x1(const array2d, 1, 1> &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x1(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x2(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x4(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x8(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_word_1x16(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_data_2x1(const array2d, 2, 1> &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x1(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x2(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x4(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x8(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX2_BMI_BMI2 void sha_256_hash_2x16(const array2d &data, + const std::array &hash) noexcept; +// AVX-512 implementation for x16 SIMD hashing +MULTIVERSION_AMD64_AVX512_BMI_BMI2 size_t sha_256_get_optimal_lane_count() noexcept; +MULTIVERSION_AMD64_AVX512_BMI_BMI2 void sha_256_word_1x16(const array2d &data, + const std::array &hash) noexcept; +MULTIVERSION_AMD64_AVX512_BMI_BMI2 void sha_256_hash_2x16(const array2d &data, + const std::array &hash) noexcept; +#endif + +class sha_256_hasher final : public i_hasher { +public: + static constexpr int MAX_LANE_COUNT = 16; ///< Number of maximum supported SIMD lanes + + template + static void do_simd_concat_hash(const array2d, ConcatCount, LaneCount> &data, + const std::array &hash) noexcept; + + static size_t do_get_optimal_lane_count() noexcept { + return sha_256_get_optimal_lane_count(); + } +}; + +template <> +inline void sha_256_hasher::do_simd_concat_hash<1, 1, std::dynamic_extent>( + const array2d, 1, 1> &data, const std::array &hash) noexcept { + sha_256_data_1x1(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<1, 1, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_word_1x1(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<1, 2, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_word_1x2(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<1, 4, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_word_1x4(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<1, 8, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_word_1x8(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<1, 16, HASH_TREE_WORD_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_word_1x16(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<2, 1, std::dynamic_extent>( + const array2d, 2, 1> &data, const std::array &hash) noexcept { + sha_256_data_2x1(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<2, 1, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_hash_2x1(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<2, 2, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_hash_2x2(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<2, 4, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_hash_2x4(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<2, 8, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_hash_2x8(data, hash); +} +template <> +inline void sha_256_hasher::do_simd_concat_hash<2, 16, MACHINE_HASH_SIZE>( + const array2d &data, const std::array &hash) noexcept { + sha_256_hash_2x16(data, hash); +} + +} // namespace cartesi + +#endif diff --git a/src/simd-hasher.h b/src/simd-hasher.h new file mode 100644 index 000000000..7afeb5c2a --- /dev/null +++ b/src/simd-hasher.h @@ -0,0 +1,363 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef SIMD_HASHER_H +#define SIMD_HASHER_H + +/// \file +/// \brief SIMD hasher interface +/// +/// This file provides template classes for SIMD-accelerated hashing operations. +/// It includes specialized hashers for single data items and concatenated data pairs, +/// both utilizing queue-based batching to maximize SIMD efficiency. + +#include + +#include + +#include "compiler-defines.h" +#include "i-hasher.h" +#include "machine-hash.h" + +namespace cartesi { + +/// \brief SIMD-accelerated data hasher with queue-based batching +/// \tparam hasher_type The underlying SIMD hasher implementation +/// \tparam data_type The type of data to be hashed +/// \tparam MaxQueueSize Maximum number of items that can be queued (defaults to hasher's max lane count) +template +class simd_data_hasher { + struct data_entry { + data_type data; ///< Data to be hashed + machine_hash_view result; ///< View where the hash result will be stored + }; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + hasher_type &m_hasher; ///< Underlying hasher instance + boost::container::static_vector m_queue{}; ///< Queue of pending hash operations + +public: + explicit simd_data_hasher(hasher_type &hasher) : m_hasher(hasher) {} + + /// \brief Enqueues data for hashing + /// \param data Data to hash + /// \param result Receives the hash of data + /// \details If the queue reaches the optimal size, it is automatically flushed. + void enqueue(data_type data, machine_hash_view result) noexcept { + m_queue.emplace_back(data_entry{.data = data, .result = result}); + static const size_t optimal_queue_size = std::min(MaxQueueSize, m_hasher.get_optimal_lane_count()); + if (m_queue.size() >= optimal_queue_size) [[unlikely]] { // Queue is full, auto flush it + flush(); + } + } + + /// \brief Flushes the queue, clearing it in the process + void flush() noexcept { + auto &q = m_queue; + size_t i = q.size(); + if constexpr (hasher_type::MAX_LANE_COUNT >= 16) { + while (i >= 16) { // x16 parallel hashing + i -= 16; + m_hasher.simd_concat_hash(array2d{{{ + q[i + 0].data, + q[i + 1].data, + q[i + 2].data, + q[i + 3].data, + q[i + 4].data, + q[i + 5].data, + q[i + 6].data, + q[i + 7].data, + q[i + 8].data, + q[i + 9].data, + q[i + 10].data, + q[i + 11].data, + q[i + 12].data, + q[i + 13].data, + q[i + 14].data, + q[i + 15].data, + }}}, + std::array{{ + q[i + 0].result, + q[i + 1].result, + q[i + 2].result, + q[i + 3].result, + q[i + 4].result, + q[i + 5].result, + q[i + 6].result, + q[i + 7].result, + q[i + 8].result, + q[i + 9].result, + q[i + 10].result, + q[i + 11].result, + q[i + 12].result, + q[i + 13].result, + q[i + 14].result, + q[i + 15].result, + }}); + } + } + while (i >= 8) { // x8 parallel hashing + i -= 8; + m_hasher.simd_concat_hash(array2d{{{ + q[i + 0].data, + q[i + 1].data, + q[i + 2].data, + q[i + 3].data, + q[i + 4].data, + q[i + 5].data, + q[i + 6].data, + q[i + 7].data, + }}}, + std::array{{ + q[i + 0].result, + q[i + 1].result, + q[i + 2].result, + q[i + 3].result, + q[i + 4].result, + q[i + 5].result, + q[i + 6].result, + q[i + 7].result, + }}); + } + if (i >= 4) { // x4 parallel hashing + i -= 4; + m_hasher.simd_concat_hash(array2d{{{ + q[i + 0].data, + q[i + 1].data, + q[i + 2].data, + q[i + 3].data, + }}}, + std::array{{ + q[i + 0].result, + q[i + 1].result, + q[i + 2].result, + q[i + 3].result, + }}); + } + if (i >= 2) { // x2 parallel hashing + i -= 2; + m_hasher.simd_concat_hash(array2d{{{ + q[i + 0].data, + q[i + 1].data, + }}}, + std::array{{ + q[i + 0].result, + q[i + 1].result, + }}); + } + if (i >= 1) { // x1 scalar hashing + i -= 1; + m_hasher.simd_concat_hash(array2d{{ + {q[i + 0].data}, + }}, + std::array{ + {q[i + 0].result}, + }); + } + q.clear(); + } +}; + +/// \brief SIMD-accelerated concatenation hasher with queue-based batching +/// \tparam hasher_type The underlying SIMD hasher implementation +/// \tparam data_type The type of data to be hashed +/// \tparam MaxQueueSize Maximum number of pairs that can be queued (defaults to hasher's max lane count) +template +class simd_concat_hasher { + struct concat_entry { + data_type left; ///< Left data to be concatenated and hashed + data_type right; ///< Right data to be concatenated and hashed + machine_hash_view result; ///< View where the hash result will be stored + }; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + hasher_type &m_hasher; ///< The underlying SIMD hasher instance + boost::container::static_vector + m_queue{}; ///< Queue of pending concatenation hash operations + +public: + explicit simd_concat_hasher(hasher_type &hasher) : m_hasher(hasher) {} + + /// \brief Enqueues data pair for concat hashing + /// \param left Left data to hash + /// \param right Right data to hash + /// \param result Receives the hash of concatenated data + /// \details If the queue reaches the optimal size, it is automatically flushed. + void enqueue(data_type left, data_type right, machine_hash_view result) noexcept { + m_queue.emplace_back(concat_entry{.left = left, .right = right, .result = result}); + static const size_t optimal_queue_size = std::min(MaxQueueSize, m_hasher.get_optimal_lane_count()); + if (m_queue.size() >= optimal_queue_size) [[unlikely]] { // Queue is full, auto flush it + flush(); + } + } + + /// \brief Flushes the queue, clearing it in the process + void flush() noexcept { + auto &q = m_queue; + size_t i = q.size(); + if constexpr (hasher_type::MAX_LANE_COUNT >= 16) { + while (i >= 16) { // x16 parallel hashing + i -= 16; + m_hasher.simd_concat_hash(array2d{{ + { + q[i + 0].left, + q[i + 1].left, + q[i + 2].left, + q[i + 3].left, + q[i + 4].left, + q[i + 5].left, + q[i + 6].left, + q[i + 7].left, + q[i + 8].left, + q[i + 9].left, + q[i + 10].left, + q[i + 11].left, + q[i + 12].left, + q[i + 13].left, + q[i + 14].left, + q[i + 15].left, + }, + { + q[i + 0].right, + q[i + 1].right, + q[i + 2].right, + q[i + 3].right, + q[i + 4].right, + q[i + 5].right, + q[i + 6].right, + q[i + 7].right, + q[i + 8].right, + q[i + 9].right, + q[i + 10].right, + q[i + 11].right, + q[i + 12].right, + q[i + 13].right, + q[i + 14].right, + q[i + 15].right, + }, + }}, + std::array{ + q[i + 0].result, + q[i + 1].result, + q[i + 2].result, + q[i + 3].result, + q[i + 4].result, + q[i + 5].result, + q[i + 6].result, + q[i + 7].result, + q[i + 8].result, + q[i + 9].result, + q[i + 10].result, + q[i + 11].result, + q[i + 12].result, + q[i + 13].result, + q[i + 14].result, + q[i + 15].result, + }); + } + } + while (i >= 8) { // x8 parallel hashing + i -= 8; + m_hasher.simd_concat_hash(array2d{{ + { + q[i + 0].left, + q[i + 1].left, + q[i + 2].left, + q[i + 3].left, + q[i + 4].left, + q[i + 5].left, + q[i + 6].left, + q[i + 7].left, + }, + { + q[i + 0].right, + q[i + 1].right, + q[i + 2].right, + q[i + 3].right, + q[i + 4].right, + q[i + 5].right, + q[i + 6].right, + q[i + 7].right, + }, + }}, + std::array{ + q[i + 0].result, + q[i + 1].result, + q[i + 2].result, + q[i + 3].result, + q[i + 4].result, + q[i + 5].result, + q[i + 6].result, + q[i + 7].result, + }); + } + if (i >= 4) { // x4 parallel hashing + i -= 4; + m_hasher.simd_concat_hash(array2d{{ + { + q[i + 0].left, + q[i + 1].left, + q[i + 2].left, + q[i + 3].left, + }, + { + q[i + 0].right, + q[i + 1].right, + q[i + 2].right, + q[i + 3].right, + }, + }}, + std::array{ + q[i + 0].result, + q[i + 1].result, + q[i + 2].result, + q[i + 3].result, + }); + } + if (i >= 2) { // x2 parallel hashing + i -= 2; + m_hasher.simd_concat_hash(array2d{{ + { + q[i + 0].left, + q[i + 1].left, + }, + { + q[i + 0].right, + q[i + 1].right, + }, + }}, + std::array{ + q[i + 0].result, + q[i + 1].result, + }); + } + if (i >= 1) { // x1 scalar hashing + i -= 1; + m_hasher.simd_concat_hash(array2d{{ + {q[i + 0].left}, + {q[i + 0].right}, + }}, + std::array{{ + q[i + 0].result, + }}); + } + q.clear(); + } +}; + +} // namespace cartesi + +#endif // SIMD_HASHER_H diff --git a/src/simd-vector-type.h b/src/simd-vector-type.h new file mode 100644 index 000000000..d8e107f40 --- /dev/null +++ b/src/simd-vector-type.h @@ -0,0 +1,114 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +/// \file simd_vector_type.h +/// \brief Defines SIMD vector type traits for uint64_t and uint32_t operations +/// +/// This header provides template specializations that map lane counts to appropriate +/// SIMD vector types with their corresponding alignment requirements. +/// It supports both scalar (single lane) and vector operations for efficient SIMD computations. + +#ifndef SIMD_VECTOR_TYPE +#define SIMD_VECTOR_TYPE + +#include +#include + +namespace cartesi { + +/// \brief Template struct to define uint64_t vector types based on lane count +/// \tparam LaneCount Number of 64-bit lanes in the vector +template +struct uint64_vector_type; + +/// \brief Specialization for single uint64_t value (1 lane) +template <> +struct uint64_vector_type<1> { + using type = uint64_t __attribute__((vector_size(8))); ///< Scalar uint64_t type + static constexpr size_t align = 16; ///< Recommended memory alignment requirement +}; + +/// \brief Specialization for 2-lane uint64_t vector (128-bit) +template <> +struct uint64_vector_type<2> { + using type = uint64_t __attribute__((vector_size(16))); ///< 2x64-bit vector type + static constexpr size_t align = 16; ///< Recommended memory alignment requirement +}; + +/// \brief Specialization for 4-lane uint64_t vector (256-bit) +template <> +struct uint64_vector_type<4> { + using type = uint64_t __attribute__((vector_size(32))); ///< 4x64-bit vector type + static constexpr size_t align = 32; ///< Recommended memory alignment requirement +}; + +/// \brief Specialization for 8-lane uint64_t vector (512-bit) +template <> +struct uint64_vector_type<8> { + using type = uint64_t __attribute__((vector_size(64))); ///< 8x64-bit vector type + static constexpr size_t align = 64; ///< Recommended memory alignment requirement +}; + +/// \brief Specialization for 16-lane uint64_t vector (1024-bit) +template <> +struct uint64_vector_type<16> { + using type = uint64_t __attribute__((vector_size(128))); ///< 16x64-bit vector type + static constexpr size_t align = 128; ///< Recommended memory alignment requirement +}; + +/// \brief Template struct to define uint32_t vector types based on lane count +/// \tparam LaneCount Number of 32-bit lanes in the vector +template +struct uint32_vector_type; + +/// \brief Specialization for single uint32_t value (1 lane) +template <> +struct uint32_vector_type<1> { + using type = uint32_t __attribute__((vector_size(4))); ///< Scalar uint32_t type + static constexpr size_t align = 16; ///< Recommended memory alignment requirement +}; + +/// \brief Specialization for 2-lane uint32_t vector (64-bit) +template <> +struct uint32_vector_type<2> { + using type = uint32_t __attribute__((vector_size(8))); ///< 2x32-bit vector type + static constexpr size_t align = 16; ///< Recommended memory alignment requirement +}; + +/// \brief Specialization for 4-lane uint32_t vector (128-bit) +template <> +struct uint32_vector_type<4> { + using type = uint32_t __attribute__((vector_size(16))); ///< 4x32-bit vector type + static constexpr size_t align = 16; ///< Recommended memory alignment requirement +}; + +/// \brief Specialization for 8-lane uint32_t vector (256-bit) +template <> +struct uint32_vector_type<8> { + using type = uint32_t __attribute__((vector_size(32))); ///< 8x32-bit vector type + static constexpr size_t align = 32; ///< Recommended memory alignment requirement +}; + +/// \brief Specialization for 16-lane uint32_t vector (512-bit) +template <> +struct uint32_vector_type<16> { + using type = uint32_t __attribute__((vector_size(64))); ///< 16x32-bit vector type + static constexpr size_t align = 64; ///< Recommended memory alignment requirement +}; + +} // namespace cartesi + +#endif diff --git a/src/uarch-record-state-access.h b/src/uarch-record-state-access.h index 2e2785851..ec79c2067 100644 --- a/src/uarch-record-state-access.h +++ b/src/uarch-record-state-access.h @@ -52,20 +52,11 @@ class uarch_record_state_access : public i_accept_scoped_notes, public i_prefer_shadow_uarch_state { - using hasher_type = hash_tree::hasher_type; - // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) machine &m_m; ///< Macro machine access_log &m_log; ///< Access log // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) - template - static auto get_hash(H &h, const access_data &data) { - machine_hash hash{}; - get_merkle_tree_hash(h, data.data(), data.size(), HASH_TREE_WORD_SIZE, hash); - return hash; - } - public: /// \brief Constructor from machine and uarch states. /// \param m Reference to machine state. diff --git a/src/uarch-replay-state-access.h b/src/uarch-replay-state-access.h index faa72d128..7fc66b24a 100644 --- a/src/uarch-replay-state-access.h +++ b/src/uarch-replay-state-access.h @@ -33,6 +33,9 @@ #include "i-hasher.h" #include "i-prefer-shadow-uarch-state.h" #include "i-uarch-state-access.h" +#include "keccak-256-hasher.h" +#include "machine-reg.h" +#include "machine.h" #include "meta.h" #include "shadow-tlb.h" #include "shadow-uarch-state.h" @@ -49,8 +52,6 @@ class uarch_replay_state_access : using proof_type = hash_tree::proof_type; public: - using hasher_type = hash_tree::hasher_type; - struct context { /// \brief Constructor replay_send_cmio_state_access context /// \param log Access log to be replayed @@ -66,7 +67,7 @@ class uarch_replay_state_access : ///< Root hash before next access machine_hash root_hash; ///< Hasher needed to verify proofs - hasher_type hasher; + keccak_256_hasher hasher; }; private: @@ -89,10 +90,10 @@ class uarch_replay_state_access : } private: - static auto get_hash(hasher_type &hasher, const access_data &data) { + template + static auto get_hash(H &h, const access_data &data) { machine_hash hash{}; - get_merkle_tree_hash(hasher, std::span{data.data(), data.size()}, HASH_TREE_WORD_SIZE, - hash); + get_merkle_tree_hash(h, std::span{data.data(), data.size()}, HASH_TREE_WORD_SIZE, hash); return hash; } diff --git a/src/variant-hasher.h b/src/variant-hasher.h new file mode 100644 index 000000000..91a27914a --- /dev/null +++ b/src/variant-hasher.h @@ -0,0 +1,71 @@ +// Copyright Cartesi and individual authors (see AUTHORS) +// SPDX-License-Identifier: LGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +// PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License along +// with this program (see COPYING). If not, see . +// + +#ifndef VARIANT_HASHER_H +#define VARIANT_HASHER_H + +#include +#include + +#include "i-hasher.h" +#include "keccak-256-hasher.h" +#include "sha-256-hasher.h" + +namespace cartesi { + +/// \brief Hash function +enum class hash_function_type : uint64_t { + keccak256, ///< Keccak-256 (recommended for fraud proofs based on Microarchitecture) + sha256, ///< SHA-256 (recommended for fraud proofs using zkVMs) +}; + +class variant_hasher final : public i_hasher { + std::variant m_hasher_impl; + +public: + static constexpr int MAX_LANE_COUNT = std::max(keccak_256_hasher::MAX_LANE_COUNT, sha_256_hasher::MAX_LANE_COUNT); + + explicit variant_hasher(hash_function_type algo) { + switch (algo) { + case hash_function_type::keccak256: + m_hasher_impl = keccak_256_hasher{}; + break; + case hash_function_type::sha256: + m_hasher_impl = sha_256_hasher{}; + break; + default: + throw std::invalid_argument("unsupported hash function type"); + } + } + + variant_hasher() = delete; ///< Default constructor is not allowed + + template + // NOLINTNEXTLINE(bugprone-exception-escape) + void do_simd_concat_hash(const array2d, ConcatCount, LaneCount> &data, + const std::array &hash) noexcept { + std::visit([&](auto &h) noexcept { h.do_simd_concat_hash(data, hash); }, m_hasher_impl); + } + + // NOLINTNEXTLINE(bugprone-exception-escape) + size_t do_get_optimal_lane_count() const noexcept { + return std::visit([](auto &h) noexcept { return h.do_get_optimal_lane_count(); }, m_hasher_impl); + } +}; + +} // namespace cartesi + +#endif diff --git a/tests/Makefile b/tests/Makefile index 07d006819..2b2dd9ac1 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -203,7 +203,8 @@ test-uarch-interpreter: $(MAKE) -C ../third-party/riscv-arch-test verify XLEN=64 RISCV_TARGET=cartesi RISCV_ISA=rv64i RISCV_DEVICE=I WORK=$(BUILDDIR)/uarch-riscv-arch-test RUN_ARCH_TEST='$(RUN_ARCH_TEST)' test-hash: - $(LD_PRELOAD_PREFIX) ./build/misc/test-merkle-tree-hash --log2-root-size=30 --log2-leaf-size=12 --input=build/misc/test-merkle-tree-hash + $(LD_PRELOAD_PREFIX) ./build/misc/test-merkle-tree-hash --log2-root-size=30 --log2-leaf-size=12 --input=build/misc/test-merkle-tree-hash --hash-function=keccak256 + $(LD_PRELOAD_PREFIX) ./build/misc/test-merkle-tree-hash --log2-root-size=30 --log2-leaf-size=12 --input=build/misc/test-merkle-tree-hash --hash-function=sha256 test-jsonrpc: ./scripts/test-jsonrpc-server.sh ../src/cartesi-jsonrpc-machine '$(LUA) ../src/cartesi-machine.lua' '$(LUA) ./lua/cartesi-machine-tests.lua' '$(LUA)' diff --git a/tests/lua/cartesi/tests/util.lua b/tests/lua/cartesi/tests/util.lua index b5398d0f7..5c9690a4f 100644 --- a/tests/lua/cartesi/tests/util.lua +++ b/tests/lua/cartesi/tests/util.lua @@ -32,19 +32,23 @@ local test_util = { tests_uarch_path = adjust_path(assert(os.getenv("CARTESI_TESTS_UARCH_PATH"), "must set CARTESI_TESTS_UARCH_PATH")), } -local zero_keccak_hash_table = { - "", - "", -} - -do - local hash = cartesi.keccak(string.rep("\0", 1 << WORD_LOG2_SIZE)) +local function compute_zero_hash_table(hash_fn) + local zero_hash_table = { + "", + "", + } + local hash = cartesi[hash_fn](string.rep("\0", 1 << WORD_LOG2_SIZE)) for i = WORD_LOG2_SIZE, ROOT_LOG2_SIZE - 1 do - zero_keccak_hash_table[i] = hash - hash = cartesi.keccak(hash, hash) + zero_hash_table[i] = hash + hash = cartesi[hash_fn](hash, hash) end + return zero_hash_table end +local zero_hash_tables = { + keccak256 = compute_zero_hash_table("keccak256"), + sha256 = compute_zero_hash_table("sha256"), +} local ZERO_PAGE = string.rep("\x00", PAGE_SIZE) test_util.uarch_programs = { @@ -91,7 +95,7 @@ function back_merkle_tree_meta.__index:push_back(new_leaf_hash) for i = 0, depth do if self.m_leaf_count & (0x01 << i) ~= 0x0 then local left = self.m_context[i] - right = cartesi.keccak(left, right) + right = cartesi[self.hash_fn](left, right) else self.m_context[i] = right break @@ -116,12 +120,12 @@ function back_merkle_tree_meta.__index:pad_back(new_leaf_count) -- is our smallest tree at depth j? if (self.m_leaf_count & j_span) ~= 0x0 then -- if so, we can add 2^j pristine leaves directly - local right = zero_keccak_hash_table[self.m_log2_leaf_size + j] + local right = self.zero_hash_table[self.m_log2_leaf_size + j] for i = j, depth do local i_span = 0x1 << i if (self.m_leaf_count & i_span) ~= 0x0 then local left = self.m_context[i] - right = cartesi.keccak(left, right) + right = cartesi[self.hash_fn](left, right) else self.m_context[i] = right -- next outer loop starts again from where inner loop left off @@ -139,7 +143,7 @@ function back_merkle_tree_meta.__index:pad_back(new_leaf_count) for i = 0, depth do local i_span = 0x1 << i if (new_leaf_count & i_span) ~= 0x0 then - self.m_context[i] = zero_keccak_hash_table[self.m_log2_leaf_size + i] + self.m_context[i] = self.zero_hash_table[self.m_log2_leaf_size + i] new_leaf_count = new_leaf_count - i_span self.m_leaf_count = self.m_leaf_count + i_span end @@ -150,14 +154,14 @@ function back_merkle_tree_meta.__index:get_root_hash() assert(self.m_leaf_count <= self.m_max_leaves, "too many leaves") local depth = self.m_log2_root_size - self.m_log2_leaf_size if self.m_leaf_count < self.m_max_leaves then - local root = zero_keccak_hash_table[self.m_log2_leaf_size] + local root = self.zero_hash_table[self.m_log2_leaf_size] for i = 0, depth - 1 do if (self.m_leaf_count & (0x01 << i)) ~= 0 then local left = self.m_context[i] - root = cartesi.keccak(left, root) + root = cartesi[self.hash_fn](left, root) else - local right = zero_keccak_hash_table[self.m_log2_leaf_size + i] - root = cartesi.keccak(root, right) + local right = self.zero_hash_table[self.m_log2_leaf_size + i] + root = cartesi[self.hash_fn](root, right) end end return root @@ -166,8 +170,10 @@ function back_merkle_tree_meta.__index:get_root_hash() end end -function test_util.new_back_merkle_tree(log2_root_size, log2_leaf_size) +function test_util.new_back_merkle_tree(log2_root_size, log2_leaf_size, hash_fn) local self = {} + self.hash_fn = hash_fn + self.zero_hash_table = zero_hash_tables[hash_fn] self.m_context = {} self.m_log2_leaf_size = log2_leaf_size self.m_log2_root_size = log2_root_size @@ -204,7 +210,8 @@ function test_util.split_string(inputstr, sep) return t end -function test_util.check_proof(proof) +function test_util.check_proof(proof, hash_fn) + assert(hash_fn, "hash_fn is nil") local hash = proof.target_hash for log2_size = proof.log2_target_size, proof.log2_root_size - 1 do local bit = (proof.target_address & (1 << log2_size)) ~= 0 @@ -214,7 +221,7 @@ function test_util.check_proof(proof) else first, second = hash, proof.sibling_hashes[log2_size - proof.log2_target_size + 1] end - hash = cartesi.keccak(first, second) + hash = cartesi[hash_fn](first, second) end return hash == proof.root_hash end @@ -229,16 +236,18 @@ function test_util.load_file(filename) return data end -local function merkle_hash(data, start, log2_size) +local function merkle_hash(data, start, log2_size, hash_fn) + assert(hash_fn, "hash_fn is nil") + local zero_hash_table = zero_hash_tables[hash_fn] if log2_size == PAGE_LOG2_SIZE and data:sub(start + 1, start + PAGE_SIZE) == ZERO_PAGE then - return zero_keccak_hash_table[PAGE_LOG2_SIZE] + return zero_hash_table[PAGE_LOG2_SIZE] elseif log2_size > WORD_LOG2_SIZE then local child_log2_size = log2_size - 1 - local left = merkle_hash(data, start, child_log2_size) - local right = merkle_hash(data, start + (1 << child_log2_size), child_log2_size) - return cartesi.keccak(left, right) + local left = merkle_hash(data, start, child_log2_size, hash_fn) + local right = merkle_hash(data, start + (1 << child_log2_size), child_log2_size, hash_fn) + return cartesi[hash_fn](left, right) else - return cartesi.keccak(data:sub(start + 1, start + (1 << WORD_LOG2_SIZE))) + return cartesi[hash_fn](data:sub(start + 1, start + (1 << WORD_LOG2_SIZE)), nil) end end @@ -246,14 +255,15 @@ test_util.merkle_hash = merkle_hash -- Take data from dumped memory files -- and calculate root hash of the machine -function test_util.calculate_emulator_hash(machine) - local tree = test_util.new_back_merkle_tree(64, PAGE_LOG2_SIZE) +function test_util.calculate_emulator_hash(machine, hash_fn) + hash_fn = hash_fn or machine:get_initial_config().hash_tree.hash_function + local tree = test_util.new_back_merkle_tree(64, PAGE_LOG2_SIZE, hash_fn) local last = 0 for _, v in ipairs(machine:get_address_ranges()) do tree:pad_back((v.start - last) >> PAGE_LOG2_SIZE) local finish = v.start + v.length for j = v.start, finish - 1, PAGE_SIZE do - local page_hash = merkle_hash(machine:read_memory(j, PAGE_SIZE), 0, PAGE_LOG2_SIZE) + local page_hash = merkle_hash(machine:read_memory(j, PAGE_SIZE), 0, PAGE_LOG2_SIZE, hash_fn) tree:push_back(page_hash) end last = finish @@ -263,11 +273,12 @@ end -- Read memory from given machine and calculate uarch state hash function test_util.calculate_uarch_state_hash(machine) + local hash_fn = machine:get_initial_config().hash_tree.hash_function local shadow_data = machine:read_memory(cartesi.UARCH_SHADOW_START_ADDRESS, cartesi.UARCH_SHADOW_LENGTH) local ram_data = machine:read_memory(cartesi.UARCH_RAM_START_ADDRESS, cartesi.UARCH_RAM_LENGTH) - local tree = test_util.new_back_merkle_tree(cartesi.UARCH_STATE_LOG2_SIZE, PAGE_LOG2_SIZE) + local tree = test_util.new_back_merkle_tree(cartesi.UARCH_STATE_LOG2_SIZE, PAGE_LOG2_SIZE, hash_fn) for j = 0, #shadow_data - 1, PAGE_SIZE do - local page_hash = merkle_hash(shadow_data, j, PAGE_LOG2_SIZE) + local page_hash = merkle_hash(shadow_data, j, PAGE_LOG2_SIZE, hash_fn) tree:push_back(page_hash) end -- pad the region between the end of shadow data and start of ram @@ -275,7 +286,7 @@ function test_util.calculate_uarch_state_hash(machine) (cartesi.UARCH_RAM_START_ADDRESS - cartesi.UARCH_SHADOW_START_ADDRESS - #shadow_data) >> PAGE_LOG2_SIZE ) for j = 0, #ram_data - 1, PAGE_SIZE do - local page_hash = merkle_hash(ram_data, j, PAGE_LOG2_SIZE) + local page_hash = merkle_hash(ram_data, j, PAGE_LOG2_SIZE, hash_fn) tree:push_back(page_hash) end return tree:get_root_hash() diff --git a/tests/lua/cmio-test.lua b/tests/lua/cmio-test.lua index f89f2def8..0d298bb24 100755 --- a/tests/lua/cmio-test.lua +++ b/tests/lua/cmio-test.lua @@ -160,7 +160,7 @@ local function check_output(machine, expected) end assert(expected == output) - return cartesi.keccak(output) + return cartesi.keccak256(output) end local function check_report(machine, expected) @@ -195,14 +195,14 @@ local function check_outputs_root_hash(root_hash, output_hashes) end local c2 = output_hashes[child + 1] if c2 then - parent_output_hashes[parent] = cartesi.keccak(c1, c2) + parent_output_hashes[parent] = cartesi.keccak256(c1, c2) else - parent_output_hashes[parent] = cartesi.keccak(c1, z) + parent_output_hashes[parent] = cartesi.keccak256(c1, z) end parent = parent + 1 child = child + 2 end - z = cartesi.keccak(z, z) + z = cartesi.keccak256(z, z) output_hashes = parent_output_hashes end assert(root_hash == output_hashes[1], "output root hash mismatch") diff --git a/tests/lua/hash-tree.lua b/tests/lua/hash-tree.lua index 55ac01f95..4eeb05cc7 100755 --- a/tests/lua/hash-tree.lua +++ b/tests/lua/hash-tree.lua @@ -101,7 +101,7 @@ local function compare_proofs(p1, p2, padding) return true end -local function check_proof(proof) +local function check_proof(proof, hash_fn) local hash = proof.target_hash for log2_size = proof.log2_target_size, proof.log2_root_size - 1 do local bit = 1 << log2_size @@ -112,7 +112,7 @@ local function check_proof(proof) else first, second = hash, sibling_hash end - hash = cartesi.keccak(first, second) + hash = cartesi[hash_fn](first, second) end return hash == proof.root_hash end @@ -188,10 +188,11 @@ table.insert(interesting_pages, { -PAGE_SIZE, { start = last_end, length = -last -- check all page hashes print("checking all page hashes") +local hash_fn = machine:get_initial_config().hash_tree.hash_function for _, v in ipairs(machine:get_address_ranges()) do for address = v.start, v.start + v.length - 1, PAGE_SIZE do local h1 = machine:get_node_hash(address, LOG2_PAGE_SIZE) - local h2 = test_util.merkle_hash(machine:read_memory(address, PAGE_SIZE), 0, LOG2_PAGE_SIZE) + local h2 = test_util.merkle_hash(machine:read_memory(address, PAGE_SIZE), 0, LOG2_PAGE_SIZE, hash_fn) if h1 ~= h2 then stderr("hash mismatch on page 0x%016x (offset 0x%016x in %s)\n", address, address - v.start, v.description) stderr(" 0x%.16s... vs 0x%.16s...\n", tohex(h1), tohex(h2)) @@ -208,7 +209,7 @@ for _, p in ipairs(interesting_pages) do for address = page, page + PAGE_SIZE - 1, WORD_SIZE do local h1 = machine:get_node_hash(address, LOG2_WORD_SIZE) local word = machine:read_memory(address, WORD_SIZE) - local h2 = cartesi.keccak(word) + local h2 = cartesi[hash_fn](word) if h1 ~= h2 then stderr(" hash mismatch on word 0x%016x (%u)\n", address, address) stderr(" 0x%.16s... vs 0x%.16s...\n", tohex(h1), tohex(h2)) @@ -219,7 +220,7 @@ for _, p in ipairs(interesting_pages) do for log2_size = LOG2_WORD_SIZE + 1, LOG2_PAGE_SIZE do local new_hashes = {} for i = 1, #hashes - 1, 2 do - new_hashes[#new_hashes + 1] = cartesi.keccak(hashes[i], hashes[i + 1]) + new_hashes[#new_hashes + 1] = cartesi[hash_fn](hashes[i], hashes[i + 1]) end hashes = new_hashes for i = 1, #hashes do @@ -252,11 +253,11 @@ for log2_size = LOG2_WORD_SIZE, LOG2_ROOT_SIZE - 1 do if not compare_proofs(mproof, oproof, " ") then stderr(" proof mismatch for offset 0x%016x (%u)\n", address, address) end - if not check_proof(oproof) then + if not check_proof(oproof, hash_fn) then stderr(" test proof for offset 0x%016x (%u) failed\n", address, address) os.exit(1) end - if not check_proof(mproof) then + if not check_proof(mproof, hash_fn) then stderr(" machine proof for offset 0x%016x (%u) failed\n", address, address) os.exit(1) end diff --git a/tests/lua/machine-bind.lua b/tests/lua/machine-bind.lua index c37609128..b0e90a8a3 100755 --- a/tests/lua/machine-bind.lua +++ b/tests/lua/machine-bind.lua @@ -221,6 +221,7 @@ local function build_machine_config(config_options) -- Create new machine local initial_reg_values = get_cpu_reg_test_values() local config = { + hash_tree = config_options.hash_tree or nil, processor = config_options.processor or { registers = initial_reg_values, }, @@ -266,6 +267,59 @@ local do_test = test_util.make_do_test(build_machine, machine_type) print("Testing machine bindings for type " .. machine_type) +print("\n\nDifferent hash tree hash targets") + +do_test("Hash tree hash function should be keccak256 by default", function(machine) + assert( + machine:get_initial_config().hash_tree.hash_function == "keccak256", + "hash tree hash function should be uarch" + ) +end) + +test_util.make_do_test(build_machine, machine_type, { hash_tree = { hash_function = "keccak256" } })( + "Hash tree hash function keccak256 should work properly", + function(machine) + assert( + machine:get_initial_config().hash_tree.hash_function == "keccak256", + "hash tree hash function should be uarch" + ) + local root_hash = machine:get_root_hash() + local keccak256_calculated = test_util.calculate_emulator_hash(machine, "keccak256") + assert(root_hash == keccak256_calculated, "initial root hash does not match") + local sha256_calculated = test_util.calculate_emulator_hash(machine, "sha256") + assert(root_hash ~= sha256_calculated, "initial root hash should not match sha256") + end +) + +test_util.make_do_test(build_machine, machine_type, { hash_tree = { hash_function = "sha256" } })( + "Hash tree hash function sha256 should work properly", + function(machine) + assert( + machine:get_initial_config().hash_tree.hash_function == "sha256", + "hash tree hash function should be sha256" + ) + local root_hash = machine:get_root_hash() + local sha256_calculated = test_util.calculate_emulator_hash(machine, "sha256") + assert(root_hash == sha256_calculated, "initial root hash does not match") + local keccak256_calculated = test_util.calculate_emulator_hash(machine, "keccak256") + assert(root_hash ~= keccak256_calculated, "initial root hash should not match keccak256") + end +) + +test_util.make_do_test(function() end, machine_type, {})( + "Fails to construct machine of unsupported hash tree hash function", + function() + local success, err = pcall(function() + build_machine(machine_type, { + hash_tree = { hash_function = "invalid" }, + }) + end) + assert(success == false) + print(err) + assert(err and err:match("invalid hash function type")) + end +) + print("\n\ntesting machine initial flags") do_test("machine should not have halt and yield initial flags set", function(machine) -- Check machine is not halted @@ -294,10 +348,11 @@ do_test("should provide proof for values in registers", function(machine) initial_reg_values.mimpid = nil -- Check proofs + local hash_fn = machine:get_initial_config().hash_tree.hash_function for _, v in pairs(initial_reg_values) do for el = cartesi.TREE_LOG2_WORD_SIZE, cartesi.TREE_LOG2_ROOT_SIZE - 1 do local a = test_util.align(v, el) - assert(test_util.check_proof(assert(machine:get_proof(a, el), "no proof")), "proof failed") + assert(test_util.check_proof(assert(machine:get_proof(a, el), "no proof"), hash_fn), "proof failed") end end end) @@ -455,6 +510,7 @@ local function test_config(config) assertfield(config, "hash_tree.sht_filename", "string") assertfield(config, "hash_tree.phtc_filename", "string") assertfield(config, "hash_tree.phtc_size", "number") + assertfield(config, "hash_tree.hash_function", "string") end print("\n\ntesting get_default_config function binding") @@ -718,7 +774,7 @@ do_test("machine step should pass verifications", function(machine) end) print("\n\ntesting step and verification") -do_test("Step log must contain conssitent data hashes", function(machine) +do_test("Step log must contain consistent data hashes", function(machine) local wrong_hash = string.rep("\0", cartesi.HASH_SIZE) local initial_hash = machine:get_root_hash() local log = machine:log_step_uarch() @@ -964,6 +1020,42 @@ test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_c end ) +test_util.make_do_test(build_machine, machine_type, { hash_tree = { hash_function = "sha256" } })( + "Uarch operations should fail if hash tree hash function is not keccak256", + function(machine) + assert( + machine:get_initial_config().hash_tree.hash_function == "sha256", + "hash tree hash function should be sha256" + ) + -- The machine is configured for sha256, therefore: + -- run_uarch should fail + local success, err = pcall(machine.run_uarch, machine, 1) + assert( + success == false and err:match("can only be used with hash tree configured with Keccak%-256 hash function") + ) + -- reset_uarch should fail + success, err = pcall(machine.reset_uarch, machine) + assert( + success == false and err:match("can only be used with hash tree configured with Keccak%-256 hash function") + ) + -- log_reset_uarch should fail + success, err = pcall(machine.log_reset_uarch, machine) + assert( + success == false and err:match("can only be used with hash tree configured with Keccak%-256 hash function") + ) + -- log_uarch step should fail + success, err = pcall(machine.log_step_uarch, machine) + assert( + success == false and err:match("can only be used with hash tree configured with Keccak%-256 hash function") + ) + -- log_send_cmio_response should fail + success, err = pcall(machine.log_send_cmio_response, machine, 0, 0) + assert( + success == false and err:match("can only be used with hash tree configured with Keccak%-256 hash function") + ) + end +) + test_util.make_do_test(build_machine, machine_type, { uarch = test_reset_uarch_config })( "Dump of log produced by log_reset_uarch should match", function(machine) @@ -1160,8 +1252,8 @@ do_test("uarch ecall putchar should print char to console", function() os.remove(uarch_ram_path) machine:run_uarch(3) -- run 3 instructions " 2>&1]] - local p = io.popen(lua_cmd .. lua_code) - local output = p:read(2000) + local p = assert(io.popen(lua_cmd .. lua_code)) + local output = assert(p:read(2000)) p:close() local expected_output = "X" print("Output of uarch ecall putchar:") @@ -1211,9 +1303,10 @@ local function test_send_cmio_input_with_different_arguments() local data = string.rep("a", 1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE) local reason = 1 local max_rx_buffer_len = 1 << cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE - local data_hash = test_util.merkle_hash(data, 0, cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE) + local hash_fn = "keccak256" + local data_hash = test_util.merkle_hash(data, 0, cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE, hash_fn) local all_zeros = string.rep("\0", max_rx_buffer_len) - local all_zeros_hash = test_util.merkle_hash(all_zeros, 0, cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE) + local all_zeros_hash = test_util.merkle_hash(all_zeros, 0, cartesi.AR_CMIO_RX_BUFFER_LOG2_SIZE, hash_fn) -- prepares and asserts the state before send_cmio_response is called local function assert_before_cmio_response_sent(machine) machine:write_reg("iflags_Y", 1) @@ -1564,8 +1657,9 @@ end) -- helper function to load a step log file into a table local function read_step_log_file(filename) local file = assert(io.open(filename, "rb")) + local hash_function = string.unpack(">(proof_str, "proof").value(); auto proof_root_hash = proof.get_root_hash(); - auto verification = calculate_proof_root_hash(proof); + cartesi::variant_hasher h(get_machine_hash_function(_machine)); + auto verification = calculate_proof_root_hash(h, proof); BOOST_CHECK_EQUAL_COLLECTIONS(verification.begin(), verification.end(), proof_root_hash.begin(), proof_root_hash.end()); verification = calculate_emulator_hash(_machine); @@ -589,7 +597,7 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(read_write_memory_scattered_data, ordinary_machin // we are going to write data on a page junction: // one byte at the end of the third page and one byte // at the beginning of the fourth - uint64_t address = 0x80004000 - sizeof(write_value) / 2; + uint64_t address = 0x80004000 - (sizeof(write_value) / 2); std::array write_data{}; std::array read_data{}; @@ -671,7 +679,7 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(read_write_virtual_memory_scattered_data, ordinar // we are going to write data on a page junction: // one byte at the end of the third page and one byte // at the beginning of the fourth - uint64_t address = 0x80004000 - sizeof(write_value) / 2; + uint64_t address = 0x80004000 - (sizeof(write_value) / 2); std::array write_data{}; std::array read_data{}; @@ -849,7 +857,7 @@ class flash_drive_machine_fixture : public machine_flash_simple_fixture { flash_drive_machine_fixture &operator=(flash_drive_machine_fixture &&other) noexcept = delete; protected: - size_t _flash_size; + size_t _flash_size{}; std::string _flash_file; std::string _flash_data; std::string _machine_dir_path; @@ -1082,8 +1090,7 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(verify_step_uarch_log_null_log_test, default_mach class access_log_machine_fixture : public incomplete_machine_fixture { public: - access_log_machine_fixture() { - _log_type = CM_ACCESS_LOG_TYPE_ANNOTATIONS; + access_log_machine_fixture() : _log_type(CM_ACCESS_LOG_TYPE_ANNOTATIONS) { _machine_dir_path = (std::filesystem::temp_directory_path() / "661b6096c377cdc07756df488059f4407c8f4").string(); uint32_t test_uarch_ram[] = { @@ -1092,7 +1099,8 @@ class access_log_machine_fixture : public incomplete_machine_fixture { 0x00000073, // ecall }; std::ofstream of(_uarch_ram_path, std::ios::binary); - of.write(static_cast(static_cast(&test_uarch_ram)), sizeof(test_uarch_ram)); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + of.write(reinterpret_cast(&test_uarch_ram), sizeof(test_uarch_ram)); of.close(); _machine_config["uarch"]["ram"] = {{"backing_store", {{"data_filename", _uarch_ram_path}}}}; const auto dumped_config = _machine_config.dump(); @@ -1549,7 +1557,8 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(machine_verify_hash_tree_proof_updates_test, ordi auto proof = cartesi::from_json>(proof_str, "proof").value(); auto proof_root_hash = proof.get_root_hash(); - auto verification = calculate_proof_root_hash(proof); + cartesi::variant_hasher h(get_machine_hash_function(_machine)); + auto verification = calculate_proof_root_hash(h, proof); BOOST_CHECK_EQUAL_COLLECTIONS(verification.begin(), verification.end(), proof_root_hash.begin(), proof_root_hash.end()); verification = calculate_emulator_hash(_machine); @@ -1566,7 +1575,7 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(machine_verify_hash_tree_proof_updates_test, ordi proof = cartesi::from_json>(proof_str, "proof").value(); proof_root_hash = proof.get_root_hash(); - verification = calculate_proof_root_hash(proof); + verification = calculate_proof_root_hash(h, proof); BOOST_CHECK_EQUAL_COLLECTIONS(verification.begin(), verification.end(), proof_root_hash.begin(), proof_root_hash.end()); verification = calculate_emulator_hash(_machine); @@ -1685,4 +1694,4 @@ BOOST_AUTO_TEST_CASE_NOLINT(uarch_solidity_compatibility_layer) { BOOST_CHECK_EQUAL(int8ToUint64(int8(127)), 127); BOOST_CHECK_EQUAL(int8ToUint64(int8(-128)), 0xffffffffffffff80ULL); } -// NOLINTEND(cppcoreguidelines-avoid-do-while) +// NOLINTEND(cppcoreguidelines-avoid-do-while,cppcoreguidelines-non-private-member-variables-in-classes) diff --git a/tests/misc/test-merkle-tree-hash.cpp b/tests/misc/test-merkle-tree-hash.cpp index 863a2ca13..1f0198ccb 100644 --- a/tests/misc/test-merkle-tree-hash.cpp +++ b/tests/misc/test-merkle-tree-hash.cpp @@ -32,12 +32,11 @@ #include #include #include -#include #include +#include using namespace cartesi; -using hasher_type = keccak_256_hasher; -using hash_type = machine_hash; +using namespace std::string_literals; namespace { /// \brief Checks if string matches prefix and captures remaninder @@ -75,7 +74,7 @@ bool intval(const char *pre, const char *str, int *val) { /// \brief Prints hash in hex to file /// \param hash Hash to be printed. /// \param f File to print to -void print_hash(const hash_type &hash, FILE *f) { +void print_hash(const machine_hash &hash, FILE *f) { for (auto b : hash) { std::ignore = fprintf(f, "%02x", static_cast(b)); } @@ -87,13 +86,13 @@ void print_hash(const hash_type &hash, FILE *f) { /// \brief Reads a hash in hex from file /// \param f File to read from /// \returns Hash if successful, nothing otherwise -static std::optional read_hash(FILE *f) { - std::array hex_hash{}; +static std::optional read_hash(FILE *f) { + std::array hex_hash{}; if (fread(hex_hash.data(), 1, hex_hash.size(), f) != hex_hash.size()) { return {}; } - hash_type h; - for (size_t i = 0; i < hasher_type::hash_size; ++i) { + machine_hash h; + for (size_t i = 0; i < variant_hasher::hash_size; ++i) { std::array hex_c = {hex_hash[2 * i], hex_hash[2 * i + 1], '\0'}; unsigned c = 0; // NOLINTNEXTLINE(cert-err34-c): we just generated the string so we don't need to verify it @@ -123,10 +122,8 @@ __attribute__((format(printf, 1, 2))) void error(const char *fmt, ...) { /// \param leaf Pointer to leaf data. Must contain 2^log2_word_size bytes /// \param log2_word Log2 of word size /// \param hash Receives the leaf hash -void get_word_hash(hasher_type &h, const unsigned char *word, int log2_word_size, hash_type &hash) { - h.begin(); - h.add_data(std::span(word, 1 << log2_word_size)); - h.end(hash); +void get_word_hash(variant_hasher &h, const unsigned char *word, int log2_word_size, machine_hash &hash) { + h.hash(std::span(word, 1 << log2_word_size), hash); } /// \brief Computes the Merkle hash of a leaf of data @@ -136,37 +133,27 @@ void get_word_hash(hasher_type &h, const unsigned char *word, int log2_word_size /// at least 2^log2_leaf_size bytes /// \param log2_leaf_size Log2 of leaf size /// \returns Merkle hash of leaf data -hash_type get_leaf_hash(hasher_type &h, int log2_word_size, const unsigned char *leaf_data, int log2_leaf_size) { +machine_hash get_leaf_hash(variant_hasher &h, int log2_word_size, const unsigned char *leaf_data, int log2_leaf_size) { assert(log2_leaf_size >= log2_word_size); if (log2_leaf_size > log2_word_size) { - hash_type left = get_leaf_hash(h, log2_word_size, leaf_data, log2_leaf_size - 1); - const hash_type right = + machine_hash left = get_leaf_hash(h, log2_word_size, leaf_data, log2_leaf_size - 1); + const machine_hash right = get_leaf_hash(h, log2_word_size, leaf_data + (1 << (log2_leaf_size - 1)), log2_leaf_size - 1); get_concat_hash(h, left, right, left); return left; } - hash_type leaf; + machine_hash leaf; get_word_hash(h, leaf_data, log2_word_size, leaf); return leaf; } -/// \brief Computes the Merkle hash of a leaf of data -/// \param log2_word_size Log2 of word size -/// \param leaf_data Pointer to buffer containing leaf data with -/// at least 2^log2_leaf_size bytes -/// \param log2_leaf_size Log2 of leaf size -/// \returns Merkle hash of leaf data -hash_type get_leaf_hash(int log2_word_size, const unsigned char *leaf_data, int log2_leaf_size) { - hasher_type h; - return get_leaf_hash(h, log2_word_size, leaf_data, log2_leaf_size); -} - /// \brief Prints help message void help(const char *name) { std::ignore = fprintf(stderr, "Usage:\n %s [--input=] " "[--log2-word-size=] [--log2-leaf-size=

] " - "[--log2-root-size=]\n", + "[--log2-root-size=]\n" + "[--hash-function=(keccak256|sha256)]\n", name); exit(0); } @@ -174,6 +161,7 @@ void help(const char *name) { int main(int argc, char *argv[]) try { const char *input_name = nullptr; + const char *hash_function_name = nullptr; int log2_word_size = 3; int log2_leaf_size = 12; int log2_root_size = 30; @@ -186,7 +174,8 @@ int main(int argc, char *argv[]) try { } if (stringval("--input=", argv[i], &input_name) || intval("--log2-word-size=", argv[i], &log2_word_size) || intval("--log2-leaf-size=", argv[i], &log2_leaf_size) || - intval("--log2-root-size=", argv[i], &log2_root_size)) { + intval("--log2-root-size=", argv[i], &log2_root_size) || + stringval("--hash-function=", argv[i], &hash_function_name)) { ; } else { error("unrecognized option '%s'\n", argv[i]); @@ -199,6 +188,15 @@ int main(int argc, char *argv[]) try { log2_root_size); return 1; } + hash_function_type hash_function{hash_function_type::keccak256}; + if (hash_function_name == "keccak256"s) { + hash_function = hash_function_type::keccak256; + } else if (hash_function_name == "sha256"s) { + hash_function = hash_function_type::sha256; + } else { + error("invalid hash function type '%s'\n", hash_function_name); + return 1; + } // Read from stdin if no input name was given auto input_file = unique_file_ptr{stdin}; if (input_name != nullptr) { @@ -218,12 +216,12 @@ int main(int argc, char *argv[]) try { } std::cerr << "instantiating back tree\n"; - back_merkle_tree back_tree{log2_root_size, log2_leaf_size, log2_word_size}; + back_merkle_tree back_tree{log2_root_size, log2_leaf_size, log2_word_size, hash_function}; std::cerr << "instantiating complete tree\n"; - complete_merkle_tree complete_tree{log2_root_size, log2_leaf_size, log2_word_size}; + complete_merkle_tree complete_tree{log2_root_size, log2_leaf_size, log2_word_size, hash_function}; - std::vector leaf_hashes; + std::vector leaf_hashes; const uint64_t max_leaves = UINT64_C(1) << (log2_root_size - log2_leaf_size); // NOLINT(misc-include-cleaner) uint64_t leaf_count = 0; @@ -239,7 +237,7 @@ int main(int argc, char *argv[]) try { // the root hash in log time keeping only constant size state. // 4) The complete_merkle_tree can receive leaf hashes and maintain // only the part of the tree that is not pristine - hasher_type h; + variant_hasher h{hash_function}; while (true) { auto got = fread(leaf_buf.get(), 1, leaf_size, input_file.get()); if (got == 0) { @@ -256,7 +254,7 @@ int main(int argc, char *argv[]) try { // Pad leaf with zeros if file ended before next leaf boundary memset(leaf_buf.get() + got, 0, leaf_size - got); // Compute leaf hash - auto leaf_hash = get_leaf_hash(log2_word_size, leaf_buf.get(), log2_leaf_size); + auto leaf_hash = get_leaf_hash(h, log2_word_size, leaf_buf.get(), log2_leaf_size); // Add to array of leaf hashes leaf_hashes.push_back(leaf_hash); // Print leaf hash @@ -266,7 +264,8 @@ int main(int argc, char *argv[]) try { // Add new leaf to back tree back_tree.push_back(leaf_hash); // Build full tree from array of leaf hashes - const full_merkle_tree tree_from_scratch(log2_root_size, log2_leaf_size, log2_word_size, leaf_hashes); + const full_merkle_tree tree_from_scratch(log2_root_size, log2_leaf_size, log2_word_size, leaf_hashes, + hash_function); // Compare the root hash for the back tree and the tree // from scratch if (back_tree.get_root_hash() != tree_from_scratch.get_root_hash()) { diff --git a/tests/misc/test-utils.h b/tests/misc/test-utils.h index 31e869201..4d1af3d01 100644 --- a/tests/misc/test-utils.h +++ b/tests/misc/test-utils.h @@ -19,11 +19,9 @@ #include #include #include -#include #include #include - -using hash_type = cartesi::machine_hash; +#include // Calculate root hash for data buffer of log2_size namespace detail { @@ -32,26 +30,35 @@ constexpr int WORD_LOG2_SIZE = 5; constexpr int MERKLE_PAGE_LOG2_SIZE = 12; constexpr int MERKLE_PAGE_SIZE = (UINT64_C(1) << MERKLE_PAGE_LOG2_SIZE); -static hash_type merkle_hash(cartesi::keccak_256_hasher &h, const std::string_view &data, int log2_size) { - hash_type result; +static cartesi::machine_hash merkle_hash(cartesi::variant_hasher &h, const std::string_view &data, int log2_size) { + cartesi::machine_hash result; if (log2_size > WORD_LOG2_SIZE) { --log2_size; auto half_size = data.size() / 2; - auto left = merkle_hash(h, std::string_view{data.data(), half_size}, log2_size); + auto left = merkle_hash(h, std::string_view{data.data() + 0, half_size}, log2_size); auto right = merkle_hash(h, std::string_view{data.data() + half_size, half_size}, log2_size); get_concat_hash(h, left, right, result); } else { - h.begin(); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - h.add_data(data); - h.end(result); + h.hash(data, result); } return result; } } // namespace detail -static hash_type merkle_hash(const std::string_view &data, int log2_size) { +// \brief Creates a hasher object compatible with the one used by the machine's config +static cartesi::hash_function_type get_machine_hash_function(cm_machine *machine) { + const char *cfg_jsonstr{}; + cm_error error_code = cm_get_initial_config(machine, &cfg_jsonstr); + if (error_code != 0) { + throw std::runtime_error{cm_get_last_error_message()}; + } + const auto cfg = cartesi::from_json(cfg_jsonstr, "config"); + return cfg.hash_tree.hash_function; +} + +static cartesi::machine_hash merkle_hash(cartesi::variant_hasher &h, const std::string_view &data, int log2_size) { if (log2_size > 63) { throw std::domain_error("log2_size is too large"); } @@ -61,20 +68,18 @@ static hash_type merkle_hash(const std::string_view &data, int log2_size) { if ((UINT64_C(1) << log2_size) != data.size()) { throw std::invalid_argument("log2_size does not match data size"); } - cartesi::keccak_256_hasher h; return detail::merkle_hash(h, data, log2_size); } -static hash_type calculate_proof_root_hash(const cartesi::hash_tree_proof &proof) { - hash_type hash; +static cartesi::machine_hash calculate_proof_root_hash(cartesi::variant_hasher &h, + const cartesi::hash_tree_proof &proof) { + cartesi::machine_hash hash; memcpy(hash.data(), proof.get_target_hash().data(), sizeof(cm_hash)); - for (int log2_size = static_cast(proof.get_log2_target_size()); - log2_size < static_cast(proof.get_log2_root_size()); ++log2_size) { - cartesi::keccak_256_hasher h; + for (int log2_size = proof.get_log2_target_size(); log2_size < proof.get_log2_root_size(); ++log2_size) { auto bit = (proof.get_target_address() & (UINT64_C(1) << log2_size)); - hash_type first; - hash_type second; - if (bit) { + cartesi::machine_hash first; + cartesi::machine_hash second; + if (bit != 0) { memcpy(first.data(), proof.get_sibling_hashes()[log2_size - proof.get_log2_target_size()].data(), sizeof(cm_hash)); second = hash; @@ -88,8 +93,11 @@ static hash_type calculate_proof_root_hash(const cartesi::hash_tree_proof &proof return hash; } -static hash_type calculate_emulator_hash(cm_machine *machine) { - cartesi::back_merkle_tree tree(CM_TREE_LOG2_ROOT_SIZE, CM_TREE_LOG2_PAGE_SIZE, CM_TREE_LOG2_WORD_SIZE); +static cartesi::machine_hash calculate_emulator_hash(cm_machine *machine) { + const auto hash_function = get_machine_hash_function(machine); + cartesi::variant_hasher h(hash_function); + cartesi::back_merkle_tree tree(CM_TREE_LOG2_ROOT_SIZE, CM_TREE_LOG2_PAGE_SIZE, CM_TREE_LOG2_WORD_SIZE, + hash_function); std::string page; page.resize(detail::MERKLE_PAGE_SIZE); const char *ranges_jsonstr{}; @@ -98,7 +106,7 @@ static hash_type calculate_emulator_hash(cm_machine *machine) { } const auto mrds = cartesi::from_json(ranges_jsonstr, "memory_ranges"); uint64_t last = 0; - for (auto m : mrds) { + for (const auto &m : mrds) { tree.pad_back((m.start - last) >> detail::MERKLE_PAGE_LOG2_SIZE); auto end = m.start + m.length; for (uint64_t s = m.start; s < end; s += detail::MERKLE_PAGE_SIZE) { @@ -106,7 +114,7 @@ static hash_type calculate_emulator_hash(cm_machine *machine) { if (cm_read_memory(machine, s, reinterpret_cast(page.data()), page.size()) != 0) { throw std::runtime_error{cm_get_last_error_message()}; } - auto page_hash = merkle_hash(page, detail::MERKLE_PAGE_LOG2_SIZE); + auto page_hash = merkle_hash(h, page, detail::MERKLE_PAGE_LOG2_SIZE); tree.push_back(page_hash); } last = end; diff --git a/third-party/tiny_sha3/LICENSE b/third-party/tiny_sha3/LICENSE deleted file mode 100644 index d2d484d88..000000000 --- a/third-party/tiny_sha3/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Markku-Juhani O. Saarinen - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - diff --git a/third-party/tiny_sha3/sha3.c b/third-party/tiny_sha3/sha3.c deleted file mode 100644 index 7277efa14..000000000 --- a/third-party/tiny_sha3/sha3.c +++ /dev/null @@ -1,205 +0,0 @@ -// sha3.c -// 19-Nov-11 Markku-Juhani O. Saarinen - -// Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3" -// Revised 03-Sep-15 for portability + OpenSSL - style API - -#include "sha3.h" - -// Helper macros for stringification -#define TO_STRING_HELPER(X) #X -#define TO_STRING(X) TO_STRING_HELPER(X) - -// Define loop unrolling depending on the compiler -#if defined(__clang__) -#define UNROLL_LOOP(n) _Pragma(TO_STRING(unroll(n))) -#elif defined(__GNUC__) && !defined(__clang__) -#define UNROLL_LOOP(n) _Pragma(TO_STRING(GCC unroll(n))) -#else -#define UNROLL_LOOP(n) -#endif - -#ifndef KECCAKF_ROUNDS -#define KECCAKF_ROUNDS 24 -#endif - -#ifndef ROTL64 -#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) -#endif - -// update the state with given number of rounds - -void sha3_keccakf(uint64_t st[25]) -{ - // constants - const uint64_t keccakf_rndc[24] = { - 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, - 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, - 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, - 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 - }; - const int keccakf_rotc[24] = { - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 - }; - const int keccakf_piln[24] = { - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 - }; - - // variables - int i, j, r; - uint64_t t, bc[5]; - -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ - // endianess conversion. this is redundant on little-endian targets - for (i = 0; i < 25; i++) { - st[i] = __builtin_bswap64(st[i]); - } -#endif - - // actual iteration - for (r = 0; r < KECCAKF_ROUNDS; r++) { - - // Theta - UNROLL_LOOP(5) - for (i = 0; i < 5; i++) - bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20]; - - UNROLL_LOOP(5) - for (i = 0; i < 5; i++) { - t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); - UNROLL_LOOP(25) - for (j = 0; j < 25; j += 5) - st[j + i] ^= t; - } - - // Rho Pi - t = st[1]; - UNROLL_LOOP(24) - for (i = 0; i < 24; i++) { - j = keccakf_piln[i]; - bc[0] = st[j]; - st[j] = ROTL64(t, keccakf_rotc[i]); - t = bc[0]; - } - - // Chi - UNROLL_LOOP(25) - for (j = 0; j < 25; j += 5) { - UNROLL_LOOP(5) - for (i = 0; i < 5; i++) - bc[i] = st[j + i]; - UNROLL_LOOP(5) - for (i = 0; i < 5; i++) - st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5]; - } - - // Iota - st[0] ^= keccakf_rndc[r]; - } - -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ - // endianess conversion. this is redundant on little-endian targets - for (i = 0; i < 25; i++) { - st[i] = __builtin_bswap64(st[i]); - } -#endif -} - -// Initialize the context for SHA3 - -int sha3_init(sha3_ctx_t *c, int mdlen, int dsuffix) -{ - int i; - - for (i = 0; i < 25; i++) - c->st.q[i] = 0; - c->mdlen = mdlen; - c->rsiz = 200 - 2 * mdlen; - c->pt = 0; - c->dsuffix = dsuffix; - - return 1; -} - -// update state with more data - -int sha3_update(sha3_ctx_t *c, const void *data, size_t len) -{ - size_t i; - int j; - - j = c->pt; - for (i = 0; i < len; i++) { - c->st.b[j++] ^= ((const uint8_t *) data)[i]; - if (j >= c->rsiz) { - sha3_keccakf(c->st.q); - j = 0; - } - } - c->pt = j; - - return 1; -} - -// finalize and output a hash - -int sha3_final(void *md, sha3_ctx_t *c) -{ - int i; - - c->st.b[c->pt] ^= c->dsuffix; - c->st.b[c->rsiz - 1] ^= 0x80; - sha3_keccakf(c->st.q); - - for (i = 0; i < c->mdlen; i++) { - ((uint8_t *) md)[i] = c->st.b[i]; - } - - return 1; -} - -// compute a SHA-3 hash (md) of given byte length from "in" - -void *sha3(const void *in, size_t inlen, void *md, int mdlen) -{ - sha3_ctx_t sha3; - - sha3_init(&sha3, mdlen, 0x06); - sha3_update(&sha3, in, inlen); - sha3_final(md, &sha3); - - return md; -} - -// SHAKE128 and SHAKE256 extensible-output functionality - -void shake_xof(sha3_ctx_t *c) -{ - c->st.b[c->pt] ^= 0x1F; - c->st.b[c->rsiz - 1] ^= 0x80; - sha3_keccakf(c->st.q); - c->pt = 0; -} - -void shake_out(sha3_ctx_t *c, void *out, size_t len) -{ - size_t i; - int j; - - j = c->pt; - for (i = 0; i < len; i++) { - if (j >= c->rsiz) { - sha3_keccakf(c->st.q); - j = 0; - } - ((uint8_t *) out)[i] = c->st.b[j++]; - } - c->pt = j; -} - diff --git a/third-party/tiny_sha3/sha3.h b/third-party/tiny_sha3/sha3.h deleted file mode 100644 index d130afdf0..000000000 --- a/third-party/tiny_sha3/sha3.h +++ /dev/null @@ -1,39 +0,0 @@ -// sha3.h -// 19-Nov-11 Markku-Juhani O. Saarinen - -#ifndef SHA3_H -#define SHA3_H - -#include -#include - -// state context -typedef struct { - union { // state: - uint8_t b[200]; // 8-bit bytes - uint64_t q[25]; // 64-bit words - } st; - int pt, rsiz, mdlen, dsuffix; // these don't overflow -} sha3_ctx_t; - -// Compression function. -void sha3_keccakf(uint64_t st[25]); - -// OpenSSL - like interfece -int sha3_init(sha3_ctx_t *c, int mdlen, int dsuffix); // mdlen = hash output in bytes -int sha3_update(sha3_ctx_t *c, const void *data, size_t len); -int sha3_final(void *md, sha3_ctx_t *c); // digest goes to md - -// compute a sha3 hash (md) of given byte length from "in" -void *sha3(const void *in, size_t inlen, void *md, int mdlen); - -// SHAKE128 and SHAKE256 extensible-output functions -#define shake128_init(c) sha3_init(c, 16, 0x06) -#define shake256_init(c) sha3_init(c, 32, 0x06) -#define shake_update sha3_update - -void shake_xof(sha3_ctx_t *c); -void shake_out(sha3_ctx_t *c, void *out, size_t len); - -#endif - diff --git a/uarch/Makefile b/uarch/Makefile index c8ce13112..9e1eb6966 100644 --- a/uarch/Makefile +++ b/uarch/Makefile @@ -20,7 +20,7 @@ else HOST_CXX := g++ HOST_CC := gcc endif -HOST_CFLAGS := -I$(THIRD_PARTY_DIR)/tiny_sha3 -I$(EMULATOR_SRC_DIR) +HOST_CFLAGS := -I$(EMULATOR_SRC_DIR) HOST_CXXFLAGS := -std=c++20 CC := $(TOOLCHAIN_PREFIX)gcc @@ -80,7 +80,9 @@ COMPUTE_SOURCES=\ $(EMULATOR_SRC_DIR)/pristine-merkle-tree.cpp \ $(EMULATOR_SRC_DIR)/complete-merkle-tree.cpp \ $(EMULATOR_SRC_DIR)/full-merkle-tree.cpp \ - $(THIRD_PARTY_DIR)/tiny_sha3/sha3.c \ + $(EMULATOR_SRC_DIR)/keccak-256-hasher.cpp \ + $(EMULATOR_SRC_DIR)/sha-256-hasher.cpp \ + $(EMULATOR_SRC_DIR)/is-pristine.cpp \ uarch-pristine-ram.c UARCH_OBJS = $(patsubst %.c,%.uarch_c.o,$(patsubst %.cpp,%.uarch_cpp.o,$(UARCH_SOURCES))) @@ -164,4 +166,4 @@ clean-auto-generated: @rm -f uarch-pristine-hash.c uarch-pristine-ram.c clean: clean-executables clean-auto-generated - @rm -f *.ld *.elf *.bin *.tmp link.ld $(UARCH_OBJS) $(EMULATOR_OBJS) $(COMPUTE_OBJS) uarch-ram-entry.o *.clang-tidy *.insn.txt *.objdump + @rm -f *.ld *.elf *.bin *.tmp *.o $(UARCH_OBJS) $(EMULATOR_OBJS) $(COMPUTE_OBJS) *.clang-tidy *.insn.txt *.objdump diff --git a/uarch/compute-uarch-pristine-hash.cpp b/uarch/compute-uarch-pristine-hash.cpp index 68aecbda6..b8052993b 100644 --- a/uarch/compute-uarch-pristine-hash.cpp +++ b/uarch/compute-uarch-pristine-hash.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -31,10 +32,6 @@ using namespace cartesi; -using tree_type = back_merkle_tree; -using hash_type = tree_type::hash_type; -using hasher_type = tree_type::hasher_type; - static constexpr auto word_size = HASH_TREE_WORD_SIZE; static constexpr auto log2_word_size = HASH_TREE_LOG2_WORD_SIZE; static constexpr auto page_size = HASH_TREE_PAGE_SIZE; @@ -68,9 +65,9 @@ Computes the hash of the pristine uarch state. } int main(int argc, char *argv[]) try { - tree_type tree{UARCH_STATE_LOG2_SIZE, log2_page_size, log2_word_size}; - hasher_type hasher{}; - hash_type hash{}; + back_merkle_tree tree{UARCH_STATE_LOG2_SIZE, log2_page_size, log2_word_size, hash_function_type::keccak256}; + keccak_256_hasher hasher{}; + machine_hash hash{}; // Process command line arguments for (int i = 1; i < argc; ++i) { @@ -88,7 +85,7 @@ int main(int argc, char *argv[]) try { throw std::runtime_error("Could not allocate scratch memory"); } auto scratch_span = std::span{scratch.get(), page_size}; - hash_type pristine_hash; + machine_hash pristine_hash; get_merkle_tree_hash(hasher, scratch_span, word_size, pristine_hash); // Build pristine shadow uarch state @@ -123,7 +120,7 @@ int main(int argc, char *argv[]) try { // Get uarch state hash auto uarch_state_hash = tree.get_root_hash(); // Print header - std::cout << "// This file is auto-generated and should not be modified" << std::endl; + std::cout << "// This file is auto-generated and should not be modified\n"; // Print hash std::cout << "unsigned char uarch_pristine_hash[] = {\n "; int i = 0; @@ -136,8 +133,7 @@ int main(int argc, char *argv[]) try { std::cout << "0x" << std::setw(2) << std::setfill('0') << std::hex << static_cast(c); i++; } - std::cout << "\n};\nunsigned int uarch_pristine_hash_len = " << std::dec << uarch_state_hash.size() << ";" - << std::endl; + std::cout << "\n};\nunsigned int uarch_pristine_hash_len = " << std::dec << uarch_state_hash.size() << ";\n"; return 0; } catch (std::exception &e) { std::cerr << "Caught exception: " << e.what() << '\n';