From d7eb36e0fac8b89c1d58cfa519d144c35bfc431d Mon Sep 17 00:00:00 2001 From: vird Date: Sat, 14 Dec 2024 04:01:48 +0000 Subject: [PATCH 1/8] fused rx2 --- .../c_src/randomx/pack_randomx_square.cpp | 212 ++++++++++++++++++ .../c_src/randomx/pack_randomx_square.h | 14 ++ .../randomx/rxsquared/ar_rxsquared_nif.c | 140 ++++++++++++ apps/arweave/src/ar_bench_2_9.erl | 86 +++++-- apps/arweave/src/ar_rxsquared_nif.erl | 16 ++ apps/arweave/test/ar_audit_tests.erl | 20 +- 6 files changed, 463 insertions(+), 25 deletions(-) diff --git a/apps/arweave/c_src/randomx/pack_randomx_square.cpp b/apps/arweave/c_src/randomx/pack_randomx_square.cpp index f3e846380..f361f0fbc 100644 --- a/apps/arweave/c_src/randomx/pack_randomx_square.cpp +++ b/apps/arweave/c_src/randomx/pack_randomx_square.cpp @@ -1,4 +1,5 @@ #include +#include #include "crc32.h" #include "pack_randomx_square.h" #include "feistel_msgsize_key_cipher.h" @@ -156,5 +157,216 @@ extern "C" { } } + int rsp_fused_entropy( + randomx_vm** vmList, + size_t scratchpadSize, + int replicaEntropySubChunkCount, + int compositePackingSubChunkSize, + int laneCount, + int rxDepth, + int randomxProgramCount, + int blockSize, + const unsigned char* keyData, + size_t keySize, + unsigned char* outAllScratchpads + ) + { + // 1) Define the aligned struct for tempHash + struct vm_hash_t { + alignas(16) uint64_t tempHash[8]; // 64 bytes + }; + + // 2) Allocate the vm_hash_t array here in C++ + vm_hash_t* vmHashes = new (std::nothrow) vm_hash_t[2*laneCount]; + if (!vmHashes) { + return 0; // indicates allocation failure + } + + // 3) Initialize each VM scratchpad + for (int i = 0; i < laneCount; i++) { + unsigned char laneSeed[32]; + { + SHA256_CTX sha256; + SHA256_Init(&sha256); + SHA256_Update(&sha256, keyData, keySize); + unsigned char laneIndex = (unsigned char)i + 1; + SHA256_Update(&sha256, &laneIndex, 1); + SHA256_Final(laneSeed, &sha256); + } + int blakeResult = randomx_blake2b( + vmHashes[i].tempHash, sizeof(vmHashes[i].tempHash), + laneSeed, 32, + nullptr, 0 + ); + if (blakeResult != 0) { + // Free memory and return error if hashing fails + delete[] vmHashes; + return 0; + } + fillAes1Rx4( + vmHashes[i].tempHash, + scratchpadSize, + (void*)vmList[i]->getScratchpad() + ); + } + + // 4) Inline exec + auto randomx_squared_exec_inplace = [&](randomx_vm* machine, uint64_t* srcTempHash, uint64_t* dstTempHash, int programCount, size_t scratchpadSize) { + machine->resetRoundingMode(); + for (int chain = 0; chain < programCount-1; chain++) { + machine->run(srcTempHash); + int br = randomx_blake2b( + srcTempHash, 64, + machine->getRegisterFile(), + sizeof(randomx::RegisterFile), + nullptr, 0 + ); + assert(br == 0); + } + machine->run(srcTempHash); + int br = randomx_blake2b( + dstTempHash, 64, + machine->getRegisterFile(), + sizeof(randomx::RegisterFile), + nullptr, 0 + ); + assert(br == 0); + packing_mix_entropy_crc32( + (const unsigned char*)machine->getScratchpad(), + (unsigned char*)(void*)machine->getScratchpad(), + scratchpadSize); + }; + + // 5) Inline packing mix + auto packing_mix_entropy_far_sets = [&](randomx_vm** inSet, + randomx_vm** outSet, + int count, + size_t scratchpadSize, + size_t jumpSize, + size_t blockSize) + { + size_t totalSize = (size_t)count * scratchpadSize; // total bytes across all lanes + // DEBUG + // for(int i=0;igetScratchpad(), 0, scratchpadSize); + // } + + // A helper function to copy `length` bytes from global offset srcPos to dstPos in cross-lane memory. + auto copyChunkCrossLane = [&](size_t srcPos, size_t dstPos, size_t length) { + while (length > 0) { + // Find source lane + offset + int srcLane = (int)(srcPos / scratchpadSize); + size_t offsetInSrcLane = srcPos % scratchpadSize; + + // Find destination lane + offset + int dstLane = (int)(dstPos / scratchpadSize); + size_t offsetInDstLane = dstPos % scratchpadSize; + + // How many bytes remain in source lane from offsetInSrcLane? + size_t srcLaneRemain = scratchpadSize - offsetInSrcLane; + // How many bytes remain in destination lane from offsetInDstLane? + size_t dstLaneRemain = scratchpadSize - offsetInDstLane; + + // The chunk we can safely copy (without crossing a lane boundary) + size_t chunkSize = length; + if (chunkSize > srcLaneRemain) { + chunkSize = srcLaneRemain; + } + if (chunkSize > dstLaneRemain) { + chunkSize = dstLaneRemain; + } + + // Perform the memcpy for this sub-chunk + unsigned char* srcSp = (unsigned char*)(void*) inSet[srcLane]->getScratchpad(); + unsigned char* dstSp = (unsigned char*)(void*) outSet[dstLane]->getScratchpad(); + memcpy(dstSp + offsetInDstLane, srcSp + offsetInSrcLane, chunkSize); + + // Advance + srcPos += chunkSize; + dstPos += chunkSize; + length -= chunkSize; + } + }; + + // Now we replicate your leftover logic from the original packing_mix_entropy_far() + size_t entropySize = totalSize; + size_t numJumps = entropySize / jumpSize; + size_t numBlocksPerJump = jumpSize / blockSize; + size_t leftover = jumpSize % blockSize; + + size_t outOffset = 0; // global offset in outSet + for (size_t offset = 0; offset < numBlocksPerJump; ++offset) { + for (size_t i = 0; i < numJumps; ++i) { + size_t srcPos = i * jumpSize + offset * blockSize; // global source offset + copyChunkCrossLane(srcPos, outOffset, blockSize); + outOffset += blockSize; + } + } + + if (leftover > 0) { + for (size_t i = 0; i < numJumps; ++i) { + size_t srcPos = i * jumpSize + numBlocksPerJump * blockSize; + copyChunkCrossLane(srcPos, outOffset, leftover); + outOffset += leftover; + } + } + }; + + // 6) Main depth iteration + for (int d = 0; d < rxDepth; d++) { + if ((d % 2) == 0) { + // Even iteration => run Set-A, mix -> Set-B + for (int lane = 0; lane < laneCount; lane++) { + randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, randomxProgramCount, scratchpadSize); + } + packing_mix_entropy_far_sets(&vmList[0], &vmList[laneCount], + laneCount, scratchpadSize, scratchpadSize, + blockSize); + + if (d + 1 < rxDepth) { + d++; // second iteration in the pair + for (int lane = laneCount; lane < 2*laneCount; lane++) { + randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane-laneCount].tempHash, randomxProgramCount, scratchpadSize); + } + packing_mix_entropy_far_sets(&vmList[laneCount], &vmList[0], + laneCount, scratchpadSize, scratchpadSize, + blockSize); + } + } else { + // Odd iteration + for (int lane = 0; lane < laneCount; lane++) { + randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, randomxProgramCount, scratchpadSize); + } + packing_mix_entropy_far_sets(&vmList[0], &vmList[laneCount], + laneCount, scratchpadSize, scratchpadSize, + blockSize); + } + } + // NOTE still unoptimal. Last copy can be performed from scratchpad to output. But requires +1 variation (set to buffer) + + // 7) Copy final scratchpads into outAllScratchpads + if ((rxDepth % 2) == 0) { + unsigned char* outAllScratchpadsPtr = outAllScratchpads; + for (int i = 0; i < laneCount; i++) { + void* sp = (void*)vmList[i]->getScratchpad(); + memcpy(outAllScratchpadsPtr, sp, scratchpadSize); + outAllScratchpadsPtr += scratchpadSize; + } + } else { + unsigned char* outAllScratchpadsPtr = outAllScratchpads; + for (int i = laneCount; i < 2*laneCount; i++) { + void* sp = (void*)vmList[i]->getScratchpad(); + memcpy(outAllScratchpadsPtr, sp, scratchpadSize); + outAllScratchpadsPtr += scratchpadSize; + } + } + + // 8) Free the vm_hash_t array + delete[] vmHashes; + + // If we made it here, success + return 1; + } + // TODO optimized packing_apply_to_subchunk (NIF only uses slice) } diff --git a/apps/arweave/c_src/randomx/pack_randomx_square.h b/apps/arweave/c_src/randomx/pack_randomx_square.h index 47038e839..cefc41c90 100755 --- a/apps/arweave/c_src/randomx/pack_randomx_square.h +++ b/apps/arweave/c_src/randomx/pack_randomx_square.h @@ -25,6 +25,20 @@ RANDOMX_EXPORT void packing_mix_entropy_far( const unsigned char *inEntropy, unsigned char *outEntropy, const size_t entropySize, const size_t jumpSize, const size_t blockSize); +RANDOMX_EXPORT int rsp_fused_entropy( + randomx_vm** vmList, + size_t scratchpadSize, + int replicaEntropySubChunkCount, + int compositePackingSubChunkSize, + int laneCount, + int rxDepth, + int randomxProgramCount, + int blockSize, + const unsigned char* keyData, + size_t keySize, + unsigned char* outAllScratchpads // We'll pass in a pointer for final scratchpad data +); + // TODO optimized packing_apply_to_subchunk (NIF only uses slice) #if defined(__cplusplus) diff --git a/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c b/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c index 810ea8134..d2f525264 100755 --- a/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c +++ b/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c @@ -429,6 +429,144 @@ static ERL_NIF_TERM rsp_feistel_decrypt_nif( return ok_tuple(envPtr, outMsgTerm); } +static ERL_NIF_TERM rsp_fused_entropy_nif(ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 10) { + return enif_make_badarg(envPtr); + } + + // 1. Parse the state resource + struct state* statePtr; + if (!enif_get_resource(envPtr, argv[0], stateType, (void**)&statePtr)) { + return error_tuple(envPtr, "failed_to_read_state"); + } + + // 2. Parse each integer + int replicaEntropySubChunkCount; + if (!enif_get_int(envPtr, argv[1], &replicaEntropySubChunkCount)) { + return enif_make_badarg(envPtr); + } + + int compositePackingSubChunkSize; + if (!enif_get_int(envPtr, argv[2], &compositePackingSubChunkSize)) { + return enif_make_badarg(envPtr); + } + + int laneCount; + if (!enif_get_int(envPtr, argv[3], &laneCount)) { + return enif_make_badarg(envPtr); + } + + int rxDepth; + if (!enif_get_int(envPtr, argv[4], &rxDepth)) { + return enif_make_badarg(envPtr); + } + + int jitEnabled; + if (!enif_get_int(envPtr, argv[5], &jitEnabled)) { + return enif_make_badarg(envPtr); + } + + int largePagesEnabled; + if (!enif_get_int(envPtr, argv[6], &largePagesEnabled)) { + return enif_make_badarg(envPtr); + } + + int hardwareAESEnabled; + if (!enif_get_int(envPtr, argv[7], &hardwareAESEnabled)) { + return enif_make_badarg(envPtr); + } + + int randomxProgramCount; + if (!enif_get_int(envPtr, argv[8], &randomxProgramCount)) { + return enif_make_badarg(envPtr); + } + + // 3. Parse key as a binary + ErlNifBinary keyBin; + if (!enif_inspect_binary(envPtr, argv[9], &keyBin)) { + return enif_make_badarg(envPtr); + } + + // 4. Create VMs + int totalVMs = 2 * laneCount; + randomx_vm** vmList = (randomx_vm**)calloc(totalVMs, sizeof(randomx_vm*)); + if (!vmList) { + return error_tuple(envPtr, "vmList_alloc_failed"); + } + + size_t scratchpadSize = randomx_get_scratchpad_size(); + + // 5. Pre-allocate the final output binary to store all scratchpads + size_t totalSpSize = scratchpadSize * laneCount; + ERL_NIF_TERM outScratchpadsTerm; + unsigned char* outAllScratchpads = + enif_make_new_binary(envPtr, totalSpSize, &outScratchpadsTerm); + if (!outAllScratchpads) { + free(vmList); + return enif_make_badarg(envPtr); + } + + // 6. Create the randomx_vm objects + int isRandomxReleased = 0; + for (int i = 0; i < totalVMs; i++) { + vmList[i] = create_vm( + statePtr, + (statePtr->mode == HASHING_MODE_FAST), + jitEnabled, + largePagesEnabled, + hardwareAESEnabled, + &isRandomxReleased + ); + if (!vmList[i]) { + // Clean up partial + for (int j = 0; j < i; j++) { + destroy_vm(statePtr, vmList[j]); + } + free(vmList); + if (isRandomxReleased != 0) { + return error_tuple(envPtr, "state_has_been_released"); + } + return error_tuple(envPtr, "randomx_create_vm_failed"); + } + } + + // 7. Call the pure C++ function that does the heavy logic and returns bool + int success = rsp_fused_entropy( + vmList, + scratchpadSize, + replicaEntropySubChunkCount, + compositePackingSubChunkSize, + laneCount, + rxDepth, + randomxProgramCount, + 6, + keyBin.data, + keyBin.size, + outAllScratchpads // final buffer for the scratchpads + ); + + // 8. If the function returned false, we interpret that as an error + if (!success) { + // Cleanup + for (int i = 0; i < totalVMs; i++) { + if (vmList[i]) { + destroy_vm(statePtr, vmList[i]); + } + } + free(vmList); + return error_tuple(envPtr, "cxx_fused_entropy_failed"); + } + + // 9. If success, destroy VMs and return {ok, ScratchpadsBin} + for (int i = 0; i < totalVMs; i++) { + destroy_vm(statePtr, vmList[i]); + } + free(vmList); + + return ok_tuple(envPtr, outScratchpadsTerm); +} + + static ErlNifFunc rxsquared_funcs[] = { {"rxsquared_info_nif", 1, rxsquared_info_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rxsquared_init_nif", 5, rxsquared_init_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, @@ -446,6 +584,8 @@ static ErlNifFunc rxsquared_funcs[] = { rsp_mix_entropy_far_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rsp_mix_entropy_far_test_nif", 3, rsp_mix_entropy_far_test_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, + {"rsp_fused_entropy_nif", 10, + rsp_fused_entropy_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rsp_feistel_encrypt_nif", 2, rsp_feistel_encrypt_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rsp_feistel_decrypt_nif", 2, rsp_feistel_decrypt_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND} }; diff --git a/apps/arweave/src/ar_bench_2_9.erl b/apps/arweave/src/ar_bench_2_9.erl index de464c16d..71402b24b 100644 --- a/apps/arweave/src/ar_bench_2_9.erl +++ b/apps/arweave/src/ar_bench_2_9.erl @@ -6,49 +6,49 @@ -include_lib("arweave/include/ar_config.hrl"). run_benchmark_from_cli(Args) -> - Threads = list_to_integer(get_flag_value(Args, "threads", "1")), + Threads = list_to_integer(get_flag_value(Args, "threads", "1")), DataMiB = list_to_integer(get_flag_value(Args, "mib", "1024")), Format= case get_flag_value(Args, "format", "replica_2_9") of "replica_2_9" -> replica_2_9; + "replica_2_9p" -> replica_2_9p; "composite.1" -> {composite, 1}; "composite.10" -> {composite, 10}; "spora_2_6" -> spora_2_6; _ -> show_help() end, - % Collect all directory values - Dirs = collect_dirs(Args), - - % Ensure each directory exists - lists:foreach(fun(Dir) -> - case filelib:ensure_dir(filename:join(Dir, "dummy")) of - ok -> ok; - {error, Reason} -> - io:format("Error: Could not ensure directory ~p exists. Reason: ~p~n", [Dir, Reason]), - show_help(), - erlang:halt(1) - end - end, Dirs), - - run_benchmark({Format, Dirs, Threads, DataMiB}). + % Collect all directory values + Dirs = collect_dirs(Args), + + % Ensure each directory exists + lists:foreach(fun(Dir) -> + case filelib:ensure_dir(filename:join(Dir, "dummy")) of + ok -> ok; + {error, Reason} -> + io:format("Error: Could not ensure directory ~p exists. Reason: ~p~n", [Dir, Reason]), + show_help(), + erlang:halt(1) + end + end, Dirs), + run_benchmark({Format, Dirs, Threads, DataMiB}). collect_dirs([]) -> - []; + []; collect_dirs(["dir", Dir | Tail]) -> - [Dir | collect_dirs(Tail)]; + [Dir | collect_dirs(Tail)]; collect_dirs([_ | Tail]) -> - collect_dirs(Tail). + collect_dirs(Tail). get_flag_value([], _, DefaultValue) -> - DefaultValue; + DefaultValue; get_flag_value([Flag, Value | Tail], TargetFlag, _DefaultValue) when Flag == TargetFlag -> - Value; + Value; get_flag_value([_ | Tail], TargetFlag, DefaultValue) -> - get_flag_value(Tail, TargetFlag, DefaultValue). + get_flag_value(Tail, TargetFlag, DefaultValue). show_help() -> - io:format("~nUsage: benchmark-2.9 [format replica_2_9|composite.1|composite.10|spora_2_6] [threads N] [mib N] [dir path1 dir path2 dir path3 ...]~n~n"), + io:format("~nUsage: benchmark-2.9 [format replica_2_9|replica_2_9p|composite.1|composite.10|spora_2_6] [threads N] [mib N] [dir path1 dir path2 dir path3 ...]~n~n"), io:format("format: format to pack. replica_2_9, composite.1, composite.10, or spora_2_6. Default: replica_2_9.~n"), io:format("threads: number of threads to run. Default: 1.~n"), @@ -116,6 +116,8 @@ prepare_context(replica_2_9, Threads, DataMiB) -> rxsquared, ?RANDOMX_PACKING_KEY, 1, 1, erlang:system_info(dirty_cpu_schedulers_online)), {RandomXState, SubChunk, Key, EntropyPerThread}; +prepare_context(replica_2_9p, Threads, DataMiB)-> + prepare_context(replica_2_9, Threads, DataMiB); prepare_context(spora_2_6, Threads, DataMiB) -> Root = crypto:strong_rand_bytes(32), Address = crypto:strong_rand_bytes(32), @@ -142,12 +144,16 @@ prepare_context({composite, Difficulty}, Threads, DataMiB) -> get_total_data(replica_2_9, Threads, {_,_, _, EntropyPerThread}) -> Threads * EntropyPerThread * ?REPLICA_2_9_ENTROPY_SUB_CHUNK_COUNT * ?COMPOSITE_PACKING_SUB_CHUNK_SIZE / ?MiB; +get_total_data(replica_2_9p, Threads, Context) -> + get_total_data(replica_2_9, Threads, Context); get_total_data(spora_2_6, Threads, {_, _, _, ChunksPerThread}) -> Threads * ChunksPerThread * ?DATA_CHUNK_SIZE / ?MiB; get_total_data({composite, _}, Threads, {_, _, _, ChunksPerThread}) -> Threads * ChunksPerThread * ?DATA_CHUNK_SIZE / ?MiB. get_iterations(replica_2_9, _Threads, {_,_, _, EntropyPerThread}) -> EntropyPerThread; +get_iterations(replica_2_9p, _Threads, {_,_, _, EntropyPerThread}) -> + EntropyPerThread; get_iterations(spora_2_6, _Threads, {_, _, _, ChunksPerThread}) -> ChunksPerThread; get_iterations({composite, _}, _Threads, {_, _, _, ChunksPerThread}) -> @@ -169,6 +175,40 @@ pack_chunks(replica_2_9, Thread, Dir, Context, Count) -> file:write_file(Path, PackedSubChunks) end, pack_chunks(replica_2_9, Thread, Dir, Context, Count-1); +% pack_chunks(replica_2_9p, Thread, Dir, Context, Count) -> +% pack_chunks(replica_2_9, Thread, Dir, Context, Count); +pack_chunks(replica_2_9p, _Thread, _Dir, _Context, 0) -> + ok; +pack_chunks(replica_2_9p, Thread, Dir, Context, Count) -> + {RandomXState, SubChunk, Key, _EntropyPerThread} = Context, + + %% This is where we call the new fused NIF: + %% Suppose the new NIF returns {ok, EntropyBin} or something similar. + {ok, Entropy} = ar_rxsquared_nif:rsp_fused_entropy_nif( + element(2, RandomXState), + ?REPLICA_2_9_ENTROPY_SUB_CHUNK_COUNT, + ?COMPOSITE_PACKING_SUB_CHUNK_SIZE, + ?REPLICA_2_9_RANDOMX_LANE_COUNT, + ?REPLICA_2_9_RANDOMX_DEPTH, + 1, %% jitEnabled, + 1, %% largePagesEnabled + 1, %% hardwareAESEnabled + ?REPLICA_2_9_RANDOMX_ROUND_COUNT, + Key + ), + + %% Then we can reuse pack_sub_chunks as before + PackedSubChunks = pack_sub_chunks(SubChunk, Entropy, 0, RandomXState, []), + case Dir of + undefined -> + ok; + _ -> + Filename = io_lib:format("t~p_e~p.bin", [Thread, Count]), + Path = filename:join(Dir, Filename), + file:write_file(Path, PackedSubChunks) + end, + pack_chunks(replica_2_9p, Thread, Dir, Context, Count - 1); + pack_chunks(spora_2_6, Thread, Dir, Context, Count) -> {RandomXState, Chunk, Key, _ChunksPerThread} = Context, {ok, PackedChunk} = ar_rx512_nif:rx512_encrypt_chunk_nif( diff --git a/apps/arweave/src/ar_rxsquared_nif.erl b/apps/arweave/src/ar_rxsquared_nif.erl index 0bce83374..172056039 100755 --- a/apps/arweave/src/ar_rxsquared_nif.erl +++ b/apps/arweave/src/ar_rxsquared_nif.erl @@ -11,6 +11,7 @@ rsp_mix_entropy_crc32_nif/1, rsp_mix_entropy_far_nif/1, rsp_mix_entropy_far_test_nif/3, + rsp_fused_entropy_nif/10, rsp_feistel_encrypt_nif/2, rsp_feistel_decrypt_nif/2]). @@ -63,6 +64,21 @@ rsp_mix_entropy_far_test_nif(_Entropy, _JumpSize, _BlockSize) -> ?LOG_ERROR("rsp_mix_entropy_far_test_nif"), erlang:nif_error(nif_not_loaded). +rsp_fused_entropy_nif( + _RandomxState, + _ReplicaEntropySubChunkCount, + _CompositePackingSubChunkSize, + _LaneCount, + _RxDepth, + _JitEnabled, + _LargePagesEnabled, + _HardwareAESEnabled, + _RandomxProgramCount, + _Key +) -> + ?LOG_ERROR("randomx_generate_replica_2_9_entropy_nif"), + erlang:nif_error(nif_not_loaded). + rsp_feistel_encrypt_nif(_InMsg, _Key) -> ?LOG_ERROR("rsp_feistel_encrypt_nif"), erlang:nif_error(nif_not_loaded). diff --git a/apps/arweave/test/ar_audit_tests.erl b/apps/arweave/test/ar_audit_tests.erl index 6414a0123..1ac309abc 100644 --- a/apps/arweave/test/ar_audit_tests.erl +++ b/apps/arweave/test/ar_audit_tests.erl @@ -2,12 +2,13 @@ -include_lib("eunit/include/eunit.hrl"). +-include_lib("arweave/include/ar.hrl"). -include_lib("arweave/include/ar_consensus.hrl"). setup_replica_2_9() -> - FastState = ar_mine_randomx:init_fast2(rxsquared, ?RANDOMX_PACKING_KEY, 0, 0, + FastState = ar_mine_randomx:init_fast2(rxsquared, ?RANDOMX_PACKING_KEY, 0, 0, erlang:system_info(dirty_cpu_schedulers_online)), - LightState = ar_mine_randomx:init_light2(rxsquared, ?RANDOMX_PACKING_KEY, 0, 0), + LightState = ar_mine_randomx:init_light2(rxsquared, ?RANDOMX_PACKING_KEY, 0, 0), {FastState, LightState}. test_register(TestFun, Fixture) -> @@ -92,6 +93,21 @@ test_vectors({FastState, _LightState}) -> EntropySubChunkIndex}), ?assertEqual(SubChunk, SubChunkReal), + {ok, EntropyFused} = ar_rxsquared_nif:rsp_fused_entropy_nif( + element(2, FastState), + ?REPLICA_2_9_ENTROPY_SUB_CHUNK_COUNT, + ?COMPOSITE_PACKING_SUB_CHUNK_SIZE, + ?REPLICA_2_9_RANDOMX_LANE_COUNT, + ?REPLICA_2_9_RANDOMX_DEPTH, + 0, + 0, + 0, + ?REPLICA_2_9_RANDOMX_ROUND_COUNT, + Key + ), + EntropyFusedHash = crypto:hash(sha256, EntropyFused), + ?assertEqual(EntropyHashExpd, EntropyFusedHash), + ok. test_quick({FastState, _LightState}) -> From 94e809820c43b08b9d6c78f6d41b0cf218dc844c Mon Sep 17 00:00:00 2001 From: vird Date: Sun, 29 Dec 2024 22:34:11 +0000 Subject: [PATCH 2/8] cosmetic fixes; fix test --- .../c_src/randomx/pack_randomx_square.cpp | 67 +++++-------------- apps/arweave/test/ar_audit_tests.erl | 8 +-- 2 files changed, 20 insertions(+), 55 deletions(-) diff --git a/apps/arweave/c_src/randomx/pack_randomx_square.cpp b/apps/arweave/c_src/randomx/pack_randomx_square.cpp index f361f0fbc..18e637b97 100644 --- a/apps/arweave/c_src/randomx/pack_randomx_square.cpp +++ b/apps/arweave/c_src/randomx/pack_randomx_square.cpp @@ -169,20 +169,16 @@ extern "C" { const unsigned char* keyData, size_t keySize, unsigned char* outAllScratchpads - ) - { - // 1) Define the aligned struct for tempHash + ) { struct vm_hash_t { alignas(16) uint64_t tempHash[8]; // 64 bytes }; - // 2) Allocate the vm_hash_t array here in C++ vm_hash_t* vmHashes = new (std::nothrow) vm_hash_t[2*laneCount]; if (!vmHashes) { - return 0; // indicates allocation failure + return 0; } - // 3) Initialize each VM scratchpad for (int i = 0; i < laneCount; i++) { unsigned char laneSeed[32]; { @@ -199,7 +195,6 @@ extern "C" { nullptr, 0 ); if (blakeResult != 0) { - // Free memory and return error if hashing fails delete[] vmHashes; return 0; } @@ -210,7 +205,6 @@ extern "C" { ); } - // 4) Inline exec auto randomx_squared_exec_inplace = [&](randomx_vm* machine, uint64_t* srcTempHash, uint64_t* dstTempHash, int programCount, size_t scratchpadSize) { machine->resetRoundingMode(); for (int chain = 0; chain < programCount-1; chain++) { @@ -237,7 +231,6 @@ extern "C" { scratchpadSize); }; - // 5) Inline packing mix auto packing_mix_entropy_far_sets = [&](randomx_vm** inSet, randomx_vm** outSet, int count, @@ -245,29 +238,19 @@ extern "C" { size_t jumpSize, size_t blockSize) { - size_t totalSize = (size_t)count * scratchpadSize; // total bytes across all lanes - // DEBUG - // for(int i=0;igetScratchpad(), 0, scratchpadSize); - // } + size_t totalSize = (size_t)count * scratchpadSize; - // A helper function to copy `length` bytes from global offset srcPos to dstPos in cross-lane memory. auto copyChunkCrossLane = [&](size_t srcPos, size_t dstPos, size_t length) { while (length > 0) { - // Find source lane + offset int srcLane = (int)(srcPos / scratchpadSize); size_t offsetInSrcLane = srcPos % scratchpadSize; - // Find destination lane + offset int dstLane = (int)(dstPos / scratchpadSize); size_t offsetInDstLane = dstPos % scratchpadSize; - // How many bytes remain in source lane from offsetInSrcLane? size_t srcLaneRemain = scratchpadSize - offsetInSrcLane; - // How many bytes remain in destination lane from offsetInDstLane? size_t dstLaneRemain = scratchpadSize - offsetInDstLane; - // The chunk we can safely copy (without crossing a lane boundary) size_t chunkSize = length; if (chunkSize > srcLaneRemain) { chunkSize = srcLaneRemain; @@ -276,28 +259,25 @@ extern "C" { chunkSize = dstLaneRemain; } - // Perform the memcpy for this sub-chunk unsigned char* srcSp = (unsigned char*)(void*) inSet[srcLane]->getScratchpad(); unsigned char* dstSp = (unsigned char*)(void*) outSet[dstLane]->getScratchpad(); memcpy(dstSp + offsetInDstLane, srcSp + offsetInSrcLane, chunkSize); - // Advance srcPos += chunkSize; dstPos += chunkSize; length -= chunkSize; } }; - // Now we replicate your leftover logic from the original packing_mix_entropy_far() size_t entropySize = totalSize; size_t numJumps = entropySize / jumpSize; size_t numBlocksPerJump = jumpSize / blockSize; size_t leftover = jumpSize % blockSize; - size_t outOffset = 0; // global offset in outSet + size_t outOffset = 0; for (size_t offset = 0; offset < numBlocksPerJump; ++offset) { for (size_t i = 0; i < numJumps; ++i) { - size_t srcPos = i * jumpSize + offset * blockSize; // global source offset + size_t srcPos = i * jumpSize + offset * blockSize; copyChunkCrossLane(srcPos, outOffset, blockSize); outOffset += blockSize; } @@ -312,39 +292,26 @@ extern "C" { } }; - // 6) Main depth iteration for (int d = 0; d < rxDepth; d++) { - if ((d % 2) == 0) { - // Even iteration => run Set-A, mix -> Set-B - for (int lane = 0; lane < laneCount; lane++) { - randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, randomxProgramCount, scratchpadSize); - } - packing_mix_entropy_far_sets(&vmList[0], &vmList[laneCount], - laneCount, scratchpadSize, scratchpadSize, - blockSize); + for (int lane = 0; lane < laneCount; lane++) { + randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, randomxProgramCount, scratchpadSize); + } + packing_mix_entropy_far_sets(&vmList[0], &vmList[laneCount], + laneCount, scratchpadSize, scratchpadSize, + blockSize); - if (d + 1 < rxDepth) { - d++; // second iteration in the pair - for (int lane = laneCount; lane < 2*laneCount; lane++) { - randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane-laneCount].tempHash, randomxProgramCount, scratchpadSize); - } - packing_mix_entropy_far_sets(&vmList[laneCount], &vmList[0], - laneCount, scratchpadSize, scratchpadSize, - blockSize); - } - } else { - // Odd iteration - for (int lane = 0; lane < laneCount; lane++) { - randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, randomxProgramCount, scratchpadSize); + if (d + 1 < rxDepth) { + d++; + for (int lane = laneCount; lane < 2*laneCount; lane++) { + randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane-laneCount].tempHash, randomxProgramCount, scratchpadSize); } - packing_mix_entropy_far_sets(&vmList[0], &vmList[laneCount], + packing_mix_entropy_far_sets(&vmList[laneCount], &vmList[0], laneCount, scratchpadSize, scratchpadSize, blockSize); } } // NOTE still unoptimal. Last copy can be performed from scratchpad to output. But requires +1 variation (set to buffer) - // 7) Copy final scratchpads into outAllScratchpads if ((rxDepth % 2) == 0) { unsigned char* outAllScratchpadsPtr = outAllScratchpads; for (int i = 0; i < laneCount; i++) { @@ -361,10 +328,8 @@ extern "C" { } } - // 8) Free the vm_hash_t array delete[] vmHashes; - // If we made it here, success return 1; } diff --git a/apps/arweave/test/ar_audit_tests.erl b/apps/arweave/test/ar_audit_tests.erl index 1ac309abc..cfa624676 100644 --- a/apps/arweave/test/ar_audit_tests.erl +++ b/apps/arweave/test/ar_audit_tests.erl @@ -75,7 +75,7 @@ test_vectors({FastState, _LightState}) -> Key = << 1 >>, Entropy = ar_mine_randomx:randomx_generate_replica_2_9_entropy(FastState, Key), EntropyHash = crypto:hash(sha256, Entropy), - EntropyHashExpd = <<56,199,231,119,170,151,220,154,45,204,70,193,80,68, + EntropyHashExpd = << 56,199,231,119,170,151,220,154,45,204,70,193,80,68, 46,50,136,31,35,102,141,77,19,66,191,127,97,183,230, 119,243,151 >>, ?assertEqual(EntropyHashExpd, EntropyHash), @@ -85,9 +85,9 @@ test_vectors({FastState, _LightState}) -> {ok, PackedOut} = ar_mine_randomx:randomx_encrypt_replica_2_9_sub_chunk({FastState, Entropy, SubChunk, EntropySubChunkIndex}), PackedOutHashReal = crypto:hash(sha256, PackedOut), - PackedOutHashExpd = << 25,148,72,35,27,27,6,222,247,71,104,10,58,78,178,211, - 204,199,238,124,237,101,100,96,27,64,234,145,250,78, - 75,207>>, + PackedOutHashExpd = << 15,46,184,11,124,31,150,77,199,107,221,0,136,154,61, + 146,193,198,126,52,19,7,211,28,121,108,176,15,124,33, + 48,99 >>, ?assertEqual(PackedOutHashExpd, PackedOutHashReal), {ok, SubChunkReal} = ar_mine_randomx:randomx_decrypt_replica_2_9_sub_chunk({FastState, Key, PackedOut, EntropySubChunkIndex}), From 4cb8a80150e1eb4e88bf5f93b1032b0440c94546 Mon Sep 17 00:00:00 2001 From: vird Date: Mon, 30 Dec 2024 09:28:51 +0000 Subject: [PATCH 3/8] rx fused: new wrapper, optimized by default in packing, in bench --- apps/arweave/src/ar_bench_2_9.erl | 24 +++++++++++------------- apps/arweave/src/ar_mine_randomx.erl | 19 +++++++++++++++++++ apps/arweave/src/ar_packing_server.erl | 2 +- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/apps/arweave/src/ar_bench_2_9.erl b/apps/arweave/src/ar_bench_2_9.erl index 71402b24b..4cf198fd2 100644 --- a/apps/arweave/src/ar_bench_2_9.erl +++ b/apps/arweave/src/ar_bench_2_9.erl @@ -11,7 +11,7 @@ run_benchmark_from_cli(Args) -> Format= case get_flag_value(Args, "format", "replica_2_9") of "replica_2_9" -> replica_2_9; - "replica_2_9p" -> replica_2_9p; + "replica_2_9_baseline" -> replica_2_9_baseline; "composite.1" -> {composite, 1}; "composite.10" -> {composite, 10}; "spora_2_6" -> spora_2_6; @@ -48,9 +48,9 @@ get_flag_value([_ | Tail], TargetFlag, DefaultValue) -> get_flag_value(Tail, TargetFlag, DefaultValue). show_help() -> - io:format("~nUsage: benchmark-2.9 [format replica_2_9|replica_2_9p|composite.1|composite.10|spora_2_6] [threads N] [mib N] [dir path1 dir path2 dir path3 ...]~n~n"), + io:format("~nUsage: benchmark-2.9 [format replica_2_9|replica_2_9_baseline|composite.1|composite.10|spora_2_6] [threads N] [mib N] [dir path1 dir path2 dir path3 ...]~n~n"), - io:format("format: format to pack. replica_2_9, composite.1, composite.10, or spora_2_6. Default: replica_2_9.~n"), + io:format("format: format to pack. replica_2_9, replica_2_9_baseline, composite.1, composite.10, or spora_2_6. Default: replica_2_9.~n"), io:format("threads: number of threads to run. Default: 1.~n"), io:format("mib: total amount of data to pack in MiB. Default: 1024.~n"), io:format(" Will be divided evenly between threads, so the final number may be~n"), @@ -116,7 +116,7 @@ prepare_context(replica_2_9, Threads, DataMiB) -> rxsquared, ?RANDOMX_PACKING_KEY, 1, 1, erlang:system_info(dirty_cpu_schedulers_online)), {RandomXState, SubChunk, Key, EntropyPerThread}; -prepare_context(replica_2_9p, Threads, DataMiB)-> +prepare_context(replica_2_9_baseline, Threads, DataMiB)-> prepare_context(replica_2_9, Threads, DataMiB); prepare_context(spora_2_6, Threads, DataMiB) -> Root = crypto:strong_rand_bytes(32), @@ -144,7 +144,7 @@ prepare_context({composite, Difficulty}, Threads, DataMiB) -> get_total_data(replica_2_9, Threads, {_,_, _, EntropyPerThread}) -> Threads * EntropyPerThread * ?REPLICA_2_9_ENTROPY_SUB_CHUNK_COUNT * ?COMPOSITE_PACKING_SUB_CHUNK_SIZE / ?MiB; -get_total_data(replica_2_9p, Threads, Context) -> +get_total_data(replica_2_9_baseline, Threads, Context) -> get_total_data(replica_2_9, Threads, Context); get_total_data(spora_2_6, Threads, {_, _, _, ChunksPerThread}) -> Threads * ChunksPerThread * ?DATA_CHUNK_SIZE / ?MiB; @@ -152,7 +152,7 @@ get_total_data({composite, _}, Threads, {_, _, _, ChunksPerThread}) -> Threads * ChunksPerThread * ?DATA_CHUNK_SIZE / ?MiB. get_iterations(replica_2_9, _Threads, {_,_, _, EntropyPerThread}) -> EntropyPerThread; -get_iterations(replica_2_9p, _Threads, {_,_, _, EntropyPerThread}) -> +get_iterations(replica_2_9_baseline, _Threads, {_,_, _, EntropyPerThread}) -> EntropyPerThread; get_iterations(spora_2_6, _Threads, {_, _, _, ChunksPerThread}) -> ChunksPerThread; @@ -161,7 +161,7 @@ get_iterations({composite, _}, _Threads, {_, _, _, ChunksPerThread}) -> pack_chunks(_Format, _Thread, _Dir, _Context, 0) -> ok; -pack_chunks(replica_2_9, Thread, Dir, Context, Count) -> +pack_chunks(replica_2_9_baseline, Thread, Dir, Context, Count) -> {RandomXState, SubChunk, Key, _EntropyPerThread} = Context, Entropy = ar_mine_randomx:randomx_generate_replica_2_9_entropy(RandomXState, Key), PackedSubChunks = pack_sub_chunks(SubChunk, Entropy, 0, RandomXState, []), @@ -174,12 +174,10 @@ pack_chunks(replica_2_9, Thread, Dir, Context, Count) -> Path = filename:join(Dir, Filename), file:write_file(Path, PackedSubChunks) end, - pack_chunks(replica_2_9, Thread, Dir, Context, Count-1); -% pack_chunks(replica_2_9p, Thread, Dir, Context, Count) -> -% pack_chunks(replica_2_9, Thread, Dir, Context, Count); -pack_chunks(replica_2_9p, _Thread, _Dir, _Context, 0) -> + pack_chunks(replica_2_9_baseline, Thread, Dir, Context, Count-1); +pack_chunks(replica_2_9, _Thread, _Dir, _Context, 0) -> ok; -pack_chunks(replica_2_9p, Thread, Dir, Context, Count) -> +pack_chunks(replica_2_9, Thread, Dir, Context, Count) -> {RandomXState, SubChunk, Key, _EntropyPerThread} = Context, %% This is where we call the new fused NIF: @@ -207,7 +205,7 @@ pack_chunks(replica_2_9p, Thread, Dir, Context, Count) -> Path = filename:join(Dir, Filename), file:write_file(Path, PackedSubChunks) end, - pack_chunks(replica_2_9p, Thread, Dir, Context, Count - 1); + pack_chunks(replica_2_9, Thread, Dir, Context, Count - 1); pack_chunks(spora_2_6, Thread, Dir, Context, Count) -> {RandomXState, Chunk, Key, _ChunksPerThread} = Context, diff --git a/apps/arweave/src/ar_mine_randomx.erl b/apps/arweave/src/ar_mine_randomx.erl index 2da40f377..6d115086d 100755 --- a/apps/arweave/src/ar_mine_randomx.erl +++ b/apps/arweave/src/ar_mine_randomx.erl @@ -7,6 +7,7 @@ randomx_reencrypt_chunk/7, randomx_generate_replica_2_9_entropy/2, + randomx_generate_replica_2_9_entropy_opt/2, randomx_encrypt_replica_2_9_sub_chunk/1, randomx_decrypt_replica_2_9_sub_chunk/1, randomx_decrypt_replica_2_9_sub_chunk2/1, @@ -131,6 +132,24 @@ randomx_generate_replica_2_9_entropy({rxsquared, RandomxState}, Key) -> randomx_generate_replica_2_9_entropy(RandomxState, Key, HashesScratchpads0, 1, ?REPLICA_2_9_RANDOMX_DEPTH). +%% Optimized wrapper +randomx_generate_replica_2_9_entropy_opt({_, {debug_state, _}} = State, Key) -> + % fallback for tests + randomx_generate_replica_2_9_entropy(State, Key); +randomx_generate_replica_2_9_entropy_opt({rxsquared, RandomxState}, Key) -> + {ok, EntropyFused} = ar_rxsquared_nif:rsp_fused_entropy_nif( + RandomxState, + ?REPLICA_2_9_ENTROPY_SUB_CHUNK_COUNT, + ?COMPOSITE_PACKING_SUB_CHUNK_SIZE, + ?REPLICA_2_9_RANDOMX_LANE_COUNT, + ?REPLICA_2_9_RANDOMX_DEPTH, + jit(), + large_pages(), + hardware_aes(), + ?REPLICA_2_9_RANDOMX_ROUND_COUNT, + Key + ), + EntropyFused. write_scratchpad_to_disk(Type, Hash0, Scratchpad0) -> HashHex = ar_util:encode(Hash0), diff --git a/apps/arweave/src/ar_packing_server.erl b/apps/arweave/src/ar_packing_server.erl index a0d6dff47..cc0323cec 100644 --- a/apps/arweave/src/ar_packing_server.erl +++ b/apps/arweave/src/ar_packing_server.erl @@ -642,7 +642,7 @@ get_replica_2_9_entropy(Key, RandomXState) -> SubChunkSize = ?COMPOSITE_PACKING_SUB_CHUNK_SIZE, EntropySize = ?REPLICA_2_9_ENTROPY_SUB_CHUNK_COUNT * SubChunkSize, ar_shared_entropy_cache:allocate_space(EntropySize, MaxCacheSize), - Entropy = ar_mine_randomx:randomx_generate_replica_2_9_entropy(RandomXState, Key), + Entropy = ar_mine_randomx:randomx_generate_replica_2_9_entropy_opt(RandomXState, Key), ar_shared_entropy_cache:put(Key, Entropy, EntropySize), Entropy; {ok, Entropy} -> From 285987ef62fbf6b3d11d9214dd6de1f045aa8361 Mon Sep 17 00:00:00 2001 From: vird Date: Mon, 30 Dec 2024 19:49:52 +0000 Subject: [PATCH 4/8] refactor: remove lambdas --- .../c_src/randomx/pack_randomx_square.cpp | 183 +++++++++--------- 1 file changed, 96 insertions(+), 87 deletions(-) diff --git a/apps/arweave/c_src/randomx/pack_randomx_square.cpp b/apps/arweave/c_src/randomx/pack_randomx_square.cpp index 18e637b97..ad0dfbe4a 100644 --- a/apps/arweave/c_src/randomx/pack_randomx_square.cpp +++ b/apps/arweave/c_src/randomx/pack_randomx_square.cpp @@ -157,6 +157,102 @@ extern "C" { } } + + void randomx_squared_exec_inplace(randomx_vm* machine, uint64_t* srcTempHash, uint64_t* dstTempHash, int programCount, size_t scratchpadSize) { + machine->resetRoundingMode(); + for (int chain = 0; chain < programCount-1; chain++) { + machine->run(srcTempHash); + int br = randomx_blake2b( + srcTempHash, 64, + machine->getRegisterFile(), + sizeof(randomx::RegisterFile), + nullptr, 0 + ); + assert(br == 0); + } + machine->run(srcTempHash); + int br = randomx_blake2b( + dstTempHash, 64, + machine->getRegisterFile(), + sizeof(randomx::RegisterFile), + nullptr, 0 + ); + assert(br == 0); + packing_mix_entropy_crc32( + (const unsigned char*)machine->getScratchpad(), + (unsigned char*)(void*)machine->getScratchpad(), + scratchpadSize); + } + + void copyChunkCrossLane( + randomx_vm** inSet, + randomx_vm** outSet, + size_t srcPos, + size_t dstPos, + size_t length, + size_t scratchpadSize + ) { + while (length > 0) { + int srcLane = (int)(srcPos / scratchpadSize); + size_t offsetInSrcLane = srcPos % scratchpadSize; + + int dstLane = (int)(dstPos / scratchpadSize); + size_t offsetInDstLane = dstPos % scratchpadSize; + + size_t srcLaneRemain = scratchpadSize - offsetInSrcLane; + size_t dstLaneRemain = scratchpadSize - offsetInDstLane; + + size_t chunkSize = length; + if (chunkSize > srcLaneRemain) { + chunkSize = srcLaneRemain; + } + if (chunkSize > dstLaneRemain) { + chunkSize = dstLaneRemain; + } + + unsigned char* srcSp = (unsigned char*)(void*) inSet[srcLane]->getScratchpad(); + unsigned char* dstSp = (unsigned char*)(void*) outSet[dstLane]->getScratchpad(); + memcpy(dstSp + offsetInDstLane, srcSp + offsetInSrcLane, chunkSize); + + srcPos += chunkSize; + dstPos += chunkSize; + length -= chunkSize; + } + } + + void packing_mix_entropy_far_sets( + randomx_vm** inSet, + randomx_vm** outSet, + int count, + size_t scratchpadSize, + size_t jumpSize, + size_t blockSize) + { + size_t totalSize = (size_t)count * scratchpadSize; + + size_t entropySize = totalSize; + size_t numJumps = entropySize / jumpSize; + size_t numBlocksPerJump = jumpSize / blockSize; + size_t leftover = jumpSize % blockSize; + + size_t outOffset = 0; + for (size_t offset = 0; offset < numBlocksPerJump; ++offset) { + for (size_t i = 0; i < numJumps; ++i) { + size_t srcPos = i * jumpSize + offset * blockSize; + copyChunkCrossLane(inSet, outSet, srcPos, outOffset, blockSize, scratchpadSize); + outOffset += blockSize; + } + } + + if (leftover > 0) { + for (size_t i = 0; i < numJumps; ++i) { + size_t srcPos = i * jumpSize + numBlocksPerJump * blockSize; + copyChunkCrossLane(inSet, outSet, srcPos, outOffset, leftover, scratchpadSize); + outOffset += leftover; + } + } + } + int rsp_fused_entropy( randomx_vm** vmList, size_t scratchpadSize, @@ -205,93 +301,6 @@ extern "C" { ); } - auto randomx_squared_exec_inplace = [&](randomx_vm* machine, uint64_t* srcTempHash, uint64_t* dstTempHash, int programCount, size_t scratchpadSize) { - machine->resetRoundingMode(); - for (int chain = 0; chain < programCount-1; chain++) { - machine->run(srcTempHash); - int br = randomx_blake2b( - srcTempHash, 64, - machine->getRegisterFile(), - sizeof(randomx::RegisterFile), - nullptr, 0 - ); - assert(br == 0); - } - machine->run(srcTempHash); - int br = randomx_blake2b( - dstTempHash, 64, - machine->getRegisterFile(), - sizeof(randomx::RegisterFile), - nullptr, 0 - ); - assert(br == 0); - packing_mix_entropy_crc32( - (const unsigned char*)machine->getScratchpad(), - (unsigned char*)(void*)machine->getScratchpad(), - scratchpadSize); - }; - - auto packing_mix_entropy_far_sets = [&](randomx_vm** inSet, - randomx_vm** outSet, - int count, - size_t scratchpadSize, - size_t jumpSize, - size_t blockSize) - { - size_t totalSize = (size_t)count * scratchpadSize; - - auto copyChunkCrossLane = [&](size_t srcPos, size_t dstPos, size_t length) { - while (length > 0) { - int srcLane = (int)(srcPos / scratchpadSize); - size_t offsetInSrcLane = srcPos % scratchpadSize; - - int dstLane = (int)(dstPos / scratchpadSize); - size_t offsetInDstLane = dstPos % scratchpadSize; - - size_t srcLaneRemain = scratchpadSize - offsetInSrcLane; - size_t dstLaneRemain = scratchpadSize - offsetInDstLane; - - size_t chunkSize = length; - if (chunkSize > srcLaneRemain) { - chunkSize = srcLaneRemain; - } - if (chunkSize > dstLaneRemain) { - chunkSize = dstLaneRemain; - } - - unsigned char* srcSp = (unsigned char*)(void*) inSet[srcLane]->getScratchpad(); - unsigned char* dstSp = (unsigned char*)(void*) outSet[dstLane]->getScratchpad(); - memcpy(dstSp + offsetInDstLane, srcSp + offsetInSrcLane, chunkSize); - - srcPos += chunkSize; - dstPos += chunkSize; - length -= chunkSize; - } - }; - - size_t entropySize = totalSize; - size_t numJumps = entropySize / jumpSize; - size_t numBlocksPerJump = jumpSize / blockSize; - size_t leftover = jumpSize % blockSize; - - size_t outOffset = 0; - for (size_t offset = 0; offset < numBlocksPerJump; ++offset) { - for (size_t i = 0; i < numJumps; ++i) { - size_t srcPos = i * jumpSize + offset * blockSize; - copyChunkCrossLane(srcPos, outOffset, blockSize); - outOffset += blockSize; - } - } - - if (leftover > 0) { - for (size_t i = 0; i < numJumps; ++i) { - size_t srcPos = i * jumpSize + numBlocksPerJump * blockSize; - copyChunkCrossLane(srcPos, outOffset, leftover); - outOffset += leftover; - } - } - }; - for (int d = 0; d < rxDepth; d++) { for (int lane = 0; lane < laneCount; lane++) { randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, randomxProgramCount, scratchpadSize); From a78a946c6ea82d515ab50497a837c495da5d0408 Mon Sep 17 00:00:00 2001 From: vird Date: Mon, 30 Dec 2024 21:11:52 +0000 Subject: [PATCH 5/8] remove non-optimized version; remove quick tests; add feistel tests; rename methods to single style --- .../c_src/randomx/pack_randomx_square.cpp | 80 ++----- .../c_src/randomx/pack_randomx_square.h | 13 +- .../randomx/rxsquared/ar_rxsquared_nif.c | 201 +----------------- apps/arweave/src/ar_bench_2_9.erl | 46 +--- apps/arweave/src/ar_mine_randomx.erl | 60 ------ apps/arweave/src/ar_packing_server.erl | 2 +- apps/arweave/src/ar_rxsquared_nif.erl | 23 +- apps/arweave/test/ar_audit_tests.erl | 38 +--- apps/arweave/test/ar_packing_tests.erl | 18 +- 9 files changed, 55 insertions(+), 426 deletions(-) diff --git a/apps/arweave/c_src/randomx/pack_randomx_square.cpp b/apps/arweave/c_src/randomx/pack_randomx_square.cpp index ad0dfbe4a..7570cb8b3 100644 --- a/apps/arweave/c_src/randomx/pack_randomx_square.cpp +++ b/apps/arweave/c_src/randomx/pack_randomx_square.cpp @@ -10,40 +10,7 @@ #include "aes_hash.hpp" extern "C" { - void randomx_squared_exec( - randomx_vm *machine, - const unsigned char *inHash, const unsigned char *inScratchpad, - unsigned char *outHash, unsigned char *outScratchpad, - const int randomxProgramCount) { - assert(machine != nullptr); - alignas(16) uint64_t tempHash[8]; - memcpy(tempHash, inHash, sizeof(tempHash)); - void* scratchpad = (void*)machine->getScratchpad(); - memcpy(scratchpad, inScratchpad, randomx_get_scratchpad_size()); - machine->resetRoundingMode(); - int blakeResult; - for (int chain = 0; chain < randomxProgramCount - 1; ++chain) { - machine->run(&tempHash); - blakeResult = randomx_blake2b( - tempHash, sizeof(tempHash), machine->getRegisterFile(), - sizeof(randomx::RegisterFile), nullptr, 0); - assert(blakeResult == 0); - } - machine->run(&tempHash); - - blakeResult = randomx_blake2b( - tempHash, sizeof(tempHash), machine->getRegisterFile(), - sizeof(randomx::RegisterFile), nullptr, 0); - assert(blakeResult == 0); - - memcpy(outHash, tempHash, sizeof(tempHash)); - - packing_mix_entropy_crc32( - (const unsigned char*)machine->getScratchpad(), - outScratchpad, randomx_get_scratchpad_size()); - } - - void randomx_squared_exec_test( + void rsp_exec_test( randomx_vm *machine, const unsigned char *inHash, const unsigned char *inScratchpad, unsigned char *outHash, unsigned char *outScratchpad, @@ -73,26 +40,7 @@ extern "C" { memcpy(outScratchpad, machine->getScratchpad(), randomx_get_scratchpad_size()); } - // init_msg + hash - void randomx_squared_init_scratchpad( - randomx_vm *machine, const unsigned char *input, const size_t inputSize, - unsigned char *outHash, unsigned char *outScratchpad, - const int randomxProgramCount) { - assert(machine != nullptr); - assert(inputSize == 0 || input != nullptr); - alignas(16) uint64_t tempHash[8]; - int blakeResult = randomx_blake2b( - tempHash, sizeof(tempHash), input, inputSize, nullptr, 0); - assert(blakeResult == 0); - void* scratchpad = (void*)machine->getScratchpad(); - // bool softAes = false - fillAes1Rx4(tempHash, randomx_get_scratchpad_size(), scratchpad); - - memcpy(outHash, tempHash, sizeof(tempHash)); - memcpy(outScratchpad, machine->getScratchpad(), randomx_get_scratchpad_size()); - } - - void packing_mix_entropy_crc32( + void rsp_mix_entropy_crc32( const unsigned char *inEntropy, unsigned char *outEntropy, const size_t entropySize) { // NOTE we can't use _mm_crc32_u64, because it output only final 32-bit result @@ -132,7 +80,7 @@ extern "C" { } } - void packing_mix_entropy_far( + void rsp_mix_entropy_far( const unsigned char *inEntropy, unsigned char *outEntropy, const size_t entropySize, const size_t jumpSize, const size_t blockSize) { @@ -157,8 +105,8 @@ extern "C" { } } - - void randomx_squared_exec_inplace(randomx_vm* machine, uint64_t* srcTempHash, uint64_t* dstTempHash, int programCount, size_t scratchpadSize) { + // Group of functions related to rsp_fused_entropy + void _rsp_exec_inplace(randomx_vm* machine, uint64_t* srcTempHash, uint64_t* dstTempHash, int programCount, size_t scratchpadSize) { machine->resetRoundingMode(); for (int chain = 0; chain < programCount-1; chain++) { machine->run(srcTempHash); @@ -178,13 +126,13 @@ extern "C" { nullptr, 0 ); assert(br == 0); - packing_mix_entropy_crc32( + rsp_mix_entropy_crc32( (const unsigned char*)machine->getScratchpad(), (unsigned char*)(void*)machine->getScratchpad(), scratchpadSize); } - void copyChunkCrossLane( + void _copy_chunk_cross_lane( randomx_vm** inSet, randomx_vm** outSet, size_t srcPos, @@ -220,7 +168,7 @@ extern "C" { } } - void packing_mix_entropy_far_sets( + void packing_mix_entropy_direct( randomx_vm** inSet, randomx_vm** outSet, int count, @@ -239,7 +187,7 @@ extern "C" { for (size_t offset = 0; offset < numBlocksPerJump; ++offset) { for (size_t i = 0; i < numJumps; ++i) { size_t srcPos = i * jumpSize + offset * blockSize; - copyChunkCrossLane(inSet, outSet, srcPos, outOffset, blockSize, scratchpadSize); + _copy_chunk_cross_lane(inSet, outSet, srcPos, outOffset, blockSize, scratchpadSize); outOffset += blockSize; } } @@ -247,7 +195,7 @@ extern "C" { if (leftover > 0) { for (size_t i = 0; i < numJumps; ++i) { size_t srcPos = i * jumpSize + numBlocksPerJump * blockSize; - copyChunkCrossLane(inSet, outSet, srcPos, outOffset, leftover, scratchpadSize); + _copy_chunk_cross_lane(inSet, outSet, srcPos, outOffset, leftover, scratchpadSize); outOffset += leftover; } } @@ -303,18 +251,18 @@ extern "C" { for (int d = 0; d < rxDepth; d++) { for (int lane = 0; lane < laneCount; lane++) { - randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, randomxProgramCount, scratchpadSize); + _rsp_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, randomxProgramCount, scratchpadSize); } - packing_mix_entropy_far_sets(&vmList[0], &vmList[laneCount], + packing_mix_entropy_direct(&vmList[0], &vmList[laneCount], laneCount, scratchpadSize, scratchpadSize, blockSize); if (d + 1 < rxDepth) { d++; for (int lane = laneCount; lane < 2*laneCount; lane++) { - randomx_squared_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane-laneCount].tempHash, randomxProgramCount, scratchpadSize); + _rsp_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane-laneCount].tempHash, randomxProgramCount, scratchpadSize); } - packing_mix_entropy_far_sets(&vmList[laneCount], &vmList[0], + packing_mix_entropy_direct(&vmList[laneCount], &vmList[0], laneCount, scratchpadSize, scratchpadSize, blockSize); } diff --git a/apps/arweave/c_src/randomx/pack_randomx_square.h b/apps/arweave/c_src/randomx/pack_randomx_square.h index cefc41c90..de1cabe0d 100755 --- a/apps/arweave/c_src/randomx/pack_randomx_square.h +++ b/apps/arweave/c_src/randomx/pack_randomx_square.h @@ -7,21 +7,14 @@ extern "C" { #endif -RANDOMX_EXPORT void randomx_squared_exec( +RANDOMX_EXPORT void rsp_exec_test( randomx_vm *machine, const unsigned char *inHash, const unsigned char *inScratchpad, unsigned char *outHash, unsigned char *outScratchpad, const int randomxProgramCount); -RANDOMX_EXPORT void randomx_squared_exec_test( - randomx_vm *machine, const unsigned char *inHash, const unsigned char *inScratchpad, - unsigned char *outHash, unsigned char *outScratchpad, const int randomxProgramCount); -// init_msg + hash -RANDOMX_EXPORT void randomx_squared_init_scratchpad( - randomx_vm *machine, const unsigned char *input, const size_t inputSize, - unsigned char *outHash, unsigned char *outScratchpad, const int randomxProgramCount); -RANDOMX_EXPORT void packing_mix_entropy_crc32( +RANDOMX_EXPORT void rsp_mix_entropy_crc32( const unsigned char *inEntropy, unsigned char *outEntropy, const size_t entropySize); -RANDOMX_EXPORT void packing_mix_entropy_far( +RANDOMX_EXPORT void rsp_mix_entropy_far( const unsigned char *inEntropy, unsigned char *outEntropy, const size_t entropySize, const size_t jumpSize, const size_t blockSize); diff --git a/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c b/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c index d2f525264..a2d40655a 100755 --- a/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c +++ b/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c @@ -15,101 +15,22 @@ static ERL_NIF_TERM rxsquared_info_nif(ErlNifEnv* envPtr, int argc, const ERL_NI static ERL_NIF_TERM rxsquared_init_nif(ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM rxsquared_hash_nif(ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]); -static int rxsquared_load(ErlNifEnv* envPtr, void** priv, ERL_NIF_TERM info) -{ +static int rxsquared_load(ErlNifEnv* envPtr, void** priv, ERL_NIF_TERM info) { return load(envPtr, priv, info); } -static ERL_NIF_TERM rxsquared_info_nif(ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) -{ +static ERL_NIF_TERM rxsquared_info_nif(ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { return info_nif("rxsquared", envPtr, argc, argv); } -static ERL_NIF_TERM rxsquared_init_nif(ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) -{ +static ERL_NIF_TERM rxsquared_init_nif(ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { return init_nif(envPtr, argc, argv); } -static ERL_NIF_TERM rxsquared_hash_nif(ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) -{ +static ERL_NIF_TERM rxsquared_hash_nif(ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { return hash_nif(envPtr, argc, argv); } -static ERL_NIF_TERM rsp_exec_nif( - ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { - if (argc != 7) { - return enif_make_badarg(envPtr); - } - - int randomxProgramCount; - int jitEnabled, largePagesEnabled, hardwareAESEnabled; - struct state* statePtr; - ErlNifBinary inHashBin; - ErlNifBinary inScratchpadBin; - ERL_NIF_TERM outHashTerm; - unsigned char* outHashData; - ERL_NIF_TERM outScratchpadTerm; - unsigned char* outScratchpadData; - - if (!enif_get_resource(envPtr, argv[0], stateType, (void**) &statePtr)) { - return error_tuple(envPtr, "failed to read state"); - } - if (!enif_inspect_binary(envPtr, argv[1], &inHashBin)) { - return enif_make_badarg(envPtr); - } - if (!enif_inspect_binary(envPtr, argv[2], &inScratchpadBin)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[3], &jitEnabled)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[4], &largePagesEnabled)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[5], &hardwareAESEnabled)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[6], &randomxProgramCount)) { - return enif_make_badarg(envPtr); - } - - if (inHashBin.size != 64) { - return enif_make_badarg(envPtr); - } - if (inScratchpadBin.size != randomx_get_scratchpad_size()) { - return enif_make_badarg(envPtr); - } - - int isRandomxReleased; - randomx_vm *vmPtr = create_vm(statePtr, (statePtr->mode == HASHING_MODE_FAST), - jitEnabled, largePagesEnabled, hardwareAESEnabled, &isRandomxReleased); - if (vmPtr == NULL) { - if (isRandomxReleased != 0) { - return error_tuple(envPtr, "state has been released"); - } - return error_tuple(envPtr, "randomx_create_vm failed"); - } - - outScratchpadData = enif_make_new_binary( - envPtr, randomx_get_scratchpad_size(), &outScratchpadTerm); - if (outScratchpadData == NULL) { - return enif_make_badarg(envPtr); - } - - outHashData = enif_make_new_binary(envPtr, 64, &outHashTerm); - if (outHashData == NULL) { - return enif_make_badarg(envPtr); - } - - randomx_squared_exec( - vmPtr, inHashBin.data, inScratchpadBin.data, outHashData, outScratchpadData, - randomxProgramCount); - - destroy_vm(statePtr, vmPtr); - - return ok_tuple2(envPtr, outHashTerm, outScratchpadTerm); -} - static ERL_NIF_TERM rsp_exec_test_nif( ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { if (argc != 7) { @@ -176,7 +97,7 @@ static ERL_NIF_TERM rsp_exec_test_nif( return enif_make_badarg(envPtr); } - randomx_squared_exec_test( + rsp_exec_test( vmPtr, inHashBin.data, inScratchpadBin.data, outHashData, outScratchpadData, randomxProgramCount); @@ -185,103 +106,8 @@ static ERL_NIF_TERM rsp_exec_test_nif( return ok_tuple2(envPtr, outHashTerm, outScratchpadTerm); } -static ERL_NIF_TERM rsp_init_scratchpad_nif( - ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { - if (argc != 6) { - return enif_make_badarg(envPtr); - } - - int randomxProgramCount; - int jitEnabled, largePagesEnabled, hardwareAESEnabled; - struct state* statePtr; - ErlNifBinary inputBin; - ERL_NIF_TERM outHashTerm; - unsigned char* outHashData; - ERL_NIF_TERM outScratchpadTerm; - unsigned char* outScratchpadData; - - if (!enif_get_resource(envPtr, argv[0], stateType, (void**) &statePtr)) { - return error_tuple(envPtr, "failed to read state"); - } - if (!enif_inspect_binary(envPtr, argv[1], &inputBin)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[2], &jitEnabled)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[3], &largePagesEnabled)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[4], &hardwareAESEnabled)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[5], &randomxProgramCount)) { - return enif_make_badarg(envPtr); - } - - int isRandomxReleased; - randomx_vm *vmPtr = create_vm(statePtr, (statePtr->mode == HASHING_MODE_FAST), - jitEnabled, largePagesEnabled, hardwareAESEnabled, &isRandomxReleased); - if (vmPtr == NULL) { - if (isRandomxReleased != 0) { - return error_tuple(envPtr, "state has been released"); - } - return error_tuple(envPtr, "randomx_create_vm failed"); - } - - outScratchpadData = enif_make_new_binary( - envPtr, randomx_get_scratchpad_size(), &outScratchpadTerm); - if (outScratchpadData == NULL) { - return enif_make_badarg(envPtr); - } - - outHashData = enif_make_new_binary(envPtr, 64, &outHashTerm); - if (outHashData == NULL) { - return enif_make_badarg(envPtr); - } - - randomx_squared_init_scratchpad( - vmPtr, inputBin.data, inputBin.size, outHashData, outScratchpadData, - randomxProgramCount); - - destroy_vm(statePtr, vmPtr); - - return ok_tuple2(envPtr, outHashTerm, outScratchpadTerm); -} - - // pack randomx square randomx independent -static ERL_NIF_TERM rsp_mix_entropy_crc32_nif( - ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { - ErlNifBinary inEntropyBin; - ERL_NIF_TERM outEntropyTerm; - unsigned char* outEntropyData; - - if (argc != 1) { - return enif_make_badarg(envPtr); - } - - if (!enif_inspect_binary(envPtr, argv[0], &inEntropyBin)) { - return enif_make_badarg(envPtr); - } - - size_t entropySize = inEntropyBin.size; - - if (entropySize % 8 != 0) { - return enif_make_badarg(envPtr); - } - - outEntropyData = enif_make_new_binary(envPtr, entropySize, &outEntropyTerm); - if (outEntropyData == NULL) { - return enif_make_badarg(envPtr); - } - - packing_mix_entropy_crc32(inEntropyBin.data, outEntropyData, entropySize); - - return ok_tuple(envPtr, outEntropyTerm); -} - -static ERL_NIF_TERM rsp_mix_entropy_far_nif( +static ERL_NIF_TERM rsp_mix_entropy_crc32_test_nif( ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary inEntropyBin; ERL_NIF_TERM outEntropyTerm; @@ -306,8 +132,7 @@ static ERL_NIF_TERM rsp_mix_entropy_far_nif( return enif_make_badarg(envPtr); } - packing_mix_entropy_far(inEntropyBin.data, outEntropyData, entropySize, - randomx_get_scratchpad_size(), 6); + rsp_mix_entropy_crc32(inEntropyBin.data, outEntropyData, entropySize); return ok_tuple(envPtr, outEntropyTerm); } @@ -345,7 +170,7 @@ static ERL_NIF_TERM rsp_mix_entropy_far_test_nif( return enif_make_badarg(envPtr); } - packing_mix_entropy_far(inEntropyBin.data, outEntropyData, entropySize, + rsp_mix_entropy_far(inEntropyBin.data, outEntropyData, entropySize, jumpSize, blockSize); return ok_tuple(envPtr, outEntropyTerm); @@ -572,16 +397,10 @@ static ErlNifFunc rxsquared_funcs[] = { {"rxsquared_init_nif", 5, rxsquared_init_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rxsquared_hash_nif", 5, rxsquared_hash_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"rsp_exec_nif", 7, - rsp_exec_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rsp_exec_test_nif", 7, rsp_exec_test_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"rsp_init_scratchpad_nif", 6, - rsp_init_scratchpad_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"rsp_mix_entropy_crc32_nif", 1, - rsp_mix_entropy_crc32_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"rsp_mix_entropy_far_nif", 1, - rsp_mix_entropy_far_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, + {"rsp_mix_entropy_crc32_test_nif", 1, + rsp_mix_entropy_crc32_test_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rsp_mix_entropy_far_test_nif", 3, rsp_mix_entropy_far_test_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rsp_fused_entropy_nif", 10, diff --git a/apps/arweave/src/ar_bench_2_9.erl b/apps/arweave/src/ar_bench_2_9.erl index 08b027aac..d1d3e7ac7 100644 --- a/apps/arweave/src/ar_bench_2_9.erl +++ b/apps/arweave/src/ar_bench_2_9.erl @@ -11,7 +11,6 @@ run_benchmark_from_cli(Args) -> Format= case get_flag_value(Args, "format", "replica_2_9") of "replica_2_9" -> replica_2_9; - "replica_2_9_baseline" -> replica_2_9_baseline; "composite.1" -> {composite, 1}; "composite.10" -> {composite, 10}; "spora_2_6" -> spora_2_6; @@ -48,9 +47,9 @@ get_flag_value([_ | Tail], TargetFlag, DefaultValue) -> get_flag_value(Tail, TargetFlag, DefaultValue). show_help() -> - io:format("~nUsage: benchmark-2.9 [format replica_2_9|replica_2_9_baseline|composite.1|composite.10|spora_2_6] [threads N] [mib N] [dir path1 dir path2 dir path3 ...]~n~n"), + io:format("~nUsage: benchmark-2.9 [format replica_2_9|composite.1|composite.10|spora_2_6] [threads N] [mib N] [dir path1 dir path2 dir path3 ...]~n~n"), - io:format("format: format to pack. replica_2_9, replica_2_9_baseline, composite.1, composite.10, or spora_2_6. Default: replica_2_9.~n"), + io:format("format: format to pack. replica_2_9, composite.1, composite.10, or spora_2_6. Default: replica_2_9.~n"), io:format("threads: number of threads to run. Default: 1.~n"), io:format("mib: total amount of data to pack in MiB. Default: 1024.~n"), io:format(" Will be divided evenly between threads, so the final number may be~n"), @@ -115,8 +114,6 @@ prepare_context(replica_2_9, Threads, DataMiB) -> rxsquared, ?RANDOMX_PACKING_KEY, 1, 1, erlang:system_info(dirty_cpu_schedulers_online)), {RandomXState, SubChunk, Key, EntropyPerThread}; -prepare_context(replica_2_9_baseline, Threads, DataMiB)-> - prepare_context(replica_2_9, Threads, DataMiB); prepare_context(spora_2_6, Threads, DataMiB) -> Root = crypto:strong_rand_bytes(32), Address = crypto:strong_rand_bytes(32), @@ -142,16 +139,12 @@ prepare_context({composite, Difficulty}, Threads, DataMiB) -> get_total_data(replica_2_9, Threads, {_,_, _, EntropyPerThread}) -> Threads * EntropyPerThread * ?REPLICA_2_9_ENTROPY_SIZE / ?MiB; -get_total_data(replica_2_9_baseline, Threads, Context) -> - get_total_data(replica_2_9, Threads, Context); get_total_data(spora_2_6, Threads, {_, _, _, ChunksPerThread}) -> Threads * ChunksPerThread * ?DATA_CHUNK_SIZE / ?MiB; get_total_data({composite, _}, Threads, {_, _, _, ChunksPerThread}) -> Threads * ChunksPerThread * ?DATA_CHUNK_SIZE / ?MiB. get_iterations(replica_2_9, _Threads, {_,_, _, EntropyPerThread}) -> EntropyPerThread; -get_iterations(replica_2_9_baseline, _Threads, {_,_, _, EntropyPerThread}) -> - EntropyPerThread; get_iterations(spora_2_6, _Threads, {_, _, _, ChunksPerThread}) -> ChunksPerThread; get_iterations({composite, _}, _Threads, {_, _, _, ChunksPerThread}) -> @@ -159,7 +152,7 @@ get_iterations({composite, _}, _Threads, {_, _, _, ChunksPerThread}) -> pack_chunks(_Format, _Thread, _Dir, _Context, 0) -> ok; -pack_chunks(replica_2_9_baseline, Thread, Dir, Context, Count) -> +pack_chunks(replica_2_9, Thread, Dir, Context, Count) -> {RandomXState, SubChunk, Key, _EntropyPerThread} = Context, Entropy = ar_mine_randomx:randomx_generate_replica_2_9_entropy(RandomXState, Key), PackedSubChunks = pack_sub_chunks(SubChunk, Entropy, 0, RandomXState, []), @@ -172,38 +165,7 @@ pack_chunks(replica_2_9_baseline, Thread, Dir, Context, Count) -> Path = filename:join(Dir, Filename), file:write_file(Path, PackedSubChunks) end, - pack_chunks(replica_2_9_baseline, Thread, Dir, Context, Count-1); -pack_chunks(replica_2_9, _Thread, _Dir, _Context, 0) -> - ok; -pack_chunks(replica_2_9, Thread, Dir, Context, Count) -> - {RandomXState, SubChunk, Key, _EntropyPerThread} = Context, - - %% This is where we call the new fused NIF: - %% Suppose the new NIF returns {ok, EntropyBin} or something similar. - {ok, Entropy} = ar_rxsquared_nif:rsp_fused_entropy_nif( - element(2, RandomXState), - ?COMPOSITE_PACKING_SUB_CHUNK_COUNT, - ?COMPOSITE_PACKING_SUB_CHUNK_SIZE, - ?REPLICA_2_9_RANDOMX_LANE_COUNT, - ?REPLICA_2_9_RANDOMX_DEPTH, - 1, %% jitEnabled, - 1, %% largePagesEnabled - 1, %% hardwareAESEnabled - ?REPLICA_2_9_RANDOMX_ROUND_COUNT, - Key - ), - - %% Then we can reuse pack_sub_chunks as before - PackedSubChunks = pack_sub_chunks(SubChunk, Entropy, 0, RandomXState, []), - case Dir of - undefined -> - ok; - _ -> - Filename = io_lib:format("t~p_e~p.bin", [Thread, Count]), - Path = filename:join(Dir, Filename), - file:write_file(Path, PackedSubChunks) - end, - pack_chunks(replica_2_9, Thread, Dir, Context, Count - 1); + pack_chunks(replica_2_9, Thread, Dir, Context, Count-1); pack_chunks(spora_2_6, Thread, Dir, Context, Count) -> {RandomXState, Chunk, Key, _ChunksPerThread} = Context, diff --git a/apps/arweave/src/ar_mine_randomx.erl b/apps/arweave/src/ar_mine_randomx.erl index 5f913805f..33c38628e 100755 --- a/apps/arweave/src/ar_mine_randomx.erl +++ b/apps/arweave/src/ar_mine_randomx.erl @@ -7,7 +7,6 @@ randomx_reencrypt_chunk/7, randomx_generate_replica_2_9_entropy/2, - randomx_generate_replica_2_9_entropy_opt/2, randomx_encrypt_replica_2_9_sub_chunk/1, randomx_decrypt_replica_2_9_sub_chunk/1, randomx_decrypt_replica_2_9_sub_chunk2/1, @@ -127,17 +126,6 @@ randomx_generate_replica_2_9_entropy({_, {debug_state, _}}, Key) -> %% Non-DEBUG implementation randomx_generate_replica_2_9_entropy({rxsquared, RandomxState}, Key) -> - Inputs = [crypto:hash(sha256, << Key/binary, LaneNumber:8 >>) - || LaneNumber <- lists:seq(1, ?REPLICA_2_9_RANDOMX_LANE_COUNT)], - HashesScratchpads0 = randomx_initialize_replica_2_9_scratchpads(RandomxState, Key, Inputs), - randomx_generate_replica_2_9_entropy(RandomxState, Key, HashesScratchpads0, - 1, ?REPLICA_2_9_RANDOMX_DEPTH). - -%% Optimized wrapper -randomx_generate_replica_2_9_entropy_opt({_, {debug_state, _}} = State, Key) -> - % fallback for tests - randomx_generate_replica_2_9_entropy(State, Key); -randomx_generate_replica_2_9_entropy_opt({rxsquared, RandomxState}, Key) -> {ok, EntropyFused} = ar_rxsquared_nif:rsp_fused_entropy_nif( RandomxState, ?COMPOSITE_PACKING_SUB_CHUNK_COUNT, @@ -152,54 +140,6 @@ randomx_generate_replica_2_9_entropy_opt({rxsquared, RandomxState}, Key) -> ), EntropyFused. -write_scratchpad_to_disk(Type, Hash0, Scratchpad0) -> - HashHex = ar_util:encode(Hash0), - FileName = io_lib:format("~s_~s.bin", [Type, HashHex]), - file:write_file(FileName, Scratchpad0). - -randomx_initialize_replica_2_9_scratchpads(_RandomxState, _Key, []) -> - []; -randomx_initialize_replica_2_9_scratchpads(RandomxState, Key, [Input | Inputs]) -> - {ok, Hash0, Scratchpad0} = - ar_rxsquared_nif:rsp_init_scratchpad_nif(RandomxState, Input, - jit(), large_pages(), hardware_aes(), ?REPLICA_2_9_RANDOMX_ROUND_COUNT), - [{Hash0, Scratchpad0} | randomx_initialize_replica_2_9_scratchpads(RandomxState, Key, - Inputs)]. - -randomx_generate_replica_2_9_entropy(RandomxState, Key, HashesScratchpads, - Depth, MaxDepth) -> - HashesScratchpads2 = randomx_process_replica_2_9_scratchpads( - RandomxState, HashesScratchpads), - - Scratchpad = iolist_to_binary([S || {_H, S} <- HashesScratchpads2]), - {ok, MixedScratchpad} = ar_rxsquared_nif:rsp_mix_entropy_far_nif(Scratchpad), - - case Depth == MaxDepth of - true -> - MixedScratchpad; - false -> - Scratchpads = split_scratchpads(MixedScratchpad), - HashesScratchpads3 = lists:zip([H || {H, _S} <- HashesScratchpads2], Scratchpads), - randomx_generate_replica_2_9_entropy(RandomxState, Key, HashesScratchpads3, - Depth + 1, MaxDepth) - end. - -split_scratchpads(<<>>) -> - []; -split_scratchpads(<< Scratchpad:(?RANDOMX_SCRATCHPAD_SIZE)/binary, Rest/binary >>) -> - [Scratchpad | split_scratchpads(Rest)]. - -randomx_process_replica_2_9_scratchpads(_RandomxState, []) -> - []; -randomx_process_replica_2_9_scratchpads(RandomxState, - [{Input, Scratchpad} | HashesScratchpads]) -> - {ok, Hash2, Scratchpad2} = - ar_rxsquared_nif:rsp_exec_nif(RandomxState, - Input, Scratchpad, jit(), large_pages(), hardware_aes(), - ?REPLICA_2_9_RANDOMX_ROUND_COUNT), - [{Hash2, Scratchpad2} | randomx_process_replica_2_9_scratchpads( - RandomxState, HashesScratchpads)]. - %%% DEBUG implementation randomx_decrypt_replica_2_9_sub_chunk({{_, {debug_state, _}} = State, Key, SubChunk, EntropySubChunkIndex}) -> diff --git a/apps/arweave/src/ar_packing_server.erl b/apps/arweave/src/ar_packing_server.erl index 84360d1e9..8b7ee1373 100644 --- a/apps/arweave/src/ar_packing_server.erl +++ b/apps/arweave/src/ar_packing_server.erl @@ -243,7 +243,7 @@ get_replica_2_9_entropy(RewardAddr, AbsoluteEndOffset, SubChunkStartOffset) -> Entropy = prometheus_histogram:observe_duration( replica_2_9_entropy_duration_milliseconds, [], fun() -> - ar_mine_randomx:randomx_generate_replica_2_9_entropy_opt(RandomXState, Key) + ar_mine_randomx:randomx_generate_replica_2_9_entropy(RandomXState, Key) end), ar_shared_entropy_cache:put(Key, Entropy, EntropySize), Entropy; diff --git a/apps/arweave/src/ar_rxsquared_nif.erl b/apps/arweave/src/ar_rxsquared_nif.erl index 172056039..89c877265 100755 --- a/apps/arweave/src/ar_rxsquared_nif.erl +++ b/apps/arweave/src/ar_rxsquared_nif.erl @@ -5,11 +5,8 @@ -on_load(init_nif/0). -export([rxsquared_hash_nif/5, rxsquared_info_nif/1, rxsquared_init_nif/5, - rsp_exec_nif/7, rsp_exec_test_nif/7, - rsp_init_scratchpad_nif/6, - rsp_mix_entropy_crc32_nif/1, - rsp_mix_entropy_far_nif/1, + rsp_mix_entropy_crc32_test_nif/1, rsp_mix_entropy_far_test_nif/3, rsp_fused_entropy_nif/10, rsp_feistel_encrypt_nif/2, @@ -39,27 +36,15 @@ init_nif() -> %%% Randomx square packing %%%=================================================================== -rsp_exec_nif(_State, _Hash, _Scratchpad, _JIT, _LargePages, _HardwareAES, _RoundCount) -> - ?LOG_ERROR("rsp_exec_nif"), - erlang:nif_error(nif_not_loaded). - rsp_exec_test_nif(_State, _Hash, _Scratchpad, _JIT, _LargePages, _HardwareAES, _RoundCount) -> ?LOG_ERROR("rsp_exec_test_nif"), erlang:nif_error(nif_not_loaded). -rsp_init_scratchpad_nif(_State, _Input, _JIT, _LargePages, _HardwareAES, _RoundCount) -> - ?LOG_ERROR("rsp_init_scratchpad_nif"), - erlang:nif_error(nif_not_loaded). - -rsp_mix_entropy_crc32_nif(_Entropy) -> - ?LOG_ERROR("rsp_mix_entropy_crc32_nif"), - erlang:nif_error(nif_not_loaded). -rsp_mix_entropy_far_nif(_Entropy) -> - ?LOG_ERROR("rsp_mix_entropy_far_nif"), +rsp_mix_entropy_crc32_test_nif(_Entropy) -> + ?LOG_ERROR("rsp_mix_entropy_crc32_test_nif"), erlang:nif_error(nif_not_loaded). -% NOTE maybe this impl will replace rsp_mix_entropy_far_nif rsp_mix_entropy_far_test_nif(_Entropy, _JumpSize, _BlockSize) -> ?LOG_ERROR("rsp_mix_entropy_far_test_nif"), erlang:nif_error(nif_not_loaded). @@ -76,7 +61,7 @@ rsp_fused_entropy_nif( _RandomxProgramCount, _Key ) -> - ?LOG_ERROR("randomx_generate_replica_2_9_entropy_nif"), + ?LOG_ERROR("rsp_fused_entropy_nif"), erlang:nif_error(nif_not_loaded). rsp_feistel_encrypt_nif(_InMsg, _Key) -> diff --git a/apps/arweave/test/ar_audit_tests.erl b/apps/arweave/test/ar_audit_tests.erl index 318af81f3..84ea86c7a 100644 --- a/apps/arweave/test/ar_audit_tests.erl +++ b/apps/arweave/test/ar_audit_tests.erl @@ -20,7 +20,6 @@ randomx_replica_2_9_suite_test_() -> [ test_register(fun test_vectors/1, SetupData), % TODO move bottom test_register(fun test_state/1, SetupData), - test_register(fun test_quick/1, SetupData), test_register(fun test_pack_unpack_sub_chunks/1, SetupData) ] end @@ -53,10 +52,8 @@ test_vectors({FastState, _LightState}) -> 176,71,171,120,18,186,252,150,107,106,65,5,197,85, 108,100,151,250 >>, - {ok, Output2v1} = ar_rxsquared_nif:rsp_mix_entropy_crc32_nif(Output1), - Output2v1HashReal = crypto:hash(sha256, Output2v1), - {ok, OutHash2Real, Output2v2} = ar_rxsquared_nif:rsp_exec_nif(element(2, FastState), Hash, Scratchpad, 0, 0, 0, 8), - Output2v2HashReal = crypto:hash(sha256, Output2v2), + {ok, Output2} = ar_rxsquared_nif:rsp_mix_entropy_crc32_test_nif(Output1), + Output2HashReal = crypto:hash(sha256, Output2), Output2HashExpd = << 133,226,122,189,170,63,128,182,242,28,50,204,85,179, 230,105,98,187,39,24,30,133,84,135,70,85,220,145,30, 165,161,242 >>, @@ -67,10 +64,8 @@ test_vectors({FastState, _LightState}) -> 206,247,3,124,167,34,75 >>, ?assertEqual(Output1HashExpd, Output1HashReal), - ?assertEqual(Output2HashExpd, Output2v1HashReal), - ?assertEqual(Output2HashExpd, Output2v2HashReal), + ?assertEqual(Output2HashExpd, Output2HashReal), ?assertEqual(OutHashExpd, OutHash1Real), - ?assertEqual(OutHashExpd, OutHash2Real), Key = << 1 >>, Entropy = ar_mine_randomx:randomx_generate_replica_2_9_entropy(FastState, Key), @@ -93,35 +88,8 @@ test_vectors({FastState, _LightState}) -> EntropySubChunkIndex}), ?assertEqual(SubChunk, SubChunkReal), - {ok, EntropyFused} = ar_rxsquared_nif:rsp_fused_entropy_nif( - element(2, FastState), - ?COMPOSITE_PACKING_SUB_CHUNK_COUNT, - ?COMPOSITE_PACKING_SUB_CHUNK_SIZE, - ?REPLICA_2_9_RANDOMX_LANE_COUNT, - ?REPLICA_2_9_RANDOMX_DEPTH, - 0, - 0, - 0, - ?REPLICA_2_9_RANDOMX_ROUND_COUNT, - Key - ), - EntropyFusedHash = crypto:hash(sha256, EntropyFused), - ?assertEqual(EntropyHashExpd, EntropyFusedHash), - ok. -test_quick({FastState, _LightState}) -> - {ok, _, _} = ar_rxsquared_nif:rsp_exec_nif( - element(2, FastState), << 0:(8*64) >>, << 0:(8*2097152) >>, 0, 0, 0, 8), - {ok, _, _} = ar_rxsquared_nif:rsp_init_scratchpad_nif( - element(2, FastState), <<"Some input">>, 0, 0, 0, 8), - {ok, _} = ar_rxsquared_nif:rsp_mix_entropy_crc32_nif(<< 0:(8*2097152) >>), - {ok, _} = ar_rxsquared_nif:rsp_mix_entropy_far_nif(<< 0:(8*2097152) >>), - {ok, _} = ar_rxsquared_nif:rsp_feistel_encrypt_nif( - << 0:(8*2097152) >>, << 0:(8*2097152) >>), - {ok, _} = ar_rxsquared_nif:rsp_feistel_decrypt_nif( - << 0:(8*2097152) >>, << 0:(8*2097152) >>). - test_pack_unpack_sub_chunks({State, _LightState}) -> Key = << 0:256 >>, SubChunk = << 0:(8192 * 8) >>, diff --git a/apps/arweave/test/ar_packing_tests.erl b/apps/arweave/test/ar_packing_tests.erl index c4f7fc945..6dc0ed256 100644 --- a/apps/arweave/test/ar_packing_tests.erl +++ b/apps/arweave/test/ar_packing_tests.erl @@ -30,6 +30,7 @@ packing_test_() -> fun teardown/1, [fun test_mix_crc/0, fun test_mix_far/0, + fun test_feistel/0, fun test_full_chunk/0, fun test_partial_chunk/0, fun test_full_chunk_repack/0, @@ -50,12 +51,12 @@ teardown(_) -> test_mix_crc() -> Input1 = << 0:(8*8)>>, - {ok, RealOutput1} = ar_rxsquared_nif:rsp_mix_entropy_crc32_nif(Input1), + {ok, RealOutput1} = ar_rxsquared_nif:rsp_mix_entropy_crc32_test_nif(Input1), ExpdOutput1 = << 199,75,103,72,178,6,176,59 >>, ?assertEqual(ExpdOutput1, RealOutput1), Input2 = << 1,2,3,4,5,6,7,8 >>, - {ok, RealOutput2} = ar_rxsquared_nif:rsp_mix_entropy_crc32_nif(Input2), + {ok, RealOutput2} = ar_rxsquared_nif:rsp_mix_entropy_crc32_test_nif(Input2), ExpdOutput2 = << 245,142,51,45,188,173,22,249 >>, ?assertEqual(ExpdOutput2, RealOutput2), ok. @@ -84,6 +85,19 @@ test_mix_far() -> ?assertEqual(ExodOutput4, RealOutput4), ok. +test_feistel()-> + Unpacked = << 1:(8*2097152) >>, + Entropy = << 2:(8*2097152) >>, + {ok, Packed} = ar_rxsquared_nif:rsp_feistel_encrypt_nif(Unpacked, Entropy), + PackedHashReal = crypto:hash(sha256, Packed), + PackedHashExpd = << 73,123,99,202,146,24,95,220,127,228,210,8,106,220,94, + 251,234,166,63,206,16,213,64,208,35,104,15,144,215, + 139,183,59 >>, + ?assertEqual(PackedHashExpd, PackedHashReal), + {ok, UnpackedReal} = ar_rxsquared_nif:rsp_feistel_decrypt_nif(Packed, Entropy), + ?assertEqual(Unpacked, UnpackedReal), + ok. + test_full_chunk() -> UnpackedData = ar_test_node:load_fixture("ar_packing_tests/unpacked.256kb"), Spora25Data = ar_test_node:load_fixture("ar_packing_tests/spora25.256kb"), From b988bacc3942a13b433871c03e1be2cf6e1093b5 Mon Sep 17 00:00:00 2001 From: vird Date: Wed, 1 Jan 2025 17:53:57 +0000 Subject: [PATCH 6/8] remove excessive tests + test functions for better readability; +1 test vector for each case --- .../randomx/rxsquared/ar_rxsquared_nif.c | 150 ------------------ apps/arweave/src/ar_rxsquared_nif.erl | 16 -- apps/arweave/test/ar_audit_tests.erl | 31 +--- apps/arweave/test/ar_packing_tests.erl | 51 ++---- 4 files changed, 20 insertions(+), 228 deletions(-) diff --git a/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c b/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c index a2d40655a..420ec7ad0 100755 --- a/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c +++ b/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c @@ -31,150 +31,6 @@ static ERL_NIF_TERM rxsquared_hash_nif(ErlNifEnv* envPtr, int argc, const ERL_NI return hash_nif(envPtr, argc, argv); } -static ERL_NIF_TERM rsp_exec_test_nif( - ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { - if (argc != 7) { - return enif_make_badarg(envPtr); - } - - int randomxProgramCount; - int jitEnabled, largePagesEnabled, hardwareAESEnabled; - struct state* statePtr; - ErlNifBinary inHashBin; - ErlNifBinary inScratchpadBin; - ERL_NIF_TERM outHashTerm; - unsigned char* outHashData; - ERL_NIF_TERM outScratchpadTerm; - unsigned char* outScratchpadData; - - if (!enif_get_resource(envPtr, argv[0], stateType, (void**) &statePtr)) { - return error_tuple(envPtr, "failed to read state"); - } - if (!enif_inspect_binary(envPtr, argv[1], &inHashBin)) { - return enif_make_badarg(envPtr); - } - if (!enif_inspect_binary(envPtr, argv[2], &inScratchpadBin)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[3], &jitEnabled)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[4], &largePagesEnabled)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[5], &hardwareAESEnabled)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_int(envPtr, argv[6], &randomxProgramCount)) { - return enif_make_badarg(envPtr); - } - - if (inHashBin.size != 64) { - return enif_make_badarg(envPtr); - } - if (inScratchpadBin.size != randomx_get_scratchpad_size()) { - return enif_make_badarg(envPtr); - } - - int isRandomxReleased; - randomx_vm *vmPtr = create_vm(statePtr, (statePtr->mode == HASHING_MODE_FAST), - jitEnabled, largePagesEnabled, hardwareAESEnabled, &isRandomxReleased); - if (vmPtr == NULL) { - if (isRandomxReleased != 0) { - return error_tuple(envPtr, "state has been released"); - } - return error_tuple(envPtr, "randomx_create_vm failed"); - } - - outScratchpadData = enif_make_new_binary( - envPtr, randomx_get_scratchpad_size(), &outScratchpadTerm); - if (outScratchpadData == NULL) { - return enif_make_badarg(envPtr); - } - - outHashData = enif_make_new_binary(envPtr, 64, &outHashTerm); - if (outHashData == NULL) { - return enif_make_badarg(envPtr); - } - - rsp_exec_test( - vmPtr, inHashBin.data, inScratchpadBin.data, outHashData, outScratchpadData, - randomxProgramCount); - - destroy_vm(statePtr, vmPtr); - - return ok_tuple2(envPtr, outHashTerm, outScratchpadTerm); -} - -// pack randomx square randomx independent -static ERL_NIF_TERM rsp_mix_entropy_crc32_test_nif( - ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { - ErlNifBinary inEntropyBin; - ERL_NIF_TERM outEntropyTerm; - unsigned char* outEntropyData; - - if (argc != 1) { - return enif_make_badarg(envPtr); - } - - if (!enif_inspect_binary(envPtr, argv[0], &inEntropyBin)) { - return enif_make_badarg(envPtr); - } - - size_t entropySize = inEntropyBin.size; - - if (entropySize % 8 != 0) { - return enif_make_badarg(envPtr); - } - - outEntropyData = enif_make_new_binary(envPtr, entropySize, &outEntropyTerm); - if (outEntropyData == NULL) { - return enif_make_badarg(envPtr); - } - - rsp_mix_entropy_crc32(inEntropyBin.data, outEntropyData, entropySize); - - return ok_tuple(envPtr, outEntropyTerm); -} - -static ERL_NIF_TERM rsp_mix_entropy_far_test_nif( - ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { - ErlNifBinary inEntropyBin; - ERL_NIF_TERM outEntropyTerm; - unsigned char* outEntropyData; - unsigned int jumpSize; - unsigned int blockSize; - - if (argc != 3) { - return enif_make_badarg(envPtr); - } - - if (!enif_inspect_binary(envPtr, argv[0], &inEntropyBin)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_uint(envPtr, argv[1], &jumpSize)) { - return enif_make_badarg(envPtr); - } - if (!enif_get_uint(envPtr, argv[2], &blockSize)) { - return enif_make_badarg(envPtr); - } - - size_t entropySize = inEntropyBin.size; - - if (entropySize % 8 != 0) { - return enif_make_badarg(envPtr); - } - - outEntropyData = enif_make_new_binary(envPtr, entropySize, &outEntropyTerm); - if (outEntropyData == NULL) { - return enif_make_badarg(envPtr); - } - - rsp_mix_entropy_far(inEntropyBin.data, outEntropyData, entropySize, - jumpSize, blockSize); - - return ok_tuple(envPtr, outEntropyTerm); -} static ERL_NIF_TERM rsp_feistel_encrypt_nif( ErlNifEnv* envPtr, int argc, const ERL_NIF_TERM argv[]) { @@ -397,12 +253,6 @@ static ErlNifFunc rxsquared_funcs[] = { {"rxsquared_init_nif", 5, rxsquared_init_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rxsquared_hash_nif", 5, rxsquared_hash_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"rsp_exec_test_nif", 7, - rsp_exec_test_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"rsp_mix_entropy_crc32_test_nif", 1, - rsp_mix_entropy_crc32_test_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"rsp_mix_entropy_far_test_nif", 3, - rsp_mix_entropy_far_test_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rsp_fused_entropy_nif", 10, rsp_fused_entropy_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"rsp_feistel_encrypt_nif", 2, rsp_feistel_encrypt_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, diff --git a/apps/arweave/src/ar_rxsquared_nif.erl b/apps/arweave/src/ar_rxsquared_nif.erl index 89c877265..d0a5dce65 100755 --- a/apps/arweave/src/ar_rxsquared_nif.erl +++ b/apps/arweave/src/ar_rxsquared_nif.erl @@ -5,9 +5,6 @@ -on_load(init_nif/0). -export([rxsquared_hash_nif/5, rxsquared_info_nif/1, rxsquared_init_nif/5, - rsp_exec_test_nif/7, - rsp_mix_entropy_crc32_test_nif/1, - rsp_mix_entropy_far_test_nif/3, rsp_fused_entropy_nif/10, rsp_feistel_encrypt_nif/2, rsp_feistel_decrypt_nif/2]). @@ -36,19 +33,6 @@ init_nif() -> %%% Randomx square packing %%%=================================================================== -rsp_exec_test_nif(_State, _Hash, _Scratchpad, _JIT, _LargePages, _HardwareAES, _RoundCount) -> - ?LOG_ERROR("rsp_exec_test_nif"), - erlang:nif_error(nif_not_loaded). - - -rsp_mix_entropy_crc32_test_nif(_Entropy) -> - ?LOG_ERROR("rsp_mix_entropy_crc32_test_nif"), - erlang:nif_error(nif_not_loaded). - -rsp_mix_entropy_far_test_nif(_Entropy, _JumpSize, _BlockSize) -> - ?LOG_ERROR("rsp_mix_entropy_far_test_nif"), - erlang:nif_error(nif_not_loaded). - rsp_fused_entropy_nif( _RandomxState, _ReplicaEntropySubChunkCount, diff --git a/apps/arweave/test/ar_audit_tests.erl b/apps/arweave/test/ar_audit_tests.erl index 84ea86c7a..e528e753c 100644 --- a/apps/arweave/test/ar_audit_tests.erl +++ b/apps/arweave/test/ar_audit_tests.erl @@ -44,29 +44,6 @@ test_state({FastState, LightState}) -> ?assertEqual(?RANDOMX_SCRATCHPAD_SIZE, ScratchpadSize). test_vectors({FastState, _LightState}) -> - Hash = << 255:(8*64) >>, - Scratchpad = << 255:(8*2097152) >>, - {ok, OutHash1Real, Output1} = ar_rxsquared_nif:rsp_exec_test_nif(element(2, FastState), Hash, Scratchpad, 0, 0, 0, 8), - Output1HashReal = crypto:hash(sha256, Output1), - Output1HashExpd = << 23,173,31,182,17,62,103,254,86,234,161,194,62,234, - 176,71,171,120,18,186,252,150,107,106,65,5,197,85, - 108,100,151,250 >>, - - {ok, Output2} = ar_rxsquared_nif:rsp_mix_entropy_crc32_test_nif(Output1), - Output2HashReal = crypto:hash(sha256, Output2), - Output2HashExpd = << 133,226,122,189,170,63,128,182,242,28,50,204,85,179, - 230,105,98,187,39,24,30,133,84,135,70,85,220,145,30, - 165,161,242 >>, - OutHashExpd = << 137,100,229,43,87,136,2,64,101,172,17,65,106,94,24, - 209,195,201,194,250,35,211,175,73,15,102,11,25,12, - 147,231,196,194,48,55,194,181,47,41,136,101,215,179, - 124,181,223,195,140,217,56,206,55,144,184,44,131,86, - 206,247,3,124,167,34,75 >>, - - ?assertEqual(Output1HashExpd, Output1HashReal), - ?assertEqual(Output2HashExpd, Output2HashReal), - ?assertEqual(OutHashExpd, OutHash1Real), - Key = << 1 >>, Entropy = ar_mine_randomx:randomx_generate_replica_2_9_entropy(FastState, Key), EntropyHash = crypto:hash(sha256, Entropy), @@ -75,6 +52,14 @@ test_vectors({FastState, _LightState}) -> 119,243,151 >>, ?assertEqual(EntropyHashExpd, EntropyHash), + Key2 = << 2 >>, + Entropy2 = ar_mine_randomx:randomx_generate_replica_2_9_entropy(FastState, Key2), + EntropyHash2 = crypto:hash(sha256, Entropy2), + EntropyHashExpd2 = << 206,47,133,111,139,20,31,64,185,33,107,29,14,10,252, + 76,201,75,203,186,131,32,20,45,34,125,76,248,64,90, + 220,196 >>, + ?assertEqual(EntropyHashExpd2, EntropyHash2), + SubChunk = << 255:(8*8192) >>, EntropySubChunkIndex = 1, {ok, PackedOut} = ar_mine_randomx:randomx_encrypt_replica_2_9_sub_chunk({FastState, Entropy, SubChunk, diff --git a/apps/arweave/test/ar_packing_tests.erl b/apps/arweave/test/ar_packing_tests.erl index 6dc0ed256..5e8443874 100644 --- a/apps/arweave/test/ar_packing_tests.erl +++ b/apps/arweave/test/ar_packing_tests.erl @@ -28,9 +28,7 @@ packing_test_() -> {setup, fun setup/0, fun teardown/1, - [fun test_mix_crc/0, - fun test_mix_far/0, - fun test_feistel/0, + [fun test_feistel/0, fun test_full_chunk/0, fun test_partial_chunk/0, fun test_full_chunk_repack/0, @@ -49,42 +47,6 @@ teardown(_) -> % optional cleanup code ok. -test_mix_crc() -> - Input1 = << 0:(8*8)>>, - {ok, RealOutput1} = ar_rxsquared_nif:rsp_mix_entropy_crc32_test_nif(Input1), - ExpdOutput1 = << 199,75,103,72,178,6,176,59 >>, - ?assertEqual(ExpdOutput1, RealOutput1), - - Input2 = << 1,2,3,4,5,6,7,8 >>, - {ok, RealOutput2} = ar_rxsquared_nif:rsp_mix_entropy_crc32_test_nif(Input2), - ExpdOutput2 = << 245,142,51,45,188,173,22,249 >>, - ?assertEqual(ExpdOutput2, RealOutput2), - ok. - -test_mix_far() -> - % divisible - Input1 = << 11, 12, 21, 22, 31, 32, 41, 42 >>, - ExodOutput1 = << 11, 21, 31, 41, 12, 22, 32, 42 >>, - {ok, RealOutput1} = ar_rxsquared_nif:rsp_mix_entropy_far_test_nif(Input1, 2, 1), - ?assertEqual(ExodOutput1, RealOutput1), - - Input2 = << 11, 12, 13, 14, 21, 22, 23, 24 >>, - ExodOutput2 = << 11, 21, 12, 22, 13, 23, 14, 24 >>, - {ok, RealOutput2} = ar_rxsquared_nif:rsp_mix_entropy_far_test_nif(Input2, 4, 1), - ?assertEqual(ExodOutput2, RealOutput2), - - Input3 = << 11, 12, 13, 14, 21, 22, 23, 24 >>, - ExodOutput3 = << 11, 12, 21, 22, 13, 14, 23, 24 >>, - {ok, RealOutput3} = ar_rxsquared_nif:rsp_mix_entropy_far_test_nif(Input3, 4, 2), - ?assertEqual(ExodOutput3, RealOutput3), - - % not divisible - Input4 = << 11, 12, 13, 14, 21, 22, 23, 24 >>, - ExodOutput4 = << 11, 12, 13, 21, 22, 23, 14, 24 >>, - {ok, RealOutput4} = ar_rxsquared_nif:rsp_mix_entropy_far_test_nif(Input4, 4, 3), - ?assertEqual(ExodOutput4, RealOutput4), - ok. - test_feistel()-> Unpacked = << 1:(8*2097152) >>, Entropy = << 2:(8*2097152) >>, @@ -96,6 +58,17 @@ test_feistel()-> ?assertEqual(PackedHashExpd, PackedHashReal), {ok, UnpackedReal} = ar_rxsquared_nif:rsp_feistel_decrypt_nif(Packed, Entropy), ?assertEqual(Unpacked, UnpackedReal), + + Unpacked2 = << 3:(8*2097152) >>, + Entropy2 = << 4:(8*2097152) >>, + {ok, Packed2} = ar_rxsquared_nif:rsp_feistel_encrypt_nif(Unpacked2, Entropy2), + PackedHashReal2 = crypto:hash(sha256, Packed2), + PackedHashExpd2 = << 226,95,254,246,118,154,133,215,229,243,245,255,18,48, + 130,246,98,240,207,197,188,161,222,66,140,47,110,18, + 193,145,96,210 >>, + ?assertEqual(PackedHashExpd2, PackedHashReal2), + {ok, UnpackedReal2} = ar_rxsquared_nif:rsp_feistel_decrypt_nif(Packed2, Entropy2), + ?assertEqual(Unpacked2, UnpackedReal2), ok. test_full_chunk() -> From c03439cd93ca0f09a208528b450cb9cd30dccd8e Mon Sep 17 00:00:00 2001 From: James Piechota Date: Wed, 1 Jan 2025 18:37:11 +0000 Subject: [PATCH 7/8] refactor: renaming and deadcode removal --- .github/workflows/test.yml | 1 + .../c_src/randomx/pack_randomx_square.h | 41 ------ ...randomx_square.cpp => randomx_squared.cpp} | 130 +++++++----------- apps/arweave/c_src/randomx/randomx_squared.h | 30 ++++ .../randomx/rxsquared/ar_rxsquared_nif.c | 30 ++-- apps/arweave/include/ar_consensus.hrl | 11 +- apps/arweave/src/ar_mine_randomx.erl | 2 +- ...tests.erl => ar_replica_2_9_nif_tests.erl} | 25 ++-- apps/randomx_square_latency_tester/main.cpp | 2 +- 9 files changed, 118 insertions(+), 154 deletions(-) delete mode 100755 apps/arweave/c_src/randomx/pack_randomx_square.h rename apps/arweave/c_src/randomx/{pack_randomx_square.cpp => randomx_squared.cpp} (63%) create mode 100755 apps/arweave/c_src/randomx/randomx_squared.h rename apps/arweave/test/{ar_audit_tests.erl => ar_replica_2_9_nif_tests.erl} (83%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 580d66153..5da3f8474 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -315,6 +315,7 @@ jobs: ar_mining_io_tests, ar_poller_tests, ar_reject_chunks_tests, + ar_replica_2_9_nif_tests, ar_semaphore_tests, ar_start_from_block_tests, ar_tx_blacklist_tests, diff --git a/apps/arweave/c_src/randomx/pack_randomx_square.h b/apps/arweave/c_src/randomx/pack_randomx_square.h deleted file mode 100755 index de1cabe0d..000000000 --- a/apps/arweave/c_src/randomx/pack_randomx_square.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef PACK_RANDOMX_SQUARE_H -#define PACK_RANDOMX_SQUARE_H - -#include "randomx.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -RANDOMX_EXPORT void rsp_exec_test( - randomx_vm *machine, const unsigned char *inHash, const unsigned char *inScratchpad, - unsigned char *outHash, unsigned char *outScratchpad, const int randomxProgramCount); - -RANDOMX_EXPORT void rsp_mix_entropy_crc32( - const unsigned char *inEntropy, unsigned char *outEntropy, const size_t entropySize); - -RANDOMX_EXPORT void rsp_mix_entropy_far( - const unsigned char *inEntropy, unsigned char *outEntropy, const size_t entropySize, - const size_t jumpSize, const size_t blockSize); - -RANDOMX_EXPORT int rsp_fused_entropy( - randomx_vm** vmList, - size_t scratchpadSize, - int replicaEntropySubChunkCount, - int compositePackingSubChunkSize, - int laneCount, - int rxDepth, - int randomxProgramCount, - int blockSize, - const unsigned char* keyData, - size_t keySize, - unsigned char* outAllScratchpads // We'll pass in a pointer for final scratchpad data -); - -// TODO optimized packing_apply_to_subchunk (NIF only uses slice) - -#if defined(__cplusplus) -} -#endif - -#endif // PACK_RANDOMX_SQUARE_H diff --git a/apps/arweave/c_src/randomx/pack_randomx_square.cpp b/apps/arweave/c_src/randomx/randomx_squared.cpp similarity index 63% rename from apps/arweave/c_src/randomx/pack_randomx_square.cpp rename to apps/arweave/c_src/randomx/randomx_squared.cpp index 7570cb8b3..ced22c56e 100644 --- a/apps/arweave/c_src/randomx/pack_randomx_square.cpp +++ b/apps/arweave/c_src/randomx/randomx_squared.cpp @@ -1,7 +1,7 @@ #include #include #include "crc32.h" -#include "pack_randomx_square.h" +#include "randomx_squared.h" #include "feistel_msgsize_key_cipher.h" // imports from randomx @@ -10,39 +10,12 @@ #include "aes_hash.hpp" extern "C" { - void rsp_exec_test( - randomx_vm *machine, - const unsigned char *inHash, const unsigned char *inScratchpad, - unsigned char *outHash, unsigned char *outScratchpad, - const int randomxProgramCount) { - assert(machine != nullptr); - alignas(16) uint64_t tempHash[8]; - memcpy(tempHash, inHash, sizeof(tempHash)); - void* scratchpad = (void*)machine->getScratchpad(); - memcpy(scratchpad, inScratchpad, randomx_get_scratchpad_size()); - machine->resetRoundingMode(); - int blakeResult; - for (int chain = 0; chain < randomxProgramCount - 1; ++chain) { - machine->run(&tempHash); - blakeResult = randomx_blake2b( - tempHash, sizeof(tempHash), machine->getRegisterFile(), - sizeof(randomx::RegisterFile), nullptr, 0); - assert(blakeResult == 0); - } - machine->run(&tempHash); - - blakeResult = randomx_blake2b( - tempHash, sizeof(tempHash), machine->getRegisterFile(), - sizeof(randomx::RegisterFile), nullptr, 0); - assert(blakeResult == 0); - - memcpy(outHash, tempHash, sizeof(tempHash)); - memcpy(outScratchpad, machine->getScratchpad(), randomx_get_scratchpad_size()); - } - void rsp_mix_entropy_crc32( - const unsigned char *inEntropy, - unsigned char *outEntropy, const size_t entropySize) { + void _rsp_mix_entropy_crc32( + const unsigned char *inEntropy, + unsigned char *outEntropy, + const size_t entropySize + ) { // NOTE we can't use _mm_crc32_u64, because it output only final 32-bit result // NOTE commented variant is more readable but unoptimized unsigned int state = ~0; @@ -80,53 +53,35 @@ extern "C" { } } - void rsp_mix_entropy_far( - const unsigned char *inEntropy, - unsigned char *outEntropy, const size_t entropySize, - const size_t jumpSize, const size_t blockSize) { - unsigned char *outEntropyPtr = outEntropy; - size_t numJumps = entropySize / jumpSize; - size_t numBlocksPerJump = jumpSize / blockSize; - size_t leftover = jumpSize % blockSize; - - for (size_t offset = 0; offset < numBlocksPerJump; ++offset) { - for (size_t i = 0; i < numJumps; ++i) { - size_t srcPos = i * jumpSize + offset * blockSize; - memcpy(outEntropyPtr, &inEntropy[srcPos], blockSize); - outEntropyPtr += blockSize; - } - } - if (leftover > 0) { - for (size_t i = 0; i < numJumps; ++i) { - size_t srcPos = i * jumpSize + numBlocksPerJump * blockSize; - memcpy(outEntropyPtr, &inEntropy[srcPos], leftover); - outEntropyPtr += leftover; - } - } - } - - // Group of functions related to rsp_fused_entropy - void _rsp_exec_inplace(randomx_vm* machine, uint64_t* srcTempHash, uint64_t* dstTempHash, int programCount, size_t scratchpadSize) { + // Runs 1 RX2 round of programCount RandomX execs + 1 CRC mix on a single lane. + // VM scratchpad is updated in place. + void _rsp_exec_inplace( + randomx_vm* machine, + uint64_t* srcTempHash, + uint64_t* dstTempHash, + int programCount, + size_t scratchpadSize + ) { machine->resetRoundingMode(); for (int chain = 0; chain < programCount-1; chain++) { machine->run(srcTempHash); - int br = randomx_blake2b( + int blakeResult = randomx_blake2b( srcTempHash, 64, machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0 ); - assert(br == 0); + assert(blakeResult == 0); } machine->run(srcTempHash); - int br = randomx_blake2b( + int blakeResult = randomx_blake2b( dstTempHash, 64, machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0 ); - assert(br == 0); - rsp_mix_entropy_crc32( + assert(blakeResult == 0); + _rsp_mix_entropy_crc32( (const unsigned char*)machine->getScratchpad(), (unsigned char*)(void*)machine->getScratchpad(), scratchpadSize); @@ -168,7 +123,7 @@ extern "C" { } } - void packing_mix_entropy_direct( + void _rsp_mix_entropy_far( randomx_vm** inSet, randomx_vm** outSet, int count, @@ -204,15 +159,15 @@ extern "C" { int rsp_fused_entropy( randomx_vm** vmList, size_t scratchpadSize, - int replicaEntropySubChunkCount, - int compositePackingSubChunkSize, + int subChunkCount, + int subChunkSize, int laneCount, int rxDepth, int randomxProgramCount, int blockSize, const unsigned char* keyData, size_t keySize, - unsigned char* outAllScratchpads + unsigned char* outEntropy ) { struct vm_hash_t { alignas(16) uint64_t tempHash[8]; // 64 bytes @@ -223,7 +178,13 @@ extern "C" { return 0; } + // Initialize the scratchaps for each lane for (int i = 0; i < laneCount; i++) { + // laneSeed = sha256(<>) + // laneSeed should be unique - i.e. now two lanes across all entropies and all + // replicas should have the same seed. Current key (as off 2025-01-01) is + // <> where entropy index is unique within + // a given partition. unsigned char laneSeed[32]; { SHA256_CTX sha256; @@ -242,6 +203,8 @@ extern "C" { delete[] vmHashes; return 0; } + // This replaces the default `randomx_vm::initScratchpad()` and overwrites + // the VM's internal `scratchpad` member variable. fillAes1Rx4( vmHashes[i].tempHash, scratchpadSize, @@ -251,37 +214,44 @@ extern "C" { for (int d = 0; d < rxDepth; d++) { for (int lane = 0; lane < laneCount; lane++) { - _rsp_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, randomxProgramCount, scratchpadSize); + _rsp_exec_inplace( + vmList[lane], + vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, + randomxProgramCount, scratchpadSize); } - packing_mix_entropy_direct(&vmList[0], &vmList[laneCount], + _rsp_mix_entropy_far(&vmList[0], &vmList[laneCount], laneCount, scratchpadSize, scratchpadSize, blockSize); if (d + 1 < rxDepth) { d++; for (int lane = laneCount; lane < 2*laneCount; lane++) { - _rsp_exec_inplace(vmList[lane], vmHashes[lane].tempHash, vmHashes[lane-laneCount].tempHash, randomxProgramCount, scratchpadSize); + _rsp_exec_inplace( + vmList[lane], + vmHashes[lane].tempHash, vmHashes[lane-laneCount].tempHash, + randomxProgramCount, scratchpadSize); } - packing_mix_entropy_direct(&vmList[laneCount], &vmList[0], + _rsp_mix_entropy_far(&vmList[laneCount], &vmList[0], laneCount, scratchpadSize, scratchpadSize, blockSize); } } - // NOTE still unoptimal. Last copy can be performed from scratchpad to output. But requires +1 variation (set to buffer) + // NOTE still unoptimal. Last copy can be performed from scratchpad to output. + // But requires +1 variation (set to buffer) if ((rxDepth % 2) == 0) { - unsigned char* outAllScratchpadsPtr = outAllScratchpads; + unsigned char* outEntropyPtr = outEntropy; for (int i = 0; i < laneCount; i++) { void* sp = (void*)vmList[i]->getScratchpad(); - memcpy(outAllScratchpadsPtr, sp, scratchpadSize); - outAllScratchpadsPtr += scratchpadSize; + memcpy(outEntropyPtr, sp, scratchpadSize); + outEntropyPtr += scratchpadSize; } } else { - unsigned char* outAllScratchpadsPtr = outAllScratchpads; + unsigned char* outEntropyPtr = outEntropy; for (int i = laneCount; i < 2*laneCount; i++) { void* sp = (void*)vmList[i]->getScratchpad(); - memcpy(outAllScratchpadsPtr, sp, scratchpadSize); - outAllScratchpadsPtr += scratchpadSize; + memcpy(outEntropyPtr, sp, scratchpadSize); + outEntropyPtr += scratchpadSize; } } diff --git a/apps/arweave/c_src/randomx/randomx_squared.h b/apps/arweave/c_src/randomx/randomx_squared.h new file mode 100755 index 000000000..d98560333 --- /dev/null +++ b/apps/arweave/c_src/randomx/randomx_squared.h @@ -0,0 +1,30 @@ +#ifndef RANDOMX_SQUARED_H +#define RANDOMX_SQUARED_H + +#include "randomx.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +RANDOMX_EXPORT int rsp_fused_entropy( + randomx_vm** vmList, + size_t scratchpadSize, + int subChunkCount, + int subChunkSize, + int laneCount, + int rxDepth, + int randomxProgramCount, + int blockSize, + const unsigned char* keyData, + size_t keySize, + unsigned char* outEntropy // We'll pass in a pointer for final scratchpad data +); + +// TODO optimized packing_apply_to_subchunk (NIF only uses slice) + +#if defined(__cplusplus) +} +#endif + +#endif // RANDOMX_SQUARED_H diff --git a/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c b/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c index 420ec7ad0..c555139e3 100755 --- a/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c +++ b/apps/arweave/c_src/randomx/rxsquared/ar_rxsquared_nif.c @@ -3,7 +3,7 @@ #include #include "../randomx_long_with_entropy.h" #include "../feistel_msgsize_key_cipher.h" -#include "../pack_randomx_square.h" +#include "../randomx_squared.h" #include "../ar_randomx_impl.h" @@ -122,13 +122,13 @@ static ERL_NIF_TERM rsp_fused_entropy_nif(ErlNifEnv* envPtr, int argc, const ERL } // 2. Parse each integer - int replicaEntropySubChunkCount; - if (!enif_get_int(envPtr, argv[1], &replicaEntropySubChunkCount)) { + int subChunkCount; + if (!enif_get_int(envPtr, argv[1], &subChunkCount)) { return enif_make_badarg(envPtr); } - int compositePackingSubChunkSize; - if (!enif_get_int(envPtr, argv[2], &compositePackingSubChunkSize)) { + int subChunkSize; + if (!enif_get_int(envPtr, argv[2], &subChunkSize)) { return enif_make_badarg(envPtr); } @@ -178,11 +178,11 @@ static ERL_NIF_TERM rsp_fused_entropy_nif(ErlNifEnv* envPtr, int argc, const ERL size_t scratchpadSize = randomx_get_scratchpad_size(); // 5. Pre-allocate the final output binary to store all scratchpads - size_t totalSpSize = scratchpadSize * laneCount; - ERL_NIF_TERM outScratchpadsTerm; - unsigned char* outAllScratchpads = - enif_make_new_binary(envPtr, totalSpSize, &outScratchpadsTerm); - if (!outAllScratchpads) { + size_t outEntropySize = scratchpadSize * laneCount; + ERL_NIF_TERM outEntropyTerm; + unsigned char* outEntropy = + enif_make_new_binary(envPtr, outEntropySize, &outEntropyTerm); + if (!outEntropy) { free(vmList); return enif_make_badarg(envPtr); } @@ -215,15 +215,15 @@ static ERL_NIF_TERM rsp_fused_entropy_nif(ErlNifEnv* envPtr, int argc, const ERL int success = rsp_fused_entropy( vmList, scratchpadSize, - replicaEntropySubChunkCount, - compositePackingSubChunkSize, + subChunkCount, + subChunkSize, laneCount, rxDepth, randomxProgramCount, 6, keyBin.data, keyBin.size, - outAllScratchpads // final buffer for the scratchpads + outEntropy // final buffer for the output entropy ); // 8. If the function returned false, we interpret that as an error @@ -238,13 +238,13 @@ static ERL_NIF_TERM rsp_fused_entropy_nif(ErlNifEnv* envPtr, int argc, const ERL return error_tuple(envPtr, "cxx_fused_entropy_failed"); } - // 9. If success, destroy VMs and return {ok, ScratchpadsBin} + // 9. If success, destroy VMs and return {ok, outEntropyTerm} for (int i = 0; i < totalVMs; i++) { destroy_vm(statePtr, vmList[i]); } free(vmList); - return ok_tuple(envPtr, outScratchpadsTerm); + return ok_tuple(envPtr, outEntropyTerm); } diff --git a/apps/arweave/include/ar_consensus.hrl b/apps/arweave/include/ar_consensus.hrl index d471dccc3..fe3c705f7 100755 --- a/apps/arweave/include/ar_consensus.hrl +++ b/apps/arweave/include/ar_consensus.hrl @@ -21,15 +21,20 @@ %% The number of times we apply an RX hash in each RX2 lane in-between every pair %% of mixings. --define(REPLICA_2_9_RANDOMX_ROUND_COUNT, 6). +-define(REPLICA_2_9_RANDOMX_PROGRAM_COUNT, 6). %% The number of RX2 lanes. -define(REPLICA_2_9_RANDOMX_LANE_COUNT, 4). -%% The RX2 depth. +%% The RX2 depth: the number of RX2 rounds. A round of RX2 has: +%% 1. REPLICA_2_9_RANDOMX_LANE_COUNT lanes +%% 2. Each lane evaluates REPLICA_2_9_RANDOMX_PROGRAM_COUNT RandomX programs +%% 3. The output entropy of each lane is then mixed with crc32 (aka "near mix") +%% 4. The the mixed output from all lanes is then shuffled (aka "far mix") -define(REPLICA_2_9_RANDOMX_DEPTH, 3). -%% The size in bytes of the component (NOT the total) RX2 scratchpad. +%% The size in bytes of the component RX2 scratchpad (aka the output from each RX2 lane). This +%% is NOT the total output entropy (that size is defined in REPLICA_2_9_ENTROPY_SIZE). -define(RANDOMX_SCRATCHPAD_SIZE, 2097152). %% The size in bytes of the total RX2 entropy (# of lanes * scratchpad size). diff --git a/apps/arweave/src/ar_mine_randomx.erl b/apps/arweave/src/ar_mine_randomx.erl index 33c38628e..992642113 100755 --- a/apps/arweave/src/ar_mine_randomx.erl +++ b/apps/arweave/src/ar_mine_randomx.erl @@ -135,7 +135,7 @@ randomx_generate_replica_2_9_entropy({rxsquared, RandomxState}, Key) -> jit(), large_pages(), hardware_aes(), - ?REPLICA_2_9_RANDOMX_ROUND_COUNT, + ?REPLICA_2_9_RANDOMX_PROGRAM_COUNT, Key ), EntropyFused. diff --git a/apps/arweave/test/ar_audit_tests.erl b/apps/arweave/test/ar_replica_2_9_nif_tests.erl similarity index 83% rename from apps/arweave/test/ar_audit_tests.erl rename to apps/arweave/test/ar_replica_2_9_nif_tests.erl index e528e753c..607404240 100644 --- a/apps/arweave/test/ar_audit_tests.erl +++ b/apps/arweave/test/ar_replica_2_9_nif_tests.erl @@ -1,8 +1,7 @@ --module(ar_audit_tests). +-module(ar_replica_2_9_nif_tests). -include_lib("eunit/include/eunit.hrl"). --include_lib("arweave/include/ar.hrl"). -include_lib("arweave/include/ar_consensus.hrl"). setup_replica_2_9() -> @@ -32,7 +31,7 @@ randomx_replica_2_9_suite_test_() -> test_state({FastState, LightState}) -> ?assertEqual( - {ok, {rxsquared, fast, 67602036, 2097152}}, + {ok, {rxsquared, fast, 34047604, 2097152}}, ar_mine_randomx:info(FastState) ), ?assertEqual( @@ -62,16 +61,16 @@ test_vectors({FastState, _LightState}) -> SubChunk = << 255:(8*8192) >>, EntropySubChunkIndex = 1, - {ok, PackedOut} = ar_mine_randomx:randomx_encrypt_replica_2_9_sub_chunk({FastState, Entropy, SubChunk, - EntropySubChunkIndex}), - PackedOutHashReal = crypto:hash(sha256, PackedOut), - PackedOutHashExpd = << 15,46,184,11,124,31,150,77,199,107,221,0,136,154,61, + {ok, Packed} = ar_mine_randomx:randomx_encrypt_replica_2_9_sub_chunk( + {FastState, Entropy, SubChunk, EntropySubChunkIndex}), + PackedHashReal = crypto:hash(sha256, Packed), + PackedHashExpd = << 15,46,184,11,124,31,150,77,199,107,221,0,136,154,61, 146,193,198,126,52,19,7,211,28,121,108,176,15,124,33, 48,99 >>, - ?assertEqual(PackedOutHashExpd, PackedOutHashReal), - {ok, SubChunkReal} = ar_mine_randomx:randomx_decrypt_replica_2_9_sub_chunk({FastState, Key, PackedOut, - EntropySubChunkIndex}), - ?assertEqual(SubChunk, SubChunkReal), + ?assertEqual(PackedHashExpd, PackedHashReal), + {ok, Unpacked} = ar_mine_randomx:randomx_decrypt_replica_2_9_sub_chunk( + {FastState, Key, Packed, EntropySubChunkIndex}), + ?assertEqual(SubChunk, Unpacked), ok. @@ -79,14 +78,14 @@ test_pack_unpack_sub_chunks({State, _LightState}) -> Key = << 0:256 >>, SubChunk = << 0:(8192 * 8) >>, Entropy = ar_mine_randomx:randomx_generate_replica_2_9_entropy(State, Key), - ?assertEqual(268435456, byte_size(Entropy)), + ?assertEqual(8388608, byte_size(Entropy)), PackedSubChunks = pack_sub_chunks(SubChunk, Entropy, 0, SubChunk, State), ?assert(lists:all(fun(PackedSubChunk) -> byte_size(PackedSubChunk) == 8192 end, PackedSubChunks)), unpack_sub_chunks(PackedSubChunks, 0, SubChunk, Entropy). pack_sub_chunks(_SubChunk, _Entropy, Index, _PreviousSubChunk, _State) - when Index == 32768 -> + when Index == 1024 -> []; pack_sub_chunks(SubChunk, Entropy, Index, PreviousSubChunk, State) -> {ok, PackedSubChunk} = ar_mine_randomx:randomx_encrypt_replica_2_9_sub_chunk( diff --git a/apps/randomx_square_latency_tester/main.cpp b/apps/randomx_square_latency_tester/main.cpp index 0b7ff4f94..a9f603f80 100755 --- a/apps/randomx_square_latency_tester/main.cpp +++ b/apps/randomx_square_latency_tester/main.cpp @@ -2,7 +2,7 @@ #include #include #include -#include "pack_randomx_square.h" +#include "randomx_squared.h" int main() { return 0; From 40bfbf50f0085e6f213feeaf60ba1c2209fd1bfa Mon Sep 17 00:00:00 2001 From: vird Date: Thu, 2 Jan 2025 12:01:23 +0000 Subject: [PATCH 8/8] refactor crc->near; aes fix; less space for rx temp hash --- .../arweave/c_src/randomx/randomx_squared.cpp | 34 +++++++------------ 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/apps/arweave/c_src/randomx/randomx_squared.cpp b/apps/arweave/c_src/randomx/randomx_squared.cpp index ced22c56e..473370cfb 100644 --- a/apps/arweave/c_src/randomx/randomx_squared.cpp +++ b/apps/arweave/c_src/randomx/randomx_squared.cpp @@ -7,11 +7,10 @@ // imports from randomx #include "vm_compiled.hpp" #include "blake2/blake2.h" -#include "aes_hash.hpp" extern "C" { - void _rsp_mix_entropy_crc32( + void _rsp_mix_entropy_near( const unsigned char *inEntropy, unsigned char *outEntropy, const size_t entropySize @@ -57,31 +56,30 @@ extern "C" { // VM scratchpad is updated in place. void _rsp_exec_inplace( randomx_vm* machine, - uint64_t* srcTempHash, - uint64_t* dstTempHash, + uint64_t* tempHash, int programCount, size_t scratchpadSize ) { machine->resetRoundingMode(); for (int chain = 0; chain < programCount-1; chain++) { - machine->run(srcTempHash); + machine->run(tempHash); int blakeResult = randomx_blake2b( - srcTempHash, 64, + tempHash, 64, machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0 ); assert(blakeResult == 0); } - machine->run(srcTempHash); + machine->run(tempHash); int blakeResult = randomx_blake2b( - dstTempHash, 64, + tempHash, 64, machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0 ); assert(blakeResult == 0); - _rsp_mix_entropy_crc32( + _rsp_mix_entropy_near( (const unsigned char*)machine->getScratchpad(), (unsigned char*)(void*)machine->getScratchpad(), scratchpadSize); @@ -173,7 +171,7 @@ extern "C" { alignas(16) uint64_t tempHash[8]; // 64 bytes }; - vm_hash_t* vmHashes = new (std::nothrow) vm_hash_t[2*laneCount]; + vm_hash_t* vmHashes = new (std::nothrow) vm_hash_t[laneCount]; if (!vmHashes) { return 0; } @@ -203,20 +201,14 @@ extern "C" { delete[] vmHashes; return 0; } - // This replaces the default `randomx_vm::initScratchpad()` and overwrites - // the VM's internal `scratchpad` member variable. - fillAes1Rx4( - vmHashes[i].tempHash, - scratchpadSize, - (void*)vmList[i]->getScratchpad() - ); + vmList[i]->initScratchpad(&vmHashes[i].tempHash); } for (int d = 0; d < rxDepth; d++) { for (int lane = 0; lane < laneCount; lane++) { _rsp_exec_inplace( vmList[lane], - vmHashes[lane].tempHash, vmHashes[lane+laneCount].tempHash, + vmHashes[lane].tempHash, randomxProgramCount, scratchpadSize); } _rsp_mix_entropy_far(&vmList[0], &vmList[laneCount], @@ -225,10 +217,10 @@ extern "C" { if (d + 1 < rxDepth) { d++; - for (int lane = laneCount; lane < 2*laneCount; lane++) { + for (int lane = 0; lane < laneCount; lane++) { _rsp_exec_inplace( - vmList[lane], - vmHashes[lane].tempHash, vmHashes[lane-laneCount].tempHash, + vmList[lane+laneCount], + vmHashes[lane].tempHash, randomxProgramCount, scratchpadSize); } _rsp_mix_entropy_far(&vmList[laneCount], &vmList[0],