@@ -18,7 +18,6 @@ use super::TweakableHash;
1818
1919use p3_koala_bear:: Poseidon2KoalaBear ;
2020use std:: cell:: RefCell ;
21- use thread_local:: ThreadLocal ;
2221
2322const DOMAIN_PARAMETERS_LENGTH : usize = 4 ;
2423/// The state width for compressing a single hash in a chain.
@@ -517,9 +516,11 @@ impl<
517516 let sponge_chains_offset = PARAMETER_LEN + TWEAK_LEN ;
518517 let sponge_input_len = PARAMETER_LEN + TWEAK_LEN + NUM_CHUNKS * HASH_LEN ;
519518
520- // We use a thread local storage to guarantee the `packed_leaf_input` vector is only allocated
519+ // We use thread- local storage to guarantee the `packed_leaf_input` vector is only allocated
521520 // once per thread
522- let tls: ThreadLocal < RefCell < Vec < PackedF > > > = ThreadLocal :: new ( ) ;
521+ thread_local ! {
522+ static PACKED_LEAF_INPUT : RefCell <Vec <PackedF >> = const { RefCell :: new( Vec :: new( ) ) } ;
523+ }
523524
524525 // PARALLEL SIMD PROCESSING
525526 //
@@ -537,18 +538,11 @@ impl<
537538 //
538539 // This layout enables efficient SIMD operations across epochs.
539540
540- let cell = tls. get_or ( || {
541- RefCell :: new ( vec ! [ PackedF :: ZERO ; sponge_input_len] )
542- } ) ;
543- let mut packed_leaf_input = cell. borrow_mut ( ) ;
544- // reset not needed
545-
546541 let mut packed_chains: [ [ PackedF ; HASH_LEN ] ; NUM_CHUNKS ] =
547542 array:: from_fn ( |c_idx| {
548543 // Generate starting points for this chain across all epochs.
549544 let starts: [ _ ; PackedF :: WIDTH ] = array:: from_fn ( |lane| {
550- PRF :: get_domain_element ( prf_key, epoch_chunk[ lane] , c_idx as u64 )
551- . into ( )
545+ PRF :: get_domain_element ( prf_key, epoch_chunk[ lane] , c_idx as u64 ) . into ( )
552546 } ) ;
553547
554548 // Transpose to vertical packing for SIMD efficiency.
@@ -601,10 +595,10 @@ impl<
601595 // Apply the hash function to advance the chain.
602596 // This single call processes all epochs in parallel.
603597 * packed_chain =
604- poseidon_compress :: < PackedF , _ , CHAIN_COMPRESSION_WIDTH , HASH_LEN > (
605- & chain_perm,
606- & packed_input,
607- ) ;
598+ poseidon_compress :: < PackedF , _ , CHAIN_COMPRESSION_WIDTH , HASH_LEN > (
599+ & chain_perm,
600+ & packed_input,
601+ ) ;
608602 }
609603 }
610604
@@ -619,45 +613,49 @@ impl<
619613 // Layout: [parameter | tree_tweak | all_chain_ends]
620614 // NOTE: `packed_leaf_input` is preallocated per thread. We overwrite the entire
621615 // vector in each iteration, so no need to `fill(0)`!
616+ let packed_leaves = PACKED_LEAF_INPUT . with_borrow_mut ( |packed_leaf_input| {
617+ // Resize on first use for this thread
618+ if packed_leaf_input. len ( ) != sponge_input_len {
619+ packed_leaf_input. resize ( sponge_input_len, PackedF :: ZERO ) ;
620+ }
622621
623- // Copy pre-packed parameter
624- packed_leaf_input[ ..PARAMETER_LEN ] . copy_from_slice ( & packed_parameter) ;
625-
626- // Pack tree tweaks directly (level 0 for bottom-layer leaves)
627- pack_fn_into :: < TWEAK_LEN > (
628- & mut packed_leaf_input,
629- sponge_tweak_offset,
630- |t_idx, lane| {
631- Self :: tree_tweak ( 0 , epoch_chunk[ lane] ) . to_field_elements :: < TWEAK_LEN > ( )
622+ // Copy pre-packed parameter
623+ packed_leaf_input[ ..PARAMETER_LEN ] . copy_from_slice ( & packed_parameter) ;
624+
625+ // Pack tree tweaks directly (level 0 for bottom-layer leaves)
626+ pack_fn_into :: < TWEAK_LEN > (
627+ packed_leaf_input,
628+ sponge_tweak_offset,
629+ |t_idx, lane| {
630+ Self :: tree_tweak ( 0 , epoch_chunk[ lane] ) . to_field_elements :: < TWEAK_LEN > ( )
632631 [ t_idx]
633- } ,
634- ) ;
632+ } ,
633+ ) ;
635634
636- // Copy all chain ends (already packed)
637- let dst = & mut packed_leaf_input[ sponge_chains_offset
635+ // Copy all chain ends (already packed)
636+ let dst = & mut packed_leaf_input[ sponge_chains_offset
638637 ..sponge_chains_offset + packed_chains. len ( ) * HASH_LEN ] ;
639- for ( dst_chunk, src_chain) in
638+ for ( dst_chunk, src_chain) in
640639 dst. chunks_exact_mut ( HASH_LEN ) . zip ( packed_chains. iter ( ) )
641640 {
642641 dst_chunk. copy_from_slice ( src_chain) ;
643642 }
644643
645- // Apply the sponge hash to produce the leaf.
646- // This absorbs all chain ends and squeezes out the final hash.
647- let packed_leaves =
644+ // Apply the sponge hash to produce the leaf.
645+ // This absorbs all chain ends and squeezes out the final hash.
648646 poseidon_sponge :: < PackedF , _ , MERGE_COMPRESSION_WIDTH , HASH_LEN > (
649647 & sponge_perm,
650648 & capacity_val,
651- & packed_leaf_input,
652- ) ;
649+ packed_leaf_input,
650+ )
651+ } ) ;
653652
654653 // STEP 4: UNPACK RESULTS TO SCALAR REPRESENTATION
655654 //
656655 // Convert from vertical packing back to scalar layout.
657656 // Each lane becomes one leaf in the output slice.
658657 unpack_array ( & packed_leaves, leaves_chunk) ;
659- } ,
660- ) ;
658+ } ) ;
661659
662660 // HANDLE REMAINDER EPOCHS
663661 //
0 commit comments