More efficient looping and reduced caching of only common values

maneatingape · maneatingape · commit 8d90441d1feb · 2025-10-26T18:18:22.000Z
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # Advent of Code [![checks-badge]][checks-link] [![docs-badge]][docs-link]
 
 Blazing fast Rust solutions for every [Advent of Code] puzzle from 2015 to 2024, taking
-**497 milliseconds** to solve all 500 stars. Each solution is carefully optimized for performance
+**493 milliseconds** to solve all 500 stars. Each solution is carefully optimized for performance
 while ensuring the code remains concise, readable, and idiomatic.
 
 ## Features
@@ -67,7 +67,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
 
 | Year | [2015](#2015) | [2016](#2016) | [2017](#2017) | [2018](#2018) | [2019](#2019) | [2020](#2020) | [2021](#2021) | [2022](#2022) | [2023](#2023) | [2024](#2024) |
 | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
-| Benchmark (ms) | 15 | 109 | 82 | 35 | 14 | 220 | 8 | 5 | 5 | 4 |
+| Benchmark (ms) | 15 | 109 | 78 | 35 | 14 | 220 | 8 | 5 | 5 | 4 |
 
 ## 2024
 
@@ -303,7 +303,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
 | 2 | [Corruption Checksum](https://adventofcode.com/2017/day/2) | [Source](src/year2017/day02.rs) | 2 |
 | 3 | [Spiral Memory](https://adventofcode.com/2017/day/3) | [Source](src/year2017/day03.rs) | 2 |
 | 4 | [High-Entropy Passphrases](https://adventofcode.com/2017/day/4) | [Source](src/year2017/day04.rs) | 86 |
-| 5 | [A Maze of Twisty Trampolines, All Alike](https://adventofcode.com/2017/day/5) | [Source](src/year2017/day05.rs) | 22000 |
+| 5 | [A Maze of Twisty Trampolines, All Alike](https://adventofcode.com/2017/day/5) | [Source](src/year2017/day05.rs) | 18000 |
 | 6 | [Memory Reallocation](https://adventofcode.com/2017/day/6) | [Source](src/year2017/day06.rs) | 81 |
 | 7 | [Recursive Circus](https://adventofcode.com/2017/day/7) | [Source](src/year2017/day07.rs) | 93 |
 | 8 | [I Heard You Like Registers](https://adventofcode.com/2017/day/8) | [Source](src/year2017/day08.rs) | 47 |
diff --git a/src/year2017/day05.rs b/src/year2017/day05.rs
@@ -16,9 +16,7 @@
 //! We then precompute all possible combination for blocks of size 16, using this to accelerate
 //! part two.
 use crate::util::parse::*;
-
-const WIDTH: usize = 16;
-const LENGTH: usize = 1 << WIDTH;
+use std::array::from_fn;
 
 pub fn parse(input: &str) -> Vec<i32> {
     input.iter_signed().collect()
@@ -49,37 +47,32 @@ pub fn part2(input: &[i32]) -> usize {
     let mut fine = 0;
     let mut coarse = 0;
     let mut compact = Vec::new();
-    let mut cache = vec![[(0_u16, 0_u8, 0_u8); LENGTH]; WIDTH];
-
-    // Precompute all possible combinations. For each binary starting number we can start at any
-    // offset from 0..16.
-    for i in 0..WIDTH {
-        for j in 0..LENGTH {
-            let mut offset = i as u16;
-            let mut value = j as u16;
-            let mut steps = 0;
-
-            while offset < 16 {
-                value ^= 1 << offset;
-                steps += 1;
-                offset += 3 - ((value >> offset) & 1);
-            }
 
-            cache[i][j] = (value, steps, offset as u8 - i as u8);
-        }
-    }
+    // Precompute all possible combinations for each binary starting number from 0 to 2^16,
+    // starting at any offset from 0..2.
+    let cache: Vec<[_; 0x10000]> =
+        (0..3).map(|offset| from_fn(|value| compute_block(value, offset))).collect();
 
     while index < jump.len() {
         if index < coarse {
+            if index % 16 >= 3 {
+                let j = index / 16;
+                let (next, steps, delta) = compute_block(compact[j], index % 16);
+
+                compact[j] = next as usize;
+                total += steps as usize;
+                index += delta as usize;
+            }
+
             // Index lies within precomputed blocks.
-            let base = index / 16;
-            let offset = index % 16;
-            let value = compact[base] as usize;
-            let (next, steps, delta) = cache[offset][value];
-
-            compact[base] = next;
-            total += steps as usize;
-            index += delta as usize;
+            for j in (index / 16)..(coarse / 16) {
+                let value = compact[j];
+                let (next, steps, delta) = cache[index % 16][value];
+
+                compact[j] = next as usize;
+                total += steps as usize;
+                index += delta as usize;
+            }
         } else {
             // Fall back to part one approach.
             let next = index.wrapping_add(jump[index] as usize);
@@ -93,7 +86,7 @@ pub fn part2(input: &[i32]) -> usize {
                 if fine.is_multiple_of(16) {
                     let value = (coarse..fine).rev().fold(0, |acc, i| (acc << 1) | (jump[i] & 1));
                     coarse = fine;
-                    compact.push(value as u16);
+                    compact.push(value as usize);
                 }
             }
 
@@ -103,3 +96,17 @@ pub fn part2(input: &[i32]) -> usize {
 
     total
 }
+
+#[inline]
+fn compute_block(mut value: usize, mut offset: usize) -> (u16, u8, u8) {
+    let start = offset;
+    let mut steps = 0;
+
+    while offset < 16 {
+        value ^= 1 << offset;
+        steps += 1;
+        offset += 3 - ((value >> offset) & 1);
+    }
+
+    (value as u16, steps, (offset - start) as u8)
+}