Faster approach using custom modulo function

maneatingape · maneatingape · commit 354e84705a38 · 2025-10-03T20:11:44.000+01:00
diff --git a/README.md b/README.md
@@ -313,7 +313,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
 | 12 | [Digital Plumber](https://adventofcode.com/2017/day/12) | [Source](src/year2017/day12.rs) | 61 |
 | 13 | [Packet Scanners](https://adventofcode.com/2017/day/13) | [Source](src/year2017/day13.rs) | 1 |
 | 14 | [Disk Defragmentation](https://adventofcode.com/2017/day/14) | [Source](src/year2017/day14.rs) | 438 |
-| 15 | [Dueling Generators](https://adventofcode.com/2017/day/15) | [Source](src/year2017/day15.rs) | 26000 |
+| 15 | [Dueling Generators](https://adventofcode.com/2017/day/15) | [Source](src/year2017/day15.rs) | 20000 |
 | 16 | [Permutation Promenade](https://adventofcode.com/2017/day/16) | [Source](src/year2017/day16.rs) | 68 |
 | 17 | [Spinlock](https://adventofcode.com/2017/day/17) | [Source](src/year2017/day17.rs) | 85 |
 | 18 | [Duet](https://adventofcode.com/2017/day/18) | [Source](src/year2017/day18.rs) | 7 |
diff --git a/src/year2017/day15.rs b/src/year2017/day15.rs
@@ -14,11 +14,12 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::mpsc::{Receiver, Sender, channel};
 use std::thread;
 
+const MOD: usize = 0x7fffffff;
 const PART_ONE: usize = 40_000_000;
 const PART_TWO: usize = 5_000_000;
 const BLOCK: usize = 50_000;
 
-type Input = (u32, u32);
+type Input = (usize, usize);
 
 /// State shared between all threads.
 pub struct Shared {
@@ -31,7 +32,7 @@ pub struct Shared {
 /// Generated numbers from `start` to `start + BLOCK`.
 struct Block {
     start: usize,
-    ones: u32,
+    ones: usize,
     fours: Vec<u16>,
     eights: Vec<u16>,
 }
@@ -51,30 +52,30 @@ pub fn parse(input: &str) -> Input {
     })
 }
 
-pub fn part1(input: &Input) -> u32 {
+pub fn part1(input: &Input) -> usize {
     input.0
 }
 
-pub fn part2(input: &Input) -> u32 {
+pub fn part2(input: &Input) -> usize {
     input.1
 }
 
 fn sender(shared: &Shared, tx: &Sender<Block>) {
     while !shared.done.load(Ordering::Relaxed) {
         // Start at any point in the sequence using modular exponentiation.
         let start = shared.start.fetch_add(BLOCK, Ordering::Relaxed);
-        let mut first = shared.first * 16807.mod_pow(start, 0x7fffffff);
-        let mut second = shared.second * 48271.mod_pow(start, 0x7fffffff);
+        let mut first = shared.first * 16807.mod_pow(start, MOD);
+        let mut second = shared.second * 48271.mod_pow(start, MOD);
 
-        // Estimate capacity at one quarter or one eight, plus a little extra for variance.
+        // Estimate capacity at one quarter or one eight.
         let mut ones = 0;
-        let mut fours = Vec::with_capacity((BLOCK * 30) / 100);
-        let mut eights = Vec::with_capacity((BLOCK * 15) / 100);
+        let mut fours = Vec::with_capacity(BLOCK / 4);
+        let mut eights = Vec::with_capacity(BLOCK / 8);
 
         // Check part one pairs immediately while queueing part two pairs.
         for _ in 0..BLOCK {
-            first = (first * 16807) % 0x7fffffff;
-            second = (second * 48271) % 0x7fffffff;
+            first = fast_mod(first * 16807);
+            second = fast_mod(second * 48271);
 
             let left = first as u16;
             let right = second as u16;
@@ -94,76 +95,56 @@ fn sender(shared: &Shared, tx: &Sender<Block>) {
     }
 }
 
-fn receiver(shared: &Shared, rx: &Receiver<Block>) -> (u32, u32) {
-    let mut remaining = PART_TWO;
-    let mut part_two = 0;
-
+fn receiver(shared: &Shared, rx: &Receiver<Block>) -> Input {
     let mut required = 0;
     let mut out_of_order = FastMap::new();
-    let mut blocks = Vec::new();
 
-    let mut fours_block = 0;
-    let mut fours_index = 0;
+    let mut fours = Vec::with_capacity(PART_TWO + BLOCK);
+    let mut eights = Vec::with_capacity(PART_TWO + BLOCK);
+    let mut start = 0;
 
-    let mut eights_block = 0;
-    let mut eights_index = 0;
+    let mut part_one = 0;
+    let mut part_two = 0;
 
-    while remaining > 0 {
+    while required < PART_ONE || fours.len() < PART_TWO || eights.len() < PART_TWO {
         // Blocks could be received in any order, as there's no guarantee threads will finish
         // processing at the same time. The `start` field of the block defines the order they
         // must be added to the vec.
-        while fours_block >= blocks.len() || eights_block >= blocks.len() {
-            let block = rx.recv().unwrap();
-            out_of_order.insert(block.start, block);
-
-            while let Some(next) = out_of_order.remove(&required) {
-                blocks.push(next);
-                required += BLOCK;
-            }
-        }
-
-        // Iterate over the minimum block size or numbers left to check.
-        let fours = &blocks[fours_block].fours;
-        let eights = &blocks[eights_block].eights;
-        let iterations = remaining.min(fours.len() - fours_index).min(eights.len() - eights_index);
+        let block = rx.recv().unwrap();
+        out_of_order.insert(block.start, block);
 
-        remaining -= iterations;
+        while let Some(block) = out_of_order.remove(&required) {
+            required += BLOCK;
 
-        for _ in 0..iterations {
-            if fours[fours_index] == eights[eights_index] {
-                part_two += 1;
+            if required <= PART_ONE {
+                part_one += block.ones;
             }
-            fours_index += 1;
-            eights_index += 1;
-        }
 
-        // If we've checked all the numbers in a block, advance to the next one.
-        // This may require waiting for a worker thread to create it first.
-        if fours_index == fours.len() {
-            fours_block += 1;
-            fours_index = 0;
-        }
-        if eights_index == eights.len() {
-            eights_block += 1;
-            eights_index = 0;
-        }
-    }
+            if fours.len() < PART_TWO {
+                fours.extend_from_slice(&block.fours);
+            }
 
-    // Just in case, make sure we have enough blocks for part one.
-    while required < PART_ONE {
-        let block = rx.recv().unwrap();
-        out_of_order.insert(block.start, block);
+            if eights.len() < PART_TWO {
+                eights.extend_from_slice(&block.eights);
+            }
 
-        while let Some(next) = out_of_order.remove(&required) {
-            blocks.push(next);
-            required += BLOCK;
+            let end = PART_TWO.min(fours.len()).min(eights.len());
+            part_two +=
+                fours[start..end].iter().zip(&eights[start..end]).filter(|(a, b)| a == b).count();
+            start = end;
         }
     }
 
-    // Signal worker thread to finish.
+    // Signal worker threads to finish.
     shared.done.store(true, Ordering::Relaxed);
 
-    // Return results.
-    let part_one = blocks.iter().take(PART_ONE / BLOCK).map(|p| p.ones).sum();
     (part_one, part_two)
 }
+
+#[inline]
+fn fast_mod(n: usize) -> usize {
+    let low = n & MOD;
+    let high = n >> 31;
+    let sum = low + high;
+    if sum < MOD { sum } else { sum - MOD }
+}