Skip to content

Commit c8d73c1

Browse files
committed
Convert many_small_writes_delayed_acks into a bench
1 parent dcfd862 commit c8d73c1

File tree

7 files changed

+157
-75
lines changed

7 files changed

+157
-75
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quinn-proto/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ tracing-log = ["tracing/log"]
3535
rustls-log = ["rustls?/logging"]
3636
# Enable qlog support
3737
qlog = ["dep:qlog"]
38+
bench = ["dep:bencher"]
3839

3940
# Internal (PRIVATE!) features used to aid testing.
4041
# Don't rely on these whatsoever. They may disappear at any time.
@@ -44,6 +45,7 @@ __rustls-post-quantum-test = []
4445
[dependencies]
4546
arbitrary = { workspace = true, optional = true }
4647
aws-lc-rs = { workspace = true, optional = true }
48+
bencher = { workspace = true, optional = true }
4749
bytes = { workspace = true }
4850
fastbloom = { workspace = true, optional = true }
4951
identity-hash = { workspace = true }
@@ -82,6 +84,11 @@ wasm-bindgen-test = { workspace = true }
8284
proptest = { workspace = true }
8385
test-strategy = { workspace = true }
8486

87+
[[bench]]
88+
name = "send_buffer"
89+
harness = false
90+
required-features = ["bench"]
91+
8592
[lints.rust]
8693
# https://rust-fuzz.github.io/book/cargo-fuzz/guide.html#cfgfuzzing
8794
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] }

quinn-proto/benches/send_buffer.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
use bencher::{benchmark_group, benchmark_main};
2+
use iroh_quinn_proto::bench_exports::send_buffer::*;
3+
4+
// Since we can't easily access test utilities, this is a minimal benchmark
5+
// that measures the actual problematic operations directly
6+
7+
benchmark_group!(
8+
benches,
9+
get_into_many_segments,
10+
get_loop_many_segments,
11+
);
12+
benchmark_main!(benches);

quinn-proto/src/connection/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,7 @@ pub use paths::{ClosedPath, PathEvent, PathId, PathStatus, RttEstimator, SetPath
7373
use paths::{PathData, PathState};
7474

7575
pub(crate) mod qlog;
76-
77-
mod send_buffer;
76+
pub(crate) mod send_buffer;
7877

7978
mod spaces;
8079
#[cfg(fuzzing)]

quinn-proto/src/connection/send_buffer.rs

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ impl SendBufferData {
9393
/// Returns data which is associated with a range
9494
///
9595
/// Requesting a range outside of the buffered data will panic.
96-
#[cfg(test)]
9796
fn get(&self, offsets: Range<u64>) -> &[u8] {
9897
assert!(
9998
offsets.start >= self.range().start && offsets.end <= self.range().end,
@@ -245,7 +244,6 @@ impl SendBuffer {
245244
/// in noncontiguous fashion in the send buffer. In this case callers
246245
/// should call the function again with an incremented start offset to
247246
/// retrieve more data.
248-
#[cfg(test)]
249247
pub(super) fn get(&self, offsets: Range<u64>) -> &[u8] {
250248
self.data.get(offsets)
251249
}
@@ -502,3 +500,63 @@ mod tests {
502500
data.get_into(0..1, &mut buf);
503501
}
504502
}
503+
504+
#[cfg(feature = "bench")]
505+
pub mod send_buffer {
506+
//! Bench fns for SendBuffer
507+
//!
508+
//! These are defined here and re-exported via `bench_exports` in lib.rs,
509+
//! so we can access the private `SendBuffer` struct.
510+
use bencher::Bencher;
511+
use bytes::Bytes;
512+
use super::SendBuffer;
513+
514+
/// Pathological case: many segments, get from end
515+
pub fn get_into_many_segments(bench: &mut Bencher) {
516+
let mut buf = SendBuffer::new();
517+
518+
const SEGMENTS: u64 = 10000;
519+
const SEGMENT_SIZE: u64 = 10;
520+
const PACKET_SIZE: u64 = 1200;
521+
const BYTES: u64 = SEGMENTS * SEGMENT_SIZE;
522+
523+
// 10000 segments of 10 bytes each = 100KB total (same data size)
524+
for i in 0..SEGMENTS {
525+
buf.write(Bytes::from(vec![i as u8; SEGMENT_SIZE as usize]));
526+
}
527+
528+
let mut tgt = Vec::with_capacity(PACKET_SIZE as usize);
529+
bench.iter(|| {
530+
// Get from end (very slow - scans through all 1000 segments)
531+
tgt.clear();
532+
buf.get_into( BYTES - PACKET_SIZE..BYTES, bencher::black_box(&mut tgt));
533+
});
534+
}
535+
536+
/// Get segments in the old way, using a loop of get calls
537+
pub fn get_loop_many_segments(bench: &mut Bencher) {
538+
let mut buf = SendBuffer::new();
539+
540+
const SEGMENTS: u64 = 10000;
541+
const SEGMENT_SIZE: u64 = 10;
542+
const PACKET_SIZE: u64 = 1200;
543+
const BYTES: u64 = SEGMENTS * SEGMENT_SIZE;
544+
545+
// 10000 segments of 10 bytes each = 100KB total (same data size)
546+
for i in 0..SEGMENTS {
547+
buf.write(Bytes::from(vec![i as u8; SEGMENT_SIZE as usize]));
548+
}
549+
550+
let mut tgt = Vec::with_capacity(PACKET_SIZE as usize);
551+
bench.iter(|| {
552+
// Get from end (very slow - scans through all 1000 segments)
553+
tgt.clear();
554+
let mut range = BYTES - PACKET_SIZE..BYTES;
555+
while range.start < range.end {
556+
let slice = bencher::black_box(buf.get(range.clone()));
557+
range.start += slice.len() as u64;
558+
tgt.extend_from_slice(slice);
559+
}
560+
});
561+
}
562+
}

quinn-proto/src/lib.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ mod bloom_token_log;
4141
#[cfg(feature = "bloom")]
4242
pub use bloom_token_log::BloomTokenLog;
4343

44-
mod connection;
44+
pub(crate) mod connection;
4545
pub use crate::connection::{
4646
Chunk, Chunks, ClosePathError, ClosedPath, ClosedStream, Connection, ConnectionError,
4747
ConnectionStats, Datagrams, Event, FinishError, FrameStats, PathError, PathEvent, PathId,
@@ -113,6 +113,12 @@ pub(crate) use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
113113
#[cfg(all(target_family = "wasm", target_os = "unknown"))]
114114
pub(crate) use web_time::{Duration, Instant, SystemTime, UNIX_EPOCH};
115115

116+
#[cfg(feature = "bench")]
117+
pub mod bench_exports {
118+
//! Exports for benchmarks
119+
pub use crate::connection::send_buffer::send_buffer;
120+
}
121+
116122
#[cfg(fuzzing)]
117123
pub mod fuzzing {
118124
pub use crate::connection::{Retransmits, State as ConnectionState, StreamsState};

quinn-proto/src/tests/mod.rs

Lines changed: 69 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -538,76 +538,75 @@ fn high_latency_handshake() {
538538
assert!(pair.server_conn_mut(server_ch).using_ecn());
539539
}
540540

541-
// // Test to expose O(n²) behavior in SendBuffer with many small writes and delayed ACKs
542-
// #[test]
543-
// #[cfg(not(wasm_browser))]
544-
// fn many_small_writes_delayed_acks() {
545-
// let _guard = subscribe();
546-
// let mut pair = Pair::default();
547-
548-
// // Simulate high latency to delay ACKs
549-
// pair.latency = Duration::from_millis(500);
550-
551-
// let (client_ch, server_ch) = pair.connect();
552-
553-
// let s = pair.client_streams(client_ch).open(Dir::Uni).unwrap();
554-
555-
// // Write many small messages (simulate fragmented buffer)
556-
// const NUM_WRITES: usize = 100000;
557-
// const WRITE_SIZE: usize = 10;
558-
559-
// for i in 0..NUM_WRITES {
560-
// let data = vec![i as u8; WRITE_SIZE];
561-
// pair.client_send(client_ch, s).write(&data).unwrap();
562-
// }
563-
564-
// // The key insight: with high latency, the client will send many packets
565-
// // before any ACKs arrive. This causes SendBuffer to accumulate many
566-
// // unacked segments. We don't need to artificially limit driving -
567-
// // the latency naturally creates the pathological state.
568-
569-
// // The high latency means:
570-
// // 1. Client sends many packets quickly (all 500 writes)
571-
// // 2. ACKs are delayed by 500ms RTT
572-
// // 3. SendBuffer accumulates many unacked segments
573-
// // 4. When retransmission or late transmission happens, get() scans are expensive
574-
575-
// let start = std::time::Instant::now();
576-
577-
// // Drive to completion
578-
// // With O(n²) get() behavior, this will be slow due to many segments
579-
// pair.drive();
580-
581-
// let elapsed = start.elapsed();
582-
583-
// // With O(n²) behavior and 500 segments, this could take 10-100ms
584-
// // With O(n) or O(1), should be < 5ms
585-
// // This is a performance regression test
586-
// info!(
587-
// "Time to drive {} small writes with delayed ACKs: {:?}",
588-
// NUM_WRITES, elapsed
589-
// );
590-
591-
// // Verify correctness - all data should be received
592-
// let total_written = (NUM_WRITES * WRITE_SIZE) as u64;
593-
// pair.client_send(client_ch, s).finish().unwrap();
594-
// pair.drive();
595-
596-
// let mut recv = pair.server_recv(server_ch, s);
597-
// let mut chunks = recv.read(false).unwrap();
598-
// let mut received = 0;
599-
600-
// while let Ok(Some(chunk)) = chunks.next(usize::MAX) {
601-
// received += chunk.bytes.len();
602-
// }
603-
// let _ = chunks.finalize();
604-
605-
// assert_eq!(received, total_written as usize);
606-
607-
// // This test exposes the pathology but doesn't strictly assert on timing
608-
// // because timing tests are flaky in CI. The println! shows the issue.
609-
// // To properly test, we'd need to instrument SendBuffer::get() to count scans.
610-
// }
541+
// Test to expose O(n²) behavior in SendBuffer with many small writes and delayed ACKs
542+
#[test]
543+
fn many_small_writes_delayed_acks() {
544+
let _guard = subscribe();
545+
let mut pair = Pair::default();
546+
547+
// Simulate high latency to delay ACKs
548+
pair.latency = Duration::from_millis(500);
549+
550+
let (client_ch, server_ch) = pair.connect();
551+
552+
let s = pair.client_streams(client_ch).open(Dir::Uni).unwrap();
553+
554+
// Write many small messages (simulate fragmented buffer)
555+
const NUM_WRITES: usize = 100000;
556+
const WRITE_SIZE: usize = 10;
557+
558+
for i in 0..NUM_WRITES {
559+
let data = vec![i as u8; WRITE_SIZE];
560+
pair.client_send(client_ch, s).write(&data).unwrap();
561+
}
562+
563+
// The key insight: with high latency, the client will send many packets
564+
// before any ACKs arrive. This causes SendBuffer to accumulate many
565+
// unacked segments. We don't need to artificially limit driving -
566+
// the latency naturally creates the pathological state.
567+
568+
// The high latency means:
569+
// 1. Client sends many packets quickly (all 500 writes)
570+
// 2. ACKs are delayed by 500ms RTT
571+
// 3. SendBuffer accumulates many unacked segments
572+
// 4. When retransmission or late transmission happens, get() scans are expensive
573+
574+
let start = std::time::Instant::now();
575+
576+
// Drive to completion
577+
// With O(n²) get() behavior, this will be slow due to many segments
578+
pair.drive();
579+
580+
let elapsed = start.elapsed();
581+
582+
// With O(n²) behavior and 500 segments, this could take 10-100ms
583+
// With O(n) or O(1), should be < 5ms
584+
// This is a performance regression test
585+
info!(
586+
"Time to drive {} small writes with delayed ACKs: {:?}",
587+
NUM_WRITES, elapsed
588+
);
589+
590+
// Verify correctness - all data should be received
591+
let total_written = (NUM_WRITES * WRITE_SIZE) as u64;
592+
pair.client_send(client_ch, s).finish().unwrap();
593+
pair.drive();
594+
595+
let mut recv = pair.server_recv(server_ch, s);
596+
let mut chunks = recv.read(false).unwrap();
597+
let mut received = 0;
598+
599+
while let Ok(Some(chunk)) = chunks.next(usize::MAX) {
600+
received += chunk.bytes.len();
601+
}
602+
let _ = chunks.finalize();
603+
604+
assert_eq!(received, total_written as usize);
605+
606+
// This test exposes the pathology but doesn't strictly assert on timing
607+
// because timing tests are flaky in CI. The println! shows the issue.
608+
// To properly test, we'd need to instrument SendBuffer::get() to count scans.
609+
}
611610

612611
#[test]
613612
fn zero_rtt_happypath() {

0 commit comments

Comments
 (0)