|  | 
|  | 1 | +use std::collections::HashMap; | 
|  | 2 | +use std::time::{Duration, Instant}; | 
|  | 3 | + | 
|  | 4 | +use anyhow::*; | 
|  | 5 | +use rivet_ups_protocol::versioned::UpsMessage; | 
|  | 6 | +use rivet_ups_protocol::{MessageBody, MessageChunk, MessageStart, PROTOCOL_VERSION}; | 
|  | 7 | +use versioned_data_util::OwnedVersionedData; | 
|  | 8 | + | 
|  | 9 | +pub const CHUNK_BUFFER_GC_INTERVAL: Duration = Duration::from_secs(60); | 
|  | 10 | +pub const CHUNK_BUFFER_MAX_AGE: Duration = Duration::from_secs(300); | 
|  | 11 | + | 
|  | 12 | +#[derive(Debug)] | 
|  | 13 | +pub struct ChunkBuffer { | 
|  | 14 | +	pub message_id: [u8; 16], | 
|  | 15 | +	pub received_chunks: u32, | 
|  | 16 | +	pub last_chunk_ts: Instant, | 
|  | 17 | +	pub buffer: Vec<u8>, | 
|  | 18 | +	pub chunk_count: u32, | 
|  | 19 | +	pub reply_subject: Option<String>, | 
|  | 20 | +} | 
|  | 21 | + | 
|  | 22 | +pub struct ChunkTracker { | 
|  | 23 | +	chunks_in_process: HashMap<[u8; 16], ChunkBuffer>, | 
|  | 24 | +} | 
|  | 25 | + | 
|  | 26 | +impl ChunkTracker { | 
|  | 27 | +	pub fn new() -> Self { | 
|  | 28 | +		Self { | 
|  | 29 | +			chunks_in_process: HashMap::new(), | 
|  | 30 | +		} | 
|  | 31 | +	} | 
|  | 32 | + | 
|  | 33 | +	pub fn process_chunk( | 
|  | 34 | +		&mut self, | 
|  | 35 | +		raw_message: &[u8], | 
|  | 36 | +	) -> Result<Option<(Vec<u8>, Option<String>)>> { | 
|  | 37 | +		let message = UpsMessage::deserialize_with_embedded_version(raw_message)?; | 
|  | 38 | + | 
|  | 39 | +		match message.body { | 
|  | 40 | +			MessageBody::MessageStart(msg) => { | 
|  | 41 | +				// If only one chunk, return immediately | 
|  | 42 | +				if msg.chunk_count == 1 { | 
|  | 43 | +					return Ok(Some((msg.payload, msg.reply_subject))); | 
|  | 44 | +				} | 
|  | 45 | + | 
|  | 46 | +				// Start of a multi-chunk message | 
|  | 47 | +				let buffer = ChunkBuffer { | 
|  | 48 | +					message_id: msg.message_id, | 
|  | 49 | +					received_chunks: 1, | 
|  | 50 | +					last_chunk_ts: Instant::now(), | 
|  | 51 | +					buffer: msg.payload, | 
|  | 52 | +					chunk_count: msg.chunk_count, | 
|  | 53 | +					reply_subject: msg.reply_subject, | 
|  | 54 | +				}; | 
|  | 55 | +				self.chunks_in_process.insert(msg.message_id, buffer); | 
|  | 56 | +				Ok(None) | 
|  | 57 | +			} | 
|  | 58 | +			MessageBody::MessageChunk(msg) => { | 
|  | 59 | +				// Find the matching buffer using message_id | 
|  | 60 | +				let buffer = self.chunks_in_process.get_mut(&msg.message_id); | 
|  | 61 | + | 
|  | 62 | +				let Some(buffer) = buffer else { | 
|  | 63 | +					bail!( | 
|  | 64 | +						"received chunk {} for message {:?} but no matching buffer found", | 
|  | 65 | +						msg.chunk_index, | 
|  | 66 | +						msg.message_id | 
|  | 67 | +					); | 
|  | 68 | +				}; | 
|  | 69 | + | 
|  | 70 | +				// Validate chunk order | 
|  | 71 | +				if buffer.received_chunks != msg.chunk_index { | 
|  | 72 | +					bail!( | 
|  | 73 | +						"received chunk {} but expected chunk {} for message {:?}", | 
|  | 74 | +						msg.chunk_index, | 
|  | 75 | +						buffer.received_chunks, | 
|  | 76 | +						msg.message_id | 
|  | 77 | +					); | 
|  | 78 | +				} | 
|  | 79 | + | 
|  | 80 | +				// Update buffer | 
|  | 81 | +				buffer.buffer.extend_from_slice(&msg.payload); | 
|  | 82 | +				buffer.received_chunks += 1; | 
|  | 83 | +				buffer.last_chunk_ts = Instant::now(); | 
|  | 84 | +				let is_complete = buffer.received_chunks == buffer.chunk_count; | 
|  | 85 | + | 
|  | 86 | +				if is_complete { | 
|  | 87 | +					let completed_buffer = self.chunks_in_process.remove(&msg.message_id).unwrap(); | 
|  | 88 | +					Ok(Some(( | 
|  | 89 | +						completed_buffer.buffer, | 
|  | 90 | +						completed_buffer.reply_subject, | 
|  | 91 | +					))) | 
|  | 92 | +				} else { | 
|  | 93 | +					Ok(None) | 
|  | 94 | +				} | 
|  | 95 | +			} | 
|  | 96 | +		} | 
|  | 97 | +	} | 
|  | 98 | + | 
|  | 99 | +	pub fn gc(&mut self) { | 
|  | 100 | +		let now = Instant::now(); | 
|  | 101 | +		let size_before = self.chunks_in_process.len(); | 
|  | 102 | +		self.chunks_in_process | 
|  | 103 | +			.retain(|_, buffer| now.duration_since(buffer.last_chunk_ts) < CHUNK_BUFFER_MAX_AGE); | 
|  | 104 | +		let size_after = self.chunks_in_process.len(); | 
|  | 105 | + | 
|  | 106 | +		tracing::debug!( | 
|  | 107 | +			?size_before, | 
|  | 108 | +			?size_after, | 
|  | 109 | +			"performed chunk buffer garbage collection" | 
|  | 110 | +		); | 
|  | 111 | +	} | 
|  | 112 | +} | 
|  | 113 | + | 
|  | 114 | +/// Splits a payload into chunks that fit within message size limits. | 
|  | 115 | +/// | 
|  | 116 | +/// This function handles chunking by accounting for different overhead | 
|  | 117 | +/// between the first chunk (MessageStart) and subsequent chunks (MessageChunk). | 
|  | 118 | +/// | 
|  | 119 | +/// The first chunk carries additional metadata like the reply_subject and chunk_count, | 
|  | 120 | +/// which means it has more protocol overhead and less room for payload data. | 
|  | 121 | +/// Subsequent chunks only carry a chunk_index, allowing them to fit more payload. | 
|  | 122 | +/// | 
|  | 123 | +/// This optimization ensures: | 
|  | 124 | +/// - Reply subject is only transmitted once (in MessageStart) | 
|  | 125 | +/// - Maximum payload utilization in each chunk | 
|  | 126 | +/// - Efficient bandwidth usage for multi-chunk messages | 
|  | 127 | +/// | 
|  | 128 | +/// # Returns | 
|  | 129 | +/// A vector of payload chunks, where each chunk is sized to fit within the message limit | 
|  | 130 | +/// after accounting for protocol overhead. | 
|  | 131 | +pub fn split_payload_into_chunks( | 
|  | 132 | +	payload: &[u8], | 
|  | 133 | +	max_message_size: usize, | 
|  | 134 | +	message_id: [u8; 16], | 
|  | 135 | +	reply_subject: Option<&str>, | 
|  | 136 | +) -> Result<Vec<Vec<u8>>> { | 
|  | 137 | +	// Calculate overhead for MessageStart (first chunk) | 
|  | 138 | +	let start_message = MessageStart { | 
|  | 139 | +		message_id, | 
|  | 140 | +		chunk_count: 1, | 
|  | 141 | +		reply_subject: reply_subject.map(|s| s.to_string()), | 
|  | 142 | +		payload: vec![], | 
|  | 143 | +	}; | 
|  | 144 | +	let start_ups_message = rivet_ups_protocol::UpsMessage { | 
|  | 145 | +		body: MessageBody::MessageStart(start_message), | 
|  | 146 | +	}; | 
|  | 147 | +	let start_overhead = UpsMessage::latest(start_ups_message) | 
|  | 148 | +		.serialize_with_embedded_version(PROTOCOL_VERSION)? | 
|  | 149 | +		.len(); | 
|  | 150 | + | 
|  | 151 | +	// Calculate overhead for MessageChunk (subsequent chunks) | 
|  | 152 | +	let chunk_message = MessageChunk { | 
|  | 153 | +		message_id, | 
|  | 154 | +		chunk_index: 0, | 
|  | 155 | +		payload: vec![], | 
|  | 156 | +	}; | 
|  | 157 | +	let chunk_ups_message = rivet_ups_protocol::UpsMessage { | 
|  | 158 | +		body: MessageBody::MessageChunk(chunk_message), | 
|  | 159 | +	}; | 
|  | 160 | +	let chunk_overhead = UpsMessage::latest(chunk_ups_message) | 
|  | 161 | +		.serialize_with_embedded_version(PROTOCOL_VERSION)? | 
|  | 162 | +		.len(); | 
|  | 163 | + | 
|  | 164 | +	// Calculate max payload sizes | 
|  | 165 | +	let first_chunk_max_payload = max_message_size.saturating_sub(start_overhead); | 
|  | 166 | +	let other_chunk_max_payload = max_message_size.saturating_sub(chunk_overhead); | 
|  | 167 | + | 
|  | 168 | +	if first_chunk_max_payload == 0 || other_chunk_max_payload == 0 { | 
|  | 169 | +		bail!("message overhead exceeds max message size"); | 
|  | 170 | +	} | 
|  | 171 | + | 
|  | 172 | +	// Calculate how many chunks we need | 
|  | 173 | +	if payload.len() <= first_chunk_max_payload { | 
|  | 174 | +		// Single chunk - all data fits in first message | 
|  | 175 | +		return Ok(vec![payload.to_vec()]); | 
|  | 176 | +	} | 
|  | 177 | + | 
|  | 178 | +	// Multi-chunk: first chunk + remaining chunks | 
|  | 179 | +	let remaining_after_first = payload.len() - first_chunk_max_payload; | 
|  | 180 | +	let additional_chunks = | 
|  | 181 | +		(remaining_after_first + other_chunk_max_payload - 1) / other_chunk_max_payload; | 
|  | 182 | + | 
|  | 183 | +	let mut chunks = Vec::new(); | 
|  | 184 | + | 
|  | 185 | +	// First chunk (smaller due to reply_subject overhead) | 
|  | 186 | +	chunks.push(payload[..first_chunk_max_payload].to_vec()); | 
|  | 187 | + | 
|  | 188 | +	// Subsequent chunks | 
|  | 189 | +	let mut offset = first_chunk_max_payload; | 
|  | 190 | +	for _ in 0..additional_chunks { | 
|  | 191 | +		let end = std::cmp::min(offset + other_chunk_max_payload, payload.len()); | 
|  | 192 | +		chunks.push(payload[offset..end].to_vec()); | 
|  | 193 | +		offset = end; | 
|  | 194 | +	} | 
|  | 195 | + | 
|  | 196 | +	Ok(chunks) | 
|  | 197 | +} | 
|  | 198 | + | 
|  | 199 | +/// Encodes a chunk to the resulting BARE message. | 
|  | 200 | +pub fn encode_chunk( | 
|  | 201 | +	payload: Vec<u8>, | 
|  | 202 | +	chunk_idx: u32, | 
|  | 203 | +	chunk_count: u32, | 
|  | 204 | +	message_id: [u8; 16], | 
|  | 205 | +	reply_subject: Option<String>, | 
|  | 206 | +) -> Result<Vec<u8>> { | 
|  | 207 | +	let body = if chunk_idx == 0 { | 
|  | 208 | +		// First chunk - MessageStart | 
|  | 209 | +		MessageBody::MessageStart(MessageStart { | 
|  | 210 | +			message_id, | 
|  | 211 | +			chunk_count, | 
|  | 212 | +			reply_subject, | 
|  | 213 | +			payload, | 
|  | 214 | +		}) | 
|  | 215 | +	} else { | 
|  | 216 | +		// Subsequent chunks - MessageChunk | 
|  | 217 | +		MessageBody::MessageChunk(MessageChunk { | 
|  | 218 | +			message_id, | 
|  | 219 | +			chunk_index: chunk_idx, | 
|  | 220 | +			payload, | 
|  | 221 | +		}) | 
|  | 222 | +	}; | 
|  | 223 | + | 
|  | 224 | +	let ups_message = rivet_ups_protocol::UpsMessage { body }; | 
|  | 225 | +	UpsMessage::latest(ups_message).serialize_with_embedded_version(PROTOCOL_VERSION) | 
|  | 226 | +} | 
0 commit comments