Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve inlining for bitwriter #47

Merged
merged 2 commits into from
Sep 28, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 34 additions & 22 deletions src/structs/bit_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,43 @@ impl BitWriter {
};
}

#[inline(never)]
fn flush_bytes_slowly(&mut self) {
let mut tmp_current_bit = self.current_bit;
let mut tmp_fill_register = self.fill_register;

while tmp_current_bit <= 56 {
let b = (tmp_fill_register >> 56) as u8;
/// flushes whole bytes from the register into the data buffer
fn flush_whole_bytes(&mut self) {
while self.current_bit <= 56 {
let b = (self.fill_register >> 56) as u8;
if b != 0xff {
self.data_buffer.push(b);
} else {
// escape 0xff here to avoid multiple scans of the same data
self.data_buffer.extend_from_slice(&[0xff, 0]);
}

tmp_fill_register <<= 8;
tmp_current_bit += 8;
self.fill_register <<= 8;
self.current_bit += 8;
}

self.fill_register = tmp_fill_register;
self.current_bit = tmp_current_bit;
}

#[inline(always)]
pub fn write(&mut self, mut val: u32, mut new_bits: u32) {
pub fn write(&mut self, val: u32, new_bits: u32) {
/// this is the slow path that is rarely called but generates a lot of code inlined
/// so we move it out of the main function to keep the main function small with few branches.
///
/// We also call this path when we are about to overflow the buffer to avoid having
/// to inline the buffer growing logic, which is also much bigger than a simple insert.
#[inline(never)]
#[cold]
fn write_ff_encoded(data_buffer: &mut Vec<u8>, fill_register: u64) {
for i in 0..8 {
let b = (fill_register >> (56 - (i * 8))) as u8;
if b != 0xff {
data_buffer.push(b);
} else {
// escape 0xff here to avoid multiple scans of the same data
data_buffer.extend_from_slice(&[0xff, 0]);
}
}
}

debug_assert!(
val < (1 << new_bits),
"value {0} should fit into the number of {1} bits provided",
Expand All @@ -61,8 +74,8 @@ impl BitWriter {
// if not, fill up the register so to the 64 bit boundary we can flush it hopefully without any 0xff bytes
let fill = self.fill_register | (val as u64).wrapping_shr(new_bits - self.current_bit);

new_bits -= self.current_bit;
val &= (1 << new_bits) - 1;
let leftover_new_bits = new_bits - self.current_bit;
let leftover_val = val & (1 << leftover_new_bits) - 1;

// flush bytes slowly if we have any 0xff bytes or if we are about to overflow the buffer
// (overflow check matches implementation in RawVec so that the optimizer can remove the buffer growing code)
Expand All @@ -73,14 +86,13 @@ impl BitWriter {
.wrapping_sub(self.data_buffer.len())
< 8
{
self.fill_register = fill;
self.current_bit = 0;
self.flush_bytes_slowly();
write_ff_encoded(&mut self.data_buffer, fill);
} else {
self.data_buffer.extend_from_slice(&fill.to_be_bytes());
}
self.fill_register = (val as u64).wrapping_shl(64 - new_bits); // support corner case where new_bits is zero, we don't want to panic
self.current_bit = 64 - new_bits;

self.fill_register = (leftover_val as u64).wrapping_shl(64 - leftover_new_bits); // support corner case where new_bits is zero, we don't want to panic
self.current_bit = 64 - leftover_new_bits;
}
}

Expand All @@ -91,7 +103,7 @@ impl BitWriter {
offset <<= 1;
}

self.flush_bytes_slowly();
self.flush_whole_bytes();

debug_assert!(
self.current_bit == 64,
Expand All @@ -102,7 +114,7 @@ impl BitWriter {
// flushes the data buffer while escaping all 0xff characters
pub fn flush_with_escape<W: Write>(&mut self, w: &mut W) -> anyhow::Result<()> {
// flush any remaining whole bytes
self.flush_bytes_slowly();
self.flush_whole_bytes();

w.write_all(&self.data_buffer[..])?;

Expand Down