Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parser improvements #5

Merged
merged 10 commits into from
Jun 16, 2023
49 changes: 1 addition & 48 deletions benches/benchmarks.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use breakwater::{
framebuffer::FrameBuffer,
parser::{from_hex_char_lookup, from_hex_char_map, parse_pixelflut_commands, ParserState},
parser::{parse_pixelflut_commands, ParserState},
test::helpers::{get_commands_to_draw_rect, DevNullTcpStream},
};
use criterion::{
Expand All @@ -20,46 +20,6 @@ async fn invoke_parse_pixelflut_commands(
parse_pixelflut_commands(input, fb, &mut stream, parser_state).await;
}

#[allow(unused)] // Benchmarks are commented out by default
fn invoke_from_hex_char_map() -> u8 {
// So that we actually compute something
let mut result = 0;
for char in b'0'..=b'9' {
result |= from_hex_char_map(char);
}
for char in b'a'..=b'f' {
result |= from_hex_char_map(char);
}
for char in b'A'..=b'F' {
result |= from_hex_char_map(char);
}
result |= from_hex_char_map(b'\n');
result |= from_hex_char_map(b' ');
result |= from_hex_char_map(b';');
result |= from_hex_char_map(b'%');
result
}

#[allow(unused)] // Benchmarks are commented out by default
fn invoke_from_hex_char_lookup() -> u8 {
// So that we actually compute something
let mut result = 0;
for char in b'0'..=b'9' {
result |= from_hex_char_lookup(char);
}
for char in b'a'..=b'f' {
result |= from_hex_char_lookup(char);
}
for char in b'A'..=b'F' {
result |= from_hex_char_lookup(char);
}
result |= from_hex_char_lookup(b'\n');
result |= from_hex_char_lookup(b' ');
result |= from_hex_char_lookup(b';');
result |= from_hex_char_lookup(b'%');
result
}

fn from_elem(c: &mut Criterion) {
let draw_commands = get_commands_to_draw_rect(FRAMEBUFFER_WIDTH, FRAMEBUFFER_HEIGHT, 0x123456);
let draw_commands = draw_commands.as_bytes();
Expand Down Expand Up @@ -94,13 +54,6 @@ fn from_elem(c: &mut Criterion) {
// .iter(|| invoke_parse_pixelflut_commands(input, &fb, parser_state.clone()));
// },
// );

// c.bench_function("from_hex_char_map", |b: &mut criterion::Bencher| {
// b.iter(invoke_from_hex_char_map)
// });
// c.bench_function("from_hex_char_lookup", |b: &mut criterion::Bencher| {
// b.iter(invoke_from_hex_char_lookup)
// });
}

criterion_group!(
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![feature(portable_simd)]

pub mod args;
pub mod framebuffer;
pub mod network;
Expand Down
16 changes: 16 additions & 0 deletions src/network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,22 @@ mod test {
assert_eq!(expected, stream.get_output());
}

#[rstest]
#[case("PX 0 0 aaaaaa\n")]
#[case("PX 0 0 aa\n")]
#[tokio::test]
async fn test_safe(
#[case] input: &str,
ip: IpAddr,
fb: Arc<FrameBuffer>,
statistics_channel: (Sender<StatisticsEvent>, Receiver<StatisticsEvent>),
) {
let mut stream = MockTcpStream::from_input(input);
handle_connection(&mut stream, ip, fb.clone(), statistics_channel.0).await;
// Test if it panics
assert_eq!(fb.get(0, 0).unwrap() & 0x00ff_ffff, 0xaaaaaa);
}

#[rstest]
#[case(5, 5, 0, 0)]
#[case(6, 6, 0, 0)]
Expand Down
200 changes: 75 additions & 125 deletions src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::framebuffer::FrameBuffer;
use const_format::formatcp;
use std::simd::{u32x8, Simd, SimdUint};
use std::sync::Arc;
use tokio::io::AsyncWriteExt;

Expand Down Expand Up @@ -35,6 +36,17 @@ impl ParserState {
}
}

const fn string_to_number(input: &[u8]) -> u64 {
(input[7] as u64) << 56
| (input[6] as u64) << 48
| (input[5] as u64) << 40
| (input[4] as u64) << 32
| (input[3] as u64) << 24
| (input[2] as u64) << 16
| (input[1] as u64) << 8
| (input[0] as u64)
}

/// Returns the offset (think of index in [u8]) of the last bytes of the last fully parsed command.
///
/// TODO: Implement support for 16K (15360 × 8640).
Expand All @@ -59,10 +71,8 @@ pub async fn parse_pixelflut_commands(
let loop_end = buffer.len().saturating_sub(PARSER_LOOKAHEAD); // Let's extract the .len() call and the subtraction into it's own variable so we only compute it once

while i < loop_end {
// Check for buffer[i] = "PX "
if unsafe { (buffer.as_ptr().add(i) as *const u32).read_unaligned() } & 0x00ff_ffff
== 0x50582000_u32.swap_bytes()
{
let current_command = unsafe { (buffer.as_ptr().add(i) as *const u64).read_unaligned() };
if current_command & 0x00ff_ffff == string_to_number(b"PX \0\0\0\0\0") {
i += 3;
// Parse first x coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
Expand Down Expand Up @@ -134,25 +144,9 @@ pub async fn parse_pixelflut_commands(
last_byte_parsed = i + 6;
i += 7; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop

// 30% slower (38,334 ms vs 29,385 ms)
// let str = unsafe {
// std::str::from_utf8_unchecked(&buffer[i - 7..i - 2])
// };
// let rgba = u32::from_str_radix(str, 16).unwrap();

let rgba: u32 =
(ASCII_HEXADECIMAL_VALUES[buffer[i - 3] as usize] as u32) << 20
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 2] as usize] as u32)
<< 16
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 5] as usize] as u32)
<< 12
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 4] as usize] as u32)
<< 8
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 7] as usize] as u32)
<< 4
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 6] as usize] as u32);
let rgba: u32 = simd_unhex(&buffer[i - 7..i + 1]);

fb.set(x, y, rgba);
fb.set(x, y, rgba & 0x00ff_ffff);
continue;
}

Expand All @@ -162,46 +156,29 @@ pub async fn parse_pixelflut_commands(
last_byte_parsed = i + 8;
i += 9; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop

let rgba: u32 =
(ASCII_HEXADECIMAL_VALUES[buffer[i - 5] as usize] as u32) << 20
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 4] as usize] as u32)
<< 16
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 7] as usize] as u32)
<< 12
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 6] as usize] as u32)
<< 8
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 9] as usize] as u32)
<< 4
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 8] as usize] as u32);

fb.set(x, y, rgba);
let rgba: u32 = simd_unhex(&buffer[i - 9..i - 1]);

fb.set(x, y, rgba & 0x00ff_ffff);
continue;
}
#[cfg(feature = "alpha")]
if buffer[i + 8] == b'\n' {
last_byte_parsed = i + 8;
i += 9; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop

let alpha =
(ASCII_HEXADECIMAL_VALUES[buffer[i - 3] as usize] as u32) << 4
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 2] as usize] as u32);
let rgba = simd_unhex(&buffer[i - 9..i - 1]);

let alpha = (rgba >> 24) & 0xff;

if alpha == 0 || x >= fb.get_width() || y >= fb.get_height() {
continue;
}

let alpha_comp = 0xff - alpha;
let current = fb.get_unchecked(x, y);
let r = (ASCII_HEXADECIMAL_VALUES[buffer[i - 5] as usize] as u32)
<< 4
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 4] as usize] as u32);
let g = (ASCII_HEXADECIMAL_VALUES[buffer[i - 7] as usize] as u32)
<< 4
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 6] as usize] as u32);
let b = (ASCII_HEXADECIMAL_VALUES[buffer[i - 9] as usize] as u32)
<< 4
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 8] as usize] as u32);
let r = (rgba >> 16) & 0xff;
let g = (rgba >> 8) & 0xff;
let b = rgba & 0xff;

let r: u32 =
(((current >> 24) & 0xff) * alpha_comp + r * alpha) / 0xff;
Expand All @@ -219,17 +196,9 @@ pub async fn parse_pixelflut_commands(
last_byte_parsed = i + 2;
i += 3; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop

let rgba: u32 =
(ASCII_HEXADECIMAL_VALUES[buffer[i - 3] as usize] as u32) << 20
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 2] as usize] as u32)
<< 16
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 3] as usize] as u32)
<< 12
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 2] as usize] as u32)
<< 8
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 3] as usize] as u32)
<< 4
| (ASCII_HEXADECIMAL_VALUES[buffer[i - 2] as usize] as u32);
let base = simd_unhex(&buffer[i - 3..i + 5]) & 0xff;

let rgba: u32 = base << 16 | base << 8 | base;

fb.set(x, y, rgba);

Expand Down Expand Up @@ -264,35 +233,7 @@ pub async fn parse_pixelflut_commands(
}
}
}
// Check for buffer[i] = "SIZE"
} else if unsafe { (buffer.as_ptr().add(i) as *const u32).read_unaligned() }
== 0x53495a45_u32.swap_bytes()
{
i += 4;
last_byte_parsed = i - 1;

stream
.write_all(format!("SIZE {} {}\n", fb.get_width(), fb.get_height()).as_bytes())
.await
.expect("Failed to write bytes to tcp socket");
continue;
// Check for buffer[i] = "HELP"
} else if unsafe { (buffer.as_ptr().add(i) as *const u32).read_unaligned() }
== 0x48454c50_u32.swap_bytes()
{
i += 4;
last_byte_parsed = i - 1;

stream
.write_all(HELP_TEXT)
.await
.expect("Failed to write bytes to tcp socket");
continue;
// Check for buffer[i] = "OFFSET "
} else if unsafe { (buffer.as_ptr().add(i) as *const u64).read_unaligned() }
& 0x0000_ffff_ffff_ffff
== 0x4f464653455420_u64.swap_bytes()
{
} else if current_command & 0x0000_ffff_ffff_ffff == string_to_number(b"OFFSET \0\0") {
i += 7;
// Parse first x coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
Expand Down Expand Up @@ -354,6 +295,24 @@ pub async fn parse_pixelflut_commands(
}
}
}
} else if current_command & 0xffff_ffff == string_to_number(b"SIZE\0\0\0\0") {
i += 4;
last_byte_parsed = i - 1;

stream
.write_all(format!("SIZE {} {}\n", fb.get_width(), fb.get_height()).as_bytes())
.await
.expect("Failed to write bytes to tcp socket");
continue;
} else if current_command & 0xffff_ffff == string_to_number(b"HELP\0\0\0\0") {
i += 4;
last_byte_parsed = i - 1;

stream
.write_all(HELP_TEXT)
.await
.expect("Failed to write bytes to tcp socket");
continue;
}

i += 1;
Expand All @@ -366,43 +325,35 @@ pub async fn parse_pixelflut_commands(
}
}

#[inline(always)]
pub fn from_hex_char_map(char: u8) -> u8 {
match char {
b'0'..=b'9' => char - b'0',
b'a'..=b'f' => char - b'a' + 10,
b'A'..=b'F' => char - b'A' + 10,
_ => 0,
}
}

// fn main() {
// let numbers = (0..=255)
// .map(|char| match char {
// b'0'..=b'9' => char - b'0',
// b'a'..=b'f' => char - b'a' + 10,
// b'A'..=b'F' => char - b'A' + 10,
// _ => 0,
// })
// .map(|number| number.to_string())
// .collect::<Vec<String>>();
// println!("{}", numbers.join(", "));
// }
const ASCII_HEXADECIMAL_VALUES: [u8; 256] = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,
0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0,
];
const SHIFT_PATTERN: Simd<u32, 8> = u32x8::from_array([4, 0, 12, 8, 20, 16, 28, 24]);
const SIMD_6: Simd<u32, 8> = u32x8::from_array([6; 8]);
const SIMD_F: Simd<u32, 8> = u32x8::from_array([0xf; 8]);
const SIMD_9: Simd<u32, 8> = u32x8::from_array([9; 8]);

/// Parse a slice of 8 characters into a single u32 number
/// is undefined behavior for invalid characters
#[inline(always)]
pub fn from_hex_char_lookup(char: u8) -> u8 {
ASCII_HEXADECIMAL_VALUES[char as usize]
fn simd_unhex(value: &[u8]) -> u32 {
#[cfg(debug_assertions)]
assert_eq!(value.len(), 8);
// Feel free to find a better, but fast, way, to cast all integers as u32
let input = u32x8::from_array([
value[0] as u32,
value[1] as u32,
value[2] as u32,
value[3] as u32,
value[4] as u32,
value[5] as u32,
value[6] as u32,
value[7] as u32,
]);
// Heavily inspired by https://github.com/nervosnetwork/faster-hex/blob/master/src/decode.rs#L80
let sr6 = input >> SIMD_6;
let and15 = input & SIMD_F;
let mul = sr6 * SIMD_9;
let hexed = and15 + mul;
let shifted = hexed << SHIFT_PATTERN;
shifted.reduce_or()
}

#[cfg(test)]
Expand All @@ -411,8 +362,7 @@ mod test {

#[test]
fn test_from_hex_char() {
for c in 0..=255 {
assert_eq!(from_hex_char_map(c), from_hex_char_map(c));
}
assert_eq!(simd_unhex(b"01234567"), 0x67452301);
assert_eq!(simd_unhex(b"fedcba98"), 0x98badcfe);
}
}
Loading