Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simd digit parsing #8

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 143 additions & 135 deletions src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::framebuffer::FrameBuffer;
use const_format::formatcp;
use log::{info, warn};
use std::simd::{u32x8, Simd, SimdUint};
use std::simd::{u32x8, Simd, SimdUint, u8x16, u32x4, SimdPartialOrd, ToBitMask};
use std::sync::Arc;
use log::{info, warn};
use tokio::io::AsyncWriteExt;

pub const PARSER_LOOKAHEAD: usize = "PX 1234 1234 rrggbbaa\n".len(); // Longest possible command
Expand Down Expand Up @@ -75,163 +75,106 @@ pub async fn parse_pixelflut_commands(
let current_command = unsafe { (buffer.as_ptr().add(i) as *const u64).read_unaligned() };
if current_command & 0x00ff_ffff == string_to_number(b"PX \0\0\0\0\0") {
i += 3;
// Parse first x coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
x = (buffer[i] - b'0') as usize;
i += 1;

// Parse optional second x coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
// TODO: Test bitshifts and add instead of multiplication
// i = (i << 3) + (i << 1);
// i = (i * 8) + (i * 2);
// i = 8i + 2i
// i = 10i
x = 10 * x + (buffer[i] - b'0') as usize;
i += 1;

// Parse optional third x coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
x = 10 * x + (buffer[i] - b'0') as usize;
i += 1;
let (mut x, x_size) = simd_digit_parsing(unsafe {buffer.as_ptr().add(i)});
i += x_size + 1;
let (mut y, y_size) = simd_digit_parsing(unsafe {buffer.as_ptr().add(i)});
i += y_size;

// Parse optional forth x coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
x = 10 * x + (buffer[i] - b'0') as usize;
i += 1;
}
}
}
if x_size != 0 && y_size != 0 {
x += connection_x_offset;
y += connection_y_offset;

// Separator between x and y
// Separator between coordinates and color
if buffer[i] == b' ' {
i += 1;

// Parse first y coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
y = (buffer[i] - b'0') as usize;
i += 1;
// TODO: Determine what clients use more: RGB, RGBA or gg variant.
// If RGBA is used more often move the RGB code below the RGBA code

// Parse optional second y coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
y = 10 * y + (buffer[i] - b'0') as usize;
i += 1;
// Must be followed by 6 bytes RGB and newline or ...
if buffer[i + 6] == b'\n' {
last_byte_parsed = i + 6;
i += 7; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop

// Parse optional third y coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
y = 10 * y + (buffer[i] - b'0') as usize;
i += 1;
let rgba: u32 = simd_unhex(&buffer[i - 7..i + 1]);

// Parse optional forth y coordinate char
if buffer[i] >= b'0' && buffer[i] <= b'9' {
y = 10 * y + (buffer[i] - b'0') as usize;
i += 1;
}
}
}

x += connection_x_offset;
y += connection_y_offset;

// Separator between coordinates and color
if buffer[i] == b' ' {
i += 1;

// TODO: Determine what clients use more: RGB, RGBA or gg variant.
// If RGBA is used more often move the RGB code below the RGBA code

// Must be followed by 6 bytes RGB and newline or ...
if buffer[i + 6] == b'\n' {
last_byte_parsed = i + 6;
i += 7; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop

let rgba: u32 = simd_unhex(&buffer[i - 7..i + 1]);
fb.set(x, y, rgba & 0x00ff_ffff);
continue;
}

fb.set(x, y, rgba & 0x00ff_ffff);
continue;
}
// ... or must be followed by 8 bytes RGBA and newline
#[cfg(not(feature = "alpha"))]
if buffer[i + 8] == b'\n' {
last_byte_parsed = i + 8;
i += 9; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop

// ... or must be followed by 8 bytes RGBA and newline
#[cfg(not(feature = "alpha"))]
if buffer[i + 8] == b'\n' {
last_byte_parsed = i + 8;
i += 9; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop
let rgba: u32 = simd_unhex(&buffer[i - 9..i - 1]);

let rgba: u32 = simd_unhex(&buffer[i - 9..i - 1]);
fb.set(x, y, rgba & 0x00ff_ffff);
continue;
}
#[cfg(feature = "alpha")]
if buffer[i + 8] == b'\n' {
last_byte_parsed = i + 8;
i += 9; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop

fb.set(x, y, rgba & 0x00ff_ffff);
continue;
}
#[cfg(feature = "alpha")]
if buffer[i + 8] == b'\n' {
last_byte_parsed = i + 8;
i += 9; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop
let rgba = simd_unhex(&buffer[i - 9..i - 1]);

let rgba = simd_unhex(&buffer[i - 9..i - 1]);
let alpha = (rgba >> 24) & 0xff;

let alpha = (rgba >> 24) & 0xff;
if alpha == 0 || x >= fb.get_width() || y >= fb.get_height() {
continue;
}

if alpha == 0 || x >= fb.get_width() || y >= fb.get_height() {
continue;
}
let alpha_comp = 0xff - alpha;
let current = fb.get_unchecked(x, y);
let r = (rgba >> 16) & 0xff;
let g = (rgba >> 8) & 0xff;
let b = rgba & 0xff;

let alpha_comp = 0xff - alpha;
let current = fb.get_unchecked(x, y);
let r = (rgba >> 16) & 0xff;
let g = (rgba >> 8) & 0xff;
let b = rgba & 0xff;

let r: u32 =
(((current >> 24) & 0xff) * alpha_comp + r * alpha) / 0xff;
let g: u32 =
(((current >> 16) & 0xff) * alpha_comp + g * alpha) / 0xff;
let b: u32 =
(((current >> 8) & 0xff) * alpha_comp + b * alpha) / 0xff;

fb.set(x, y, r << 16 | g << 8 | b);
continue;
}
let r: u32 = (((current >> 24) & 0xff) * alpha_comp + r * alpha) / 0xff;
let g: u32 = (((current >> 16) & 0xff) * alpha_comp + g * alpha) / 0xff;
let b: u32 = (((current >> 8) & 0xff) * alpha_comp + b * alpha) / 0xff;

// ... for the efficient/lazy clients
if buffer[i + 2] == b'\n' {
last_byte_parsed = i + 2;
i += 3; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop
fb.set(x, y, r << 16 | g << 8 | b);
continue;
}

let base = simd_unhex(&buffer[i - 3..i + 5]) & 0xff;
// ... for the efficient/lazy clients
if buffer[i + 2] == b'\n' {
last_byte_parsed = i + 2;
i += 3; // We can advance one byte more than normal as we use continue and therefore not get incremented at the end of the loop

let rgba: u32 = base << 16 | base << 8 | base;
let base = simd_unhex(&buffer[i - 3..i + 5]) & 0xff;

fb.set(x, y, rgba);
let rgba: u32 = base << 16 | base << 8 | base;

continue;
}
}
fb.set(x, y, rgba);

// End of command to read Pixel value
if buffer[i] == b'\n' {
last_byte_parsed = i;
i += 1;
if let Some(rgb) = fb.get(x, y) {
match stream
.write_all(
format!(
"PX {} {} {:06x}\n",
// We don't want to return the actual (absolute) coordinates, the client should also get the result offseted
x - connection_x_offset,
y - connection_y_offset,
rgb.to_be() >> 8
)
.as_bytes(),
)
.await
{
Ok(_) => (),
Err(_) => continue,
}
}
continue;
continue;
}
} else if buffer[i] == b'\n' {
last_byte_parsed = i;
i += 1;
if let Some(rgb) = fb.get(x, y) {
match stream
.write_all(
format!(
"PX {} {} {:06x}\n",
// We don't want to return the actual (absolute) coordinates, the client should also get the result offseted
x - connection_x_offset,
y - connection_y_offset,
rgb.to_be() >> 8
)
.as_bytes(),
)
.await
{
Ok(_) => (),
Err(_) => continue,
}
}
continue;
}
}
} else if current_command & 0x0000_ffff_ffff_ffff == string_to_number(b"OFFSET \0\0") {
Expand Down Expand Up @@ -376,6 +319,38 @@ pub fn check_cpu_support() {
}
}

const SIMD_POS: Simd<u8, 16> = u8x16::from_array([
255, 251, 251, 251, // interesting data
254, 251, 251, 251, // just zero em all
253, 251, 251, 251, // It doesn't matter that I'm subtracting
252, 251, 251, 251, // as all values where the highest bit is 1 will be zeroed
]);
const FACTORS: Simd<u32, 4> = u32x4::from_array([1, 10, 100, 1000]);

/// count, how many digits a number has, based on the map of space characters
/// the mask is composed as follows:
/// {4th char is space}{3rd char is space}{2nd char is space}{1st char is space}
/// guarantees that the result is in (inclusive) 0-4
#[inline(always)]
fn count_digits(space_mask: u16) -> u32 {
(space_mask | 0b10000).trailing_zeros()
}

#[inline(never)]
fn simd_digit_parsing(value: *const u8) -> (usize, usize) {
// using u16 instead of u32 for the simd pipeline takes 20% longer for some reason
let input = u8x16::from_array(unsafe {(value as *const [u8; 16]).read_unaligned()});
let converted_digits = input - u8x16::splat(b'0');
let is_space = converted_digits.simd_gt(u8x16::splat(9));
let space_mask = is_space.to_bitmask();
let digits = count_digits(space_mask);
let swizzle_idx = SIMD_POS + u8x16::splat(digits as u8);
let swizzled = converted_digits.swizzle_dyn(swizzle_idx);
let casted_swizzle = unsafe { *(&swizzled as *const u8x16 as *const u32x4)};
let multiplied = casted_swizzle * FACTORS;
(multiplied.reduce_sum() as usize, digits as usize)
}

#[cfg(test)]
mod test {
use super::*;
Expand All @@ -385,4 +360,37 @@ mod test {
assert_eq!(simd_unhex(b"01234567"), 0x67452301);
assert_eq!(simd_unhex(b"fedcba98"), 0x98badcfe);
}

#[test]
fn test_count_digits() {
assert_eq!(count_digits(0b0000), 4);
assert_eq!(count_digits(0b0001), 0);
assert_eq!(count_digits(0b0010), 1);
assert_eq!(count_digits(0b0011), 0);
assert_eq!(count_digits(0b0100), 2);
assert_eq!(count_digits(0b0101), 0);
assert_eq!(count_digits(0b0110), 1);
assert_eq!(count_digits(0b0111), 0);
assert_eq!(count_digits(0b1000), 3);
assert_eq!(count_digits(0b1001), 0);
assert_eq!(count_digits(0b1010), 1);
assert_eq!(count_digits(0b1011), 0);
assert_eq!(count_digits(0b1100), 2);
assert_eq!(count_digits(0b1101), 0);
assert_eq!(count_digits(0b1110), 1);
assert_eq!(count_digits(0b1111), 0);
}

#[test]
fn test_digit_parsing() {
assert_eq!(simd_digit_parsing(b"0123".as_ptr()), (123, 4));
assert_eq!(simd_digit_parsing(b"0 23".as_ptr()), (0, 1));
assert_eq!(simd_digit_parsing(b"5555".as_ptr()), (5555, 4));
assert_eq!(simd_digit_parsing(b"12 3".as_ptr()), (12, 2));
assert_eq!(simd_digit_parsing(b"123 ".as_ptr()), (123, 3));
assert_eq!(simd_digit_parsing(b"1123".as_ptr()), (1123, 4));
assert_eq!(simd_digit_parsing(b" 123".as_ptr()), (0, 0));
assert_eq!(simd_digit_parsing(b"1\n123".as_ptr()), (1, 1));
assert_eq!(simd_digit_parsing(b"1a23".as_ptr()), (1, 1));
}
}
Loading