Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions wincode/benches/benchmarks.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use {
criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput},
rand::{Rng as _, SeedableRng},
serde::{Deserialize, Serialize},
std::{collections::HashMap, hint::black_box},
wincode::{
Expand Down Expand Up @@ -96,6 +97,25 @@ fn bench_primitives_comparison(c: &mut Criterion) {
group.finish();
}

fn bench_char_deserialization(c: &mut Criterion) {
c.bench_function("char/wincode/deserialize", |b| {
let str: String = rand::prelude::SmallRng::seed_from_u64(0x42)
.sample_iter::<char, _>(rand::distr::StandardUniform)
.take(10_000)
.collect();

b.iter(|| {
let mut bytes = black_box(str.as_bytes());
let mut sum: u32 = 0;
while !bytes.is_empty() {
let ch: char = wincode::deserialize_from(&mut bytes).unwrap();
sum = sum.wrapping_add(ch as u32);
}
black_box(sum);
});
});
}

fn bench_vec_comparison(c: &mut Criterion) {
let mut group = c.benchmark_group("Vec<u64>");

Expand Down Expand Up @@ -862,6 +882,7 @@ criterion_group!(
bench_vec_unit_enum_comparison,
bench_vec_same_sized_enum_comparison,
bench_vec_mixed_sized_enum_comparison,
bench_char_deserialization,
);

#[cfg(feature = "solana-short-vec")]
Expand Down
2 changes: 2 additions & 0 deletions wincode/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub enum ReadError {
Io(#[from] io::ReadError),
#[error(transparent)]
InvalidUtf8Encoding(#[from] Utf8Error),
#[error("Decoded UTF-8 value {0} is not a valid character")]
InvalidUtf8Code(u32),
#[error("Could not cast integer type to pointer sized type")]
PointerSizedReadError,
#[error(
Expand Down
66 changes: 46 additions & 20 deletions wincode/src/schema/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,30 +349,56 @@ unsafe impl<'de, C: ConfigCore> SchemaRead<'de, C> for char {

#[inline]
fn read(mut reader: impl Reader<'de>, dst: &mut MaybeUninit<Self::Dst>) -> ReadResult<()> {
let b0 = *reader.peek()?;
use crate::error::ReadError;

let len = match b0 {
0x00..=0x7F => 1,
0xC2..=0xDF => 2,
0xE0..=0xEF => 3,
0xF0..=0xF4 => 4,
// We re-validate with from_utf8 only on error path to get proper Utf8Error.
#[cold]
fn utf8_error(buf: &[u8]) -> ReadError {
invalid_utf8_encoding(core::str::from_utf8(buf).unwrap_err())
}
let b0 = *reader.peek()?;
let code_point = match b0 {
0x00..=0x7F => {
unsafe { reader.consume_unchecked(1) };
dst.write(b0 as char);
return Ok(());
}
0xC2..=0xDF => {
let [b0, b1] = reader.take_array()?;
// Validate continuation byte (must be 10xxxxxx)
if (b1 & 0xC0) != 0x80 {
return Err(utf8_error(&[b0, b1]));
}
((b0 & 0x1F) as u32) << 6 | ((b1 & 0x3F) as u32)
}
0xE0..=0xEF => {
let [b0, b1, b2] = reader.take_array()?;
if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 {
return Err(utf8_error(&[b0, b1, b2]));
}
// Check for overlong encodings (< U+0800) and surrogates (U+D800..U+DFFF)
if (b0 == 0xE0 && b1 < 0xA0) || (b0 == 0xED && b1 >= 0xA0) {
return Err(utf8_error(&[b0, b1, b2]));
}
((b0 & 0x0F) as u32) << 12 | ((b1 & 0x3F) as u32) << 6 | ((b2 & 0x3F) as u32)
}
0xF0..=0xF4 => {
let [b0, b1, b2, b3] = reader.take_array()?;
if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80 {
return Err(utf8_error(&[b0, b1, b2, b3]));
}
if (b0 == 0xF0 && b1 < 0x90) || (b0 == 0xF4 && b1 > 0x8F) {
return Err(utf8_error(&[b0, b1, b2, b3]));
}
((b0 & 0x07) as u32) << 18
| ((b1 & 0x3F) as u32) << 12
| ((b2 & 0x3F) as u32) << 6
| ((b3 & 0x3F) as u32)
}
_ => return Err(invalid_char_lead(b0)),
};

if len == 1 {
unsafe { reader.consume_unchecked(1) };
dst.write(b0 as char);
return Ok(());
}

let buf = reader.fill_exact(len)?;
// TODO: Could implement a manual decoder that avoids UTF-8 validate + chars()
// and instead performs the UTF-8 validity checks and produces a `char` directly.
// Some quick micro-benchmarking revealed a roughly 2x speedup is possible,
// but this is on the order of a 1-2ns/byte delta.
let str = core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
let c = str.chars().next().unwrap();
unsafe { reader.consume_unchecked(len) };
let c = char::from_u32(code_point).ok_or(ReadError::InvalidUtf8Code(code_point))?;
dst.write(c);
Ok(())
}
Expand Down