From 5286b9d43e016694927beb65ebdf6844131a15f0 Mon Sep 17 00:00:00 2001
From: tanmay4l <mrspot67@gmail.com>
Date: Mon, 17 Nov 2025 15:49:02 +0530
Subject: [PATCH 1/4] Optimize char deserialization with manual UTF-8 decoder

---
 wincode/src/schema/impls.rs | 60 +++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/wincode/src/schema/impls.rs b/wincode/src/schema/impls.rs
index edda6d2f..9eb34a4f 100644
--- a/wincode/src/schema/impls.rs
+++ b/wincode/src/schema/impls.rs
@@ -366,12 +366,60 @@ unsafe impl<'de, C: ConfigCore> SchemaRead<'de, C> for char {
         }
 
         let buf = reader.fill_exact(len)?;
-        // TODO: Could implement a manual decoder that avoids UTF-8 validate + chars()
-        // and instead performs the UTF-8 validity checks and produces a `char` directly.
-        // Some quick micro-benchmarking revealed a roughly 2x speedup is possible,
-        // but this is on the order of a 1-2ns/byte delta.
-        let str = core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-        let c = str.chars().next().unwrap();
+
+        // Manual UTF-8 decoder for 2x speedup by avoiding intermediate str allocation
+        let code_point = match len {
+            2 => {
+                let b1 = buf[1];
+                // Validate continuation byte (must be 10xxxxxx)
+                if (b1 & 0xC0) != 0x80 {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                ((b0 & 0x1F) as u32) << 6 | ((b1 & 0x3F) as u32)
+            }
+            3 => {
+                let b1 = buf[1];
+                let b2 = buf[2];
+                if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                // Check for overlong encodings (< U+0800) and surrogates (U+D800..U+DFFF)
+                if (b0 == 0xE0 && b1 < 0xA0) || (b0 == 0xED && b1 >= 0xA0) {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                ((b0 & 0x0F) as u32) << 12 | ((b1 & 0x3F) as u32) << 6 | ((b2 & 0x3F) as u32)
+            }
+            4 => {
+                let b1 = buf[1];
+                let b2 = buf[2];
+                let b3 = buf[3];
+                if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80 {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                if (b0 == 0xF0 && b1 < 0x90) || (b0 == 0xF4 && b1 > 0x8F) {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                ((b0 & 0x07) as u32) << 18
+                    | ((b1 & 0x3F) as u32) << 12
+                    | ((b2 & 0x3F) as u32) << 6
+                    | ((b3 & 0x3F) as u32)
+            }
+            _ => unreachable!(),
+        };
+
+        let c = match char::from_u32(code_point) {
+            Some(c) => c,
+            None => {
+                core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                unreachable!();
+            }
+        };
+
         unsafe { reader.consume_unchecked(len) };
         dst.write(c);
         Ok(())

From ac9807bee97c10c370ea369723987195d109fe19 Mon Sep 17 00:00:00 2001
From: tanmay4l <mrspot67@gmail.com>
Date: Mon, 17 Nov 2025 21:05:03 +0530
Subject: [PATCH 2/4] Clippy-clean

---
 wincode/src/schema/impls.rs | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/wincode/src/schema/impls.rs b/wincode/src/schema/impls.rs
index 9eb34a4f..f5f53b8b 100644
--- a/wincode/src/schema/impls.rs
+++ b/wincode/src/schema/impls.rs
@@ -367,14 +367,20 @@ unsafe impl<'de, C: ConfigCore> SchemaRead<'de, C> for char {
 
         let buf = reader.fill_exact(len)?;
 
+        // We re-validate with from_utf8 only on error path to get proper Utf8Error.
+        #[inline]
+        #[cold]
+        fn utf8_error(buf: &[u8]) -> crate::error::ReadError {
+            invalid_utf8_encoding(core::str::from_utf8(buf).unwrap_err())
+        }
+
         // Manual UTF-8 decoder for 2x speedup by avoiding intermediate str allocation
         let code_point = match len {
             2 => {
                 let b1 = buf[1];
                 // Validate continuation byte (must be 10xxxxxx)
                 if (b1 & 0xC0) != 0x80 {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 ((b0 & 0x1F) as u32) << 6 | ((b1 & 0x3F) as u32)
             }
@@ -382,13 +388,11 @@ unsafe impl<'de, C: ConfigCore> SchemaRead<'de, C> for char {
                 let b1 = buf[1];
                 let b2 = buf[2];
                 if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 // Check for overlong encodings (< U+0800) and surrogates (U+D800..U+DFFF)
                 if (b0 == 0xE0 && b1 < 0xA0) || (b0 == 0xED && b1 >= 0xA0) {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 ((b0 & 0x0F) as u32) << 12 | ((b1 & 0x3F) as u32) << 6 | ((b2 & 0x3F) as u32)
             }
@@ -397,12 +401,10 @@ unsafe impl<'de, C: ConfigCore> SchemaRead<'de, C> for char {
                 let b2 = buf[2];
                 let b3 = buf[3];
                 if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80 {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 if (b0 == 0xF0 && b1 < 0x90) || (b0 == 0xF4 && b1 > 0x8F) {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 ((b0 & 0x07) as u32) << 18
                     | ((b1 & 0x3F) as u32) << 12
@@ -412,13 +414,7 @@ unsafe impl<'de, C: ConfigCore> SchemaRead<'de, C> for char {
             _ => unreachable!(),
         };
 
-        let c = match char::from_u32(code_point) {
-            Some(c) => c,
-            None => {
-                core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                unreachable!();
-            }
-        };
+        let c = char::from_u32(code_point).ok_or_else(|| utf8_error(buf))?;
 
         unsafe { reader.consume_unchecked(len) };
         dst.write(c);

From c7c9b19eee799cdbcdc2a29505a5006cbc3dc21e Mon Sep 17 00:00:00 2001
From: Kamil Skalski <kamil.skalski@gmail.com>
Date: Thu, 19 Feb 2026 07:09:34 +0800
Subject: [PATCH 3/4] Add benchmark for char deserialization

---
 wincode/benches/benchmarks.rs | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/wincode/benches/benchmarks.rs b/wincode/benches/benchmarks.rs
index 4b38fc65..802a92a9 100644
--- a/wincode/benches/benchmarks.rs
+++ b/wincode/benches/benchmarks.rs
@@ -1,5 +1,6 @@
 use {
     criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput},
+    rand::{Rng as _, SeedableRng},
     serde::{Deserialize, Serialize},
     std::{collections::HashMap, hint::black_box},
     wincode::{
@@ -96,6 +97,25 @@ fn bench_primitives_comparison(c: &mut Criterion) {
     group.finish();
 }
 
+fn bench_char_deserialization(c: &mut Criterion) {
+    c.bench_function("char/wincode/deserialize", |b| {
+        let str: String = rand::prelude::SmallRng::seed_from_u64(0x42)
+            .sample_iter::<char, _>(rand::distr::StandardUniform)
+            .take(10_000)
+            .collect();
+
+        b.iter(|| {
+            let mut bytes = black_box(str.as_bytes());
+            let mut sum: u32 = 0;
+            while !bytes.is_empty() {
+                let ch: char = wincode::deserialize_from(&mut bytes).unwrap();
+                sum = sum.wrapping_add(ch as u32);
+            }
+            black_box(sum);
+        });
+    });
+}
+
 fn bench_vec_comparison(c: &mut Criterion) {
     let mut group = c.benchmark_group("Vec<u64>");
 
@@ -862,6 +882,7 @@ criterion_group!(
     bench_vec_unit_enum_comparison,
     bench_vec_same_sized_enum_comparison,
     bench_vec_mixed_sized_enum_comparison,
+    bench_char_deserialization,
 );
 
 #[cfg(feature = "solana-short-vec")]

From 090e66991dceb504497720215fdd38e0a3f30529 Mon Sep 17 00:00:00 2001
From: Kamil Skalski <kamil.skalski@gmail.com>
Date: Thu, 19 Feb 2026 07:10:16 +0800
Subject: [PATCH 4/4] use take_array

---
 wincode/src/error.rs        |  2 ++
 wincode/src/schema/impls.rs | 62 +++++++++++++------------------------
 2 files changed, 24 insertions(+), 40 deletions(-)

diff --git a/wincode/src/error.rs b/wincode/src/error.rs
index a759e06d..ce5c8bb7 100644
--- a/wincode/src/error.rs
+++ b/wincode/src/error.rs
@@ -34,6 +34,8 @@ pub enum ReadError {
     Io(#[from] io::ReadError),
     #[error(transparent)]
     InvalidUtf8Encoding(#[from] Utf8Error),
+    #[error("Decoded UTF-8 value {0} is not a valid character")]
+    InvalidUtf8Code(u32),
     #[error("Could not cast integer type to pointer sized type")]
     PointerSizedReadError,
     #[error(
diff --git a/wincode/src/schema/impls.rs b/wincode/src/schema/impls.rs
index f5f53b8b..f1d8b53e 100644
--- a/wincode/src/schema/impls.rs
+++ b/wincode/src/schema/impls.rs
@@ -349,74 +349,56 @@ unsafe impl<'de, C: ConfigCore> SchemaRead<'de, C> for char {
 
     #[inline]
     fn read(mut reader: impl Reader<'de>, dst: &mut MaybeUninit<Self::Dst>) -> ReadResult<()> {
-        let b0 = *reader.peek()?;
-
-        let len = match b0 {
-            0x00..=0x7F => 1,
-            0xC2..=0xDF => 2,
-            0xE0..=0xEF => 3,
-            0xF0..=0xF4 => 4,
-            _ => return Err(invalid_char_lead(b0)),
-        };
-
-        if len == 1 {
-            unsafe { reader.consume_unchecked(1) };
-            dst.write(b0 as char);
-            return Ok(());
-        }
-
-        let buf = reader.fill_exact(len)?;
+        use crate::error::ReadError;
 
         // We re-validate with from_utf8 only on error path to get proper Utf8Error.
-        #[inline]
         #[cold]
-        fn utf8_error(buf: &[u8]) -> crate::error::ReadError {
+        fn utf8_error(buf: &[u8]) -> ReadError {
             invalid_utf8_encoding(core::str::from_utf8(buf).unwrap_err())
         }
-
-        // Manual UTF-8 decoder for 2x speedup by avoiding intermediate str allocation
-        let code_point = match len {
-            2 => {
-                let b1 = buf[1];
+        let b0 = *reader.peek()?;
+        let code_point = match b0 {
+            0x00..=0x7F => {
+                unsafe { reader.consume_unchecked(1) };
+                dst.write(b0 as char);
+                return Ok(());
+            }
+            0xC2..=0xDF => {
+                let [b0, b1] = reader.take_array()?;
                 // Validate continuation byte (must be 10xxxxxx)
                 if (b1 & 0xC0) != 0x80 {
-                    return Err(utf8_error(buf));
+                    return Err(utf8_error(&[b0, b1]));
                 }
                 ((b0 & 0x1F) as u32) << 6 | ((b1 & 0x3F) as u32)
             }
-            3 => {
-                let b1 = buf[1];
-                let b2 = buf[2];
+            0xE0..=0xEF => {
+                let [b0, b1, b2] = reader.take_array()?;
                 if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 {
-                    return Err(utf8_error(buf));
+                    return Err(utf8_error(&[b0, b1, b2]));
                 }
                 // Check for overlong encodings (< U+0800) and surrogates (U+D800..U+DFFF)
                 if (b0 == 0xE0 && b1 < 0xA0) || (b0 == 0xED && b1 >= 0xA0) {
-                    return Err(utf8_error(buf));
+                    return Err(utf8_error(&[b0, b1, b2]));
                 }
                 ((b0 & 0x0F) as u32) << 12 | ((b1 & 0x3F) as u32) << 6 | ((b2 & 0x3F) as u32)
             }
-            4 => {
-                let b1 = buf[1];
-                let b2 = buf[2];
-                let b3 = buf[3];
+            0xF0..=0xF4 => {
+                let [b0, b1, b2, b3] = reader.take_array()?;
                 if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80 {
-                    return Err(utf8_error(buf));
+                    return Err(utf8_error(&[b0, b1, b2, b3]));
                 }
                 if (b0 == 0xF0 && b1 < 0x90) || (b0 == 0xF4 && b1 > 0x8F) {
-                    return Err(utf8_error(buf));
+                    return Err(utf8_error(&[b0, b1, b2, b3]));
                 }
                 ((b0 & 0x07) as u32) << 18
                     | ((b1 & 0x3F) as u32) << 12
                     | ((b2 & 0x3F) as u32) << 6
                     | ((b3 & 0x3F) as u32)
             }
-            _ => unreachable!(),
+            _ => return Err(invalid_char_lead(b0)),
         };
 
-        let c = char::from_u32(code_point).ok_or_else(|| utf8_error(buf))?;
-
-        unsafe { reader.consume_unchecked(len) };
+        let c = char::from_u32(code_point).ok_or(ReadError::InvalidUtf8Code(code_point))?;
         dst.write(c);
         Ok(())
     }