From e6e80ac8a10edd24ca9fdb2ec9f24ba71ffc0b65 Mon Sep 17 00:00:00 2001
From: tanmay4l <mrspot67@gmail.com>
Date: Mon, 17 Nov 2025 15:49:02 +0530
Subject: [PATCH 1/2] Optimize char deserialization with manual UTF-8 decoder

---
 wincode/src/schema/impls.rs | 60 +++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/wincode/src/schema/impls.rs b/wincode/src/schema/impls.rs
index af326e85..f7c9fef2 100644
--- a/wincode/src/schema/impls.rs
+++ b/wincode/src/schema/impls.rs
@@ -244,12 +244,60 @@ impl<'de> SchemaRead<'de> for char {
         }
 
         let buf = reader.fill_exact(len)?;
-        // TODO: Could implement a manual decoder that avoids UTF-8 validate + chars()
-        // and instead performs the UTF-8 validity checks and produces a `char` directly.
-        // Some quick micro-benchmarking revealed a roughly 2x speedup is possible,
-        // but this is on the order of a 1-2ns/byte delta.
-        let str = core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-        let c = str.chars().next().unwrap();
+
+        // Manual UTF-8 decoder for 2x speedup by avoiding intermediate str allocation
+        let code_point = match len {
+            2 => {
+                let b1 = buf[1];
+                // Validate continuation byte (must be 10xxxxxx)
+                if (b1 & 0xC0) != 0x80 {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                ((b0 & 0x1F) as u32) << 6 | ((b1 & 0x3F) as u32)
+            }
+            3 => {
+                let b1 = buf[1];
+                let b2 = buf[2];
+                if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                // Check for overlong encodings (< U+0800) and surrogates (U+D800..U+DFFF)
+                if (b0 == 0xE0 && b1 < 0xA0) || (b0 == 0xED && b1 >= 0xA0) {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                ((b0 & 0x0F) as u32) << 12 | ((b1 & 0x3F) as u32) << 6 | ((b2 & 0x3F) as u32)
+            }
+            4 => {
+                let b1 = buf[1];
+                let b2 = buf[2];
+                let b3 = buf[3];
+                if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80 {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                if (b0 == 0xF0 && b1 < 0x90) || (b0 == 0xF4 && b1 > 0x8F) {
+                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                    unreachable!();
+                }
+                ((b0 & 0x07) as u32) << 18
+                    | ((b1 & 0x3F) as u32) << 12
+                    | ((b2 & 0x3F) as u32) << 6
+                    | ((b3 & 0x3F) as u32)
+            }
+            _ => unreachable!(),
+        };
+
+        let c = match char::from_u32(code_point) {
+            Some(c) => c,
+            None => {
+                core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
+                unreachable!();
+            }
+        };
+
         unsafe { reader.consume_unchecked(len) };
         dst.write(c);
         Ok(())

From 3a2c574bcc7d94116d9d2e0348299df87e6a12b5 Mon Sep 17 00:00:00 2001
From: tanmay4l <mrspot67@gmail.com>
Date: Mon, 17 Nov 2025 21:05:03 +0530
Subject: [PATCH 2/2] Clippy-clean

---
 wincode/src/schema/impls.rs | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/wincode/src/schema/impls.rs b/wincode/src/schema/impls.rs
index f7c9fef2..c62d36e2 100644
--- a/wincode/src/schema/impls.rs
+++ b/wincode/src/schema/impls.rs
@@ -245,14 +245,20 @@ impl<'de> SchemaRead<'de> for char {
 
         let buf = reader.fill_exact(len)?;
 
+        // We re-validate with from_utf8 only on error path to get proper Utf8Error.
+        #[inline]
+        #[cold]
+        fn utf8_error(buf: &[u8]) -> crate::error::ReadError {
+            invalid_utf8_encoding(core::str::from_utf8(buf).unwrap_err())
+        }
+
         // Manual UTF-8 decoder for 2x speedup by avoiding intermediate str allocation
         let code_point = match len {
             2 => {
                 let b1 = buf[1];
                 // Validate continuation byte (must be 10xxxxxx)
                 if (b1 & 0xC0) != 0x80 {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 ((b0 & 0x1F) as u32) << 6 | ((b1 & 0x3F) as u32)
             }
@@ -260,13 +266,11 @@ impl<'de> SchemaRead<'de> for char {
                 let b1 = buf[1];
                 let b2 = buf[2];
                 if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 // Check for overlong encodings (< U+0800) and surrogates (U+D800..U+DFFF)
                 if (b0 == 0xE0 && b1 < 0xA0) || (b0 == 0xED && b1 >= 0xA0) {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 ((b0 & 0x0F) as u32) << 12 | ((b1 & 0x3F) as u32) << 6 | ((b2 & 0x3F) as u32)
             }
@@ -275,12 +279,10 @@ impl<'de> SchemaRead<'de> for char {
                 let b2 = buf[2];
                 let b3 = buf[3];
                 if (b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80 {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 if (b0 == 0xF0 && b1 < 0x90) || (b0 == 0xF4 && b1 > 0x8F) {
-                    core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                    unreachable!();
+                    return Err(utf8_error(buf));
                 }
                 ((b0 & 0x07) as u32) << 18
                     | ((b1 & 0x3F) as u32) << 12
@@ -290,13 +292,7 @@ impl<'de> SchemaRead<'de> for char {
             _ => unreachable!(),
         };
 
-        let c = match char::from_u32(code_point) {
-            Some(c) => c,
-            None => {
-                core::str::from_utf8(buf).map_err(invalid_utf8_encoding)?;
-                unreachable!();
-            }
-        };
+        let c = char::from_u32(code_point).ok_or_else(|| utf8_error(buf))?;
 
         unsafe { reader.consume_unchecked(len) };
         dst.write(c);