|
1 | 1 | const std = @import("std");
|
2 |
| -const neon = @import("neon"); |
| 2 | +const neon = @import("zeon"); |
| 3 | + |
| 4 | +const hex_lookup: neon.u8x16 = .{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; |
| 5 | +const mask_low: neon.u8x16 = @splat(0x0f); |
| 6 | + |
| 7 | +fn buftohex(input: [*]const u8, output: [*]u8, comptime len: usize) void { |
| 8 | + comptime var i: usize = 0; |
| 9 | + inline while (i + 32 <= len) : (i += 32) { |
| 10 | + const input_chunk1 = neon.vld1q_u8(input + i); |
| 11 | + const input_chunk2 = neon.vld1q_u8(input + i + 16); |
| 12 | + |
| 13 | + // Split bytes into high and low nibbles |
| 14 | + const high_nibbles1 = neon.vshrq_n_u8(input_chunk1, 4); |
| 15 | + const low_nibbles1 = neon.vandq_u8(input_chunk1, mask_low); |
| 16 | + const high_nibbles2 = neon.vshrq_n_u8(input_chunk2, 4); |
| 17 | + const low_nibbles2 = neon.vandq_u8(input_chunk2, mask_low); |
| 18 | + |
| 19 | + // Lookup high and low nibbles |
| 20 | + const high_chars1 = neon.vqtbl1q_u8(hex_lookup, high_nibbles1); |
| 21 | + const low_chars1 = neon.vqtbl1q_u8(hex_lookup, low_nibbles1); |
| 22 | + const high_chars2 = neon.vqtbl1q_u8(hex_lookup, high_nibbles2); |
| 23 | + const low_chars2 = neon.vqtbl1q_u8(hex_lookup, low_nibbles2); |
| 24 | + |
| 25 | + // Interleave high and low hex characters |
| 26 | + const interleaved1 = neon.vzipq_u8(high_chars1, low_chars1); |
| 27 | + const interleaved2 = neon.vzipq_u8(high_chars2, low_chars2); |
| 28 | + |
| 29 | + // Store the interleaved results |
| 30 | + neon.vst1q_u8(output + i * 2, interleaved1[0]); |
| 31 | + neon.vst1q_u8(output + i * 2 + 16, interleaved1[1]); |
| 32 | + neon.vst1q_u8(output + i * 2 + 32, interleaved2[0]); |
| 33 | + neon.vst1q_u8(output + i * 2 + 48, interleaved2[1]); |
| 34 | + } |
| 35 | + |
| 36 | + const remaining = len - i; |
| 37 | + if (remaining >= 16) { |
| 38 | + const input_chunk = neon.vld1q_u8(input + i); |
| 39 | + |
| 40 | + // Split bytes into high and low nibbles |
| 41 | + const high_nibbles = neon.vshrq_n_u8(input_chunk, 4); |
| 42 | + const low_nibbles = neon.vandq_u8(input_chunk, mask_low); |
| 43 | + |
| 44 | + // Lookup high and low nibbles in the hex table |
| 45 | + const high_chars = neon.vqtbl1q_u8(hex_lookup, high_nibbles); |
| 46 | + const low_chars = neon.vqtbl1q_u8(hex_lookup, low_nibbles); |
| 47 | + |
| 48 | + // Interleave the high and low hex characters |
| 49 | + const interleaved = neon.vzipq_u8(high_chars, low_chars); |
| 50 | + |
| 51 | + // Store the result |
| 52 | + neon.vst1q_u8(output + i * 2, interleaved[0]); |
| 53 | + neon.vst1q_u8(output + i * 2 + 16, interleaved[1]); |
| 54 | + |
| 55 | + i += 16; |
| 56 | + } |
| 57 | + |
| 58 | + inline while (i < len) : (i += 1) { |
| 59 | + const byte = input[i]; |
| 60 | + output[i * 2] = hex_lookup[byte >> 4]; |
| 61 | + output[i * 2 + 1] = hex_lookup[byte & 0x0F]; |
| 62 | + } |
| 63 | +} |
| 64 | + |
| 65 | +test buftohex { |
| 66 | + const buf: [32]u8 = .{ |
| 67 | + 0x0c, 0x62, 0x68, 0xf8, |
| 68 | + 0x71, 0x29, 0xd7, 0x64, |
| 69 | + 0xac, 0x73, 0xf7, 0x7b, |
| 70 | + 0x1a, 0x4f, 0x95, 0xf5, |
| 71 | + 0x16, 0x67, 0x83, 0xa7, |
| 72 | + 0xe4, 0x1e, 0xfc, 0x83, |
| 73 | + 0x02, 0xf6, 0x10, 0x30, |
| 74 | + 0xee, 0xcc, 0x63, 0xee, |
| 75 | + }; |
| 76 | + const expected = "0c6268f87129d764ac73f77b1a4f95f5166783a7e41efc8302f61030eecc63ee"; |
| 77 | + |
| 78 | + var result: [64]u8 = undefined; |
| 79 | + inline for (.{ .{ true, false }, .{ false, true }, .{ false, false } }) |opt| { |
| 80 | + neon.use_asm = opt[0]; |
| 81 | + neon.use_builtins = opt[1]; |
| 82 | + buftohex(buf[0..].ptr, result[0..].ptr, 32); |
| 83 | + |
| 84 | + try std.testing.expectEqualStrings(expected, &result); |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +pub fn main() void { |
| 89 | + std.debug.print("Buffer to Hex:\n", .{}); |
| 90 | + const buf: [32]u8 = .{ |
| 91 | + 0xb1, 0x35, 0xf9, 0xff, |
| 92 | + 0x16, 0x49, 0xb6, 0x49, |
| 93 | + 0xa3, 0x4e, 0xf7, 0x7c, |
| 94 | + 0xff, 0xd7, 0xf7, 0x57, |
| 95 | + 0x5e, 0x7d, 0xe1, 0xb4, |
| 96 | + 0x7f, 0x84, 0x52, 0xc3, |
| 97 | + 0x62, 0x9b, 0x6a, 0xd3, |
| 98 | + 0xc6, 0x67, 0xab, 0xbe, |
| 99 | + }; |
| 100 | + var result: [64]u8 = undefined; |
| 101 | + // b135f9ff1649b649a34ef77cffd7f7575e7de1b47f8452c3629b6ad3c667abbe |
| 102 | + buftohex(buf[0..].ptr, result[0..].ptr, 32); |
| 103 | + std.debug.print("{s}\n", .{result}); |
| 104 | +} |
0 commit comments