Skip to content

Commit 908e472

Browse files
committed
some fixes
1 parent ff2f416 commit 908e472

File tree

7 files changed

+604
-71
lines changed

7 files changed

+604
-71
lines changed

.github/workflows/tests.yml

+1
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,4 @@ jobs:
4444
- name: Build and Test
4545
run: |
4646
make test
47+
DEBUG=1 make test

Makefile

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
1-
TEST_FLAGS := -fqemu -freference-trace --summary all # --release=fast
1+
TEST_FLAGS := -fqemu -freference-trace --summary all
2+
RUN_FLAGS :=
23
TEST_TARGET_FILTER ?= none
34

5+
ifndef DEBUG
6+
TEST_FLAGS += --release=fast
7+
RUN_FLAGS += --release=fast
8+
endif
9+
410
.PHONY: test
511
test:
612
zig build test $(TEST_FLAGS) -Dtarget-filter="$(TEST_TARGET_FILTER)"
713

814
.PHONY: examples
915
examples:
10-
zig build run --release=fast
11-
16+
zig build run $(RUN_FLAGS)

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Zeon aims to provide high-performance `Neon` intrinsics for `ARM` and `ARM64` ar
88

99
## Status
1010

11-
🚧 This project is under active development(502/3803 implemented). Contributions and feedback are welcome!
11+
🚧 This project is under active development(522/3803 implemented). Contributions and feedback are welcome!
1212

1313
## Roadmap
1414

build.zig

+23-14
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ const examples: []const Example = &.{
2323
.path = "matrixVerticalFlip/main.zig",
2424
.name = "matrix-vertical-flip",
2525
},
26+
.{
27+
.path = "bufferToHex/main.zig",
28+
.name = "buffer-to-hex",
29+
},
2630
};
2731

2832
const test_targets = [_]std.Target.Query{
@@ -32,7 +36,7 @@ const test_targets = [_]std.Target.Query{
3236
.os_tag = .linux,
3337
.cpu_features_add = arm_target_features,
3438
},
35-
// TODO: When 0.14.0 officially releases, we need to cover armeb
39+
// TODO: Figure out how to test armeb
3640
// .{
3741
// .cpu_arch = .armeb,
3842
// .os_tag = .linux,
@@ -59,6 +63,12 @@ const test_targets = [_]std.Target.Query{
5963
.os_tag = .linux,
6064
.cpu_features_add = aarch64_target_features,
6165
},
66+
// For personal use(macos doesnt have qemu userspace)
67+
.{
68+
.cpu_arch = .aarch64,
69+
.os_tag = .macos,
70+
.cpu_features_add = aarch64_target_features,
71+
},
6272
// Not needed until we add x86 assembly fallbacks
6373
// .{
6474
// .cpu_arch = .x86,
@@ -96,9 +106,8 @@ pub fn build(b: *std.Build) void {
96106
const target = b.standardTargetOptions(.{});
97107
const optimize = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseFast });
98108

99-
const path = "src/zeon.zig";
100109
const module = b.addModule("zeon", .{
101-
.root_source_file = b.path(path),
110+
.root_source_file = b.path("src/zeon.zig"),
102111
.target = target,
103112
.optimize = optimize,
104113
});
@@ -176,6 +185,7 @@ fn addExample(
176185
target_filter,
177186
);
178187

188+
run_step.dependOn(&run_cmd.step);
179189
example_run_step.dependOn(&run_cmd.step);
180190
run_step.dependOn(&run_cmd.step);
181191
}
@@ -198,15 +208,7 @@ fn addTest(
198208
const filter = std.mem.trim(u8, unprocessed_filter, " ");
199209
const target_group = findTargetGroup(filter);
200210

201-
if (target_group == null) {
202-
const fmt =
203-
\\Invalid filter: {s}
204-
\\Valid filters: native, arm, aarch64, and aarch64_be
205-
;
206-
std.debug.print(fmt, .{filter});
207-
std.process.exit(1);
208-
}
209-
for (target_group.?) |t| {
211+
for (target_group) |t| {
210212
addUnitTest(b, path, modules, optimize, test_steps, t);
211213
}
212214
}
@@ -215,12 +217,19 @@ fn addTest(
215217

216218
fn findTargetGroup(
217219
filter: []const u8,
218-
) ?[]const std.Target.Query {
220+
) []const std.Target.Query {
219221
if (std.mem.eql(u8, filter, "native")) return &.{test_targets[0]};
220222
if (std.mem.eql(u8, filter, "arm")) return &.{test_targets[1]};
221223
if (std.mem.eql(u8, filter, "aarch64")) return &.{test_targets[2]};
222224
if (std.mem.eql(u8, filter, "aarch64_be")) return &.{test_targets[3]};
223-
return null;
225+
if (std.mem.eql(u8, filter, "personal")) return &.{test_targets[4]};
226+
227+
const fmt =
228+
\\Invalid filter: {s}
229+
\\Valid filters: native, arm, aarch64, aarch64_be and personal
230+
;
231+
std.debug.print(fmt, .{filter});
232+
std.process.exit(1);
224233
}
225234

226235
fn addUnitTest(

examples/bufferToHex/main.zig

+103-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,104 @@
11
const std = @import("std");
2-
const neon = @import("neon");
2+
const neon = @import("zeon");
3+
4+
const hex_lookup: neon.u8x16 = .{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
5+
const mask_low: neon.u8x16 = @splat(0x0f);
6+
7+
fn buftohex(input: [*]const u8, output: [*]u8, comptime len: usize) void {
8+
comptime var i: usize = 0;
9+
inline while (i + 32 <= len) : (i += 32) {
10+
const input_chunk1 = neon.vld1q_u8(input + i);
11+
const input_chunk2 = neon.vld1q_u8(input + i + 16);
12+
13+
// Split bytes into high and low nibbles
14+
const high_nibbles1 = neon.vshrq_n_u8(input_chunk1, 4);
15+
const low_nibbles1 = neon.vandq_u8(input_chunk1, mask_low);
16+
const high_nibbles2 = neon.vshrq_n_u8(input_chunk2, 4);
17+
const low_nibbles2 = neon.vandq_u8(input_chunk2, mask_low);
18+
19+
// Lookup high and low nibbles
20+
const high_chars1 = neon.vqtbl1q_u8(hex_lookup, high_nibbles1);
21+
const low_chars1 = neon.vqtbl1q_u8(hex_lookup, low_nibbles1);
22+
const high_chars2 = neon.vqtbl1q_u8(hex_lookup, high_nibbles2);
23+
const low_chars2 = neon.vqtbl1q_u8(hex_lookup, low_nibbles2);
24+
25+
// Interleave high and low hex characters
26+
const interleaved1 = neon.vzipq_u8(high_chars1, low_chars1);
27+
const interleaved2 = neon.vzipq_u8(high_chars2, low_chars2);
28+
29+
// Store the interleaved results
30+
neon.vst1q_u8(output + i * 2, interleaved1[0]);
31+
neon.vst1q_u8(output + i * 2 + 16, interleaved1[1]);
32+
neon.vst1q_u8(output + i * 2 + 32, interleaved2[0]);
33+
neon.vst1q_u8(output + i * 2 + 48, interleaved2[1]);
34+
}
35+
36+
const remaining = len - i;
37+
if (remaining >= 16) {
38+
const input_chunk = neon.vld1q_u8(input + i);
39+
40+
// Split bytes into high and low nibbles
41+
const high_nibbles = neon.vshrq_n_u8(input_chunk, 4);
42+
const low_nibbles = neon.vandq_u8(input_chunk, mask_low);
43+
44+
// Lookup high and low nibbles in the hex table
45+
const high_chars = neon.vqtbl1q_u8(hex_lookup, high_nibbles);
46+
const low_chars = neon.vqtbl1q_u8(hex_lookup, low_nibbles);
47+
48+
// Interleave the high and low hex characters
49+
const interleaved = neon.vzipq_u8(high_chars, low_chars);
50+
51+
// Store the result
52+
neon.vst1q_u8(output + i * 2, interleaved[0]);
53+
neon.vst1q_u8(output + i * 2 + 16, interleaved[1]);
54+
55+
i += 16;
56+
}
57+
58+
inline while (i < len) : (i += 1) {
59+
const byte = input[i];
60+
output[i * 2] = hex_lookup[byte >> 4];
61+
output[i * 2 + 1] = hex_lookup[byte & 0x0F];
62+
}
63+
}
64+
65+
test buftohex {
66+
const buf: [32]u8 = .{
67+
0x0c, 0x62, 0x68, 0xf8,
68+
0x71, 0x29, 0xd7, 0x64,
69+
0xac, 0x73, 0xf7, 0x7b,
70+
0x1a, 0x4f, 0x95, 0xf5,
71+
0x16, 0x67, 0x83, 0xa7,
72+
0xe4, 0x1e, 0xfc, 0x83,
73+
0x02, 0xf6, 0x10, 0x30,
74+
0xee, 0xcc, 0x63, 0xee,
75+
};
76+
const expected = "0c6268f87129d764ac73f77b1a4f95f5166783a7e41efc8302f61030eecc63ee";
77+
78+
var result: [64]u8 = undefined;
79+
inline for (.{ .{ true, false }, .{ false, true }, .{ false, false } }) |opt| {
80+
neon.use_asm = opt[0];
81+
neon.use_builtins = opt[1];
82+
buftohex(buf[0..].ptr, result[0..].ptr, 32);
83+
84+
try std.testing.expectEqualStrings(expected, &result);
85+
}
86+
}
87+
88+
pub fn main() void {
89+
std.debug.print("Buffer to Hex:\n", .{});
90+
const buf: [32]u8 = .{
91+
0xb1, 0x35, 0xf9, 0xff,
92+
0x16, 0x49, 0xb6, 0x49,
93+
0xa3, 0x4e, 0xf7, 0x7c,
94+
0xff, 0xd7, 0xf7, 0x57,
95+
0x5e, 0x7d, 0xe1, 0xb4,
96+
0x7f, 0x84, 0x52, 0xc3,
97+
0x62, 0x9b, 0x6a, 0xd3,
98+
0xc6, 0x67, 0xab, 0xbe,
99+
};
100+
var result: [64]u8 = undefined;
101+
// b135f9ff1649b649a34ef77cffd7f7575e7de1b47f8452c3629b6ad3c667abbe
102+
buftohex(buf[0..].ptr, result[0..].ptr, 32);
103+
std.debug.print("{s}\n", .{result});
104+
}

src/aarch64.zig

-50
Original file line numberDiff line numberDiff line change
@@ -14,54 +14,4 @@ pub fn hasFeatures(comptime aarch64_features: []const std.Target.aarch64.Feature
1414
if (!has_feature) return false;
1515
}
1616
return true;
17-
}
18-
19-
/// Get the vector register suffix for a given vector type
20-
pub inline fn vectorSuffix(comptime T: type) []const u8 {
21-
return switch (T) {
22-
// 128-bit Quadword vectors
23-
neon.i8x16, neon.u8x16 => ".16b",
24-
neon.i16x8, neon.u16x8, neon.f16x8 => ".8h",
25-
neon.i32x4, neon.u32x4, neon.f32x4 => ".4s",
26-
neon.i64x2, neon.u64x2, neon.f64x2 => ".2d",
27-
// 64-bit Doubleword vectors
28-
neon.i8x8, neon.u8x8 => ".8b",
29-
neon.i16x4, neon.u16x4, neon.f16x4 => ".4h",
30-
neon.i32x2, neon.u32x2, neon.f32x2 => ".2s",
31-
neon.i64x1, neon.u64x1, neon.f64x1 => ".1d",
32-
else => @compileError("Unsupported vector type for NEON register suffix"),
33-
};
34-
}
35-
36-
/// Performs a Endianness swap of the provided `vec`
37-
pub inline fn byteSwap(vec: anytype) @TypeOf(vec) {
38-
const T = comptime @TypeOf(vec);
39-
comptime {
40-
switch (T) {
41-
neon.i8x16, neon.u8x16, neon.i16x8, neon.u16x8, neon.f16x8, neon.i32x4, neon.u32x4, neon.f32x4, neon.i64x2, neon.u64x2, neon.f64x2, neon.i8x8, neon.u8x8, neon.i16x4, neon.u16x4, neon.f16x4, neon.i32x2, neon.u32x2, neon.f32x2, neon.i64x1, neon.u64x1, neon.f64x1 => {},
42-
else => @compileError("Unsupported element type for byteswap."),
43-
}
44-
}
45-
46-
const bits = comptime switch (@typeInfo(std.meta.Child(T))) {
47-
.Int => |i| i.bits,
48-
.Float => |f| f.bits,
49-
else => unreachable,
50-
};
51-
comptime if (bits == 8) return vec;
52-
53-
const suffix = comptime switch (bits * @typeInfo(T).Vector.len) {
54-
64 => ".8b",
55-
128 => ".16b",
56-
else => unreachable,
57-
};
58-
59-
return asm ("rev" ++ std.fmt.comptimePrint("{d}", .{bits}) ++ " %[result]" ++ suffix ++ ", %[input]" ++ suffix
60-
: [result] "=w" (-> T),
61-
: [input] "w" (vec),
62-
);
63-
}
64-
65-
test {
66-
std.testing.refAllDecls(@This());
6717
}

0 commit comments

Comments
 (0)