Skip to content

Commit 47853d7

Browse files
authored
perf(simd): avx2 fallack to swar instead of sse4.2 (#181)
This has massive implications on the default runtime perf, improving how the code is lowered/inlined. (Falling back to SSE4.2 for a handful of bytes was wasteful). Should supersede #175, #156
1 parent fff851f commit 47853d7

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

src/simd/avx2.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::iter::Bytes;
22

33
#[inline]
4-
#[target_feature(enable = "avx2", enable = "sse4.2")]
4+
#[target_feature(enable = "avx2")]
55
pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
66
while bytes.as_ref().len() >= 32 {
77
let advance = match_url_char_32_avx(bytes.as_ref());
@@ -11,8 +11,8 @@ pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
1111
return;
1212
}
1313
}
14-
// do both, since avx2 only works when bytes.len() >= 32
15-
super::sse42::match_uri_vectored(bytes)
14+
// NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
15+
super::swar::match_uri_vectored(bytes)
1616
}
1717

1818
#[inline(always)]
@@ -56,7 +56,7 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
5656
r.trailing_zeros() as usize
5757
}
5858

59-
#[target_feature(enable = "avx2", enable = "sse4.2")]
59+
#[target_feature(enable = "avx2")]
6060
pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
6161
while bytes.as_ref().len() >= 32 {
6262
let advance = match_header_value_char_32_avx(bytes.as_ref());
@@ -66,8 +66,8 @@ pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
6666
return;
6767
}
6868
}
69-
// do both, since avx2 only works when bytes.len() >= 32
70-
super::sse42::match_header_value_vectored(bytes)
69+
// NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
70+
super::swar::match_header_value_vectored(bytes)
7171
}
7272

7373
#[inline(always)]

0 commit comments

Comments
 (0)