diff --git a/cmake/develop.cmake b/cmake/develop.cmake index 47bf6baa..8b450cae 100644 --- a/cmake/develop.cmake +++ b/cmake/develop.cmake @@ -70,6 +70,23 @@ if(ENABLE_CLIENT_SSL) add_definitions(-DCINATRA_ENABLE_CLIENT_SSL) endif() + +if(ENABLE_SIMD STREQUAL "SSE42" OR ENABLE_SIMD STREQUAL "AVX2" OR ENABLE_SIMD STREQUAL "AARCH64") + if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + message(STATUS "Build with simd in aarch64") + add_definitions(-DCINATRA_AARCH64) + elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") + message(STATUS "Build with simd in X86_64") + if (ENABLE_SIMD STREQUAL "SSE42") + message(STATUS "Build with SSE4.2 ISA") + add_definitions(-DCINATRA_SSE) + elseif (ENABLE_SIMD STREQUAL "AVX2") + message(STATUS "Build with AVX2 ISA") + add_definitions(-DCINATRA_AVX2) + endif () + endif () +endif() + add_definitions(-DASIO_STANDALONE) if (ENABLE_SSL) diff --git a/include/cinatra/picohttpparser.h b/include/cinatra/picohttpparser.h index 19b81ded..d00770aa 100644 --- a/include/cinatra/picohttpparser.h +++ b/include/cinatra/picohttpparser.h @@ -30,6 +30,16 @@ #include #include +#ifdef CINATRA_SSE +#ifdef _MSC_VER +#include +#else +#include +#endif +#endif + +#include + #ifdef _MSC_VER #define ssize_t intptr_t #endif @@ -163,10 +173,31 @@ static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, int ranges_size, int *found) { *found = 0; +#ifdef CINATRA_SSE + if (likely(buf_end - buf >= 16)) { + __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges); + + size_t left = (buf_end - buf) & ~15; + do { + __m128i b16 = _mm_loadu_si128((const __m128i *)buf); + int r = _mm_cmpestri( + ranges16, ranges_size, b16, 16, + _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); + if (unlikely(r != 16)) { + buf += r; + *found = 1; + break; + } + buf += 16; + left -= 16; + } while (likely(left != 0)); + } +#else /* suppress unused parameter warning */ (void)buf_end; (void)ranges; (void)ranges_size; +#endif return buf; } @@ -174,7 +205,20 @@ static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) { const char *token_start = buf; - +#ifdef CINATRA_SSE + static const char ranges1[] = + "\0\010" + /* allow HT */ + "\012\037" + /* allow SP and up to but not including DEL */ + "\177\177" + /* allow chars w. MSB set */ + ; + int found; + buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); + if (found) + goto FOUND_CTL; +#else /* find non-printable char within the next 8 bytes, this is the hottest code; * manually inlined */ while (likely(buf_end - buf >= 8)) { @@ -201,7 +245,7 @@ static const char *get_token_to_eol(const char *buf, const char *buf_end, } ++buf; } - +#endif for (;; ++buf) { CHECK_EOF(); if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { diff --git a/press_tool/CMakeLists.txt b/press_tool/CMakeLists.txt index 589f57b5..61b13f32 100644 --- a/press_tool/CMakeLists.txt +++ b/press_tool/CMakeLists.txt @@ -22,6 +22,21 @@ endif() if (ENABLE_GZIP) target_link_libraries(${project_name} ${ZLIB_LIBRARIES}) endif() + +if (ENABLE_SIMD STREQUAL "AARCH64") + if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + #TODO + endif () +elseif (ENABLE_SIMD STREQUAL "SSE42") + if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") + target_link_libraries(${project_name} sse4.2) + endif () +elseif (ENABLE_SIMD STREQUAL "AVX2") + if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") + target_link_libraries(${project_name} avx2) + endif () +endif () + install(TARGETS ${project_name} DESTINATION include) set(unittest_press_tool test_cinatra_press_tool)