From 4437829db75449d5ac5ee41cce8130f37496f582 Mon Sep 17 00:00:00 2001 From: Wang Xiang W Date: Tue, 19 Jan 2021 12:31:52 +0000 Subject: [PATCH 01/13] fat runtime: fix libc symbol parsing fixes github issue #292 --- cmake/build_wrapper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh index 1962813fe..895610c00 100755 --- a/cmake/build_wrapper.sh +++ b/cmake/build_wrapper.sh @@ -17,7 +17,7 @@ KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX) LIBC_SO=$("$@" --print-file-name=libc.so.6) cp ${KEEPSYMS_IN} ${KEEPSYMS} # get all symbols from libc and turn them into patterns -nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS} +nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ @]*\).*/^\1$/' >> ${KEEPSYMS} # build the object "$@" # rename the symbols in the object From a5a78f660d9c1d1654479e4e7b84beb565057933 Mon Sep 17 00:00:00 2001 From: Wang Xiang W Date: Wed, 27 Jan 2021 11:57:51 +0000 Subject: [PATCH 02/13] simd_utils: fix undefined instruction issue for 32-bit system fixes github issue #292 --- src/util/simd_utils.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index d1f060b07..5fa727e5d 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -156,6 +156,16 @@ static really_inline u32 movd(const m128 in) { return _mm_cvtsi128_si32(in); } +static really_inline u64a movq(const m128 in) { +#if defined(ARCH_X86_64) + return _mm_cvtsi128_si64(in); +#else // 32-bit - this is horrific + u32 lo = movd(in); + u32 hi = movd(_mm_srli_epi64(in, 32)); + return (u64a)hi << 32 | lo; +#endif +} + #if defined(HAVE_AVX512) static really_inline u32 movd512(const m512 in) { // NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in), @@ -166,20 +176,10 @@ static really_inline u32 movd512(const m512 in) { static really_inline u64a movq512(const m512 in) { // NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in), // so we use 2-step convertions to work around. - return _mm_cvtsi128_si64(_mm512_castsi512_si128(in)); + return movq(_mm512_castsi512_si128(in)); } #endif -static really_inline u64a movq(const m128 in) { -#if defined(ARCH_X86_64) - return _mm_cvtsi128_si64(in); -#else // 32-bit - this is horrific - u32 lo = movd(in); - u32 hi = movd(_mm_srli_epi64(in, 32)); - return (u64a)hi << 32 | lo; -#endif -} - /* another form of movq */ static really_inline m128 load_m128_from_u64a(const u64a *p) { @@ -791,7 +791,7 @@ m128 movdq_lo(m256 x) { #define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed) #define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2) #define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4) -#define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a)) +#define extractlow64from256(a) movq(cast256to128(a)) #define extractlow32from256(a) movd(cast256to128(a)) #define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b) #define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b) From 5b2e1ebf9f7a26f7194d58acd3fbaa75a32b7c59 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 4 Mar 2021 16:13:46 +0000 Subject: [PATCH 03/13] literal API: add empty string check. fixes github issue #302, #304 --- src/compiler/compiler.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 5751bd64f..15a130dd6 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -416,6 +416,10 @@ void addLitExpression(NG &ng, unsigned index, const char *expression, "HS_FLAG_SOM_LEFTMOST are supported in literal API."); } + if (!strcmp(expression, "")) { + throw CompileError("Pure literal API doesn't support empty string."); + } + // This expression must be a pure literal, we can build ue2_literal // directly based on expression text. ParsedLitExpression ple(index, expression, expLength, flags, id); From 4c43e3ca36addb2c42ed420f4f357995a8609eb2 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 4 Mar 2021 16:50:14 +0000 Subject: [PATCH 04/13] literal API: add instruction support fixes github issue #303 --- src/rose/program_runtime.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index ff5a5099c..24a6cbeae 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -3110,6 +3110,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP; const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; + const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; const char *pc_base = getByOffset(t, programOffset); const char *pc = pc_base; @@ -3206,6 +3207,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(CATCH_UP_MPV) { + if (from_mpv || skip_mpv_catchup) { + DEBUG_PRINTF("skipping mpv catchup\n"); + } else if (roseCatchUpMPV(t, + end - scratch->core_info.buf_offset, + scratch) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(SOM_FROM_REPORT) { som = handleSomExternal(scratch, &ri->som, end); DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch, @@ -3213,6 +3225,15 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(TRIGGER_SUFFIX) { + if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som, + end) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(DEDUPE) { updateSeqPoint(tctxt, end, from_mpv); const char do_som = t->hasSom; // TODO: constant propagate From 29e452cc6329a39ee8b878f93c575e7ea7fed1ac Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 4 Mar 2021 17:00:34 +0000 Subject: [PATCH 05/13] mcclellan: improve wide-state checking in Sherman optimization fixes github issue #305 --- src/nfa/mcclellancompile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 27ec1716e..432c7fd3b 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1082,7 +1082,9 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, // Use the daddy already set for this state so long as it isn't already // a Sherman state. dstate_id_t daddy = currState.daddy; - if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { + if (info.is_widestate(daddy)) { + return; + } else if (!info.is_sherman(daddy)) { hinted.insert(currState.daddy); } else { // Fall back to granddaddy, which has already been processed (due From eb688bbfa65576f8e86530f38fa99961faa23fd7 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 11 Mar 2021 15:20:55 +0000 Subject: [PATCH 06/13] update year for bugfix #302-#305 --- src/compiler/compiler.cpp | 2 +- src/nfa/mcclellancompile.cpp | 2 +- src/rose/program_runtime.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 15a130dd6..6f993ffe5 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 432c7fd3b..6ae9558ce 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 24a6cbeae..579ce2783 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: From 90a5f90698390dc3312b3d872457605fc3250fd5 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Wed, 10 Mar 2021 07:20:01 +0000 Subject: [PATCH 07/13] Logical Combination: bypass combination flag in hs_expression_info. Fixes github issue #291 --- src/hs.cpp | 8 +++++++- src/hs_compile.h | 12 +++--------- src/hs_internal.h | 6 ++++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/hs.cpp b/src/hs.cpp index eac588891..ae9cdf146 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -514,6 +514,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, return HS_COMPILER_ERROR; } + if (flags & HS_FLAG_COMBINATION) { + *error = generateCompileError("Invalid parameter: unsupported " + "logical combination expression", -1); + return HS_COMPILER_ERROR; + } + *info = nullptr; *error = nullptr; diff --git a/src/hs_compile.h b/src/hs_compile.h index b318c29db..5aa241886 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -748,10 +748,7 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error); * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. - * - HS_FLAG_COMBINATION - Parse the expression in logical combination - * syntax. - * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for - * the sub-expressions in logical combinations. + * - HS_FLAG_QUIET - This flag will be ignored. * * @param info * On success, a pointer to the pattern information will be returned in @@ -814,10 +811,7 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression, * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. - * - HS_FLAG_COMBINATION - Parse the expression in logical combination - * syntax. - * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for - * the sub-expressions in logical combinations. + * - HS_FLAG_QUIET - This flag will be ignored. * * @param ext * A pointer to a filled @ref hs_expr_ext_t structure that defines diff --git a/src/hs_internal.h b/src/hs_internal.h index adf07b22c..4eb5e157c 100644 --- a/src/hs_internal.h +++ b/src/hs_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Intel Corporation + * Copyright (c) 2019-2021, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -80,7 +80,9 @@ extern "C" | HS_FLAG_PREFILTER \ | HS_FLAG_SINGLEMATCH \ | HS_FLAG_ALLOWEMPTY \ - | HS_FLAG_SOM_LEFTMOST) + | HS_FLAG_SOM_LEFTMOST \ + | HS_FLAG_COMBINATION \ + | HS_FLAG_QUIET) #ifdef __cplusplus } /* extern "C" */ From 7d644e7ba27eaadda753febf0b142faa9affbbca Mon Sep 17 00:00:00 2001 From: hongyang7 Date: Thu, 16 Dec 2021 19:02:17 +0800 Subject: [PATCH 08/13] Fix segfaults on allocation failure (#4) Throw std::bad_alloc instead of returning nullptr from ue2::AlignedAllocator. Allocators for STL containers are expected never to return with an invalid pointer, and instead must throw on failure. Violating this expectation can lead to invalid pointer dereferences. Co-authored-by: johanngan fixes github issue #317 (PR #320) --- src/util/alloc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/util/alloc.h b/src/util/alloc.h index de20c8d02..49b4a824d 100644 --- a/src/util/alloc.h +++ b/src/util/alloc.h @@ -76,7 +76,11 @@ class AlignedAllocator { T *allocate(std::size_t size) const { size_t alloc_size = size * sizeof(T); - return static_cast(aligned_malloc_internal(alloc_size, N)); + T *ptr = static_cast(aligned_malloc_internal(alloc_size, N)); + if (!ptr) { + throw std::bad_alloc(); + } + return ptr; } void deallocate(T *x, std::size_t) const noexcept { From 3070f11991cc2014685a28c0eaa1e033ffa8fe30 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 28 Apr 2022 10:11:32 +0000 Subject: [PATCH 09/13] bugfix: fix overflow risk of strlen function --- src/compiler/compiler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 6f993ffe5..35f46b3fe 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -323,7 +323,8 @@ void addExpression(NG &ng, unsigned index, const char *expression, } // Ensure that our pattern isn't too long (in characters). - if (strlen(expression) > cc.grey.limitPatternLength) { + size_t maxlen = cc.grey.limitPatternLength + 1; + if (strnlen(expression, maxlen) >= maxlen) { throw CompileError("Pattern length exceeds limit."); } From 010b803d042e69b8e8077ec45c23cba024f2b77e Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 12 May 2022 02:15:07 +0000 Subject: [PATCH 10/13] Corpus generator: fix random char value of UTF-8. fixes github issue #184 --- util/ng_corpus_generator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index e5e8e06cd..f796cd45f 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -477,14 +477,14 @@ void CorpusGeneratorUtf8::generateCorpus(vector &data) { * that we've been asked for. */ unichar CorpusGeneratorUtf8::getRandomChar() { u32 range = MAX_UNICODE + 1 - - (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1); + - (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1); range = min(cProps.alphabetSize, range); assert(range); unichar c = 'a' + cProps.rand(0, range - 1); if (c >= UNICODE_SURROGATE_MIN) { - c =+ UNICODE_SURROGATE_MAX + 1; + c += UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1; } return c % (MAX_UNICODE + 1); From 1dc964dafe1f6969702a722afff33e65559e8e65 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 12 May 2022 08:20:29 +0000 Subject: [PATCH 11/13] Corpus editor: fix random char value of UTF-8. --- util/ng_corpus_editor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/ng_corpus_editor.cpp b/util/ng_corpus_editor.cpp index ac4f8b654..c1149216d 100644 --- a/util/ng_corpus_editor.cpp +++ b/util/ng_corpus_editor.cpp @@ -268,12 +268,12 @@ void CorpusEditorUtf8::flip_case(vector &corpus) { unichar CorpusEditorUtf8::chooseCodePoint(void) { /* We need to ensure that we don't pick a surrogate cp */ const u32 range = - MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1); + MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1); unichar raw = props.rand(0, range - 1); if (raw < UNICODE_SURROGATE_MIN) { return raw; } else { - return raw + UNICODE_SURROGATE_MAX + 1; + return raw + UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1; } } From 10b52255fc821a5a40dd62fe41b2be4645828634 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Thu, 28 Jul 2022 04:59:34 +0000 Subject: [PATCH 12/13] chimera: fix SKIP flag issue fix github issue #360 --- chimera/ch_runtime.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chimera/ch_runtime.c b/chimera/ch_runtime.c index fdb5b992b..1009036b5 100644 --- a/chimera/ch_runtime.c +++ b/chimera/ch_runtime.c @@ -326,6 +326,10 @@ ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id, } else if (cbrv == CH_CALLBACK_SKIP_PATTERN) { DEBUG_PRINTF("user callback told us to skip this pattern\n"); pd->scanStart = hyctx->length; + if (top_id == id) { + break; + } + continue; } if (top_id == id) { From d9f0c27091659708374f40ab2d042e76a30bc322 Mon Sep 17 00:00:00 2001 From: Krzysztof Czapla Date: Thu, 15 Sep 2022 16:22:09 +0200 Subject: [PATCH 13/13] Port to web assembly I added HYPERSCAN_WASM32 cmake option. If enabled, it sets appropriate headers and compile definitions that enables WASM compilation; I conditionaly replace some of the codeblocks that are not compilable into WASM. This blocks fetch cpu information in order to enable some extra features (that how I understand it). In the WASM conext there is no "CPU" hence no CPU info; In order to build wasm hyperscan you need to do following: 1. Prepare environment Install and activate emscripten sdk. Follow steps in this instruction: https://emscripten.org/docs/getting_started/downloads.html. Use version that supports wasm exceptions proposal e.g. 3.1.16. Install nodejs version that supports wasm exceptions e.g. v16.16.0. 2. Configure Hyperscan ``` cd mkdir cd emcmake cmake -DFAT_RUNTIME=FALSE -DBUILD_EXAMPLES=OFF -DHYPERSCAN_WASM32=ON -DHYPERSCAN_EMSCRIPTEN_BOOST=ON -S ../ -B . ``` 3. Build hscheck ``` emmake make -j hscheck ``` 4. Run hscheck ``` node --experimental-wasm-simd --experimental-wasm-eh .//bin/hscheck.js ``` --- CMakeLists.txt | 34 ++++++++++++++++++++++++++++++++-- cmake/arch.cmake | 2 ++ cmake/config.h.in | 6 ++++++ cmake/platform.cmake | 4 ++++ src/util/arch.h | 2 +- src/util/cpuid_flags.c | 2 +- src/util/cpuid_flags.h | 2 +- src/util/cpuid_inline.h | 16 +++++++++++++--- src/util/intrinsics.h | 12 ++++++++++++ 9 files changed, 72 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bc6077fb..f095fcee6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,9 @@ set (HS_MINOR_VERSION 4) set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) +option(HYPERSCAN_WASM32 "Enable hyperscan compilation to wasm32" OFF) +option(HYPERSCAN_EMSCRIPTEN_BOOST "Use boost library that is bundled with emscripten" OFF) + set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) @@ -40,6 +43,11 @@ endif() set(BINDIR "${PROJECT_BINARY_DIR}/bin") set(LIBDIR "${PROJECT_BINARY_DIR}/lib") +if (HYPERSCAN_WASM32) + string(APPEND CMAKE_C_FLAGS " -msse -msse2 -mssse3 -msimd128") + string(APPEND CMAKE_CXX_FLAGS " -msse -msse2 -mssse3 -msimd128 -fwasm-exceptions") +endif() + set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) # First for the generic no-config case @@ -64,12 +72,21 @@ if (XCODE OR CMAKE_CXX_COMPILER_ID MATCHES "Intel") set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "-isystem") endif () +if (HYPERSCAN_WASM32) + set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "-Xclang -isystem") +endif() + set(CMAKE_INCLUDE_CURRENT_DIR 1) include_directories(${PROJECT_SOURCE_DIR}/src) include_directories(${PROJECT_BINARY_DIR}) include_directories(SYSTEM include) -include (${CMAKE_MODULE_PATH}/boost.cmake) +if (HYPERSCAN_EMSCRIPTEN_BOOST) + string(APPEND CMAKE_CXX_FLAGS " -sUSE_BOOST_HEADERS") +else() + include (${CMAKE_MODULE_PATH}/boost.cmake) +endif() + # -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6) find_package(PythonInterp) @@ -294,6 +311,11 @@ CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H) CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H) CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H) +if (HYPERSCAN_WASM32) + CHECK_INCLUDE_FILES(tmmintrin.h HAVE_C_TMMINTRIN_H) + set(HAVE_CXX_TMMINTRIN_H 1) +endif() + CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN) CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC) @@ -482,7 +504,9 @@ if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS) set(BUILD_CHIMERA TRUE) endif() -add_subdirectory(unit) +if (NOT HYPERSCAN_WASM32) + add_subdirectory(unit) +endif() if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt) add_subdirectory(tools) endif() @@ -1190,6 +1214,12 @@ if (NOT FAT_RUNTIME) src/hs_valid_platform.c $ $) + if (HYPERSCAN_WASM32) + target_compile_definitions(hs_exec PRIVATE WASM32) + target_compile_definitions(hs_runtime PRIVATE WASM32) + target_compile_definitions(hs_compile PRIVATE WASM32) + target_compile_definitions(hs PRIVATE WASM32) + endif() endif (BUILD_STATIC_LIBS) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index eb4791e6b..3207f1450 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -6,6 +6,8 @@ if (HAVE_C_X86INTRIN_H) set (INTRIN_INC_H "x86intrin.h") elseif (HAVE_C_INTRIN_H) set (INTRIN_INC_H "intrin.h") +elseif (HAVE_C_TMMINTRIN_H) + set (INTRIN_INC_H "tmmintrin.h") else () message (FATAL_ERROR "No intrinsics header found") endif () diff --git a/cmake/config.h.in b/cmake/config.h.in index 5454643e7..d52e29424 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -48,6 +48,12 @@ /* C compiler has intrin.h */ #cmakedefine HAVE_C_INTRIN_H +/* C++ compiler has tmmintrin.h */ +#cmakedefine HAVE_CXX_TMMINTRIN_H + +/* C compiler has tmmintrin.h */ +#cmakedefine HAVE_C_TMMINTRIN_H + /* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to 0 if you don't. */ #cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP diff --git a/cmake/platform.cmake b/cmake/platform.cmake index 593c544b5..10096dbd4 100644 --- a/cmake/platform.cmake +++ b/cmake/platform.cmake @@ -5,5 +5,9 @@ CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error n CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_32_BIT) +if (HYPERSCAN_WASM32) + set(ARCH_32_BIT 1) +endif() + set(ARCH_X86_64 ${ARCH_64_BIT}) set(ARCH_IA32 ${ARCH_32_BIT}) diff --git a/src/util/arch.h b/src/util/arch.h index 985fec6ac..46d81219f 100644 --- a/src/util/arch.h +++ b/src/util/arch.h @@ -83,7 +83,7 @@ /* * MSVC uses a different form of inline asm */ -#if defined(_WIN32) && defined(_MSC_VER) +#if defined(WASM32) || defined(_WIN32) && defined(_MSC_VER) #define NO_ASM #endif diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index c00ce58e2..048375ae2 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -33,7 +33,7 @@ #include "hs_internal.h" #include "util/arch.h" -#if !defined(_WIN32) && !defined(CPUID_H_) +#if !defined(_WIN32) && !defined(CPUID_H_) && !defined(WASM32) #include #endif diff --git a/src/util/cpuid_flags.h b/src/util/cpuid_flags.h index 527c6d52f..305470f6c 100644 --- a/src/util/cpuid_flags.h +++ b/src/util/cpuid_flags.h @@ -31,7 +31,7 @@ #include "ue2common.h" -#if !defined(_WIN32) && !defined(CPUID_H_) +#if !defined(_WIN32) && !defined(CPUID_H_) && !defined(WASM32) #include /* system header doesn't have a header guard */ #define CPUID_H_ diff --git a/src/util/cpuid_inline.h b/src/util/cpuid_inline.h index b7b424528..0f0b44007 100644 --- a/src/util/cpuid_inline.h +++ b/src/util/cpuid_inline.h @@ -32,7 +32,7 @@ #include "ue2common.h" #include "cpuid_flags.h" -#if !defined(_WIN32) && !defined(CPUID_H_) +#if !defined(_WIN32) && !defined(CPUID_H_) && !defined(WASM32) #include /* system header doesn't have a header guard */ #define CPUID_H_ @@ -46,7 +46,14 @@ extern "C" static inline void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { -#ifndef _WIN32 +#if defined(WASM32) + (void)(op); + (void)(leaf); + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; +#elif !defined(_WIN32) __cpuid_count(op, leaf, *eax, *ebx, *ecx, *edx); #else int a[4]; @@ -95,7 +102,10 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax, static inline u64a xgetbv(u32 op) { -#if defined(_WIN32) || defined(__INTEL_COMPILER) +#if defined(WASM32) + (void) op; + return 0; +#elif defined(_WIN32) || defined(__INTEL_COMPILER) return _xgetbv(op); #else u32 a, d; diff --git a/src/util/intrinsics.h b/src/util/intrinsics.h index edc4f6efb..402566e40 100644 --- a/src/util/intrinsics.h +++ b/src/util/intrinsics.h @@ -55,10 +55,22 @@ # endif #endif +#ifdef __cplusplus +# if defined(HAVE_CXX_TMMINTRIN_H) +# define USE_TMMINTRIN_H +# endif +#else // C +# if defined(HAVE_C_TMMINTRIN_H) +# define USE_TMMINTRIN_H +# endif +#endif + #if defined(USE_X86INTRIN_H) #include #elif defined(USE_INTRIN_H) #include +#elif defined(USE_TMMINTRIN_H) +#include #else #error no intrinsics file #endif