Skip to content

Commit

Permalink
Merge pull request #58 from ashvardanian/main-dev
Browse files Browse the repository at this point in the history
AVX-512, Bindings for C++ & Rust & Swift, Levenshtein distances, Needleman-Wunsch scores, and Fingerprinting
  • Loading branch information
ashvardanian authored Feb 6, 2024
2 parents d728848 + aa7bbc2 commit 9fde435
Show file tree
Hide file tree
Showing 56 changed files with 18,003 additions and 2,500 deletions.
13 changes: 7 additions & 6 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Language: Cpp
BasedOnStyle: LLVM
BasedOnStyle: LLVM
IndentWidth: 4
TabWidth: 4
NamespaceIndentation: All
NamespaceIndentation: None
ColumnLimit: 120
ReflowComments: true
UseTab: Never
Expand Down Expand Up @@ -44,9 +44,8 @@ BraceWrapping:
SplitEmptyNamespace: false
IndentBraces: false


SortIncludes: true
SortUsingDeclarations: true
SortUsingDeclarations: true

SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
Expand All @@ -65,5 +64,7 @@ SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false

BinPackArguments: false
BinPackParameters: false
BinPackArguments: true
BinPackParameters: true
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakArgument: 1
44 changes: 44 additions & 0 deletions .github/workflows/build_tools.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

# Assign arguments to variables
BUILD_TYPE=$1 # Debug or Release
COMPILER=$2 # GCC, LLVM, or MSVC

# Set common flags
COMMON_FLAGS="-DSTRINGZILLA_BUILD_TEST=1 -DSTRINGZILLA_BUILD_BENCHMARK=1 -DSTRINGZILLA_BUILD_SHARED=0"

# Compiler specific settings
case "$COMPILER" in
"GCC")
COMPILER_FLAGS="-DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12"
;;
"LLVM")
COMPILER_FLAGS="-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++"
;;
"MSVC")
COMPILER_FLAGS=""
;;
*)
echo "Unknown compiler: $COMPILER"
exit 1
;;
esac

# Set build type
case "$BUILD_TYPE" in
"Debug")
BUILD_DIR="./build_debug"
BUILD_FLAGS="-DCMAKE_BUILD_TYPE=Debug"
;;
"Release")
BUILD_DIR="./build_release"
BUILD_FLAGS="-DCMAKE_BUILD_TYPE=RelWithDebInfo"
;;
*)
echo "Unknown build type: $BUILD_TYPE"
exit 1
;;
esac

# Execute commands
cmake $COMMON_FLAGS $COMPILER_FLAGS $BUILD_FLAGS -B $BUILD_DIR && cmake --build $BUILD_DIR --config $BUILD_TYPE
287 changes: 230 additions & 57 deletions .github/workflows/prerelease.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,83 +9,256 @@ on:
env:
BUILD_TYPE: Release
GH_TOKEN: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
PYTHON_VERSION: 3.11
SWIFT_VERSION: 5.9
PYTHONUTF8: 1

# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
contents: read

jobs:

test_python_311:
name: Test Python
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-20.04, macOS-11, windows-2022]
python-version: ["3.11"]
test_ubuntu_gcc:
name: Ubuntu (GCC 12)
runs-on: ubuntu-22.04
env:
CC: gcc-12
CXX: g++-12

steps:
- uses: actions/checkout@v3
- run: git submodule update --init --recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- uses: actions/checkout@v4

# C/C++
# If the compilation fails, we want to log the compilation commands in addition to
# the standard output.
- name: Build C/C++
run: |
sudo apt update
sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev gcc-12 g++-12
cmake -B build_artifacts \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-DSTRINGZILLA_BUILD_BENCHMARK=1 \
-DSTRINGZILLA_BUILD_TEST=1
cmake --build build_artifacts --config RelWithDebInfo > build_artifacts/logs.txt 2>&1 || {
echo "Compilation failed. Here are the logs:"
cat build_artifacts/logs.txt
echo "The original compilation commands:"
cat build_artifacts/compile_commands.json
echo "CPU Features:"
lscpu
echo "GCC Version:"
gcc-12 --version
echo "G++ Version:"
g++-12 --version
exit 1
}
- name: Test C++
run: ./build_artifacts/stringzilla_test_cpp20
- name: Test on Real World Data
run: |
./build_artifacts/stringzilla_bench_search ${DATASET_PATH} # for substring search
./build_artifacts/stringzilla_bench_token ${DATASET_PATH} # for hashing, equality comparisons, etc.
./build_artifacts/stringzilla_bench_similarity ${DATASET_PATH} # for edit distances and alignment scores
./build_artifacts/stringzilla_bench_sort ${DATASET_PATH} # for sorting arrays of strings
./build_artifacts/stringzilla_bench_container ${DATASET_PATH} # for STL containers with string keys
env:
DATASET_PATH: ./README.md
# Don't overload GitHub with our benchmarks.
# The results in such an unstable environment will be meaningless anyway.
if: 0

# Python
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
python-version: ${{ env.PYTHON_VERSION }}
- name: Build Python
run: |
python -m pip install --no-cache-dir --upgrade pip numpy
pip install --no-cache-dir pytest
- name: Build locally
run: python -m pip install .
- name: Test with PyTest
run: pytest scripts/


test_python_37:
name: Test Python 3.7
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-20.04]
python-version: ["3.7"]
python -m pip install --upgrade pip
pip install pytest pytest-repeat numpy
python -m pip install .
- name: Test Python
run: pytest scripts/test.py -s -x

# JavaScript
# - name: Set up Node.js
# uses: actions/setup-node
# with:
# node-version: 18
# - name: Build and test JavaScript
# run: npm ci && npm test

# Rust
- name: Test Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true

test_ubuntu_clang:
name: Ubuntu (Clang 16)
runs-on: ubuntu-22.04
env:
CC: clang-16
CXX: clang++-16

steps:
- uses: actions/checkout@v3
- run: git submodule update --init --recursive

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- uses: actions/checkout@v4
with:
python-version: ${{ matrix.python-version }}
ref: main-dev
- run: git submodule update --init --recursive

- name: Install dependencies
# C/C++
# Clang 16 isn't available from default repos on Ubuntu 22.04, so we have to install it manually
- name: Build C/C++
run: |
python -m pip install --no-cache-dir --upgrade pip numpy
pip install --no-cache-dir pytest
sudo apt update
sudo apt install -y cmake build-essential libjemalloc-dev
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 16
- name: Build locally
run: python -m pip install .
cmake -B build_artifacts \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-DSTRINGZILLA_BUILD_BENCHMARK=1 \
-DSTRINGZILLA_BUILD_TEST=1
- name: Test with PyTest
run: pytest scripts/
cmake --build build_artifacts --config RelWithDebInfo > build_artifacts/logs.txt 2>&1 || {
echo "Compilation failed. Here are the logs:"
cat build_artifacts/logs.txt
echo "The original compilation commands:"
cat build_artifacts/compile_commands.json
echo "CPU Features:"
lscpu
echo "Clang Version:"
clang-16 --version
echo "Clang++ Version:"
clang++-16 --version
exit 1
}
- name: Test C++
run: ./build_artifacts/stringzilla_test_cpp20
- name: Test on Real World Data
run: |
./build_artifacts/stringzilla_bench_search ${DATASET_PATH} # for substring search
./build_artifacts/stringzilla_bench_token ${DATASET_PATH} # for hashing, equality comparisons, etc.
./build_artifacts/stringzilla_bench_similarity ${DATASET_PATH} # for edit distances and alignment scores
./build_artifacts/stringzilla_bench_sort ${DATASET_PATH} # for sorting arrays of strings
./build_artifacts/stringzilla_bench_container ${DATASET_PATH} # for STL containers with string keys
env:
DATASET_PATH: ./README.md
# Don't overload GitHub with our benchmarks.
# The results in such an unstable environment will be meaningless anyway.
if: 0

test_javascript:
name: Test JavaScript
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [18.x]
# Python
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Build Python
run: |
python -m pip install --upgrade pip
pip install pytest pytest-repeat numpy
python -m pip install .
- name: Test Python
run: pytest scripts/test.py -s -x

# Rust
- name: Test Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true

# Swift
# Fails due to: https://github.com/swift-actions/setup-swift/issues/591
# - name: Set up Swift ${{ env.SWIFT_VERSION }}
# uses: swift-actions/setup-swift@v1
# with:
# swift-version: ${{ env.SWIFT_VERSION }}
# - name: Build Swift
# run: swift build -c release --static-swift-stdlib
# - name: Test Swift
# run: swift test -c release --enable-test-discovery

# Temporary workaround to run Swift tests on Linux
# Based on: https://github.com/swift-actions/setup-swift/issues/591#issuecomment-1685710678
test_ubuntu_swift:
name: Ubuntu (Swift)
runs-on: ubuntu-22.04
container: swift:5.9
steps:

- uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v3
- name: Test Swift
run: swift test

test_macos:
name: MacOS
runs-on: macos-12

steps:
- uses: actions/checkout@v4
with:
ref: main-dev
- run: git submodule update --init --recursive

# C/C++
- name: Build C/C++
run: |
brew update
brew install cmake
cmake -B build_artifacts \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-DSTRINGZILLA_BUILD_BENCHMARK=1 \
-DSTRINGZILLA_BUILD_TEST=1
cmake --build build_artifacts --config RelWithDebInfo
- name: Test C++
run: ./build_artifacts/stringzilla_test_cpp17
- name: Test on Real World Data
run: |
./build_artifacts/stringzilla_bench_search ${DATASET_PATH} # for substring search
./build_artifacts/stringzilla_bench_token ${DATASET_PATH} # for hashing, equality comparisons, etc.
./build_artifacts/stringzilla_bench_similarity ${DATASET_PATH} # for edit distances and alignment scores
./build_artifacts/stringzilla_bench_sort ${DATASET_PATH} # for sorting arrays of strings
./build_artifacts/stringzilla_bench_container ${DATASET_PATH} # for STL containers with string keys
env:
DATASET_PATH: ./README.md
# Don't overload GitHub with our benchmarks.
# The results in such an unstable environment will be meaningless anyway.
if: 0

# Python
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
node-version: '18.x'

- name: Build locally
run: npm i
python-version: ${{ env.PYTHON_VERSION }}
- name: Build Python
run: |
python -m pip install --upgrade pip
pip install pytest pytest-repeat numpy
python -m pip install .
- name: Test Python
run: pytest scripts/test.py -s -x

# Swift
- name: Set up Swift ${{ env.SWIFT_VERSION }}
uses: swift-actions/setup-swift@v1
with:
swift-version: ${{ env.SWIFT_VERSION }}
- name: Build Swift
run: swift build
- name: Test Swift
run: swift test

- name: Test
run: npm test
# Rust
- name: Test Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
Loading

0 comments on commit 9fde435

Please sign in to comment.