Skip to content

Commit b40399b

Browse files
authored
Merge pull request #266 from ashvardanian/main-dev
Build Issues & Warnings
2 parents eca3997 + 36e16da commit b40399b

File tree

7 files changed

+215
-176
lines changed

7 files changed

+215
-176
lines changed

.github/workflows/prerelease.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ jobs:
140140
env:
141141
CC: gcc-12
142142
CXX: g++-12
143+
# `node-gyp-build` will look at this first; setting it prevents an
144+
# unintended rebuild and lets Deno just `dlopen` the prebuilt `.node`.
145+
PREBUILDS_ONLY: "1"
143146

144147
steps:
145148
- name: Checkout
@@ -168,7 +171,7 @@ jobs:
168171
deno-version: vx.x.x
169172

170173
- name: Test with Deno
171-
run: deno test --allow-read
174+
run: deno test -A
172175

173176
test_rust:
174177
name: Test Rust

.github/workflows/release.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ jobs:
401401
name: prebuilds
402402
path: prebuilds
403403
retention-days: 1
404+
overwrite: true
404405

405406
publish_javascript:
406407
name: Publish JavaScript Package

CONTRIBUTING.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,23 @@ export BLIS_NUM_THREADS=1 # for BLIS
9494

9595
## Python
9696

97+
Python bindings are implemented using pure CPython, so you wouldn't need to install SWIG, PyBind11, or any other third-party library.
98+
Still, you need a virtual environment, and it's recommended to use `uv` to create one.
99+
100+
```sh
101+
uv venv --python 3.11 # Or your preferred Python version
102+
source .venv/bin/activate # To activate the virtual environment
103+
uv pip install -e . # To build locally from source
104+
```
105+
97106
Testing:
98107

99108
```sh
100-
pip install -e . # to install the package in editable mode
101109
pip install pytest pytest-repeat tabulate # testing dependencies
102110
pytest scripts/test.py -s -x -Wd # to run tests
103111

104112
# to check supported SIMD instructions:
105-
python -c "import simsimd; print(simsimd.get_capabilities())"
113+
python -c "import simsimd; print(simsimd.get_capabilities())"
106114
```
107115

108116
Here, `-s` will output the logs.
@@ -234,7 +242,7 @@ irm https://deno.land/install.ps1 | iex # Windows
234242
Testing:
235243
236244
```sh
237-
deno test --allow-read
245+
deno test -A
238246
```
239247
240248
### Bun

include/simsimd/elementwise.h

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,10 +1469,19 @@ SIMSIMD_PUBLIC void simsimd_fma_bf16_skylake(
14691469

14701470
#if SIMSIMD_TARGET_SAPPHIRE
14711471
#pragma GCC push_options
1472-
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2", "avx512bw", "avx512fp16")
1473-
#pragma clang attribute push(__attribute__((target("avx2,avx512f,avx512vl,bmi2,avx512bw,avx512fp16"))), \
1472+
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2", "avx512bw", "avx512fp16", "f16c")
1473+
#pragma clang attribute push(__attribute__((target("avx2,avx512f,avx512vl,bmi2,avx512bw,avx512fp16,f16c"))), \
14741474
apply_to = function)
14751475

1476+
/**
1477+
* Using `_mm512_set1_ph((_Float16)1.f)` results in compilation warnings if we are pedantic.
1478+
* https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/details-about-intrinsics-for-half-floats.html
1479+
*/
1480+
SIMSIMD_INTERNAL __m512h _mm512_set1_ph_from_ps(float a) {
1481+
unsigned short h = _cvtss_sh(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
1482+
return (__m512h)_mm512_set1_epi16(h);
1483+
}
1484+
14761485
SIMSIMD_PUBLIC void simsimd_sum_f16_sapphire(simsimd_f16_t const *a, simsimd_f16_t const *b, simsimd_size_t n,
14771486
simsimd_f16_t *result) {
14781487
__mmask32 mask = 0xFFFFFFFF;
@@ -1500,7 +1509,7 @@ SIMSIMD_PUBLIC void simsimd_scale_f16_sapphire(simsimd_f16_t const *a, simsimd_s
15001509
simsimd_f16_t *result) {
15011510

15021511
__mmask32 mask = 0xFFFFFFFF;
1503-
__m512h alpha_vec = _mm512_set1_ph((_Float16)alpha);
1512+
__m512h alpha_vec = _mm512_set1_ph_from_ps(alpha);
15041513
__m512h a_f16_vec, b_f16_vec;
15051514
__m512h sum_f16_vec;
15061515
simsimd_scale_f16_sapphire_cycle:
@@ -1540,8 +1549,8 @@ SIMSIMD_PUBLIC void simsimd_wsum_f16_sapphire( //
15401549

15411550
// The general case.
15421551
__mmask32 mask = 0xFFFFFFFF;
1543-
__m512h alpha_vec = _mm512_set1_ph((_Float16)alpha);
1544-
__m512h beta_vec = _mm512_set1_ph((_Float16)beta);
1552+
__m512h alpha_vec = _mm512_set1_ph_from_ps(alpha);
1553+
__m512h beta_vec = _mm512_set1_ph_from_ps(beta);
15451554
__m512h a_f16_vec, b_f16_vec;
15461555
__m512h a_scaled_f16_vec, sum_f16_vec;
15471556
simsimd_wsum_f16_sapphire_cycle:
@@ -1568,8 +1577,8 @@ SIMSIMD_PUBLIC void simsimd_fma_f16_sapphire(
15681577
simsimd_distance_t alpha, simsimd_distance_t beta, simsimd_f16_t *result) {
15691578

15701579
__mmask32 mask = 0xFFFFFFFF;
1571-
__m512h alpha_vec = _mm512_set1_ph((_Float16)alpha);
1572-
__m512h beta_vec = _mm512_set1_ph((_Float16)beta);
1580+
__m512h alpha_vec = _mm512_set1_ph_from_ps(alpha);
1581+
__m512h beta_vec = _mm512_set1_ph_from_ps(beta);
15731582
__m512h a_f16_vec, b_f16_vec, c_f16_vec;
15741583
__m512h ab_f16_vec, ab_scaled_f16_vec, sum_f16_vec;
15751584
simsimd_fma_f16_sapphire_cycle:
@@ -1619,7 +1628,7 @@ SIMSIMD_PUBLIC void simsimd_sum_u8_sapphire(simsimd_u8_t const *a, simsimd_u8_t
16191628
SIMSIMD_PUBLIC void simsimd_scale_u8_sapphire(simsimd_u8_t const *a, simsimd_size_t n, simsimd_distance_t alpha,
16201629
simsimd_u8_t *result) {
16211630
__mmask64 mask = 0xFFFFFFFFFFFFFFFFull;
1622-
__m512h alpha_vec = _mm512_set1_ph((_Float16)alpha);
1631+
__m512h alpha_vec = _mm512_set1_ph_from_ps(alpha);
16231632
__m512i a_u8_vec, b_u8_vec, sum_u8_vec;
16241633
__m512h a_f16_low_vec, a_f16_high_vec;
16251634
__m512h a_scaled_f16_low_vec, a_scaled_f16_high_vec, sum_f16_low_vec, sum_f16_high_vec;
@@ -1670,8 +1679,8 @@ SIMSIMD_PUBLIC void simsimd_wsum_u8_sapphire( //
16701679

16711680
// The general case.
16721681
__mmask64 mask = 0xFFFFFFFFFFFFFFFFull;
1673-
__m512h alpha_vec = _mm512_set1_ph((_Float16)alpha);
1674-
__m512h beta_vec = _mm512_set1_ph((_Float16)beta);
1682+
__m512h alpha_vec = _mm512_set1_ph_from_ps(alpha);
1683+
__m512h beta_vec = _mm512_set1_ph_from_ps(beta);
16751684
__m512i a_u8_vec, b_u8_vec, sum_u8_vec;
16761685
__m512h a_f16_low_vec, a_f16_high_vec, b_f16_low_vec, b_f16_high_vec;
16771686
__m512h a_scaled_f16_low_vec, a_scaled_f16_high_vec, sum_f16_low_vec, sum_f16_high_vec;
@@ -1739,7 +1748,7 @@ SIMSIMD_PUBLIC void simsimd_scale_i8_sapphire(simsimd_i8_t const *a, simsimd_siz
17391748
simsimd_i8_t *result) {
17401749

17411750
__mmask64 mask = 0xFFFFFFFFFFFFFFFFull;
1742-
__m512h alpha_vec = _mm512_set1_ph((_Float16)alpha);
1751+
__m512h alpha_vec = _mm512_set1_ph_from_ps(alpha);
17431752
__m512i a_i8_vec, sum_i8_vec;
17441753
__m512h a_f16_low_vec, a_f16_high_vec;
17451754
__m512h sum_f16_low_vec, sum_f16_high_vec;
@@ -1791,8 +1800,8 @@ SIMSIMD_PUBLIC void simsimd_wsum_i8_sapphire( //
17911800

17921801
// The general case.
17931802
__mmask64 mask = 0xFFFFFFFFFFFFFFFFull;
1794-
__m512h alpha_vec = _mm512_set1_ph((_Float16)alpha);
1795-
__m512h beta_vec = _mm512_set1_ph((_Float16)beta);
1803+
__m512h alpha_vec = _mm512_set1_ph_from_ps(alpha);
1804+
__m512h beta_vec = _mm512_set1_ph_from_ps(beta);
17961805
__m512i a_i8_vec, b_i8_vec, sum_i8_vec;
17971806
__m512h a_f16_low_vec, a_f16_high_vec, b_f16_low_vec, b_f16_high_vec;
17981807
__m512h a_scaled_f16_low_vec, a_scaled_f16_high_vec, sum_f16_low_vec, sum_f16_high_vec;
@@ -1836,8 +1845,8 @@ SIMSIMD_PUBLIC void simsimd_fma_i8_sapphire(
18361845
simsimd_distance_t alpha, simsimd_distance_t beta, simsimd_i8_t *result) {
18371846

18381847
__mmask64 mask = 0xFFFFFFFFFFFFFFFF;
1839-
__m512h alpha_vec = _mm512_set1_ph((_Float16)alpha);
1840-
__m512h beta_vec = _mm512_set1_ph((_Float16)beta);
1848+
__m512h alpha_vec = _mm512_set1_ph_from_ps(alpha);
1849+
__m512h beta_vec = _mm512_set1_ph_from_ps(beta);
18411850
__m512i a_i8_vec, b_i8_vec, c_i8_vec, sum_i8_vec;
18421851
__m512h a_f16_low_vec, a_f16_high_vec, b_f16_low_vec, b_f16_high_vec;
18431852
__m512h c_f16_low_vec, c_f16_high_vec, ab_f16_low_vec, ab_f16_high_vec;
@@ -1889,8 +1898,8 @@ SIMSIMD_PUBLIC void simsimd_fma_u8_sapphire(
18891898
simsimd_distance_t alpha, simsimd_distance_t beta, simsimd_u8_t *result) {
18901899

18911900
__mmask64 mask = 0xFFFFFFFFFFFFFFFF;
1892-
__m512h alpha_vec = _mm512_set1_ph((_Float16)alpha);
1893-
__m512h beta_vec = _mm512_set1_ph((_Float16)beta);
1901+
__m512h alpha_vec = _mm512_set1_ph_from_ps(alpha);
1902+
__m512h beta_vec = _mm512_set1_ph_from_ps(beta);
18941903
__m512i a_u8_vec, b_u8_vec, c_u8_vec, sum_u8_vec;
18951904
__m512h a_f16_low_vec, a_f16_high_vec, b_f16_low_vec, b_f16_high_vec;
18961905
__m512h c_f16_low_vec, c_f16_high_vec, ab_f16_low_vec, ab_f16_high_vec;

include/simsimd/sparse.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -527,10 +527,10 @@ SIMSIMD_PUBLIC void simsimd_intersect_u32_ice( //
527527
#if SIMSIMD_TARGET_TURIN
528528
#pragma GCC push_options
529529
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2", "lzcnt", "popcnt", "avx512bw", "avx512vbmi2", "avx512bf16", \
530-
"avx512vnni", "avx512vp2intersect")
531-
#pragma clang attribute push( \
532-
__attribute__((target( \
533-
"avx2,avx512f,avx512vl,bmi2,lzcnt,popcnt,avx512bw,avx512vbmi2,avx512bf16,avx512vnni,avx512vp2intersect"))), \
530+
"avx512vnni", "avx512vp2intersect", "avx512dq")
531+
#pragma clang attribute push( \
532+
__attribute__((target( \
533+
"avx2,avx512f,avx512vl,bmi2,lzcnt,popcnt,avx512bw,avx512vbmi2,avx512bf16,avx512vnni,avx512vp2intersect,avx512dq"))), \
534534
apply_to = function)
535535

536536
SIMSIMD_PUBLIC void simsimd_intersect_u16_turin( //

package-lock.json

Lines changed: 10 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)