Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
307 changes: 307 additions & 0 deletions crates/core_simd/benches/mask_count.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
//! Comprehensive benchmarks for Mask::count() performance analysis
//!
//! This benchmark suite tests:
//! - Different mask sizes (2, 4, 8, 16, 32, 64 elements)
//! - Different densities (0%, 25%, 50%, 75%, 100% true)
//! - Comparison with manual iteration baseline
//! - Cache behavior and instruction-level performance

#![feature(portable_simd)]
#![feature(test)]

extern crate test;
use cmp::SimdPartialOrd;
use core_simd::simd::*;
use test::{Bencher, black_box};

// ============================================================================
// Mask Size: 2 elements (i64)
// ============================================================================

#[bench]
fn mask2_count_0pct(b: &mut Bencher) {
let mask = mask64x2::splat(false);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask2_count_50pct(b: &mut Bencher) {
let mask = mask64x2::from_array([true, false]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask2_count_100pct(b: &mut Bencher) {
let mask = mask64x2::splat(true);
b.iter(|| black_box(mask).count());
}

// ============================================================================
// Mask Size: 4 elements (i32)
// ============================================================================

#[bench]
fn mask4_count_0pct(b: &mut Bencher) {
let mask = mask32x4::splat(false);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask4_count_25pct(b: &mut Bencher) {
let mask = mask32x4::from_array([true, false, false, false]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask4_count_50pct(b: &mut Bencher) {
let mask = mask32x4::from_array([true, false, true, false]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask4_count_75pct(b: &mut Bencher) {
let mask = mask32x4::from_array([true, true, true, false]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask4_count_100pct(b: &mut Bencher) {
let mask = mask32x4::splat(true);
b.iter(|| black_box(mask).count());
}

// Baseline: manual iteration for mask4
#[bench]
fn mask4_count_manual_50pct(b: &mut Bencher) {
let mask = mask32x4::from_array([true, false, true, false]);
b.iter(|| {
let m = black_box(mask);
let mut count = 0;
for i in 0..4 {
if m.test(i) {
count += 1;
}
}
black_box(count)
});
}

// ============================================================================
// Mask Size: 8 elements (i32)
// ============================================================================

#[bench]
fn mask8_count_0pct(b: &mut Bencher) {
let mask = mask32x8::splat(false);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask8_count_25pct(b: &mut Bencher) {
let mask = mask32x8::from_array([true, false, false, false, true, false, false, false]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask8_count_50pct(b: &mut Bencher) {
let mask = mask32x8::from_array([true, false, true, false, true, false, true, false]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask8_count_75pct(b: &mut Bencher) {
let mask = mask32x8::from_array([true, true, true, false, true, true, true, false]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask8_count_100pct(b: &mut Bencher) {
let mask = mask32x8::splat(true);
b.iter(|| black_box(mask).count());
}

// Baseline: manual iteration for mask8
#[bench]
fn mask8_count_manual_50pct(b: &mut Bencher) {
let mask = mask32x8::from_array([true, false, true, false, true, false, true, false]);
b.iter(|| {
let m = black_box(mask);
let mut count = 0;
for i in 0..8 {
if m.test(i) {
count += 1;
}
}
black_box(count)
});
}

// ============================================================================
// Mask Size: 16 elements (i32)
// ============================================================================

#[bench]
fn mask16_count_0pct(b: &mut Bencher) {
let mask = mask32x16::splat(false);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask16_count_25pct(b: &mut Bencher) {
let mask = mask32x16::from_array([
true, false, false, false, true, false, false, false, true, false, false, false, true,
false, false, false,
]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask16_count_50pct(b: &mut Bencher) {
let mask = mask32x16::from_array([
true, false, true, false, true, false, true, false, true, false, true, false, true, false,
true, false,
]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask16_count_75pct(b: &mut Bencher) {
let mask = mask32x16::from_array([
true, true, true, false, true, true, true, false, true, true, true, false, true, true,
true, false,
]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask16_count_100pct(b: &mut Bencher) {
let mask = mask32x16::splat(true);
b.iter(|| black_box(mask).count());
}

// Baseline: manual iteration for mask16
#[bench]
fn mask16_count_manual_50pct(b: &mut Bencher) {
let mask = mask32x16::from_array([
true, false, true, false, true, false, true, false, true, false, true, false, true, false,
true, false,
]);
b.iter(|| {
let m = black_box(mask);
let mut count = 0;
for i in 0..16 {
if m.test(i) {
count += 1;
}
}
black_box(count)
});
}

// ============================================================================
// Real-world scenario: filtering based on comparison
// ============================================================================

#[bench]
fn real_world_filter_count_f32x8(b: &mut Bencher) {
let data = f32x8::from_array([1.0, 5.5, 3.2, 7.8, 2.1, 9.5, 4.3, 6.7]);
let threshold = f32x8::splat(5.0);

b.iter(|| {
let d = black_box(data);
let t = black_box(threshold);
let mask = d.simd_gt(t);
black_box(mask.count())
});
}

#[bench]
fn real_world_filter_count_f32x16(b: &mut Bencher) {
let data = f32x16::from_array([
1.0, 5.5, 3.2, 7.8, 2.1, 9.5, 4.3, 6.7, 1.5, 5.2, 3.8, 7.1, 2.9, 9.2, 4.8, 6.1,
]);
let threshold = f32x16::splat(5.0);

b.iter(|| {
let d = black_box(data);
let t = black_box(threshold);
let mask = d.simd_gt(t);
black_box(mask.count())
});
}

// ============================================================================
// Stress test: multiple counts in tight loop
// ============================================================================

#[bench]
fn stress_multiple_counts_mask8(b: &mut Bencher) {
let masks = [
mask32x8::from_array([true, false, true, false, true, false, true, false]),
mask32x8::from_array([false, true, false, true, false, true, false, true]),
mask32x8::from_array([true, true, false, false, true, true, false, false]),
mask32x8::from_array([false, false, true, true, false, false, true, true]),
];

b.iter(|| {
let ms = black_box(&masks);
let total = ms[0].count() + ms[1].count() + ms[2].count() + ms[3].count();
black_box(total)
});
}

// ============================================================================
// Cache behavior test: alternating access pattern
// ============================================================================

#[bench]
fn cache_alternating_access(b: &mut Bencher) {
let mask1 = mask32x8::from_array([true, false, true, false, true, false, true, false]);
let mask2 = mask32x8::from_array([false, true, false, true, false, true, false, true]);

b.iter(|| {
let m1 = black_box(mask1);
let m2 = black_box(mask2);
black_box(m1.count() + m2.count())
});
}

// ============================================================================
// Test different element types (i64 vs i32)
// ============================================================================

#[bench]
fn mask4_i64_count_50pct(b: &mut Bencher) {
let mask = mask64x4::from_array([true, false, true, false]);
b.iter(|| black_box(mask).count());
}

#[bench]
fn mask8_i64_count_50pct(b: &mut Bencher) {
let mask = mask64x8::from_array([true, false, true, false, true, false, true, false]);
b.iter(|| black_box(mask).count());
}

// ============================================================================
// Edge cases
// ============================================================================

#[bench]
fn edge_case_all_false_mask16(b: &mut Bencher) {
let mask = mask32x16::splat(false);
b.iter(|| black_box(mask).count());
}

#[bench]
fn edge_case_all_true_mask16(b: &mut Bencher) {
let mask = mask32x16::splat(true);
b.iter(|| black_box(mask).count());
}

#[bench]
fn edge_case_single_true_mask16(b: &mut Bencher) {
let mut arr = [false; 16];
arr[7] = true;
let mask = mask32x16::from_array(arr);
b.iter(|| black_box(mask).count());
}
35 changes: 35 additions & 0 deletions crates/core_simd/examples/mask_count.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//! Demonstrates `Mask::count()` to count matching elements.

#![feature(portable_simd)]
use cmp::SimdPartialOrd;
use core_simd::simd::*;

fn main() {
// Count elements above threshold
let data = [1.0, 5.0, 3.0, 7.0, 2.0, 9.0, 4.0, 6.0];
let values = f32x8::from_array(data);
let threshold = f32x8::splat(5.0);
let mask = values.simd_gt(threshold);
println!("Values above 5.0: {}", mask.count());

// Use count() to pre-allocate for filtering
let chunks = data.chunks_exact(8);
let mut total = 0;
for chunk in chunks.clone() {
let v = f32x8::from_slice(chunk);
total += v.simd_gt(f32x8::splat(5.0)).count();
}

let mut results = Vec::with_capacity(total);
for chunk in chunks {
let v = f32x8::from_slice(chunk);
let m = v.simd_gt(f32x8::splat(5.0));
for (i, &val) in chunk.iter().enumerate() {
if m.test(i) {
results.push(val);
}
}
}

println!("Filtered: {:?}", results);
}
4 changes: 0 additions & 4 deletions crates/core_simd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@
any(target_arch = "powerpc", target_arch = "powerpc64"),
feature(stdarch_powerpc)
)]
#![cfg_attr(
all(target_arch = "x86_64", target_feature = "avx512f"),
feature(stdarch_x86_avx512)
)]
#![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
#![deny(
unsafe_op_in_unsafe_fn,
Expand Down
21 changes: 21 additions & 0 deletions crates/core_simd/src/masks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,27 @@ where
Some(min_index.to_usize())
}
}

/// Returns the number of `true` elements in the mask.
///
/// # Examples
///
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::mask32x4;
/// assert_eq!(mask32x4::splat(false).count(), 0);
/// assert_eq!(mask32x4::splat(true).count(), 4);
///
/// let mask = mask32x4::from_array([true, false, true, true]);
/// assert_eq!(mask.count(), 3);
/// ```
#[inline]
#[must_use]
pub fn count(self) -> usize {
self.to_bitmask().count_ones() as usize
}
}

// vector/array conversion
Expand Down
Loading