Skip to content

Commit cfcddf3

Browse files
committed
Optimize atomic float on NVPTX
1 parent 582a915 commit cfcddf3

File tree

6 files changed

+476
-5
lines changed

6 files changed

+476
-5
lines changed

build.rs

+37-2
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ fn main() {
4747

4848
if version.minor >= 80 {
4949
println!(
50-
r#"cargo:rustc-check-cfg=cfg(target_feature,values("experimental-zacas","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
50+
r#"cargo:rustc-check-cfg=cfg(target_feature,values("experimental-zacas","sm_70","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
5151
);
5252

5353
// Custom cfgs set by build script. Not public API.
@@ -58,7 +58,7 @@ fn main() {
5858
// TODO: handle multi-line target_feature_fallback
5959
// grep -F 'target_feature_fallback("' build.rs | grep -Ev '^ *//' | sed -E 's/^.*target_feature_fallback\(//; s/",.*$/"/' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/'
6060
println!(
61-
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","experimental-zacas","fast-serialization","load-store-on-cond","lse","lse128","lse2","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo","zabha"))"#
61+
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","experimental-zacas","fast-serialization","load-store-on-cond","lse","lse128","lse2","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","sm_70","v6","zaamo","zabha"))"#
6262
);
6363
}
6464

@@ -175,6 +175,11 @@ fn main() {
175175
println!("cargo:rustc-cfg=portable_atomic_unstable_asm_experimental_arch");
176176
}
177177
}
178+
"nvptx64" => {
179+
if version.nightly && is_allowed_feature("asm_experimental_arch") {
180+
println!("cargo:rustc-cfg=portable_atomic_unstable_asm_experimental_arch");
181+
}
182+
}
178183
_ => {}
179184
}
180185
}
@@ -435,6 +440,36 @@ fn main() {
435440
// nand (nnr{,g}k), select (sel{,g}r), etc.
436441
target_feature_fallback("miscellaneous-extensions-3", arch13_features);
437442
}
443+
"nvptx64" => {
444+
let mut sm_70 = false;
445+
if let Some(rustflags) = env::var_os("CARGO_ENCODED_RUSTFLAGS") {
446+
for mut flag in rustflags.to_string_lossy().split('\x1f') {
447+
flag = strip_prefix(flag, "-C").unwrap_or(flag);
448+
if let Some(flag) = strip_prefix(flag, "target-feature=") {
449+
for s in flag.split(',') {
450+
// TODO: Handles cases where a specific target feature
451+
// implicitly enables another target feature.
452+
match (s.as_bytes().first(), s.get(1..)) {
453+
(Some(b'+'), Some(f)) => {
454+
if let Some(sm) = strip_prefix(f, "sm_") {
455+
if let Ok(sm) = sm.parse::<u32>() {
456+
if sm >= 70 {
457+
sm_70 = true;
458+
}
459+
}
460+
}
461+
}
462+
(Some(b'-'), Some(_f)) => {
463+
// TODO
464+
}
465+
_ => {}
466+
}
467+
}
468+
}
469+
}
470+
}
471+
target_feature_fallback("sm_70", sm_70);
472+
}
438473
_ => {}
439474
}
440475
}

src/imp/float/int.rs

+15-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ Note that most of `fetch_*` operations of atomic floats are implemented using
99
CAS loops, which can be slower than equivalent operations of atomic integers.
1010
1111
AArch64 with FEAT_LSFE and GPU targets have atomic instructions for float.
12-
Both will use architecture-specific implementations instead of this implementation in the
13-
future: https://github.com/taiki-e/portable-atomic/issues/34 / https://github.com/taiki-e/portable-atomic/pull/45
12+
See nvptx.rs for NVPTX.
13+
AArch64 with FEAT_LSFE will also use architecture-specific implementations instead of this implementation in the
14+
future: https://github.com/taiki-e/portable-atomic/pull/201
1415
*/
1516

1617
// TODO: fetch_{minimum,maximum}* https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p3008r2.html / https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p0493r5.pdf
@@ -203,9 +204,21 @@ macro_rules! atomic_float {
203204
cfg_has_atomic_16! {
204205
atomic_float!(AtomicF16, f16, AtomicU16, u16, 2);
205206
}
207+
#[cfg(not(all(
208+
target_arch = "nvptx64",
209+
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
210+
not(any(miri, portable_atomic_sanitize_thread)),
211+
portable_atomic_unstable_asm_experimental_arch,
212+
)))]
206213
cfg_has_atomic_32! {
207214
atomic_float!(AtomicF32, f32, AtomicU32, u32, 4);
208215
}
216+
#[cfg(not(all(
217+
target_arch = "nvptx64",
218+
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
219+
not(any(miri, portable_atomic_sanitize_thread)),
220+
portable_atomic_unstable_asm_experimental_arch,
221+
)))]
209222
cfg_has_atomic_64! {
210223
atomic_float!(AtomicF64, f64, AtomicU64, u64, 8);
211224
}

src/imp/float/mod.rs

+34
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,49 @@ Atomic float implementations
66

77
#![allow(clippy::float_arithmetic)]
88

9+
#[cfg(not(all(
10+
target_arch = "nvptx64",
11+
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
12+
not(any(miri, portable_atomic_sanitize_thread)),
13+
portable_atomic_unstable_asm_experimental_arch,
14+
not(any(portable_atomic_unstable_f16, portable_atomic_unstable_f128)),
15+
)))]
916
mod int;
1017

18+
#[cfg(all(
19+
target_arch = "nvptx64",
20+
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
21+
not(any(miri, portable_atomic_sanitize_thread)),
22+
portable_atomic_unstable_asm_experimental_arch,
23+
))]
24+
mod nvptx;
25+
1126
#[cfg(portable_atomic_unstable_f16)]
1227
cfg_has_atomic_16! {
1328
pub(crate) use self::int::AtomicF16;
1429
}
30+
#[cfg(all(
31+
target_arch = "nvptx64",
32+
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
33+
not(any(miri, portable_atomic_sanitize_thread)),
34+
portable_atomic_unstable_asm_experimental_arch,
35+
))]
36+
pub(crate) use self::nvptx::{AtomicF32, AtomicF64};
37+
#[cfg(not(all(
38+
target_arch = "nvptx64",
39+
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
40+
not(any(miri, portable_atomic_sanitize_thread)),
41+
portable_atomic_unstable_asm_experimental_arch,
42+
)))]
1543
cfg_has_atomic_32! {
1644
pub(crate) use self::int::AtomicF32;
1745
}
46+
#[cfg(not(all(
47+
target_arch = "nvptx64",
48+
any(target_feature = "sm_70", portable_atomic_target_feature = "sm_70"),
49+
not(any(miri, portable_atomic_sanitize_thread)),
50+
portable_atomic_unstable_asm_experimental_arch,
51+
)))]
1852
cfg_has_atomic_64! {
1953
pub(crate) use self::int::AtomicF64;
2054
}

0 commit comments

Comments
 (0)