Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend SME2.1 intrinsics to mf8 #375

Merged
merged 3 commits into from
Jan 15, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions main/acle.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ toc: true
---

<!--
SPDX-FileCopyrightText: Copyright 2011-2024 Arm Limited and/or its affiliates <[email protected]>
SPDX-FileCopyrightText: Copyright 2011-2025 Arm Limited and/or its affiliates <[email protected]>
SPDX-FileCopyrightText: Copyright 2022 Google LLC.
CC-BY-SA-4.0 AND Apache-Patent-License
See LICENSE.md file for details
Expand Down Expand Up @@ -435,6 +435,7 @@ Armv8.4-A [[ARMARMv84]](#ARMARMv84). Support is added for the Dot Product intrin
* Added [`__arm_agnostic`](#arm_agnostic) keyword attribute.
* Refined function versioning scope and signature rules to use the default
version scope and signature.
* Added mf8 variants of SME 2.1 intrinsics.

### References

Expand Down Expand Up @@ -12509,7 +12510,7 @@ The intrinsics in this section are defined by the header file
Move and zero ZA tile slice to vector register.

```
// And similarly for u8.
// And similarly for u8 and mf8.
svint8_t svreadz_hor_za8_s8(uint64_t tile, uint32_t slice)
__arm_streaming __arm_inout("za");

Expand All @@ -12525,11 +12526,12 @@ Move and zero ZA tile slice to vector register.
svint64_t svreadz_hor_za64_s64(uint64_t tile, uint32_t slice)
__arm_streaming __arm_inout("za");

// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
// And similarly for s16, s32, s64, u8, u16, u32, u64,
// mf8, bf16, f16, f32, f64
svint8_t svreadz_hor_za128_s8(uint64_t tile, uint32_t slice)
__arm_streaming __arm_inout("za");

// And similarly for u8.
// And similarly for u8 and mf8.
svint8_t svreadz_ver_za8_s8(uint64_t tile, uint32_t slice)
__arm_streaming __arm_inout("za");

Expand All @@ -12545,7 +12547,8 @@ Move and zero ZA tile slice to vector register.
svint64_t svreadz_ver_za64_s64(uint64_t tile, uint32_t slice)
__arm_streaming __arm_inout("za");

// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
// And similarly for s16, s32, s64, u8, u16, u32, u64,
// mf8, bf16, f16, f32, f64
svint8_t svreadz_ver_za128_s8(uint64_t tile, uint32_t slice)
__arm_streaming __arm_inout("za");
```
Expand All @@ -12555,28 +12558,28 @@ Move and zero ZA tile slice to vector register.
Move and zero multiple ZA tile slices to vector registers

``` c
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
rockdreamer marked this conversation as resolved.
Show resolved Hide resolved
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
// _za64_s64, _za64_u64 and _za64_f64
svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t tile, uint32_t slice)
__arm_streaming __arm_inout("za");


// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
// _za64_s64, _za64_u64 and _za64_f64
svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t tile, uint32_t slice)
__arm_streaming __arm_inout("za");


// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
// _za64_s64, _za64_u64 and _za64_f64
svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t tile, uint32_t slice)
__arm_streaming __arm_inout("za");


// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
// _za64_s64, _za64_u64 and _za64_f64
svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t tile, uint32_t slice)
Expand All @@ -12588,14 +12591,14 @@ Move and zero multiple ZA tile slices to vector registers
Move and zero multiple ZA single-vector groups to vector registers

```
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
// _za64_s64, _za64_u64 and _za64_f64
svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice)
__arm_streaming __arm_inout("za");


// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
// _za64_s64, _za64_u64 and _za64_f64
svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice)
Expand Down
Loading