Skip to content

Commit

Permalink
aarch64: Use SVE ASRD instruction with Neon modes.
Browse files Browse the repository at this point in the history
The ASRD instruction on SVE performs an arithmetic shift right by an immediate
for divide.

This patch enables the use of ASRD with Neon modes.

For example:

int in[N], out[N];

void
foo (void)
{
  for (int i = 0; i < N; i++)
    out[i] = in[i] / 4;
}

compiles to:

	ldr	q31, [x1, x0]
	cmlt	v30.16b, v31.16b, #0
	and	z30.b, z30.b, 3
	add	v30.16b, v30.16b, v31.16b
	sshr	v30.16b, v30.16b, 2
	str	q30, [x0, x2]
	add	x0, x0, 16
	cmp	x0, 1024

but can just be:

	ldp	q30, q31, [x0], 32
	asrd	z31.b, p7/m, z31.b, #2
	asrd	z30.b, p7/m, z30.b, #2
	stp	q30, q31, [x1], 32
	cmp	x0, x2

This patch also adds the following overload:
	aarch64_ptrue_reg (machine_mode pred_mode, machine_mode data_mode)
Depending on the data mode, the function returns a predicate with the
appropriate bits set.

The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.

gcc/ChangeLog:

	* config/aarch64/aarch64.cc (aarch64_ptrue_reg): New overload.
	* config/aarch64/aarch64-protos.h (aarch64_ptrue_reg): Likewise.
	* config/aarch64/aarch64-sve.md: Extended sdiv_pow2<mode>3
	and *sdiv_pow2<mode>3 to support Neon modes.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve/sve-asrd.c: New test.

Co-authored-by: Richard Sandiford <[email protected]>
Signed-off-by: Soumya AR <[email protected]>
  • Loading branch information
Soumya AR and rsandifo-arm committed Dec 11, 2024
1 parent 65b7c8d commit e5569a2
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 12 deletions.
1 change: 1 addition & 0 deletions gcc/config/aarch64/aarch64-protos.h
Original file line number Diff line number Diff line change
Expand Up @@ -1018,6 +1018,7 @@ void aarch64_expand_mov_immediate (rtx, rtx);
rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
rtx aarch64_ptrue_reg (machine_mode);
rtx aarch64_ptrue_reg (machine_mode, unsigned int);
rtx aarch64_ptrue_reg (machine_mode, machine_mode);
rtx aarch64_pfalse_reg (machine_mode);
bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
Expand Down
24 changes: 12 additions & 12 deletions gcc/config/aarch64/aarch64-sve.md
Original file line number Diff line number Diff line change
Expand Up @@ -5009,34 +5009,34 @@

;; Unpredicated ASRD.
(define_expand "sdiv_pow2<mode>3"
[(set (match_operand:SVE_I 0 "register_operand")
(unspec:SVE_I
[(set (match_operand:SVE_VDQ_I 0 "register_operand")
(unspec:SVE_VDQ_I
[(match_dup 3)
(unspec:SVE_I
[(match_operand:SVE_I 1 "register_operand")
(unspec:SVE_VDQ_I
[(match_operand:SVE_VDQ_I 1 "register_operand")
(match_operand 2 "aarch64_simd_rshift_imm")]
UNSPEC_ASRD)]
UNSPEC_PRED_X))]
"TARGET_SVE"
{
operands[3] = aarch64_ptrue_reg (<VPRED>mode);
operands[3] = aarch64_ptrue_reg (<VPRED>mode, <MODE>mode);
}
)

;; Predicated ASRD.
(define_insn "*sdiv_pow2<mode>3"
[(set (match_operand:SVE_I 0 "register_operand")
(unspec:SVE_I
[(set (match_operand:SVE_VDQ_I 0 "register_operand")
(unspec:SVE_VDQ_I
[(match_operand:<VPRED> 1 "register_operand")
(unspec:SVE_I
[(match_operand:SVE_I 2 "register_operand")
(match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
(unspec:SVE_VDQ_I
[(match_operand:SVE_VDQ_I 2 "register_operand")
(match_operand:SVE_VDQ_I 3 "aarch64_simd_rshift_imm")]
UNSPEC_ASRD)]
UNSPEC_PRED_X))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
[ w , Upl , 0 ; * ] asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
[ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
[ w , Upl , 0 ; * ] asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
[ ?&w , Upl , w ; yes ] movprfx\t%Z0, %Z2\;asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
}
)

Expand Down
16 changes: 16 additions & 0 deletions gcc/config/aarch64/aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3778,6 +3778,22 @@ aarch64_ptrue_reg (machine_mode mode, unsigned int vl)
return gen_lowpart (mode, reg);
}

/* Return a register of mode PRED_MODE for controlling data of mode DATA_MODE.

DATA_MODE can be a scalar, an Advanced SIMD vector, or an SVE vector.
If it's an N-byte scalar or an Advanced SIMD vector, the first N bits
of the predicate will be active and the rest will be inactive.
If DATA_MODE is an SVE mode, every bit of the predicate will be active. */
rtx
aarch64_ptrue_reg (machine_mode pred_mode, machine_mode data_mode)
{
if (aarch64_sve_mode_p (data_mode))
return aarch64_ptrue_reg (pred_mode);

auto size = GET_MODE_SIZE (data_mode).to_constant ();
return aarch64_ptrue_reg (pred_mode, size);
}

/* Return an all-false predicate register of mode MODE. */

rtx
Expand Down
86 changes: 86 additions & 0 deletions gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/* { dg-do compile } */
/* { dg-options "-Ofast --param aarch64-autovec-preference=asimd-only" } */
/* { dg-final { check-function-bodies "**" "" "" } } */

#include <stdint.h>

#define FUNC(TYPE, I) \
TYPE M_##TYPE##_##I[I]; \
void asrd_##TYPE##_##I () \
{ \
for (int i = 0; i < I; i++) \
{ \
M_##TYPE##_##I[i] /= 4; \
} \
}

/*
** asrd_int8_t_8:
** ...
** ptrue (p[0-7]).b, vl8
** ...
** asrd z[0-9]+\.b, \1/m, z[0-9]+\.b, #2
** ...
*/
FUNC(int8_t, 8);

/*
** asrd_int8_t_16:
** ...
** ptrue (p[0-7]).b, vl16
** ...
** asrd z[0-9]+\.b, \1/m, z[0-9]+\.b, #2
** ...
*/
FUNC(int8_t, 16);

/*
** asrd_int16_t_4:
** ...
** ptrue (p[0-7]).b, vl8
** ...
** asrd z[0-9]+\.h, \1/m, z[0-9]+\.h, #2
** ...
*/
FUNC(int16_t, 4);

/*
** asrd_int16_t_8:
** ...
** ptrue (p[0-7]).b, vl16
** ...
** asrd z[0-9]+\.h, \1/m, z[0-9]+\.h, #2
** ...
*/
FUNC(int16_t, 8);

/*
** asrd_int32_t_2:
** ...
** ptrue (p[0-7]).b, vl8
** ...
** asrd z[0-9]+\.s, \1/m, z[0-9]+\.s, #2
** ...
*/
FUNC(int32_t, 2);

/*
** asrd_int32_t_4:
** ...
** ptrue (p[0-7]).b, vl16
** ...
** asrd z[0-9]+\.s, \1/m, z[0-9]+\.s, #2
** ...
*/
FUNC(int32_t, 4);

/*
** asrd_int64_t_2:
** ...
** ptrue (p[0-7]).b, vl16
** ...
** asrd z[0-9]+\.d, \1/m, z[0-9]+\.d, #2
** ...
*/
FUNC(int64_t, 2);

0 comments on commit e5569a2

Please sign in to comment.