Skip to content

Commit

Permalink
[a64] Optimize OPCODE_SPLAT with MOVI/FMOV
Browse files Browse the repository at this point in the history
Moves the `FMOV` constant functions into `a64_util` so it is available to other translation units. Optimize constant-splats with conditional use of `MOVI` and `FMOV`.
  • Loading branch information
Wunkolo committed Jun 14, 2024
1 parent cde211c commit 40d2d33
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 79 deletions.
69 changes: 1 addition & 68 deletions src/xenia/cpu/backend/a64/a64_emitter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*/

#include "xenia/cpu/backend/a64/a64_emitter.h"
#include "xenia/cpu/backend/a64/a64_util.h"

#include <cstddef>

Expand Down Expand Up @@ -810,74 +811,6 @@ uintptr_t A64Emitter::GetVConstPtr(VConst id) const {
return GetVConstPtr() + GetVConstOffset(id);
}

// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV
// returns false if the value cannot be represented
// C2.2.3 Modified immediate constants in A64 floating-point instructions
// abcdefgh
// V
// aBbbbbbc defgh000 00000000 00000000
// B = NOT(b)
static bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) {
const uint32_t sign = (u32 >> 31) & 1;
int32_t exp = ((u32 >> 23) & 0xff) - 127;
int64_t mantissa = u32 & 0x7fffff;

// Too many mantissa bits
if (mantissa & 0x7ffff) {
return false;
}
// Too many exp bits
if (exp < -3 || exp > 4) {
return false;
}

// mantissa = (16 + e:f:g:h) / 16.
mantissa >>= 19;
if ((mantissa & 0b1111) != mantissa) {
return false;
}

// exp = (NOT(b):c:d) - 3
exp = ((exp + 3) & 0b111) ^ 0b100;

fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
return true;
}

// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV
// returns false if the value cannot be represented
// C2.2.3 Modified immediate constants in A64 floating-point instructions
// abcdefgh
// V
// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000
// B = NOT(b)
static bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) {
const uint32_t sign = (u64 >> 63) & 1;
int32_t exp = ((u64 >> 52) & 0x7ff) - 1023;
int64_t mantissa = u64 & 0xfffffffffffffULL;

// Too many mantissa bits
if (mantissa & 0xffffffffffffULL) {
return false;
}
// Too many exp bits
if (exp < -3 || exp > 4) {
return false;
}

// mantissa = (16 + e:f:g:h) / 16.
mantissa >>= 48;
if ((mantissa & 0b1111) != mantissa) {
return false;
}

// exp = (NOT(b):c:d) - 3
exp = ((exp + 3) & 0b111) ^ 0b100;

fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
return true;
}

// Implies possible StashV(0, ...)!
void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
if (!v.low && !v.high) {
Expand Down
48 changes: 38 additions & 10 deletions src/xenia/cpu/backend/a64/a64_seq_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*/

#include "xenia/cpu/backend/a64/a64_sequences.h"
#include "xenia/cpu/backend/a64/a64_util.h"

#include <algorithm>
#include <cstring>
Expand Down Expand Up @@ -1026,12 +1027,7 @@ EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32);
struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) {
if (i.src1.constant() <= 0xFF) {
e.MOVI(i.dest.reg().B16(), i.src1.constant());
return;
}
e.MOV(W0, i.src1.constant());
e.DUP(i.dest.reg().B16(), W0);
e.MOVI(i.dest.reg().B16(), i.src1.constant());
} else {
e.DUP(i.dest.reg().B16(), i.src1);
}
Expand All @@ -1040,9 +1036,12 @@ struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) {
if (i.src1.constant() <= 0xFF) {
if ((i.src1.constant() & 0xFF'00) == 0) {
e.MOVI(i.dest.reg().H8(), i.src1.constant());
return;
} else if ((i.src1.constant() & 0x00'FF) == 0) {
e.MOVI(i.dest.reg().H8(), i.src1.constant(), oaknut::util::LSL, 8);
return;
}
e.MOV(W0, i.src1.constant());
e.DUP(i.dest.reg().H8(), W0);
Expand All @@ -1054,9 +1053,22 @@ struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) {
if (i.src1.constant() <= 0xFF) {
oaknut::FImm8 fp8(0);
if (f32_to_fimm8(i.src1.value->constant.u32, fp8)) {
e.FMOV(i.dest.reg().S4(), fp8);
return;
} else if ((i.src1.constant() & 0xFF'FF'FF'00) == 0) {
e.MOVI(i.dest.reg().S4(), i.src1.constant());
return;
} else if ((i.src1.constant() & 0xFF'FF'00'FF) == 0) {
e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 8);
return;
} else if ((i.src1.constant() & 0xFF'00'FF'FF) == 0) {
e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 16);
return;
} else if ((i.src1.constant() & 0x00'FF'FF'FF) == 0) {
e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 24);
return;
}
e.MOV(W0, i.src1.constant());
e.DUP(i.dest.reg().S4(), W0);
Expand All @@ -1068,8 +1080,24 @@ struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
struct SPLAT_F32 : Sequence<SPLAT_F32, I<OPCODE_SPLAT, V128Op, F32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) {
if (i.src1.value->constant.i32 <= 0xFF) {
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.i32);
oaknut::FImm8 fp8(0);
if (f32_to_fimm8(i.src1.value->constant.u32, fp8)) {
e.FMOV(i.dest.reg().S4(), fp8);
return;
} else if ((i.src1.value->constant.u32 & 0xFF'FF'FF'00) == 0) {
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32);
return;
} else if ((i.src1.value->constant.u32 & 0xFF'FF'00'FF) == 0) {
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL,
8);
return;
} else if ((i.src1.value->constant.u32 & 0xFF'00'FF'FF) == 0) {
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL,
16);
return;
} else if ((i.src1.value->constant.u32 & 0x00'FF'FF'FF) == 0) {
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL,
24);
return;
}
e.MOV(W0, i.src1.value->constant.i32);
Expand Down
72 changes: 71 additions & 1 deletion src/xenia/cpu/backend/a64/a64_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,77 @@
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {} // namespace a64
namespace a64 {

// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV
// returns false if the value cannot be represented
// C2.2.3 Modified immediate constants in A64 ing-point instructions
// abcdefgh
// V
// aBbbbbbc defgh000 00000000 00000000
// B = NOT(b)
constexpr bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) {
const uint32_t sign = (u32 >> 31) & 1;
int32_t exp = ((u32 >> 23) & 0xff) - 127;
int64_t mantissa = u32 & 0x7fffff;

// Too many mantissa bits
if (mantissa & 0x7ffff) {
return false;
}
// Too many exp bits
if (exp < -3 || exp > 4) {
return false;
}

// mantissa = (16 + e:f:g:h) / 16.
mantissa >>= 19;
if ((mantissa & 0b1111) != mantissa) {
return false;
}

// exp = (NOT(b):c:d) - 3
exp = ((exp + 3) & 0b111) ^ 0b100;

fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
return true;
}

// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV
// returns false if the value cannot be represented
// C2.2.3 Modified immediate constants in A64 floating-point instructions
// abcdefgh
// V
// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000
// B = NOT(b)
constexpr bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) {
const uint32_t sign = (u64 >> 63) & 1;
int32_t exp = ((u64 >> 52) & 0x7ff) - 1023;
int64_t mantissa = u64 & 0xfffffffffffffULL;

// Too many mantissa bits
if (mantissa & 0xffffffffffffULL) {
return false;
}
// Too many exp bits
if (exp < -3 || exp > 4) {
return false;
}

// mantissa = (16 + e:f:g:h) / 16.
mantissa >>= 48;
if ((mantissa & 0b1111) != mantissa) {
return false;
}

// exp = (NOT(b):c:d) - 3
exp = ((exp + 3) & 0b111) ^ 0b100;

fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
return true;
}

} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
Expand Down

0 comments on commit 40d2d33

Please sign in to comment.