From 5000379201b4f3e37ab2b7ba3e757aeca052787b Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Thu, 16 Jan 2025 10:41:11 +0100 Subject: [PATCH] Implement several compressed instructions for RISCV (#290) --- sljit_src/sljitNativeRISCV_32.c | 15 ++- sljit_src/sljitNativeRISCV_64.c | 103 ++++++++++----- sljit_src/sljitNativeRISCV_common.c | 192 +++++++++++++++++++++------- test_src/sljitTest.c | 50 +++++--- 4 files changed, 262 insertions(+), 98 deletions(-) diff --git a/sljit_src/sljitNativeRISCV_32.c b/sljit_src/sljitNativeRISCV_32.c index 40e9cfc2..23780b98 100644 --- a/sljit_src/sljitNativeRISCV_32.c +++ b/sljit_src/sljitNativeRISCV_32.c @@ -28,17 +28,28 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r { SLJIT_UNUSED_ARG(tmp_r); + if (RISCV_HAS_COMPRESSED(200) && imm <= SIMM16_MAX && imm >= SIMM16_MIN) + return push_inst16(compiler, C_LI | C_RD(dst_r) | CIMM_I(imm)); + if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); if (imm & 0x800) imm += 0x1000; - FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + if (RISCV_HAS_COMPRESSED(200) && imm <= 0x1ffff && imm >= -0x20000) + FAIL_IF(push_inst16(compiler, C_LUI | C_RD(dst_r) | ((sljit_u16)(((imm) & 0x1f000) >> 10) | ((imm) & 0x20000) >> 5))); + else + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + + imm &= 0xfff; - if ((imm & 0xfff) == 0) + if (imm == 0) return SLJIT_SUCCESS; + if (RISCV_HAS_COMPRESSED(200) && (imm <= 0x1f || imm >= 0xfe0)) + return push_inst16(compiler, C_ADDI | C_RD(dst_r) | CIMM_I(imm)); + return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); } diff --git a/sljit_src/sljitNativeRISCV_64.c b/sljit_src/sljitNativeRISCV_64.c index 9847a3a6..9111c2a5 100644 --- a/sljit_src/sljitNativeRISCV_64.c +++ b/sljit_src/sljitNativeRISCV_64.c @@ -24,31 +24,58 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) +static sljit_s32 load_immediate32(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm) { - sljit_sw high; + SLJIT_ASSERT((imm <= 0x7fffffffl && imm > SIMM_MAX) || (imm >= S32_MIN && imm < SIMM_MIN)); - if (imm <= SIMM_MAX && imm >= SIMM_MIN) - return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); + if (imm > S32_MAX) { + SLJIT_ASSERT((imm & 0x800) != 0); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + } - if (imm <= 0x7fffffffl && imm >= S32_MIN) { - if (imm > S32_MAX) { + if (RISCV_HAS_COMPRESSED(200) && imm <= 0x1ffff && imm >= -0x20000) { + if (imm > 0x1f7ff) { SLJIT_ASSERT((imm & 0x800) != 0); - FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + FAIL_IF(push_inst16(compiler, C_LUI | C_RD(dst_r) | (sljit_u16)0x1000)); return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); } if ((imm & 0x800) != 0) imm += 0x1000; - FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); - - if ((imm & 0xfff) == 0) - return SLJIT_SUCCESS; + FAIL_IF(push_inst16(compiler, C_LUI | C_RD(dst_r) | ((sljit_u16)(((imm) & 0x1f000) >> 10) | ((imm) & 0x20000) >> 5))); + } else { + if ((imm & 0x800) != 0) + imm += 0x1000; - return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~(sljit_sw)0xfff))); } + imm &= 0xfff; + + if (imm == 0) + return SLJIT_SUCCESS; + + if (RISCV_HAS_COMPRESSED(200) && (imm <= 0x1f || imm >= 0xfe0)) + return push_inst16(compiler, C_ADDI | C_RD(dst_r) | CIMM_I(imm)); + + return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); +} + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) +{ + sljit_sw high; + + if (RISCV_HAS_COMPRESSED(200) && imm <= SIMM16_MAX && imm >= SIMM16_MIN) + return push_inst16(compiler, C_LI | C_RD(dst_r) | CIMM_I(imm)); + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); + + if (imm <= 0x7fffffffl && imm >= S32_MIN) + return load_immediate32(compiler, dst_r, imm); + /* Trailing zeroes could be used to produce shifted immediates. */ if (imm <= 0x7ffffffffffl && imm >= -0x80000000000l) { @@ -57,21 +84,12 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r if (imm & 0x800) high = ~high; - if (high > S32_MAX) { - SLJIT_ASSERT((high & 0x800) != 0); - FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); - FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); - } else { - if ((high & 0x800) != 0) - high += 0x1000; - - FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(high & ~0xfff))); - - if ((high & 0xfff) != 0) - FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); - } + FAIL_IF(load_immediate32(compiler, dst_r, high)); - FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(12))); + if (RISCV_HAS_COMPRESSED(200)) + FAIL_IF(push_inst16(compiler, C_SLLI | C_RD(dst_r) | (sljit_u16)(12 << 2))); + else + FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(12))); if ((imm & 0xfff) != 0) return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); @@ -99,7 +117,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r } if (imm <= SIMM_MAX && imm >= SIMM_MIN) { - FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm))); + if (RISCV_HAS_COMPRESSED(200) && imm <= 0x1f && imm >= -0x20) + FAIL_IF(push_inst16(compiler, C_LI | C_RD(dst_r) | CIMM_I(imm))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm))); imm = 0; } else if (imm > S32_MAX) { SLJIT_ASSERT((imm & 0x800) != 0); @@ -110,19 +131,35 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r if ((imm & 0x800) != 0) imm += 0x1000; - FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + if (RISCV_HAS_COMPRESSED(200) && imm <= 0x1ffff && imm >= -0x20000) + FAIL_IF(push_inst16(compiler, C_LUI | C_RD(dst_r) | ((sljit_u16)(((imm) & 0x1f000) >> 10) | ((imm) & 0x20000) >> 5))); + else + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); imm &= 0xfff; } - if ((high & 0xfff) != 0) - FAIL_IF(push_inst(compiler, ADDI | RD(tmp_r) | RS1(tmp_r) | IMM_I(high))); + if ((high & 0xfff) != 0) { + SLJIT_ASSERT(high <= 0xfff); + if (RISCV_HAS_COMPRESSED(200) && (high <= 0x1f || high >= 0xfe0)) + FAIL_IF(push_inst16(compiler, C_ADDI | C_RD(tmp_r) | CIMM_I(high))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(tmp_r) | RS1(tmp_r) | IMM_I(high))); + } if (imm & 0x1000) FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); - else if (imm != 0) - FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); + else if (imm != 0) { + SLJIT_ASSERT(imm <= 0xfff); + if (RISCV_HAS_COMPRESSED(200) && (imm <= 0x1f || imm >= 0xfe0)) + FAIL_IF(push_inst16(compiler, C_ADDI | C_RD(dst_r) | CIMM_I(imm))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); + } - FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(tmp_r) | IMM_I((high & 0x1000) ? 20 : 32))); + if (RISCV_HAS_COMPRESSED(200)) + FAIL_IF(push_inst16(compiler, C_SLLI | C_RD(tmp_r) | (sljit_u16)((high & 0x1000) ? (20 << 2) : (1 << 12)))); + else + FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(tmp_r) | IMM_I((high & 0x1000) ? 20 : 32))); return push_inst(compiler, XOR | RD(dst_r) | RS1(dst_r) | RS2(tmp_r)); } diff --git a/sljit_src/sljitNativeRISCV_common.c b/sljit_src/sljitNativeRISCV_common.c index de79c3e4..f0015308 100644 --- a/sljit_src/sljitNativeRISCV_common.c +++ b/sljit_src/sljitNativeRISCV_common.c @@ -126,10 +126,10 @@ #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) #define RISCV_CHECK_COMPRESSED_JUMP(jump, diff, unit) \ - (!((jump)->flags & IS_CALL) && (diff) >= (JUMP_MIN16 / SSIZE_OF(unit)) && (diff) <= (JUMP_MAX16 / SSIZE_OF(unit))) + (!((jump)->flags & IS_CALL) && (diff) >= (JUMP16_MIN / SSIZE_OF(unit)) && (diff) <= (JUMP16_MAX / SSIZE_OF(unit))) #else /* !SLJIT_CONFIG_RISCV_64 */ #define RISCV_CHECK_COMPRESSED_JUMP(jump, diff, unit) \ - ((diff) >= (JUMP_MIN16 / SSIZE_OF(unit)) && (diff) <= (JUMP_MAX16 / SSIZE_OF(unit))) + ((diff) >= (JUMP16_MIN / SSIZE_OF(unit)) && (diff) <= (JUMP16_MAX / SSIZE_OF(unit))) #endif /* SLJIT_CONFIG_RISCV_64 */ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) @@ -189,6 +189,9 @@ static const sljit_u8 vreg_map[SLJIT_NUMBER_OF_VECTOR_REGISTERS + 3] = { #define VRS2(rs2) ((sljit_ins)vreg_map[rs2] << 20) #define IMM_I(imm) ((sljit_ins)(imm) << 20) #define IMM_S(imm) ((((sljit_ins)(imm) & 0xfe0) << 20) | (((sljit_ins)(imm) & 0x1f) << 7)) +#define C_RD(rd) ((sljit_u16)((sljit_u16)reg_map[rd] << 7)) +#define C_RS2(rd) ((sljit_u16)((sljit_u16)reg_map[rd] << 2)) +#define CIMM_I(imm) ((sljit_u16)(((imm) & 0x1f) << 2) | ((imm) & 0x20) << 7) /* Represents funct(i) parts of the instructions. */ #define OPC(o) ((sljit_ins)(o)) @@ -218,10 +221,22 @@ static const sljit_u8 vreg_map[SLJIT_NUMBER_OF_VECTOR_REGISTERS + 3] = { #define BLTU (F3(0x6) | OPC(0x63)) #define BGEU (F3(0x7) | OPC(0x63)) /* C_*: compressed */ +#define C_ADD (C_OPC(0x2, 0x4) | (sljit_u16)(1 << 12)) +#define C_ADDI (C_OPC(0x1, 0x0)) +#define C_ADDIW (C_OPC(0x1, 0x1)) +#define C_ADDI16SP (C_OPC(0x1, 0x3) | (sljit_u16)(2 << 7)) +#define C_EBREAK (C_OPC(0x2, 0x4) | (sljit_u16)(1 << 12)) #define C_J (C_OPC(0x1, 0x5)) +#define C_JR (C_OPC(0x2, 0x4)) #if defined SLJIT_CONFIG_RISCV_32 #define C_JAL (C_OPC(0x1, 0x1)) #endif +#define C_JALR (C_OPC(0x2, 0x4) | (sljit_u16)(1 << 12)) +#define C_LI (C_OPC(0x1, 0x2)) +#define C_LUI (C_OPC(0x1, 0x3)) +#define C_MV (C_OPC(0x2, 0x4)) +#define C_NOP (C_OPC(0x1, 0x0)) +#define C_SLLI (C_OPC(0x2, 0x0)) /* CLZ / CTZ: zbb */ #define CLZ (F7(0x30) | F3(0x1) | OPC(0x13)) #define CTZ (F7(0x30) | F12(0x1) | F3(0x1) | OPC(0x13)) @@ -332,12 +347,14 @@ static const sljit_u8 vreg_map[SLJIT_NUMBER_OF_VECTOR_REGISTERS + 3] = { #define SIMM_MAX (0x7ff) #define SIMM_MIN (-0x800) +#define SIMM16_MAX (0x1f) +#define SIMM16_MIN (-0x20) #define BRANCH_MAX (0xfff) #define BRANCH_MIN (-0x1000) #define JUMP_MAX (0xfffff) #define JUMP_MIN (-0x100000) -#define JUMP_MAX16 SIMM_MAX -#define JUMP_MIN16 SIMM_MIN +#define JUMP16_MAX SIMM_MAX +#define JUMP16_MIN SIMM_MIN #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) #define S32_MAX (0x7ffff7ffl) @@ -346,6 +363,8 @@ static const sljit_u8 vreg_map[SLJIT_NUMBER_OF_VECTOR_REGISTERS + 3] = { #define S52_MAX (0x7ffffffffffffl) #endif /* SLJIT_CONFIG_RISCV_64 */ +#define C_ADDI_W(word) (C_ADDI | (sljit_u16)((word) << 10)) + static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins)); @@ -885,7 +904,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(RISCV_HAS_COMPRESSED(200) || !(jump->flags & PATCH_J16)); if (RISCV_HAS_COMPRESSED(200) && (jump->flags & PATCH_J16)) { - SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN16 && (sljit_sw)addr <= JUMP_MAX16); + SLJIT_ASSERT((sljit_sw)addr >= JUMP16_MIN && (sljit_sw)addr <= JUMP16_MAX); addr = ((addr & 0xb40) << 1) | ((addr & 0xe) << 2) | ((addr & 0x10) << 7) | ((addr & 0x20) >> 3) | ((addr & 0x80) >> 1) | ((addr & 0x400) >> 2); #if defined SLJIT_CONFIG_RISCV_32 ins = ((jump->flags & IS_CALL) ? C_JAL : C_J) | (sljit_ins)addr; @@ -1048,8 +1067,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (local_size <= STACK_MAX_DISTANCE) { /* Frequent case. */ - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size))); offset = local_size - SSIZE_OF(sw); + local_size = -local_size; + if (RISCV_HAS_COMPRESSED(200) && local_size >= -0x200) { + SLJIT_ASSERT((local_size & 0x200) != 0 && (local_size & 0xf) == 0); + FAIL_IF(push_inst16(compiler, C_ADDI16SP | (sljit_u16)(((local_size & 0x10) << 2) | ((local_size & 0x20) >> 3) | ((local_size & 0x40) >> 1) | ((local_size & 0x180) >> 4) | (1 << 12)))); + } else + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size))); local_size = 0; } else { FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(STACK_MAX_DISTANCE))); @@ -1105,7 +1129,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi while (arg_types > 0) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_S0 - saved_arg_count) | RS1(tmp) | IMM_I(0))); + if (RISCV_HAS_COMPRESSED(200)) + FAIL_IF(push_inst16(compiler, C_MV | C_RD(SLJIT_S0 - saved_arg_count) | C_RS2(tmp))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_S0 - saved_arg_count) | RS1(tmp) | IMM_I(0))); saved_arg_count++; } tmp++; @@ -1200,6 +1227,11 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset))); } + if (RISCV_HAS_COMPRESSED(200) && local_size <= 0x1f0) { + SLJIT_ASSERT((local_size & 0xf) == 0); + return push_inst16(compiler, C_ADDI16SP | (sljit_u16)(((local_size & 0x10) << 2) | ((local_size & 0x20) >> 3) | ((local_size & 0x40) >> 1) | ((local_size & 0x180) >> 4))); + } + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size)); } @@ -1624,9 +1656,16 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl switch (GET_OPCODE(op)) { case SLJIT_MOV: SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); - if (dst != src2) + if (dst == src2) + return SLJIT_SUCCESS; + + if (!RISCV_HAS_COMPRESSED(200)) return push_inst(compiler, ADDI | RD(dst) | RS1(src2) | IMM_I(0)); - return SLJIT_SUCCESS; + + /* Revert the x0 to immediate 0. */ + if (src2 == 0) + return push_inst16(compiler, C_LI | C_RD(dst)); + return push_inst16(compiler, C_MV | C_RD(dst) | C_RS2(src2)); case SLJIT_MOV_U8: SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); @@ -1683,8 +1722,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MOV_S32: SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (RISCV_HAS_COMPRESSED(200) && dst == src2) + return push_inst16(compiler, C_ADDIW | C_RD(dst)); return push_inst(compiler, ADDI | 0x8 | RD(dst) | RS1(src2) | IMM_I(0)); + } SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; #endif /* SLJIT_CONFIG_RISCV_64 */ @@ -1735,19 +1777,26 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & SRC2_IMM) { if (is_overflow) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); - else + if (src2 >= 0) { + if (RISCV_HAS_COMPRESSED(200)) + FAIL_IF(push_inst16(compiler, C_MV | C_RD(EQUAL_FLAG) | C_RS2(src1))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + } else FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1))); } else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); /* Only the zero flag is needed. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); - } - else { + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) { + SLJIT_ASSERT(src2 != 0); + if (RISCV_HAS_COMPRESSED(200) && dst == src1 && src2 <= SIMM16_MAX && src2 >= SIMM16_MIN) + FAIL_IF(push_inst16(compiler, C_ADDI_W(WORD) | C_RD(dst) | CIMM_I(src2))); + else + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + } + } else { if (is_overflow) FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); else if (op & SLJIT_SET_Z) @@ -1765,8 +1814,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } /* Only the zero flag is needed. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) { + if (RISCV_HAS_COMPRESSED(200) && WORD == 0 && dst == src1 && src2 != 0) + FAIL_IF(push_inst16(compiler, C_ADD | C_RD(dst) | C_RS2(src2))); + else + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } } /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ @@ -1781,8 +1834,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG))); - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + if (op & SLJIT_SET_Z) { + if (RISCV_HAS_COMPRESSED(200)) + FAIL_IF(push_inst16(compiler, C_MV | C_RD(EQUAL_FLAG) | C_RS2(dst))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + } FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31))); return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG)); @@ -1790,7 +1847,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY; if (flags & SRC2_IMM) { - FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + SLJIT_ASSERT(src2 != 0); + if (RISCV_HAS_COMPRESSED(200) && dst == src1 && src2 <= SIMM16_MAX && src2 >= SIMM16_MIN) + FAIL_IF(push_inst16(compiler, C_ADDI_W(WORD) | C_RD(dst) | CIMM_I(src2))); + else + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); } else { if (carry_src_r != 0) { if (src1 != dst) @@ -1798,12 +1859,18 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl else if (src2 != dst) carry_src_r = (sljit_s32)src2; else { - FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + if (RISCV_HAS_COMPRESSED(200)) + FAIL_IF(push_inst16(compiler, C_MV | C_RD(EQUAL_FLAG) | C_RS2(src1))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); carry_src_r = EQUAL_FLAG; } } - FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); + if (RISCV_HAS_COMPRESSED(200) && WORD == 0 && dst == src1 && src2 != 0) + FAIL_IF(push_inst16(compiler, C_ADD | C_RD(dst) | C_RS2(src2))); + else + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); } /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ @@ -1814,7 +1881,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(dst) | RS2(carry_src_r))); } - FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + if (RISCV_HAS_COMPRESSED(200) && WORD == 0) + FAIL_IF(push_inst16(compiler, C_ADD | C_RD(dst) | C_RS2(OTHER_FLAG))); + else + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); if (carry_src_r == 0) return SLJIT_SUCCESS; @@ -1837,8 +1907,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (GET_FLAG_TYPE(op) == SLJIT_LESS) { FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); is_handled = 1; - } - else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { + } else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) { FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); is_handled = 1; } @@ -1849,7 +1918,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & SRC2_IMM) { reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; - FAIL_IF(push_inst(compiler, ADDI | RD(reg) | RS1(TMP_ZERO) | IMM_I(src2))); + if (RISCV_HAS_COMPRESSED(200) && src2 <= SIMM16_MAX && src2 >= SIMM16_MIN) + FAIL_IF(push_inst16(compiler, C_LI | C_RD(reg) | CIMM_I(src2))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(reg) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = reg; flags &= ~SRC2_IMM; } @@ -1874,10 +1947,14 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & SRC2_IMM) { if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2))); - if (!(flags & UNUSED_DEST)) - return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2)); - } - else { + if (!(flags & UNUSED_DEST)) { + src2 = -src2; + if (RISCV_HAS_COMPRESSED(200) && dst == src1 && src2 <= SIMM16_MAX && src2 >= SIMM16_MIN) + return push_inst16(compiler, C_ADDI_W(WORD) | C_RD(dst) | CIMM_I(src2)); + + return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2)); + } + } else { if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); if (!(flags & UNUSED_DEST)) @@ -1891,22 +1968,28 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & SRC2_IMM) { if (is_overflow) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); - else + if (src2 >= 0) { + if (RISCV_HAS_COMPRESSED(200)) + FAIL_IF(push_inst16(compiler, C_MV | C_RD(EQUAL_FLAG) | C_RS2(src1))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + } else FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1))); - } - else if (op & SLJIT_SET_Z) + } else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2))); if (is_overflow || is_carry) FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); /* Only the zero flag is needed. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2))); - } - else { + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) { + src2 = -src2; + if (RISCV_HAS_COMPRESSED(200) && dst == src1 && src2 <= SIMM16_MAX && src2 >= SIMM16_MIN) + FAIL_IF(push_inst16(compiler, C_ADDI_W(WORD) | C_RD(dst) | CIMM_I(src2))); + else + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + } + } else { if (is_overflow) FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); else if (op & SLJIT_SET_Z) @@ -1924,8 +2007,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG))); - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + if (op & SLJIT_SET_Z) { + if (RISCV_HAS_COMPRESSED(200)) + FAIL_IF(push_inst16(compiler, C_MV | C_RD(EQUAL_FLAG) | C_RS2(dst))); + else + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + } FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31))); return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG)); @@ -1942,9 +2029,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (is_carry) FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); - FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2))); - } - else { + src2 = -src2; + if (RISCV_HAS_COMPRESSED(200) && dst == src1 && src2 <= SIMM16_MAX && src2 >= SIMM16_MIN) + FAIL_IF(push_inst16(compiler, C_ADDI_W(WORD) | C_RD(dst) | CIMM_I(src2))); + else + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + } else { if (is_carry) FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); @@ -1998,6 +2088,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_SHL: case SLJIT_MSHL: + if (RISCV_HAS_COMPRESSED(200) && dst == src1 && (flags & SRC2_IMM)) + return push_inst16(compiler, C_SLLI | C_RD(dst) | CIMM_I(src2)); EMIT_SHIFT(SLLI, SLL); break; @@ -2218,8 +2310,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_BREAKPOINT: + if (RISCV_HAS_COMPRESSED(200)) + return push_inst16(compiler, C_EBREAK); return push_inst(compiler, EBREAK); case SLJIT_NOP: + if (RISCV_HAS_COMPRESSED(200)) + return push_inst16(compiler, C_NOP); return push_inst(compiler, ADDI | RD(TMP_ZERO) | RS1(TMP_ZERO) | IMM_I(0)); case SLJIT_LMUL_UW: FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0))); @@ -3136,6 +3232,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); src = TMP_REG1; } + + if (RISCV_HAS_COMPRESSED(200)) + return push_inst16(compiler, ((type >= SLJIT_FAST_CALL) ? C_JALR : C_JR) | C_RD(src)); + return push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(src) | IMM_I(0)); } diff --git a/test_src/sljitTest.c b/test_src/sljitTest.c index b4262857..da4fb945 100644 --- a/test_src/sljitTest.c +++ b/test_src/sljitTest.c @@ -740,7 +740,7 @@ static void test8(void) /* Test flags (neg, cmp, test). */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL); - sljit_sw buf[22]; + sljit_sw buf[27]; sljit_s32 i; if (verbose) @@ -748,7 +748,7 @@ static void test8(void) FAILED(!compiler, "cannot create compiler\n"); buf[0] = 100; - for (i = 1; i < 21; i++) + for (i = 1; i < 27; i++) buf[i] = 3; sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 2, 0); @@ -834,6 +834,24 @@ static void test8(void) sljit_emit_op2(compiler, SLJIT_XOR | SLJIT_SET_Z, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 0); /* buf[21] */ sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 21, SLJIT_ZERO); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, (sljit_sw)(~(sljit_uw)0 >> 1)); + sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_SET_OVERFLOW, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, 1); + /* buf[22] */ + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 22, SLJIT_OVERFLOW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)((~(sljit_uw)0 >> 1) - 500)); + sljit_emit_op2u(compiler, SLJIT_ADD | SLJIT_SET_OVERFLOW, SLJIT_R2, 0, SLJIT_IMM, 500); + /* buf[23] */ + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 23, SLJIT_OVERFLOW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(~(~(sljit_uw)0 >> 1))); + sljit_emit_op2u(compiler, SLJIT_SUB | SLJIT_SET_OVERFLOW, SLJIT_R1, 0, SLJIT_IMM, 1); + /* buf[24] */ + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 24, SLJIT_OVERFLOW); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 3); + sljit_emit_op2(compiler, SLJIT_SUB | SLJIT_SET_LESS, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, 10); + /* buf[25] */ + sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 25, SLJIT_LESS); + /* buf[26] */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw) * 26, SLJIT_S1, 0); sljit_emit_return_void(compiler); @@ -863,6 +881,11 @@ static void test8(void) FAILED(buf[19] != 1, "test8 case 19 failed\n"); FAILED(buf[20] != 0, "test8 case 20 failed\n"); FAILED(buf[21] != 1, "test8 case 21 failed\n"); + FAILED(buf[22] != 1, "test8 case 22 failed\n"); + FAILED(buf[23] != 0, "test8 case 23 failed\n"); + FAILED(buf[24] != 1, "test8 case 24 failed\n"); + FAILED(buf[25] != 1, "test8 case 25 failed\n"); + FAILED(buf[26] != -7, "test8 case 26 failed\n"); sljit_free_code(code.code, NULL); successful_tests++; @@ -2091,26 +2114,15 @@ static void test22(void) /* 64 bit loads. */ executable_code code; struct sljit_compiler* compiler = sljit_create_compiler(NULL); - sljit_sw buf[14]; + sljit_s32 i; + sljit_sw buf[16]; if (verbose) printf("Run test22\n"); FAILED(!compiler, "cannot create compiler\n"); - buf[0] = 7; - buf[1] = 0; - buf[2] = 0; - buf[3] = 0; - buf[4] = 0; - buf[5] = 0; - buf[6] = 0; - buf[7] = 0; - buf[8] = 0; - buf[9] = 0; - buf[10] = 0; - buf[11] = 0; - buf[12] = 0; - buf[13] = 0; + for (i = 0; i < 15; i++) + buf[i] = SLJIT_W(0x1aaaaaaaaaaaaaaa); sljit_emit_enter(compiler, 0, SLJIT_ARGS1V(P), 3, 1, 0); @@ -2128,6 +2140,8 @@ static void test22(void) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 11 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x07fff00080010000)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 12 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x07fff00080018001)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 13 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0x07fff00ffff00000)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 14 * sizeof(sljit_sw), SLJIT_IMM, 0x1f8ff); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 15 * sizeof(sljit_sw), SLJIT_IMM, SLJIT_W(0xde12347ffffabc)); sljit_emit_return_void(compiler); @@ -2150,6 +2164,8 @@ static void test22(void) FAILED(buf[11] != SLJIT_W(0x07fff00080010000), "test22 case 12 failed\n"); FAILED(buf[12] != SLJIT_W(0x07fff00080018001), "test22 case 13 failed\n"); FAILED(buf[13] != SLJIT_W(0x07fff00ffff00000), "test22 case 14 failed\n"); + FAILED(buf[14] != 0x1f8ff, "test22 case 15 failed\n"); + FAILED(buf[15] != SLJIT_W(0xde12347ffffabc), "test22 case 16 failed\n"); sljit_free_code(code.code, NULL); #endif /* IS_64BIT */