From ba349eecf593bc124f60ce7879c0573ed851d1f6 Mon Sep 17 00:00:00 2001 From: Eduardo Bart Date: Sun, 7 Apr 2024 09:20:00 -0300 Subject: [PATCH] feat: optimize instruction decoding primary switch into jump tables --- src/Makefile | 5 +- src/interpret.cpp | 363 +++++++++++++++++++++++------------------- src/riscv-constants.h | 278 ++++++++++++++++---------------- 3 files changed, 338 insertions(+), 308 deletions(-) diff --git a/src/Makefile b/src/Makefile index c5ec13737..db6a5047d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -210,9 +210,8 @@ ifneq (,$(filter gcc,$(CC))) # but we don't use -O3 because it enables some other flags that are not worth for the interpreter. INTERPRET_CXXFLAGS+=-fgcse-after-reload -fpredictive-commoning -fsplit-paths -ftree-partial-pre endif -# Disable jump tables, because it degrades the instruction decoding performance in the interpret loop, -# since it generates a memory indirection that has a high cost in opcode switches. -INTERPRET_CXXFLAGS+=-fno-jump-tables +# Enable large jump tables to improve performance of instruction decoding in interpret.cpp +INTERPRET_CXXFLAGS+=-fjump-tables --param jump-table-max-growth-ratio-for-speed=4096 endif # Link time optimizations diff --git a/src/interpret.cpp b/src/interpret.cpp index ca1c6a083..cbdde6c3c 100644 --- a/src/interpret.cpp +++ b/src/interpret.cpp @@ -535,8 +535,8 @@ static inline void set_rtc_interrupt(STATE_ACCESS &a, uint64_t mcycle) { /// \brief Obtains the funct3 and opcode fields an instruction. /// \param insn Instruction. -static inline uint32_t insn_get_funct3_00000_opcode(uint32_t insn) { - return insn & 0b111000001111111; +static FORCE_INLINE uint32_t insn_get_funct3_opcode(uint32_t insn) { + return ((insn >> 5) & 0b111'0000000) | (insn & 0b1111111); } /// \brief Obtains the funct3 and trailing 0 bits from an instruction. @@ -660,8 +660,8 @@ static inline uint32_t insn_get_rs3(uint32_t insn) { /// \brief Obtains the compressed instruction funct3 and opcode fields an instruction. /// \param insn Instruction. -static inline uint32_t insn_get_c_funct3(uint32_t insn) { - return insn & 0b1110000000000011; +static FORCE_INLINE uint32_t insn_get_c_funct3(uint32_t insn) { + return ((insn >> 11) & 0b111'00) | (insn & 0b11); } /// \brief Obtains the compressed instruction funct6, funct2 and opcode fields an instruction. @@ -3624,12 +3624,20 @@ static FORCE_INLINE execute_status execute_FS(STATE_ACCESS &a, uint64_t &pc, uin template static FORCE_INLINE execute_status execute_FSW(STATE_ACCESS &a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { dump_insn(a, pc, insn, "fsw"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } return execute_FS(a, pc, mcycle, insn); } template static FORCE_INLINE execute_status execute_FSD(STATE_ACCESS &a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { dump_insn(a, pc, insn, "fsd"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } return execute_FS(a, pc, mcycle, insn); } @@ -3652,12 +3660,20 @@ static FORCE_INLINE execute_status execute_FL(STATE_ACCESS &a, uint64_t &pc, uin template static FORCE_INLINE execute_status execute_FLW(STATE_ACCESS &a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { dump_insn(a, pc, insn, "flw"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } return execute_FL(a, pc, mcycle, insn); } template static FORCE_INLINE execute_status execute_FLD(STATE_ACCESS &a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { dump_insn(a, pc, insn, "fld"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } return execute_FL(a, pc, mcycle, insn); } @@ -3681,6 +3697,10 @@ static FORCE_INLINE execute_status execute_FMADD_D(STATE_ACCESS &a, uint64_t &pc template static FORCE_INLINE execute_status execute_FMADD(STATE_ACCESS &a, uint64_t &pc, uint32_t insn) { + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } switch (static_cast(insn_get_funct2_0000000000000000000000000(insn))) { case insn_FM_funct2_0000000000000000000000000::S: return execute_FMADD_S(a, pc, insn); @@ -3711,6 +3731,10 @@ static FORCE_INLINE execute_status execute_FMSUB_D(STATE_ACCESS &a, uint64_t &pc template static FORCE_INLINE execute_status execute_FMSUB(STATE_ACCESS &a, uint64_t &pc, uint32_t insn) { + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } switch (static_cast(insn_get_funct2_0000000000000000000000000(insn))) { case insn_FM_funct2_0000000000000000000000000::S: return execute_FMSUB_S(a, pc, insn); @@ -3743,6 +3767,10 @@ static FORCE_INLINE execute_status execute_FNMADD_D(STATE_ACCESS &a, uint64_t &p template static FORCE_INLINE execute_status execute_FNMADD(STATE_ACCESS &a, uint64_t &pc, uint32_t insn) { + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } switch (static_cast(insn_get_funct2_0000000000000000000000000(insn))) { case insn_FM_funct2_0000000000000000000000000::S: return execute_FNMADD_S(a, pc, insn); @@ -3773,6 +3801,10 @@ static FORCE_INLINE execute_status execute_FNMSUB_D(STATE_ACCESS &a, uint64_t &p template static FORCE_INLINE execute_status execute_FNMSUB(STATE_ACCESS &a, uint64_t &pc, uint32_t insn) { + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } switch (static_cast(insn_get_funct2_0000000000000000000000000(insn))) { case insn_FM_funct2_0000000000000000000000000::S: return execute_FNMSUB_S(a, pc, insn); @@ -4492,6 +4524,10 @@ static FORCE_INLINE execute_status execute_FCVT_FMV_FCLASS(STATE_ACCESS &a, uint template static FORCE_INLINE execute_status execute_FD(STATE_ACCESS &a, uint64_t &pc, uint32_t insn) { + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } switch (static_cast(insn_get_funct7(insn))) { case insn_FD_funct7::FADD_S: return execute_FADD_S(a, pc, insn); @@ -4613,6 +4649,11 @@ static FORCE_INLINE execute_status execute_C_ADDI4SPN(STATE_ACCESS &a, uint64_t template static FORCE_INLINE execute_status execute_C_FLD(STATE_ACCESS &a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { dump_insn(a, pc, insn, "c.fld"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction + // exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } const uint32_t rd = insn_get_CIW_CL_rd_CS_CA_rs2(insn); const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); const int32_t imm = insn_get_CL_CS_imm(insn); @@ -4643,6 +4684,11 @@ static FORCE_INLINE execute_status execute_C_LD(STATE_ACCESS &a, uint64_t &pc, u template static FORCE_INLINE execute_status execute_C_FSD(STATE_ACCESS &a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { dump_insn(a, pc, insn, "c.fsd"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction + // exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); const uint32_t rs2 = insn_get_CIW_CL_rd_CS_CA_rs2(insn); const int32_t imm = insn_get_CL_CS_imm(insn); @@ -4983,6 +5029,11 @@ static FORCE_INLINE execute_status execute_C_SLLI(STATE_ACCESS &a, uint64_t &pc, template static FORCE_INLINE execute_status execute_C_FLDSP(STATE_ACCESS &a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { dump_insn(a, pc, insn, "c.fldsp"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction + // exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } const uint32_t rd = insn_get_rd(insn); const int32_t imm = insn_get_C_FLDSP_LDSP_imm(insn); return execute_C_FL(a, pc, mcycle, rd, 0x2, imm); @@ -5095,6 +5146,11 @@ static FORCE_INLINE execute_status execute_C_Q2_SET0(STATE_ACCESS &a, uint64_t & template static FORCE_INLINE execute_status execute_C_FSDSP(STATE_ACCESS &a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { dump_insn(a, pc, insn, "c.fsdsp"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction + // exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } const uint32_t rs2 = insn_get_CR_CSS_rs2(insn); const int32_t imm = insn_get_C_FSDSP_SDSP_imm(insn); return execute_C_FS(a, pc, mcycle, rs2, 0x2, imm); @@ -5177,227 +5233,202 @@ static FORCE_INLINE execute_status execute_insn(STATE_ACCESS &a, uint64_t &pc, u return execute_C_SWSP(a, pc, mcycle, insn); case insn_c_funct3::C_SDSP: return execute_C_SDSP(a, pc, mcycle, insn); - default: { - // Here we are sure that the next instruction, at best, can only be a floating point instruction, - // or, at worst, an illegal instruction. - // Since all float instructions try to read the float state, - // we can put the next check before all of them. - // If FS is OFF, attempts to read or write the float state will cause an illegal instruction - // exception. - if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { - return raise_illegal_insn_exception(a, pc, insn); - } - switch (c_funct3) { - case insn_c_funct3::C_FLD: - return execute_C_FLD(a, pc, mcycle, insn); - case insn_c_funct3::C_FSD: - return execute_C_FSD(a, pc, mcycle, insn); - case insn_c_funct3::C_FLDSP: - return execute_C_FLDSP(a, pc, mcycle, insn); - case insn_c_funct3::C_FSDSP: - return execute_C_FSDSP(a, pc, mcycle, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); - } - } + case insn_c_funct3::C_FLD: + return execute_C_FLD(a, pc, mcycle, insn); + case insn_c_funct3::C_FSD: + return execute_C_FSD(a, pc, mcycle, insn); + case insn_c_funct3::C_FLDSP: + return execute_C_FLDSP(a, pc, mcycle, insn); + case insn_c_funct3::C_FSDSP: + return execute_C_FSDSP(a, pc, mcycle, insn); + default: + return raise_illegal_insn_exception(a, pc, insn); } } else { - //??D We should probably try doing the first branch on the combined opcode, funct3, and funct7. - // Maybe it reduces the number of levels needed to decode most instructions. - auto funct3_00000_opcode = static_cast(insn_get_funct3_00000_opcode(insn)); - switch (funct3_00000_opcode) { - case insn_funct3_00000_opcode::LB: + auto funct3_opcode = static_cast(insn_get_funct3_opcode(insn)); + // This switch will be optimized as a single jump in conjuction with GCC flags + // -fjump-tables --param jump-table-max-growth-ratio-for-speed=4096 + switch (funct3_opcode) { + case insn_funct3_opcode::LB: return execute_LB(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::LH: + case insn_funct3_opcode::LH: return execute_LH(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::LW: + case insn_funct3_opcode::LW: return execute_LW(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::LD: + case insn_funct3_opcode::LD: return execute_LD(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::LBU: + case insn_funct3_opcode::LBU: return execute_LBU(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::LHU: + case insn_funct3_opcode::LHU: return execute_LHU(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::LWU: + case insn_funct3_opcode::LWU: return execute_LWU(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::SB: + case insn_funct3_opcode::SB: return execute_SB(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::SH: + case insn_funct3_opcode::SH: return execute_SH(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::SW: + case insn_funct3_opcode::SW: return execute_SW(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::SD: + case insn_funct3_opcode::SD: return execute_SD(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::FENCE: + case insn_funct3_opcode::FENCE: return execute_FENCE(a, pc, insn); - case insn_funct3_00000_opcode::FENCE_I: + case insn_funct3_opcode::FENCE_I: return execute_FENCE_I(a, pc, insn); - case insn_funct3_00000_opcode::ADDI: + case insn_funct3_opcode::ADDI: return execute_ADDI(a, pc, insn); - case insn_funct3_00000_opcode::SLLI: + case insn_funct3_opcode::SLLI: return execute_SLLI(a, pc, insn); - case insn_funct3_00000_opcode::SLTI: + case insn_funct3_opcode::SLTI: return execute_SLTI(a, pc, insn); - case insn_funct3_00000_opcode::SLTIU: + case insn_funct3_opcode::SLTIU: return execute_SLTIU(a, pc, insn); - case insn_funct3_00000_opcode::XORI: + case insn_funct3_opcode::XORI: return execute_XORI(a, pc, insn); - case insn_funct3_00000_opcode::ORI: + case insn_funct3_opcode::ORI: return execute_ORI(a, pc, insn); - case insn_funct3_00000_opcode::ANDI: + case insn_funct3_opcode::ANDI: return execute_ANDI(a, pc, insn); - case insn_funct3_00000_opcode::ADDIW: + case insn_funct3_opcode::ADDIW: return execute_ADDIW(a, pc, insn); - case insn_funct3_00000_opcode::SLLIW: + case insn_funct3_opcode::SLLIW: return execute_SLLIW(a, pc, insn); - case insn_funct3_00000_opcode::SLLW: + case insn_funct3_opcode::SLLW: return execute_SLLW(a, pc, insn); - case insn_funct3_00000_opcode::DIVW: + case insn_funct3_opcode::DIVW: return execute_DIVW(a, pc, insn); - case insn_funct3_00000_opcode::REMW: + case insn_funct3_opcode::REMW: return execute_REMW(a, pc, insn); - case insn_funct3_00000_opcode::REMUW: + case insn_funct3_opcode::REMUW: return execute_REMUW(a, pc, insn); - case insn_funct3_00000_opcode::BEQ: + case insn_funct3_opcode::BEQ: return execute_BEQ(a, pc, insn); - case insn_funct3_00000_opcode::BNE: + case insn_funct3_opcode::BNE: return execute_BNE(a, pc, insn); - case insn_funct3_00000_opcode::BLT: + case insn_funct3_opcode::BLT: return execute_BLT(a, pc, insn); - case insn_funct3_00000_opcode::BGE: + case insn_funct3_opcode::BGE: return execute_BGE(a, pc, insn); - case insn_funct3_00000_opcode::BLTU: + case insn_funct3_opcode::BLTU: return execute_BLTU(a, pc, insn); - case insn_funct3_00000_opcode::BGEU: + case insn_funct3_opcode::BGEU: return execute_BGEU(a, pc, insn); - case insn_funct3_00000_opcode::JALR: + case insn_funct3_opcode::JALR: return execute_JALR(a, pc, insn); - case insn_funct3_00000_opcode::CSRRW: + case insn_funct3_opcode::CSRRW: return execute_CSRRW(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::CSRRS: + case insn_funct3_opcode::CSRRS: return execute_CSRRS(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::CSRRC: + case insn_funct3_opcode::CSRRC: return execute_CSRRC(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::CSRRWI: + case insn_funct3_opcode::CSRRWI: return execute_CSRRWI(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::CSRRSI: + case insn_funct3_opcode::CSRRSI: return execute_CSRRSI(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::CSRRCI: + case insn_funct3_opcode::CSRRCI: return execute_CSRRCI(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::AUIPC_000: - case insn_funct3_00000_opcode::AUIPC_001: - case insn_funct3_00000_opcode::AUIPC_010: - case insn_funct3_00000_opcode::AUIPC_011: - case insn_funct3_00000_opcode::AUIPC_100: - case insn_funct3_00000_opcode::AUIPC_101: - case insn_funct3_00000_opcode::AUIPC_110: - case insn_funct3_00000_opcode::AUIPC_111: + case insn_funct3_opcode::AUIPC_000: + case insn_funct3_opcode::AUIPC_001: + case insn_funct3_opcode::AUIPC_010: + case insn_funct3_opcode::AUIPC_011: + case insn_funct3_opcode::AUIPC_100: + case insn_funct3_opcode::AUIPC_101: + case insn_funct3_opcode::AUIPC_110: + case insn_funct3_opcode::AUIPC_111: return execute_AUIPC(a, pc, insn); - case insn_funct3_00000_opcode::LUI_000: - case insn_funct3_00000_opcode::LUI_001: - case insn_funct3_00000_opcode::LUI_010: - case insn_funct3_00000_opcode::LUI_011: - case insn_funct3_00000_opcode::LUI_100: - case insn_funct3_00000_opcode::LUI_101: - case insn_funct3_00000_opcode::LUI_110: - case insn_funct3_00000_opcode::LUI_111: + case insn_funct3_opcode::LUI_000: + case insn_funct3_opcode::LUI_001: + case insn_funct3_opcode::LUI_010: + case insn_funct3_opcode::LUI_011: + case insn_funct3_opcode::LUI_100: + case insn_funct3_opcode::LUI_101: + case insn_funct3_opcode::LUI_110: + case insn_funct3_opcode::LUI_111: return execute_LUI(a, pc, insn); - case insn_funct3_00000_opcode::JAL_000: - case insn_funct3_00000_opcode::JAL_001: - case insn_funct3_00000_opcode::JAL_010: - case insn_funct3_00000_opcode::JAL_011: - case insn_funct3_00000_opcode::JAL_100: - case insn_funct3_00000_opcode::JAL_101: - case insn_funct3_00000_opcode::JAL_110: - case insn_funct3_00000_opcode::JAL_111: + case insn_funct3_opcode::JAL_000: + case insn_funct3_opcode::JAL_001: + case insn_funct3_opcode::JAL_010: + case insn_funct3_opcode::JAL_011: + case insn_funct3_opcode::JAL_100: + case insn_funct3_opcode::JAL_101: + case insn_funct3_opcode::JAL_110: + case insn_funct3_opcode::JAL_111: return execute_JAL(a, pc, insn); - case insn_funct3_00000_opcode::SRLI_SRAI: + case insn_funct3_opcode::SRLI_SRAI: return execute_SRLI_SRAI(a, pc, insn); - case insn_funct3_00000_opcode::SRLIW_SRAIW: + case insn_funct3_opcode::SRLIW_SRAIW: return execute_SRLIW_SRAIW(a, pc, insn); - case insn_funct3_00000_opcode::AMO_W: + case insn_funct3_opcode::AMO_W: return execute_AMO_W(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::AMO_D: + case insn_funct3_opcode::AMO_D: return execute_AMO_D(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::ADD_MUL_SUB: + case insn_funct3_opcode::ADD_MUL_SUB: return execute_ADD_MUL_SUB(a, pc, insn); - case insn_funct3_00000_opcode::SLL_MULH: + case insn_funct3_opcode::SLL_MULH: return execute_SLL_MULH(a, pc, insn); - case insn_funct3_00000_opcode::SLT_MULHSU: + case insn_funct3_opcode::SLT_MULHSU: return execute_SLT_MULHSU(a, pc, insn); - case insn_funct3_00000_opcode::SLTU_MULHU: + case insn_funct3_opcode::SLTU_MULHU: return execute_SLTU_MULHU(a, pc, insn); - case insn_funct3_00000_opcode::XOR_DIV: + case insn_funct3_opcode::XOR_DIV: return execute_XOR_DIV(a, pc, insn); - case insn_funct3_00000_opcode::SRL_DIVU_SRA: + case insn_funct3_opcode::SRL_DIVU_SRA: return execute_SRL_DIVU_SRA(a, pc, insn); - case insn_funct3_00000_opcode::OR_REM: + case insn_funct3_opcode::OR_REM: return execute_OR_REM(a, pc, insn); - case insn_funct3_00000_opcode::AND_REMU: + case insn_funct3_opcode::AND_REMU: return execute_AND_REMU(a, pc, insn); - case insn_funct3_00000_opcode::ADDW_MULW_SUBW: + case insn_funct3_opcode::ADDW_MULW_SUBW: return execute_ADDW_MULW_SUBW(a, pc, insn); - case insn_funct3_00000_opcode::SRLW_DIVUW_SRAW: + case insn_funct3_opcode::SRLW_DIVUW_SRAW: return execute_SRLW_DIVUW_SRAW(a, pc, insn); - case insn_funct3_00000_opcode::PRIVILEGED: + case insn_funct3_opcode::PRIVILEGED: return execute_privileged(a, pc, mcycle, insn); - default: { - // Here we are sure that the next instruction, at best, can only be a floating point instruction, - // or, at worst, an illegal instruction. - // Since all float instructions try to read the float state, - // we can put the next check before all of them. - // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. - if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { - return raise_illegal_insn_exception(a, pc, insn); - } - switch (funct3_00000_opcode) { - case insn_funct3_00000_opcode::FSW: - return execute_FSW(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::FSD: - return execute_FSD(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::FLW: - return execute_FLW(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::FLD: - return execute_FLD(a, pc, mcycle, insn); - case insn_funct3_00000_opcode::FMADD_RNE: - case insn_funct3_00000_opcode::FMADD_RTZ: - case insn_funct3_00000_opcode::FMADD_RDN: - case insn_funct3_00000_opcode::FMADD_RUP: - case insn_funct3_00000_opcode::FMADD_RMM: - case insn_funct3_00000_opcode::FMADD_DYN: - return execute_FMADD(a, pc, insn); - case insn_funct3_00000_opcode::FMSUB_RNE: - case insn_funct3_00000_opcode::FMSUB_RTZ: - case insn_funct3_00000_opcode::FMSUB_RDN: - case insn_funct3_00000_opcode::FMSUB_RUP: - case insn_funct3_00000_opcode::FMSUB_RMM: - case insn_funct3_00000_opcode::FMSUB_DYN: - return execute_FMSUB(a, pc, insn); - case insn_funct3_00000_opcode::FNMSUB_RNE: - case insn_funct3_00000_opcode::FNMSUB_RTZ: - case insn_funct3_00000_opcode::FNMSUB_RDN: - case insn_funct3_00000_opcode::FNMSUB_RUP: - case insn_funct3_00000_opcode::FNMSUB_RMM: - case insn_funct3_00000_opcode::FNMSUB_DYN: - return execute_FNMSUB(a, pc, insn); - case insn_funct3_00000_opcode::FNMADD_RNE: - case insn_funct3_00000_opcode::FNMADD_RTZ: - case insn_funct3_00000_opcode::FNMADD_RDN: - case insn_funct3_00000_opcode::FNMADD_RUP: - case insn_funct3_00000_opcode::FNMADD_RMM: - case insn_funct3_00000_opcode::FNMADD_DYN: - return execute_FNMADD(a, pc, insn); - case insn_funct3_00000_opcode::FD_000: - case insn_funct3_00000_opcode::FD_001: - case insn_funct3_00000_opcode::FD_010: - case insn_funct3_00000_opcode::FD_011: - case insn_funct3_00000_opcode::FD_100: - case insn_funct3_00000_opcode::FD_111: - return execute_FD(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); - } - } + case insn_funct3_opcode::FSW: + return execute_FSW(a, pc, mcycle, insn); + case insn_funct3_opcode::FSD: + return execute_FSD(a, pc, mcycle, insn); + case insn_funct3_opcode::FLW: + return execute_FLW(a, pc, mcycle, insn); + case insn_funct3_opcode::FLD: + return execute_FLD(a, pc, mcycle, insn); + case insn_funct3_opcode::FMADD_RNE: + case insn_funct3_opcode::FMADD_RTZ: + case insn_funct3_opcode::FMADD_RDN: + case insn_funct3_opcode::FMADD_RUP: + case insn_funct3_opcode::FMADD_RMM: + case insn_funct3_opcode::FMADD_DYN: + return execute_FMADD(a, pc, insn); + case insn_funct3_opcode::FMSUB_RNE: + case insn_funct3_opcode::FMSUB_RTZ: + case insn_funct3_opcode::FMSUB_RDN: + case insn_funct3_opcode::FMSUB_RUP: + case insn_funct3_opcode::FMSUB_RMM: + case insn_funct3_opcode::FMSUB_DYN: + return execute_FMSUB(a, pc, insn); + case insn_funct3_opcode::FNMSUB_RNE: + case insn_funct3_opcode::FNMSUB_RTZ: + case insn_funct3_opcode::FNMSUB_RDN: + case insn_funct3_opcode::FNMSUB_RUP: + case insn_funct3_opcode::FNMSUB_RMM: + case insn_funct3_opcode::FNMSUB_DYN: + return execute_FNMSUB(a, pc, insn); + case insn_funct3_opcode::FNMADD_RNE: + case insn_funct3_opcode::FNMADD_RTZ: + case insn_funct3_opcode::FNMADD_RDN: + case insn_funct3_opcode::FNMADD_RUP: + case insn_funct3_opcode::FNMADD_RMM: + case insn_funct3_opcode::FNMADD_DYN: + return execute_FNMADD(a, pc, insn); + case insn_funct3_opcode::FD_000: + case insn_funct3_opcode::FD_001: + case insn_funct3_opcode::FD_010: + case insn_funct3_opcode::FD_011: + case insn_funct3_opcode::FD_100: + case insn_funct3_opcode::FD_111: + return execute_FD(a, pc, insn); + default: + return raise_illegal_insn_exception(a, pc, insn); } } } diff --git a/src/riscv-constants.h b/src/riscv-constants.h index f2de29725..d3132df8b 100644 --- a/src/riscv-constants.h +++ b/src/riscv-constants.h @@ -630,38 +630,38 @@ enum class CSR_address : uint32_t { tdata3 = 0x7a3, }; -/// \brief The result of insn & 0b1110000000000011 can be used to identify +/// \brief The result of insn_get_c_funct3(insn) can be used to identify /// most compressed instructions directly enum class insn_c_funct3 : uint32_t { // Quadrant 0 - C_ADDI4SPN = 0b0000000000000000, - C_FLD = 0b0010000000000000, - C_LW = 0b0100000000000000, - C_LD = 0b0110000000000000, - C_FSD = 0b1010000000000000, - C_SW = 0b1100000000000000, - C_SD = 0b1110000000000000, + C_ADDI4SPN = 0b000'00, + C_FLD = 0b001'00, + C_LW = 0b010'00, + C_LD = 0b011'00, + C_FSD = 0b101'00, + C_SW = 0b110'00, + C_SD = 0b111'00, // Quadrant 1 - C_Q1_SET0 = 0b0000000000000001, // C_NOP and C_ADDI - C_ADDIW = 0b0010000000000001, - C_LI = 0b0100000000000001, - C_Q1_SET1 = 0b0110000000000001, // C_ADDI16SP and C_LUI - C_Q1_SET2 = 0b1000000000000001, // C_SRLI64, C_SRAI64, C_ANDI, C_SUB - // C_XOR, C_OR, C_AND, C_SUBW and C_ADDW - C_J = 0b1010000000000001, - C_BEQZ = 0b1100000000000001, - C_BNEZ = 0b1110000000000001, + C_Q1_SET0 = 0b000'01, // C_NOP and C_ADDI + C_ADDIW = 0b001'01, + C_LI = 0b010'01, + C_Q1_SET1 = 0b011'01, // C_ADDI16SP and C_LUI + C_Q1_SET2 = 0b100'01, // C_SRLI64, C_SRAI64, C_ANDI, C_SUB + // C_XOR, C_OR, C_AND, C_SUBW and C_ADDW + C_J = 0b101'01, + C_BEQZ = 0b110'01, + C_BNEZ = 0b111'01, // Quadrant 2 - C_SLLI = 0b0000000000000010, - C_FLDSP = 0b0010000000000010, - C_LWSP = 0b0100000000000010, - C_LDSP = 0b0110000000000010, - C_Q2_SET0 = 0b1000000000000010, // C_JR, C_MV, C_EBREAK, C_JALR, C_ADD - C_FSDSP = 0b1010000000000010, - C_SWSP = 0b1100000000000010, - C_SDSP = 0b1110000000000010, + C_SLLI = 0b000'10, + C_FLDSP = 0b001'10, + C_LWSP = 0b010'10, + C_LDSP = 0b011'10, + C_Q2_SET0 = 0b100'10, // C_JR, C_MV, C_EBREAK, C_JALR, C_ADD + C_FSDSP = 0b101'10, + C_SWSP = 0b110'10, + C_SDSP = 0b111'10, }; /// \brief The result of insn & 0b1110110000000011 can be used to identify @@ -683,122 +683,122 @@ enum class insn_CA_funct6_funct2 : uint32_t { C_ADDW = 0b1001110000100001, }; -/// \brief The result of insn & 0b111000001111111 can be used to identify +/// \brief The result of insn_get_funct3_opcode(insn), can be used to identify /// most instructions directly -enum class insn_funct3_00000_opcode : uint32_t { - LB = 0b000000000000011, - LH = 0b001000000000011, - LW = 0b010000000000011, - LD = 0b011000000000011, - LBU = 0b100000000000011, - LHU = 0b101000000000011, - LWU = 0b110000000000011, - SB = 0b000000000100011, - SH = 0b001000000100011, - SW = 0b010000000100011, - SD = 0b011000000100011, - FENCE = 0b000000000001111, - FENCE_I = 0b001000000001111, - ADDI = 0b000000000010011, - SLLI = 0b001000000010011, - SLTI = 0b010000000010011, - SLTIU = 0b011000000010011, - XORI = 0b100000000010011, - ORI = 0b110000000010011, - ANDI = 0b111000000010011, - ADDIW = 0b000000000011011, - SLLIW = 0b001000000011011, - SLLW = 0b001000000111011, - DIVW = 0b100000000111011, - REMW = 0b110000000111011, - REMUW = 0b111000000111011, - BEQ = 0b000000001100011, - BNE = 0b001000001100011, - BLT = 0b100000001100011, - BGE = 0b101000001100011, - BLTU = 0b110000001100011, - BGEU = 0b111000001100011, - JALR = 0b000000001100111, - CSRRW = 0b001000001110011, - CSRRS = 0b010000001110011, - CSRRC = 0b011000001110011, - CSRRWI = 0b101000001110011, - CSRRSI = 0b110000001110011, - CSRRCI = 0b111000001110011, - AUIPC_000 = 0b000000000010111, - AUIPC_001 = 0b001000000010111, - AUIPC_010 = 0b010000000010111, - AUIPC_011 = 0b011000000010111, - AUIPC_100 = 0b100000000010111, - AUIPC_101 = 0b101000000010111, - AUIPC_110 = 0b110000000010111, - AUIPC_111 = 0b111000000010111, - LUI_000 = 0b000000000110111, - LUI_001 = 0b001000000110111, - LUI_010 = 0b010000000110111, - LUI_011 = 0b011000000110111, - LUI_100 = 0b100000000110111, - LUI_101 = 0b101000000110111, - LUI_110 = 0b110000000110111, - LUI_111 = 0b111000000110111, - JAL_000 = 0b000000001101111, - JAL_001 = 0b001000001101111, - JAL_010 = 0b010000001101111, - JAL_011 = 0b011000001101111, - JAL_100 = 0b100000001101111, - JAL_101 = 0b101000001101111, - JAL_110 = 0b110000001101111, - JAL_111 = 0b111000001101111, - FSW = 0b010000000100111, - FSD = 0b011000000100111, - FLW = 0b010000000000111, - FLD = 0b011000000000111, - FMADD_RNE = 0b000000001000011, - FMADD_RTZ = 0b001000001000011, - FMADD_RDN = 0b010000001000011, - FMADD_RUP = 0b011000001000011, - FMADD_RMM = 0b100000001000011, - FMADD_DYN = 0b111000001000011, - FMSUB_RNE = 0b000000001000111, - FMSUB_RTZ = 0b001000001000111, - FMSUB_RDN = 0b010000001000111, - FMSUB_RUP = 0b011000001000111, - FMSUB_RMM = 0b100000001000111, - FMSUB_DYN = 0b111000001000111, - FNMSUB_RNE = 0b000000001001011, - FNMSUB_RTZ = 0b001000001001011, - FNMSUB_RDN = 0b010000001001011, - FNMSUB_RUP = 0b011000001001011, - FNMSUB_RMM = 0b100000001001011, - FNMSUB_DYN = 0b111000001001011, - FNMADD_RNE = 0b000000001001111, - FNMADD_RTZ = 0b001000001001111, - FNMADD_RDN = 0b010000001001111, - FNMADD_RUP = 0b011000001001111, - FNMADD_RMM = 0b100000001001111, - FNMADD_DYN = 0b111000001001111, +enum class insn_funct3_opcode : uint32_t { + LB = 0b000'0000011, + LH = 0b001'0000011, + LW = 0b010'0000011, + LD = 0b011'0000011, + LBU = 0b100'0000011, + LHU = 0b101'0000011, + LWU = 0b110'0000011, + SB = 0b000'0100011, + SH = 0b001'0100011, + SW = 0b010'0100011, + SD = 0b011'0100011, + FENCE = 0b000'0001111, + FENCE_I = 0b001'0001111, + ADDI = 0b000'0010011, + SLLI = 0b001'0010011, + SLTI = 0b010'0010011, + SLTIU = 0b011'0010011, + XORI = 0b100'0010011, + ORI = 0b110'0010011, + ANDI = 0b111'0010011, + ADDIW = 0b000'0011011, + SLLIW = 0b001'0011011, + SLLW = 0b001'0111011, + DIVW = 0b100'0111011, + REMW = 0b110'0111011, + REMUW = 0b111'0111011, + BEQ = 0b000'1100011, + BNE = 0b001'1100011, + BLT = 0b100'1100011, + BGE = 0b101'1100011, + BLTU = 0b110'1100011, + BGEU = 0b111'1100011, + JALR = 0b000'1100111, + CSRRW = 0b001'1110011, + CSRRS = 0b010'1110011, + CSRRC = 0b011'1110011, + CSRRWI = 0b101'1110011, + CSRRSI = 0b110'1110011, + CSRRCI = 0b111'1110011, + AUIPC_000 = 0b000'0010111, + AUIPC_001 = 0b001'0010111, + AUIPC_010 = 0b010'0010111, + AUIPC_011 = 0b011'0010111, + AUIPC_100 = 0b100'0010111, + AUIPC_101 = 0b101'0010111, + AUIPC_110 = 0b110'0010111, + AUIPC_111 = 0b111'0010111, + LUI_000 = 0b000'0110111, + LUI_001 = 0b001'0110111, + LUI_010 = 0b010'0110111, + LUI_011 = 0b011'0110111, + LUI_100 = 0b100'0110111, + LUI_101 = 0b101'0110111, + LUI_110 = 0b110'0110111, + LUI_111 = 0b111'0110111, + JAL_000 = 0b000'1101111, + JAL_001 = 0b001'1101111, + JAL_010 = 0b010'1101111, + JAL_011 = 0b011'1101111, + JAL_100 = 0b100'1101111, + JAL_101 = 0b101'1101111, + JAL_110 = 0b110'1101111, + JAL_111 = 0b111'1101111, + FSW = 0b010'0100111, + FSD = 0b011'0100111, + FLW = 0b010'0000111, + FLD = 0b011'0000111, + FMADD_RNE = 0b000'1000011, + FMADD_RTZ = 0b001'1000011, + FMADD_RDN = 0b010'1000011, + FMADD_RUP = 0b011'1000011, + FMADD_RMM = 0b100'1000011, + FMADD_DYN = 0b111'1000011, + FMSUB_RNE = 0b000'1000111, + FMSUB_RTZ = 0b001'1000111, + FMSUB_RDN = 0b010'1000111, + FMSUB_RUP = 0b011'1000111, + FMSUB_RMM = 0b100'1000111, + FMSUB_DYN = 0b111'1000111, + FNMSUB_RNE = 0b000'1001011, + FNMSUB_RTZ = 0b001'1001011, + FNMSUB_RDN = 0b010'1001011, + FNMSUB_RUP = 0b011'1001011, + FNMSUB_RMM = 0b100'1001011, + FNMSUB_DYN = 0b111'1001011, + FNMADD_RNE = 0b000'1001111, + FNMADD_RTZ = 0b001'1001111, + FNMADD_RDN = 0b010'1001111, + FNMADD_RUP = 0b011'1001111, + FNMADD_RMM = 0b100'1001111, + FNMADD_DYN = 0b111'1001111, // some instructions need additional inspection of funct7 (or part thereof) - FD_000 = 0b000000001010011, - FD_001 = 0b001000001010011, - FD_010 = 0b010000001010011, - FD_011 = 0b011000001010011, - FD_100 = 0b100000001010011, - FD_111 = 0b111000001010011, - SRLI_SRAI = 0b101000000010011, - SRLIW_SRAIW = 0b101000000011011, - AMO_W = 0b010000000101111, - AMO_D = 0b011000000101111, - ADD_MUL_SUB = 0b000000000110011, - SLL_MULH = 0b001000000110011, - SLT_MULHSU = 0b010000000110011, - SLTU_MULHU = 0b011000000110011, - XOR_DIV = 0b100000000110011, - SRL_DIVU_SRA = 0b101000000110011, - OR_REM = 0b110000000110011, - AND_REMU = 0b111000000110011, - ADDW_MULW_SUBW = 0b000000000111011, - SRLW_DIVUW_SRAW = 0b101000000111011, - PRIVILEGED = 0b000000001110011, + FD_000 = 0b000'1010011, + FD_001 = 0b001'1010011, + FD_010 = 0b010'1010011, + FD_011 = 0b011'1010011, + FD_100 = 0b100'1010011, + FD_111 = 0b111'1010011, + SRLI_SRAI = 0b101'0010011, + SRLIW_SRAIW = 0b101'0011011, + AMO_W = 0b010'0101111, + AMO_D = 0b011'0101111, + ADD_MUL_SUB = 0b000'0110011, + SLL_MULH = 0b001'0110011, + SLT_MULHSU = 0b010'0110011, + SLTU_MULHU = 0b011'0110011, + XOR_DIV = 0b100'0110011, + SRL_DIVU_SRA = 0b101'0110011, + OR_REM = 0b110'0110011, + AND_REMU = 0b111'0110011, + ADDW_MULW_SUBW = 0b000'0111011, + SRLW_DIVUW_SRAW = 0b101'0111011, + PRIVILEGED = 0b000'1110011, }; /// \brief The result of insn >> 26 (6 most significant bits of funct7) can be