From ac950a5fa3957abad9ea38e370e9337d717e24f9 Mon Sep 17 00:00:00 2001 From: Saurabh Singh Date: Sat, 18 Nov 2023 13:10:19 -0500 Subject: [PATCH] add support for RV compressed extension --- rtl/core/AtomRV.v | 52 ++++- rtl/core/RVC_Aligner.v | 55 +++++ rtl/core/RVC_Decoder.v | 233 ++++++++++++++++++++++ rtl/soc/hydrogensoc/HydrogenSoC_Config.vh | 2 +- sim/Makefile | 2 + sw/bootloader/Makefile | 2 +- sw/examples/Makefile | 2 +- 7 files changed, 339 insertions(+), 9 deletions(-) create mode 100644 rtl/core/RVC_Aligner.v create mode 100644 rtl/core/RVC_Decoder.v diff --git a/rtl/core/AtomRV.v b/rtl/core/AtomRV.v index f9c60796..70b0dac9 100644 --- a/rtl/core/AtomRV.v +++ b/rtl/core/AtomRV.v @@ -56,6 +56,47 @@ module AtomRV # ( input wire timer_int_i `endif // EN_EXCEPT ); + wire instr_request_valid = !rst_i; // Always valid (Except on Reset condition) + + `ifdef RV_C + wire [31:0] rvc_aligner_fetch_addr_o; + wire rvc_aligner_fetch_valid_o; + + wire [31:0] rvc_alignr_data_o; + wire rvc_alignr_ack_o; + RVC_Aligner rvc_alignr ( + .clk_i (clk_i), + .rst_i (rst_i), + + // Iport IFC + .m_adr_o (rvc_aligner_fetch_addr_o), + .m_dat_i (iport_data_i), + .m_valid_o (rvc_aligner_fetch_valid_o), + .m_ack_i (iport_ack_i), + + // Pipeline IFC + .s_adr_i (ProgramCounter), + .s_dat_o (rvc_alignr_data_o), + .s_valid_i (instr_request_valid), + .s_ack_o (rvc_alignr_ack_o) + ); + + wire [31:0] rvc_decdr_instr_o; + wire rvc_decdr_is_compressed_o; + RVC_Decoder rvc_decdr ( + .clk_i (clk_i), + .instr_i (rvc_alignr_data_o), + .ack_i (rvc_alignr_ack_o), + .instr_o (rvc_decdr_instr_o), + .is_compressed (rvc_decdr_is_compressed_o) // handle + ); + `endif // RV_C + + assign iport_addr_o = `INLINE_IFDEF(RV_C, rvc_aligner_fetch_addr_o, ProgramCounter); + assign iport_valid_o = `INLINE_IFDEF(RV_C, rvc_aligner_fetch_valid_o, instr_request_valid); + wire iport_acknowledged = `INLINE_IFDEF(RV_C, rvc_alignr_ack_o, iport_ack_i); + wire [31:0] fetched_instr = `INLINE_IFDEF(RV_C, rvc_decdr_instr_o, iport_data_i); + /* ///////////// Protocol specification ////////////// CPU has a generic handshaking protocol interface (GHPI). Handshaking is done via means @@ -98,7 +139,7 @@ module AtomRV # ( currently executing instruction happens to be a load-store instruction, but since currrent instruction is a jump, there is no memory request made anyways. */ - wire raw_imem_handshake = (iport_valid_o && iport_ack_i); + wire raw_imem_handshake = (instr_request_valid && iport_acknowledged); wire imem_handshake = raw_imem_handshake && !ignore_imem_handshake; wire dmem_handshake = (dport_valid_o && dport_ack_i); @@ -124,7 +165,7 @@ module AtomRV # ( - Stage2 is stalled, since the instruction in stage1 cant popogate to stage2. Therefore until the stage2 is stalled, instruction in stage1 is kept held. */ - wire waiting_for_ibus_response = (!imem_handshake && iport_valid_o); + wire waiting_for_ibus_response = (!imem_handshake && instr_request_valid); wire stall_stage1 = waiting_for_ibus_response || stall_stage2; /* @@ -158,7 +199,7 @@ module AtomRV # ( `ifdef EN_EXCEPT // Exception signals - wire except_instr_addr_misaligned = |ProgramCounter[1:0]; + wire except_instr_addr_misaligned = `INLINE_IFDEF(RV_C, ProgramCounter[0], |ProgramCounter[1:0]); wire except_load_addr_misaligned = dport_valid_o & !dport_we_o & |dport_addr_o[1:0]; wire except_store_addr_misaligned = dport_valid_o & dport_we_o & |dport_addr_o[1:0]; @@ -171,12 +212,11 @@ module AtomRV # ( //////////////////////////////////////////////////////////////////// // STAGE 1 - FETCH //////////////////////////////////////////////////////////////////// - assign iport_valid_o = !rst_i; // Always valid (Except on Reset condition) /* Program Counter */ reg [31:0] ProgramCounter /*verilator public*/; - wire [31:0] ProgramCounter_next = ProgramCounter + 32'd4; + wire [31:0] ProgramCounter_next = ProgramCounter + `INLINE_IFDEF(RV_C, (rvc_decdr_is_compressed_o ? 32'd2 : 32'd4), 32'd4); always @(posedge clk_i) begin if(rst_i) @@ -252,7 +292,7 @@ module AtomRV # ( InstructionRegister <= `RV_INSTR_NOP; else if(!stall_stage1) - InstructionRegister <= iport_data_i; + InstructionRegister <= fetched_instr; end end diff --git a/rtl/core/RVC_Aligner.v b/rtl/core/RVC_Aligner.v new file mode 100644 index 00000000..5fefe39a --- /dev/null +++ b/rtl/core/RVC_Aligner.v @@ -0,0 +1,55 @@ +`default_nettype none + +module RVC_Aligner( + input clk_i, + input rst_i, + // From Core (Slave) + input wire [31:0] s_adr_i, + output wire [31:0] s_dat_o, + input wire s_valid_i, + output wire s_ack_o, + + // To Memory (Master) + output wire [31:0] m_adr_o, + input wire [31:0] m_dat_i, + output wire m_valid_o, + input wire m_ack_i +); + +/* + Aligns the fetches to 4 byte boundary + if addr is 4 byte aligned -> e.g. 0x0 or 0x4 -> pass through + if addr is 2 byte aligned and not 4 byte aligned -> e.g. 0x2: + - Step 1: + - fetch the previous 4 byte aligned address (0x0) + - store upper half word + - block ack signal to core + - Step 2: + - fetch next 4 byte aligned address (0x4) + - concatenate lower half word with upper half word + - return ack to core +*/ +wire is_misaligned = s_adr_i[1]; +reg was_misaligned; +always @(posedge clk_i) begin + if(rst_i) + was_misaligned <= 1'b0; + else if(m_ack_i & is_misaligned) + was_misaligned <= !was_misaligned; +end + +assign m_adr_o = is_misaligned ? (was_misaligned ? s_adr_i + 32'd2 : s_adr_i - 32'd2) : s_adr_i; +assign m_valid_o = s_valid_i; + +reg [15:0] lower_half_word; +always @(posedge clk_i) begin + if(rst_i) + lower_half_word <= 0; + else if(m_ack_i) + lower_half_word <= m_dat_i[31:16]; +end + +assign s_dat_o = was_misaligned ? {m_dat_i[15:0], lower_half_word}: m_dat_i; +assign s_ack_o = m_ack_i & !(is_misaligned & !was_misaligned); + +endmodule diff --git a/rtl/core/RVC_Decoder.v b/rtl/core/RVC_Decoder.v new file mode 100644 index 00000000..689e26f6 --- /dev/null +++ b/rtl/core/RVC_Decoder.v @@ -0,0 +1,233 @@ +`default_nettype none + +module RVC_Decoder( + /* verilator lint_off UNUSEDSIGNAL */ + input wire clk_i, + /* verilator lint_on UNUSEDSIGNAL */ + input wire [31:0] instr_i, + input wire ack_i, + output wire [31:0] instr_o, + output wire is_compressed +); + localparam OPCODE_LOAD = 7'h03; + localparam OPCODE_OP_IMM = 7'h13; + localparam OPCODE_STORE = 7'h23; + localparam OPCODE_OP = 7'h33; + localparam OPCODE_LUI = 7'h37; + localparam OPCODE_BRANCH = 7'h63; + localparam OPCODE_JALR = 7'h67; + localparam OPCODE_JAL = 7'h6f; + + wire [1:0] op = instr_i[1:0]; + // wire [3:0] func4 = instr_i[15:12]; + wire [2:0] func3 = instr_i[15:13]; + // wire [5:0] func6 = instr_i[15:10]; + // wire [4:0] rdrs1 = instr_i[11:7]; + // wire [4:0] rs2 = instr_i[6:2]; + // wire [2:0] rd_c = instr_i[4:2]; + // wire [2:0] rs2_c = instr_i[4:2]; + // wire [2:0] rs1_c = instr_i[4:2]; + + reg [127:0] instr_scope; // just for simulation, will be optimised away in synth + `UNUSED_VAR(instr_scope) + + reg [31:0] decompr_instr; // Decompressed instruction + reg is_illegal; + + assign instr_o = is_illegal ? instr_i : decompr_instr; + assign is_compressed = ack_i & !is_illegal; + + // always @(posedge clk_i) begin + // if(ack_i) begin + // is_compressed <= !is_illegal; + // end + // end + + always @(*) /* COMBINATORIAL */ begin + decompr_instr = instr_i; + is_illegal = 1'b0; + instr_scope = "C.???"; + + case({op, func3}) + // ========== Quadrant-0 ========== + 5'b00_000: begin + // c.addi4spn -> addi rd', x2, imm + decompr_instr = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], + instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], {OPCODE_OP_IMM}}; + instr_scope = "C.ADDI4SPN"; + end + + 5'b000_010: begin + // c.lw -> lw rd', imm(rs1') + decompr_instr = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], + 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], {OPCODE_LOAD}}; + instr_scope = "C.LW"; + end + + 5'b00_110: begin + // c.sw -> sw rs2', imm(rs1') + decompr_instr = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], + 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], + 2'b00, {OPCODE_STORE}}; + instr_scope = "C.SW"; + end + + + // ========== Quadrant-1 ========== + 5'b01_000: begin + // c.addi -> addi rd, rd, nzimm + // c.nop -> addi x0, x0, nzimm + decompr_instr = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], + instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP_IMM}}; + instr_scope = (instr_i[11:7] == 5'b0) ? "C.NOP": "C.ADDI"; + end + + 5'b01_001, /* c.jal -> jal x1, imm */ + 5'b01_101: /* c.j -> jal x0, imm */ begin + decompr_instr = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], + instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], + {9 {instr_i[12]}}, 4'b0, ~instr_i[15], {OPCODE_JAL}}; + instr_scope = (func3 == 3'b001) ? "C.JAL" : "C.J"; + end + + 5'b01_010: begin + // c.li -> addi rd, x0, nzimm + decompr_instr = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, + 3'b0, instr_i[11:7], {OPCODE_OP_IMM}}; + instr_scope = "C.LI"; + end + + 5'b01_011: begin + if (instr_i[11:7] == 5'h02) begin + // c.addi16sp -> addi x2, x2, nzimm + decompr_instr = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], + instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, {OPCODE_OP_IMM}}; + instr_scope = "C.ADDI16SP"; + end else begin + // c.lui -> lui rd, imm + decompr_instr = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], {OPCODE_LUI}}; + instr_scope = "C.LUI"; + end + end + + 5'b01_100: begin + case (instr_i[11:10]) + 2'b00, + 2'b01: begin + // 00: c.srli -> srli rd, rd, shamt + // 01: c.srai -> srai rd, rd, shamt + decompr_instr = {1'b0, instr_i[10], 5'b0, instr_i[6:2], 2'b01, instr_i[9:7], + 3'b101, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}}; + instr_scope = (instr_i[11:10]==2'b00) ? "C.SRLI" : "C.SRAI"; + end + + 2'b10: begin + // c.andi -> andi rd, rd, imm + decompr_instr = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], + 3'b111, 2'b01, instr_i[9:7], {OPCODE_OP_IMM}}; + instr_scope = "C.ANDI"; + end + + 2'b11: begin + case (instr_i[6:5]) + 2'b00: begin + // c.sub -> sub rd', rd', rs2' + decompr_instr = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], + 3'b000, 2'b01, instr_i[9:7], {OPCODE_OP}}; + instr_scope = "C.SUB"; + end + + 2'b01: begin + // c.xor -> xor rd', rd', rs2' + decompr_instr = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, + 2'b01, instr_i[9:7], {OPCODE_OP}}; + instr_scope = "C.XOR"; + end + + 2'b10: begin + // c.or -> or rd', rd', rs2' + decompr_instr = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, + 2'b01, instr_i[9:7], {OPCODE_OP}}; + instr_scope = "C.OR"; + end + + 2'b11: begin + // c.and -> and rd', rd', rs2' + decompr_instr = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, + 2'b01, instr_i[9:7], {OPCODE_OP}}; + instr_scope = "C.AND"; + end + endcase + end + endcase + end + + 5'b01_110, + 5'b01_111: begin + // 0: c.beqz -> beq rs1', x0, imm + // 1: c.bnez -> bne rs1', x0, imm + decompr_instr = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, + instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], + instr_i[12], {OPCODE_BRANCH}}; + instr_scope = (func3 == 3'b110) ? "C.EQEZ": "C.BNEZ"; + end + + // ========== Quadrant-2 ========== + 5'b10_000: begin + // c.slli -> slli rd, rd, shamt + // (c.ssli hints are translated into a slli hint) + decompr_instr = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], {OPCODE_OP_IMM}}; + instr_scope = "C.SLLI"; + end + + 5'b10_010: begin + // c.lwsp -> lw rd, imm(x2) + decompr_instr = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, + 3'b010, instr_i[11:7], OPCODE_LOAD}; + instr_scope = "C.LWSP"; + end + + 5'b10_100: begin + if (instr_i[12] == 1'b0) begin + if (instr_i[6:2] != 5'b0) begin + // c.mv -> add rd/rs1, x0, rs2 + // (c.mv hints are translated into an add hint) + decompr_instr = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], {OPCODE_OP}}; + instr_scope = "C.MV"; + end else begin + // c.jr -> jalr x0, rd/rs1, 0 + decompr_instr = {12'b0, instr_i[11:7], 3'b0, 5'b0, {OPCODE_JALR}}; + instr_scope = "C.JR"; + end + end else begin + if (instr_i[6:2] != 5'b0) begin + // c.add -> add rd, rd, rs2 + // (c.add hints are translated into an add hint) + decompr_instr = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], {OPCODE_OP}}; + instr_scope = "C.ADD"; + end else begin + if (instr_i[11:7] == 5'b0) begin + // c.ebreak -> ebreak + decompr_instr = {32'h00_10_00_73}; + instr_scope = "C.EBREAK"; + end else begin + // c.jalr -> jalr x1, rs1, 0 + decompr_instr = {12'b0, instr_i[11:7], 3'b000, 5'b00001, {OPCODE_JALR}}; + instr_scope = "C.JALR"; + end + end + end + end + + 5'b10_110: begin + // c.swsp -> sw rs2, imm(x2) + decompr_instr = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, + instr_i[11:9], 2'b00, {OPCODE_STORE}}; + instr_scope = "C.SWSP"; + end + + default: + is_illegal = 1'b1; + endcase + end +endmodule diff --git a/rtl/soc/hydrogensoc/HydrogenSoC_Config.vh b/rtl/soc/hydrogensoc/HydrogenSoC_Config.vh index 2a05745f..5284bfde 100644 --- a/rtl/soc/hydrogensoc/HydrogenSoC_Config.vh +++ b/rtl/soc/hydrogensoc/HydrogenSoC_Config.vh @@ -43,7 +43,7 @@ compressed instructions. It adds a decoder which decodes the 16 bit instructions to 32 bit equivalents and a aligner to word-align Instruction fetches. */ -// `define RV_C +`define RV_C /* diff --git a/sim/Makefile b/sim/Makefile index ea4fbfe9..0aef67d1 100755 --- a/sim/Makefile +++ b/sim/Makefile @@ -51,6 +51,8 @@ VSRCS += $(RTL_DIR)/core/Alu.v VSRCS += $(RTL_DIR)/core/Decode.v VSRCS += $(RTL_DIR)/core/RegisterFile.v VSRCS += $(RTL_DIR)/core/CSR_Unit.v +VSRCS += $(RTL_DIR)/core/RVC_Aligner.v +VSRCS += $(RTL_DIR)/core/RVC_Decoder.v #################################################### diff --git a/sw/bootloader/Makefile b/sw/bootloader/Makefile index 8e0b625b..1ef78526 100644 --- a/sw/bootloader/Makefile +++ b/sw/bootloader/Makefile @@ -4,7 +4,7 @@ sim ?= false RVPREFIX:= riscv64-unknown-elf -CFLAGS:= -mabi=ilp32 -march=rv32i -nostartfiles -ffreestanding -Os +CFLAGS:= -mabi=ilp32 -march=rv32ic -nostartfiles -ffreestanding -Os CFLAGS+= -DENABLE_UART CFLAGS+= -I $(RVATOM_LIB)/include LFLAGS:= -L $(RVATOM_LIB)/ -T $(RVATOM_LIB)/link/link_bootloader.ld -lcatom -Xlinker -Map $(EXEC).map -Wl,--gc-sections diff --git a/sw/examples/Makefile b/sw/examples/Makefile index f8de2171..2be28f78 100644 --- a/sw/examples/Makefile +++ b/sw/examples/Makefile @@ -82,7 +82,7 @@ endif RVPREFIX := riscv64-unknown-elf CFLAGS += -Wall -CFLAGS += -mabi=ilp32 -march=rv32i -nostartfiles -ffreestanding -O0 -g +CFLAGS += -mabi=ilp32 -march=rv32ic -nostartfiles -ffreestanding -O0 -g CFLAGS += -I $(RVATOM_LIB)/include -L $(RVATOM_LIB) LINKERSCRIPT := None