From 4ef422cde279a5d0cad8745199221bad93452e8e Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Wed, 23 Apr 2025 17:39:59 +0200 Subject: [PATCH 01/15] Add support for burst transactions in variable-latency-interconnect --- .../variable_latency_interconnect.sv | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv index 8c6eaae..3ef1d4a 100644 --- a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv +++ b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv @@ -26,6 +26,7 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( parameter int unsigned DataWidth = 32, // Data Word Width parameter int unsigned BeWidth = DataWidth/8, // Byte Strobe Width parameter int unsigned AddrMemWidth = 12, // Number of Address bits per Target + parameter int unsigned BurstWidth = 1, // Burst Signal Width parameter bit AxiVldRdy = 1'b1, // Valid/ready signaling // Spill registers // A bit set at position i indicates a spill register at the i-th crossbar layer. @@ -50,6 +51,7 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( input logic [NumIn-1:0] req_wen_i, // Write enable input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable + input logic [NumIn-1:0][BurstWidth-1:0] req_burst_i, // Burst data output logic [NumIn-1:0] resp_valid_o, // Response valid input logic [NumIn-1:0] resp_ready_i, // Response ready output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, // Data response @@ -61,6 +63,7 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( output logic [NumOut-1:0] req_wen_o, // Write enable output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, // Write data output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable + output logic [NumOut-1:0][BurstWidth-1:0] req_burst_o, // Burst data input logic [NumOut-1:0] resp_valid_i, // Response valid output logic [NumOut-1:0] resp_ready_o, // Response ready input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address @@ -74,7 +77,7 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( // localparams and aggregation of address, wen and payload data localparam int unsigned NumOutLog2 = $clog2(NumOut); - localparam int unsigned IniAggDataWidth = 1 + BeWidth + AddrMemWidth + DataWidth; + localparam int unsigned IniAggDataWidth = 1 + BeWidth + AddrMemWidth + DataWidth + BurstWidth; /************* * Signals * @@ -97,12 +100,12 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( end // Aggregate data to be routed to targets - assign data_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j]}; + assign data_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j], req_burst_i[j]}; end // Disaggregate data for (genvar k = 0; unsigned'(k) < NumOut; k++) begin : gen_outputs - assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k]} = data_agg_out[k]; + assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k], req_burst_o[k]} = data_agg_out[k]; end /**************** From 1113fd10468cebc078a95d695be95dd499d309cc Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Fri, 23 May 2025 07:42:30 +0200 Subject: [PATCH 02/15] Add support for wide burst responses in variable-latency-interconnect --- .../variable_latency_interconnect.sv | 107 ++++++++++-------- 1 file changed, 60 insertions(+), 47 deletions(-) diff --git a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv index 3ef1d4a..d5d59e4 100644 --- a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv +++ b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv @@ -26,7 +26,9 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( parameter int unsigned DataWidth = 32, // Data Word Width parameter int unsigned BeWidth = DataWidth/8, // Byte Strobe Width parameter int unsigned AddrMemWidth = 12, // Number of Address bits per Target + parameter int unsigned RspGF = 1, // Grouping Factor for the Burst Response parameter int unsigned BurstWidth = 1, // Burst Signal Width + parameter int unsigned BurstRspWidth = (RspGF-1)*DataWidth, // Burst Response Widening parameter bit AxiVldRdy = 1'b1, // Valid/ready signaling // Spill registers // A bit set at position i indicates a spill register at the i-th crossbar layer. @@ -45,29 +47,31 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( input logic clk_i, input logic rst_ni, // Initiator side - input logic [NumIn-1:0] req_valid_i, // Request valid - output logic [NumIn-1:0] req_ready_o, // Request ready - input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address - input logic [NumIn-1:0] req_wen_i, // Write enable - input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data - input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable - input logic [NumIn-1:0][BurstWidth-1:0] req_burst_i, // Burst data - output logic [NumIn-1:0] resp_valid_o, // Response valid - input logic [NumIn-1:0] resp_ready_i, // Response ready - output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, // Data response + input logic [NumIn-1:0] req_valid_i, // Request valid + output logic [NumIn-1:0] req_ready_o, // Request ready + input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address + input logic [NumIn-1:0] req_wen_i, // Write enable + input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data + input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable + input logic [NumIn-1:0][BurstWidth-1:0] req_burst_i, // Burst data + output logic [NumIn-1:0] resp_valid_o, // Response valid + input logic [NumIn-1:0] resp_ready_i, // Response ready + output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, // Data response + output logic [NumIn-1:0][BurstRspWidth-1:0] resp_burst_o, // Burst response // Target side - output logic [NumOut-1:0] req_valid_o, // Request valid - input logic [NumOut-1:0] req_ready_i, // Request ready - output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address - output logic [NumOut-1:0][AddrMemWidth-1:0] req_tgt_addr_o, // Target address - output logic [NumOut-1:0] req_wen_o, // Write enable - output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, // Write data - output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable - output logic [NumOut-1:0][BurstWidth-1:0] req_burst_o, // Burst data - input logic [NumOut-1:0] resp_valid_i, // Response valid - output logic [NumOut-1:0] resp_ready_o, // Response ready - input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address - input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i // Data response + output logic [NumOut-1:0] req_valid_o, // Request valid + input logic [NumOut-1:0] req_ready_i, // Request ready + output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address + output logic [NumOut-1:0][AddrMemWidth-1:0] req_tgt_addr_o, // Target address + output logic [NumOut-1:0] req_wen_o, // Write enable + output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, // Write data + output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable + output logic [NumOut-1:0][BurstWidth-1:0] req_burst_o, // Burst data + input logic [NumOut-1:0] resp_valid_i, // Response valid + output logic [NumOut-1:0] resp_ready_o, // Response ready + input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address + input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i, // Data response + input logic [NumOut-1:0][BurstRspWidth-1:0] resp_burst_i // Burst response ); /****************** @@ -77,17 +81,34 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( // localparams and aggregation of address, wen and payload data localparam int unsigned NumOutLog2 = $clog2(NumOut); - localparam int unsigned IniAggDataWidth = 1 + BeWidth + AddrMemWidth + DataWidth + BurstWidth; + localparam int unsigned ReqAggDataWidth = 1 + BeWidth + AddrMemWidth + DataWidth + BurstWidth; + localparam int unsigned RespAggDataWidth = DataWidth + 32; /************* * Signals * *************/ - logic [NumIn-1:0][IniAggDataWidth-1:0] data_agg_in; - logic [NumOut-1:0][IniAggDataWidth-1:0] data_agg_out; + logic [NumIn-1:0][ReqAggDataWidth-1:0] req_agg_in; + logic [NumOut-1:0][ReqAggDataWidth-1:0] req_agg_out; + + logic [NumIn-1:0][RespAggDataWidth-1:0] resp_agg_out; + logic [NumOut-1:0][RespAggDataWidth-1:0] resp_agg_in; + logic [NumIn-1:0][cf_math_pkg::idx_width(NumOut)-1:0] tgt_sel; for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_inputs + // Aggregate data to be routed to targets + assign req_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j], req_burst_i[j]}; + assign {resp_rdata_o[j], resp_burst_o[j]} = resp_agg_out[j]; + end + + // Disaggregate data + for (genvar k = 0; unsigned'(k) < NumOut; k++) begin : gen_outputs + assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k], req_burst_o[k]} = req_agg_out[k]; + assign resp_agg_in[k] = {resp_rdata_i[k], resp_burst_i[k]}; + end + + for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_target // Extract target index if (NumIn == 1) begin assign tgt_sel[j] = '0; @@ -98,14 +119,6 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( assign tgt_sel[j] = req_tgt_addr_i[j][ByteOffWidth +: NumOutLog2]; end end - - // Aggregate data to be routed to targets - assign data_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j], req_burst_i[j]}; - end - - // Disaggregate data - for (genvar k = 0; unsigned'(k) < NumOut; k++) begin : gen_outputs - assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k], req_burst_o[k]} = data_agg_out[k]; end /**************** @@ -117,17 +130,17 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( assign req_valid_o = req_valid_i; assign req_ready_o = req_ready_i; assign req_ini_addr_o = '0; - assign data_agg_out = data_agg_in; + assign req_agg_out = req_agg_in; assign resp_valid_o = resp_valid_i; assign resp_ready_o = resp_ready_i; - assign resp_rdata_o = resp_rdata_i; + assign resp_agg_out = resp_agg_in; // Tuned logarithmic interconnect architecture, based on rr_arb_tree primitives end else if (Topology == tcdm_interconnect_pkg::LIC) begin : gen_lic full_duplex_xbar #( .NumIn (NumIn ), .NumOut (NumOut ), - .ReqDataWidth (IniAggDataWidth ), - .RespDataWidth (DataWidth ), + .ReqDataWidth (ReqAggDataWidth ), + .RespDataWidth (RespAggDataWidth ), .AxiVldRdy (AxiVldRdy ), .SpillRegisterReq (SpillRegisterReq[0] ), .SpillRegisterResp (SpillRegisterResp[0]), @@ -142,19 +155,19 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( .req_valid_i (req_valid_i ), .req_ready_o (req_ready_o ), .req_tgt_addr_i (tgt_sel ), - .req_wdata_i (data_agg_in ), + .req_wdata_i (req_agg_in ), .resp_valid_o (resp_valid_o ), - .resp_rdata_o (resp_rdata_o ), + .resp_rdata_o (resp_agg_out ), .resp_ready_i (resp_ready_i ), // Target side .req_valid_o (req_valid_o ), .req_ini_addr_o (req_ini_addr_o ), .req_ready_i (req_ready_i ), - .req_wdata_o (data_agg_out ), + .req_wdata_o (req_agg_out ), .resp_valid_i (resp_valid_i ), .resp_ready_o (resp_ready_o ), .resp_ini_addr_i(resp_ini_addr_i), - .resp_rdata_i (resp_rdata_i ) + .resp_rdata_i (resp_agg_in ) ); end @@ -203,7 +216,7 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( variable_latency_bfly_net #( .NumIn (NumIn ), .NumOut (NumOut ), - .DataWidth (IniAggDataWidth ), + .DataWidth (ReqAggDataWidth ), .Radix (Radix ), .ExtPrio (1'b0 ), .SpillRegister (SpillRegisterReq ), @@ -218,18 +231,18 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( .valid_i (req_valid_i ), .ready_o (req_ready_o ), .tgt_addr_i(tgt_sel ), - .wdata_i (data_agg_in ), + .wdata_i (req_agg_in ), // Target side .valid_o (req_valid_o ), .ini_addr_o(req_ini_addr_o ), .ready_i (req_ready_i ), - .wdata_o (data_agg_out ) + .wdata_o (req_agg_out ) ); variable_latency_bfly_net #( .NumIn (NumOut ), .NumOut (NumIn ), - .DataWidth (DataWidth ), + .DataWidth (RespAggDataWidth ), .Radix (Radix ), .ExtPrio (1'b0 ), .SpillRegister (SpillRegisterResp ), @@ -244,12 +257,12 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( .valid_i (resp_valid_i ), .ready_o (resp_ready_o ), .tgt_addr_i(resp_ini_addr_i), - .wdata_i (resp_rdata_i ), + .wdata_i (resp_agg_i ), // Initiator side .valid_o (resp_valid_o ), .ready_i (resp_ready_i ), .ini_addr_o(/* Unused */ ), - .wdata_o (resp_rdata_o ) + .wdata_o (resp_agg_o ) ); end From c2e88e3910c56c65e182e1cbbe5ac9088b3205f2 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Fri, 23 May 2025 08:17:57 +0200 Subject: [PATCH 03/15] Add burst request and response support --- Bender.yml | 5 + .../burst_cutter.sv | 187 +++++++++ .../burst_manager.sv | 363 ++++++++++++++++++ .../burst_pkg.sv | 42 ++ .../burst_req_grouper.sv | 196 ++++++++++ .../burst_rsp_grouper.sv | 81 ++++ .../variable_latency_interconnect.sv | 65 ++++ 7 files changed, 939 insertions(+) create mode 100644 rtl/variable_latency_interconnect/burst_cutter.sv create mode 100644 rtl/variable_latency_interconnect/burst_manager.sv create mode 100644 rtl/variable_latency_interconnect/burst_pkg.sv create mode 100644 rtl/variable_latency_interconnect/burst_req_grouper.sv create mode 100644 rtl/variable_latency_interconnect/burst_rsp_grouper.sv diff --git a/Bender.yml b/Bender.yml index ba293ff..bd3c8b4 100644 --- a/Bender.yml +++ b/Bender.yml @@ -20,9 +20,14 @@ sources: - rtl/tcdm_interconnect/addr_dec_resp_mux.sv - rtl/tcdm_interconnect/amo_shim.sv - rtl/variable_latency_interconnect/addr_decoder.sv + - rtl/variable_latency_interconnect/burst_pkg.sv # Level 1 - rtl/tcdm_interconnect/xbar.sv - rtl/variable_latency_interconnect/simplex_xbar.sv + - rtl/variable_latency_interconnect/burst_cutter.sv + - rtl/variable_latency_interconnect/burst_manager.sv + - rtl/variable_latency_interconnect/burst_req_grouper.sv + - rtl/variable_latency_interconnect/burst_rsp_grouper.sv # Level 2 - rtl/tcdm_interconnect/clos_net.sv - rtl/tcdm_interconnect/bfly_net.sv diff --git a/rtl/variable_latency_interconnect/burst_cutter.sv b/rtl/variable_latency_interconnect/burst_cutter.sv new file mode 100644 index 0000000..daca673 --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_cutter.sv @@ -0,0 +1,187 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Author: Diyou Shen ETH Zurich +// Author: Marco Bertuletti ETH Zurich + +/// Burst Cutter: +/// Divides the burst request from NumIn initiators in multiple bursts when it +/// crosses the address boundary in the target multi-banked Memory. + +module burst_cutter + import burst_pkg::burst_t; +#( + parameter int unsigned NumIn = 32, + parameter int unsigned NumOut = 64, + parameter int unsigned AddrWidth = 32, + parameter int unsigned DataWidth = 32, + parameter int unsigned BeWidth = DataWidth/8, + // Number of Address bits per Target + parameter int unsigned AddrMemWidth = 12, + // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, + // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). + parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = NumIn == 1 ? 1 : $clog2(NumIn) +) ( + input logic clk_i, + input logic rst_ni, + // Memory Request In + input logic [NumInLog2-1:0] req_ini_addr_i, // Initiator address + input logic [AddrWidth-1:0] req_tgt_addr_i, // Target address + input logic req_wen_i, // Write enable + input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data + input logic [BeWidth-1:0] req_be_i, // Byte enable + input burst_t req_burst_i, // Burst data + input logic req_valid_i, + output logic req_ready_o, + // Memory Request Out + output logic [NumInLog2-1:0] req_ini_addr_o, // Initiator address + output logic [AddrWidth-1:0] req_tgt_addr_o, // Target address + output logic req_wen_o, // Write enable + output logic [DataWidth-1:0] req_wdata_o, // Write data + output logic [BeWidth-1:0] req_be_o, // Byte enable + output burst_t req_burst_o, // Burst data + output logic req_valid_o, + input logic req_ready_i +); + + localparam int unsigned BurstLen = NumIn; + localparam int unsigned BurstLenWidth = NumInLog2; + localparam int unsigned NumBanks = NumOut; + localparam int unsigned BankOffsetBits = AddrMemWidth - ByteOffWidth; + + typedef enum logic { + Bypass, // normal requests, first cut of burst + BurstCut // second cut of burst + } burst_cutter_fsm_e; + + // FSM state + burst_cutter_fsm_e state_d, state_q; + burst_cutter_fsm_e next_state; + + // FSM stored signals + logic [NumInLog2-1:0] cut_ini_addr_d, cut_ini_addr_q; + logic [AddrWidth-1:0] cut_tgt_addr_d, cut_tgt_addr_q; + logic [DataWidth-1:0] cut_wdata_d, cut_wdata_q; + burst_t cut_burst_d, cut_burst_q; + + logic [BankOffsetBits-1:0] bank_offset; + logic [BurstLenWidth:0] max_blen; + logic [BurstLenWidth:0] remaining_len; + + always_ff @(posedge clk_i or negedge rst_ni) begin : burst_cutter_proc + if(~rst_ni) begin + state_q <= Bypass; + cut_burst_q <= '0; + cut_ini_addr_q <= '0; + cut_tgt_addr_q <= '0; + cut_wdata_q <= '0; + end else begin + state_q <= state_d; + cut_ini_addr_q <= cut_tgt_addr_d; + cut_tgt_addr_q <= cut_tgt_addr_d; + cut_wdata_q <= cut_wdata_d; + cut_burst_q <= cut_burst_d; + end + end + + always_comb begin + // FSM defaults + state_d = state_q; + cut_burst_d = cut_burst_q; + cut_tgt_addr_d = cut_tgt_addr_q; + cut_ini_addr_d = cut_ini_addr_q; + cut_wdata_d = cut_wdata_q; + + bank_offset = '0; + max_blen = '0; + remaining_len = '0; + + next_state = Bypass; + + // Need to cut, use FSM to realize the logic + case (state_q) + Bypass: begin + // Bypass the signals + req_ini_addr_o = req_ini_addr_i; + req_tgt_addr_o = req_tgt_addr_i; + req_wdata_o = req_wdata_i[0]; + req_wen_o = req_wen_i; + req_be_o = req_be_i; + req_burst_o = req_burst_i; + req_valid_o = req_valid_i; + req_ready_o = req_ready_i; + // Keep current state by default + next_state = state_q; + + // Check if it is valid and being a burst request + if (req_burst_i.isburst) begin + bank_offset = req_tgt_addr_i[AddrMemWidth-1 : ByteOffWidth]; + max_blen = NumBanks - bank_offset; + + if (req_wen_i) begin + // no support for write burst, tie to 0 + req_burst_o = '0; + + end else begin + if (req_burst_i.blen > max_blen) begin + next_state = BurstCut; + + // pause taking in new requests + req_ready_o = 1'b0; + // Send out the first burst + req_burst_o.isburst = 1'b1; + req_burst_o.blen = max_blen; + + // store the info for next burst + cut_ini_addr_d = req_ini_addr_i + (max_blen << ByteOffWidth); + cut_tgt_addr_d = req_tgt_addr_i + (max_blen << ByteOffWidth); + cut_wdata_d = req_wdata_i[max_blen]; + + remaining_len = req_burst_i.blen - max_blen; + if (remaining_len > NumBanks) begin + $error("Only one cut is supported, reduce the burst length."); + end + + cut_burst_d.isburst = 1'b1; + cut_burst_d.blen = remaining_len; + + end + end + end + // Keep state until the current one is picked + if (req_ready_i) begin + state_d = next_state; + end + end + + BurstCut: begin + next_state = state_q; + // assign the outputs + // send out this part and wait for ready + req_tgt_addr_o = cut_ini_addr_q; + req_tgt_addr_o = cut_tgt_addr_q; + req_wdata_o = cut_wdata_q; + req_wen_o = '0; // only read burst is supported + req_be_o = '0; + req_burst_o = cut_burst_q; + req_valid_o = 1'b1; + req_ready_o = 1'b0; + + // When we get the ready, the second part is out + if (req_ready_i) begin + next_state = Bypass; + req_ready_o = req_ready_i; + end + + state_d = next_state; + end + + default: state_d = Bypass; + endcase + end + + +endmodule : burst_cutter diff --git a/rtl/variable_latency_interconnect/burst_manager.sv b/rtl/variable_latency_interconnect/burst_manager.sv new file mode 100644 index 0000000..a683fc3 --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_manager.sv @@ -0,0 +1,363 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Author: Diyou Shen ETH Zurich +// Author: Marco Bertuletti ETH Zurich + +/// Burst Req Manager: +/// Receives a burst request from NumIn initiators and produces a parallel request +/// to NumIn target banks in a target multi-banked memory with NumOut banks. +/// Collects a parallel response from NumOut banks in a target multi-banked memory +/// and groups them according to the RspGF. + +module burst_manager + import burst_pkg::*; +#( + parameter int unsigned NumIn = 32, // number of initiator ports + parameter int unsigned NumOut = 64, // number of destination ports + parameter int unsigned AddrWidth = 32, + parameter int unsigned DataWidth = 32, + parameter int unsigned BeWidth = DataWidth/8, + // determines the width of the byte offset in a memory word. normally this can be left at the default vaule, + // but sometimes it needs to be overridden (e.g. when meta-data is supplied to the memory via the wdata signal). + parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Group Response Extension Grouping Factor for TCDM + parameter int unsigned RspGF = 1, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = (NumIn == 1) ? 1 : $clog2(NumIn), + // Burst response type can be overwritten for DataWidth > 32b + // This can happen when the DataWidth includes transaction metadata + parameter type burst_resp_t = tcdm_burst_pkg::burst_gresp_t +) ( + input logic clk_i, + input logic rst_ni, + /// Xbar side + input logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_i, + input logic [NumOut-1:0][AddrWidth-1:0] req_tgt_addr_i, + input logic [NumOut-1:0][DataWidth-1:0] req_wdata_i, + input logic [NumOut-1:0] req_wen_i, + input logic [NumOut-1:0][BeWidth-1:0] req_ben_i, + input burst_t [NumOut-1:0] req_burst_i, + input logic [NumOut-1:0] req_valid_i, + output logic [NumOut-1:0] req_ready_o, + // + output logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_o, + output logic [NumOut-1:0][DataWidth-1:0] resp_rdata_o, + output burst_resp_t [NumOut-1:0] resp_burst_o, + output logic [NumOut-1:0] resp_valid_o, + input logic [NumOut-1:0] resp_ready_i, + /// Bank side + output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, + output logic [NumOut-1:0][AddrWidth-1:0] req_tgt_addr_o, + output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, + output logic [NumOut-1:0] req_wen_o, + output logic [NumOut-1:0][BeWidth-1:0] req_ben_o, + output logic [NumOut-1:0] req_valid_o, + input logic [NumOut-1:0] req_ready_i, + // + input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, + input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i, + input logic [NumOut-1:0] resp_valid_i, + output logic [NumOut-1:0] resp_ready_o +); + /************************************************************* + * req_i --+--> arbiter --> fifo --> req generator --> req_o * + * \--------------- bypass ------------------> req_o * + * rsp_o <----- data_grouper <----- rsp_i * + *************************************************************/ + + // Include FF module + `include "common_cells/registers.svh" + + localparam int unsigned NumOutLog2 = (NumOut > 32'd1) ? unsigned'($clog2(NumOut)) : 32'd1; + + /****************** + * Burst Identify * + ******************/ + + typedef struct packed { + logic [NumInLog2-1:0] ini_addr; + logic [AddrWidth-1:0] tgt_addr; + logic [DataWidth-1:0] wdata; + logic wen; + logic [BeWidth] ben; + burst_t burst; + } arb_data_t; + + arb_data_t [NumOut-1:0] prearb_data; + logic [NumOut-1:0] prearb_valid, prearb_ready; + arb_data_t postarb_data; + logic postarb_valid, postarb_ready; + logic [NumOutLog2-1:0] postarb_idx; + logic [NumOut-1:0] ready_mask; + logic [NumOut-1:0] valid_mask; + + always_comb begin + prearb_data = '0; + prearb_valid = '0; + ready_mask = '0; + valid_mask = req_valid_i; + + for (int unsigned i = 0; i < NumOut; i++) begin + if (req_valid_i[i] && req_burst_i[i].isburst) begin + prearb_data[i].ini_addr = req_ini_addr_i[i]; + prearb_data[i].tgt_addr = req_tgt_addr_i[i]; + prearb_data[i].wdata = req_wdata_i[i]; + prearb_data[i].wen = req_wen_i[i]; + prearb_data[i].ben = req_ben_i[i]; + prearb_data[i].burst = req_burst_i[i]; + prearb_valid[i] = 1'b1; + valid_mask = 1'b0; + // Mark retired burst requests + if (prearb_ready[i]) begin + ready_mask[i] = 1'b1; + end + end + end + end + + rr_arb_tree #( + .NumIn ( NumOut ), + .DataType ( arb_data_t ), + .ExtPrio ( 1'b0), + .AxiVldRdy ( 1'b1), + .LockIn ( 1'b1) + ) i_rr_arb_tree ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .rr_i ( '0 ), + .req_i ( prearb_valid ), + .gnt_o ( prearb_ready ), + .data_i ( prearb_data ), + .req_o ( postarb_valid ), + .gnt_i ( postarb_ready ), + .data_o ( postarb_data ), + .idx_o ( postarb_idx ) + ); + + typedef struct packed { + logic [NumInLog2-1:0] ini_addr; + logic [AddrWidth-1:0] tgt_addr; + logic [DataWidth-1:0] wdata; + logic wen; + logic [BeWidth] ben; + burst_t burst; + logic [NumOutLog2-1:0] idx; + } fifo_data_t; + + fifo_data_t fifo_data, pre_fifo_data; + logic fifo_pop, fifo_empty, fifo_full, fifo_push; + + assign postarb_ready = fifo_full ? 1'b0 : 1'b1; + assign pre_fifo_data.ini_addr = postarb_data.ini_addr; + assign pre_fifo_data.tgt_addr = postarb_data.tgt_addr; + assign pre_fifo_data.wdata = postarb_data.wdata; + assign pre_fifo_data.wen = postarb_data.wen; + assign pre_fifo_data.ben = postarb_data.ben; + assign pre_fifo_data.burst = postarb_data.burst; + assign pre_fifo_data.idx = postarb_idx; + + // Push when FIFO is not full and data is valid + assign fifo_push = postarb_valid & (~fifo_full); + + // Fall though FIFO to store bursts + fifo_v3 #( + .FALL_THROUGH ( 1'b1 ), + .DEPTH ( NumOut ), + .dtype ( fifo_data_t ) + ) i_fall_though_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .full_o ( fifo_full ), + .empty_o ( fifo_empty ), + .usage_o ( /*not used */ ), + .data_i ( pre_fifo_data ), + .push_i ( fifo_push ), + .data_o ( fifo_data ), + .pop_i ( fifo_pop ) + ); + + /********************* + * Request Generator * + *********************/ + + typedef enum logic { + Idle, // idle until burst request comes + DoBurst // generate parallel requests when ready + } req_gen_fsm_e; + + // FSM state + req_gen_fsm_e state_d, state_q; + // FSM stored signals + fifo_data_t breq_d, breq_q; + + logic [NumOut-1:0] burst_mask_d, burst_mask_q; + // group mask used for response grouping + logic [NumOut-1:0] group_mask_d, group_mask_q; + + // indicate if there is pending response to be picked + logic pending_rsp; + + `FF(state_q, state_d, Idle, clk_i, rst_ni); + `FF(breq_q, breq_d, '0, clk_i, rst_ni); + `FF(burst_mask_q, burst_mask_d, '0, clk_i, rst_ni); + `FF(group_mask_q, group_mask_d, '0, clk_i, rst_ni); + + // Each element of a burst request must be retired to start request + assign req_ready_o = ready_mask | (req_ready_i & ~burst_mask_q); + + always_comb begin : request_generator + + // FSM defaults + state_d = state_q; + breq_d = breq_q; + burst_mask_d = burst_mask_q; + + // comb logic defaults + pending_rsp = '0; + // Do not take in next burst for now + fifo_pop = 1'b0; + + // Bypass all requests by default + req_wdata_o = req_wdata_i; + req_tgt_addr_o = req_tgt_addr_i; + req_ini_addr_o = req_ini_addr_i; + req_wen_o = req_wen_i; + req_ben_o = req_ben_i; + + // Let valid requests not in burst pass + req_valid_o = valid_mask; + + case (state_q) + + // Idle state, ready to take in burst request + Idle: begin + + // Clear mask (unlock banks) + burst_mask_d = '0; + if (~fifo_empty) begin + // there is pending burst request + // start to handling the burst, mark as not ready + // pop next element + fifo_pop = 1'b1; + // store request + breq_d = fifo_data; + // a mask with burst length ones + burst_mask_d = (1'b1 << breq_d.burst.blen) - 1'b1; + // shift the mask to the first bank index addressed by the burst + burst_mask_d = burst_mask_d << breq_d.idx; + state_d = DoBurst; + end + + end + + DoBurst: begin + + // If there is pending responses among the affected banks we wait + pending_rsp = |((resp_valid_o & ~resp_ready_i) & burst_mask_q); + // Send out requests when 1. required banks are all ready 2. no pending responses + if (&(req_ready_i | (~burst_mask_q)) & !pending_rsp) begin + for (int unsigned i = 0; i < NumOut; i++) begin + if (burst_mask_q[i]) begin + req_wdata_o[i] = breq_q.wdata; + req_wen_o[i] = breq_q.wen; + req_ben_o[i] = breq_q.ben; + // overwrite tgt_addr + req_tgt_addr_o[i] = i + breq_q.tgt_addr - breq_q.idx; + req_ini_addr_o[i] = i + breq_q.ini_addr - breq_q.idx; + // Set the valid for burst requests + req_valid_o[i] = 1'b1; + end + end + // Switch state + state_d = Idle; + end + + end + + default: state_d = Idle; + endcase + end + + /****************** + * Rsp Handling * + ******************/ + + if (RspGF == 1) begin : gen_grouper_bypass + // Bypass all responses if no grouping + assign resp_valid_o = resp_valid_i; + assign resp_ready_o = resp_ready_i; + assign resp_rdata_o = resp_rdata_i; + assign resp_ini_addr_o = resp_ini_addr_i; + assign resp_burst_o = '0; + + end else begin : gen_grouper + + // Number of groups we will check for grouping rsp + localparam int unsigned NumGroup = RspGF > 0 ? NumOut >> $clog2(RspGF) : NumOut; + + logic [NumOut-1:0][NumInLog2-1:0] grouped_resp_ini_addr; + logic [NumOut-1:0][DataWidth-1:0] grouped_resp_rdata; + burst_resp_t [NumOut-1:0] grouped_resp_burst; + logic [NumOut-1:0] grouped_resp_valid; + logic [NumOut-1:0] grouped_resp_ready; + + for (genvar i = 0; i < NumGroup; i ++) begin : gen_data_grouper + burst_rsp_grouper #( + .NumIn ( NumIn ), + .NumOut ( NumOut ), + .DataWidth ( DataWidth ), + .RspGF ( RspGF ), + .burst_resp_t ( burst_resp_t ) + ) i_burst_rsp_grouper ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + /// Bank side + .resp_ini_addr_i (resp_ini_addr_i[i*RspGF+:RspGF] ), + .resp_rdata_i (resp_rdata_i[i*RspGF+:RspGF] ), + .resp_valid_i (resp_valid_i[i*RspGF+:RspGF] ), + .resp_ready_o (grouped_resp_ready[i*RspGF+:RspGF] ), + /// Xbar side + .resp_ini_addr_o (grouped_resp_ini_addr[i*RspGF+:RspGF] ), + .resp_rdata_o (grouped_resp_rdata[i*RspGF+:RspGF] ), + .resp_burst_o (grouped_resp_burst[i*RspGF+:RspGF] ), + .resp_valid_o (grouped_resp_valid[i*RspGF+:RspGF] ), + .resp_ready_i (resp_ready_i[i*RspGF+:RspGF] ) + ); + end + + always_comb begin + for (int i = 0; i < NumGroup; i ++) begin + if (state_q == DoBurst) begin + group_mask_d[i*RspGF+:RspGF] = {RspGF{&burst_mask_q[i*RspGF+:RspGF]}}; + end else if (resp_ready_i[i*RspGF]) begin + group_mask_d[i*RspGF+:RspGF] = '0; + end else begin + group_mask_d[i*RspGF+:RspGF] = group_mask_q[i*RspGF+:RspGF]; + end + end + end + + for (genvar i = 0; i < NumOut; i++) begin + assign resp_ini_addr_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_ini_addr[i] : '0) : resp_ini_addr_i[i]; + assign resp_rdata_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_rdata[i] : '0) : resp_rdata_i[i]; + assign resp_burst_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_burst[i] : '0) : '0; + assign resp_valid_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_valid[i] : '0) : resp_valid_i[i]; + assign resp_ready_o[i] = group_mask_q[i] ? grouped_resp_ready[RspGF*(i/RspGF)] : resp_ready_i[i]; + end + end + + /****************** + * Assertions * + ******************/ + if (NumOut == 0) + $error("[burst_manager] NumBanks needs to be greater or equal to 1"); + + if (NumOut < RspGF) + $error("[burst_manager] NumBanks needs to be larger or equal to RspGF"); + +endmodule : burst_manager diff --git a/rtl/variable_latency_interconnect/burst_pkg.sv b/rtl/variable_latency_interconnect/burst_pkg.sv new file mode 100644 index 0000000..08d5b0e --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_pkg.sv @@ -0,0 +1,42 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Diyou Shen, ETH Zurich +// Author: Marco Bertuletti ETH Zurich + +// Description +// Include TCDM burst types and grouped response types + +package burst_pkg; + /******************** + * BURST PARAMETERS * + ********************/ + + // Memory read requests are bursted + localparam bit UseBurst = `ifdef USE_BURST `USE_BURST `else 0 `endif; + + // Maximum length of the issued burst + localparam integer unsigned BurstLen = `ifdef BURSTLEN `BURSTLEN `else 1 `endif; + parameter int unsigned BurstLenWidth = BurstLen == 1 ? 1 : $clog2(BurstLen); + + // Number of cuts if a burst crosses the target memory boundary + localparam integer unsigned NumCuts = 1; + + typedef struct packed { + logic isburst; + logic [BurstLenWidth-1:0] blen; + } burst_t; + + /******************************** + * Burst Grouped Rsp PARAMETERS * + ********************************/ + + // Grouping Factor of response data + localparam integer unsigned RspGF = `ifdef GROUP_RSP `GROUP_RSP `else 1 `endif; + + // replace rdata payload with this when the response is grouped + localparam int RspBurstMSB = (RspGF > 1) ? (RspGF - 2) : 0; + typedef logic [RspBurstMSB:0][31:0] burst_gresp_t; + +endpackage : burst_pkg diff --git a/rtl/variable_latency_interconnect/burst_req_grouper.sv b/rtl/variable_latency_interconnect/burst_req_grouper.sv new file mode 100644 index 0000000..aec3b46 --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_req_grouper.sv @@ -0,0 +1,196 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Author: Marco Bertuletti ETH Zurich + +/// Burst Req Grouper: +/// Packs a parallel memory request from NumIn initiators in a burst request. +/// The burst cutter creates multiple burst requests when the burst request crosses +/// the boundary in the target multi-banked memory. + +module burst_req_grouper + import burst_pkg::burst_t; + import burst_pkg::burst_gresp_t; +#( + parameter int unsigned NumIn = 32, + parameter int unsigned NumOut = 32, + parameter int unsigned AddrWidth = 32, + parameter int unsigned DataWidth = 32, + parameter int unsigned BeWidth = DataWidth/8, + // Number of Address bits per Target + parameter int unsigned AddrMemWidth = 12, + // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, + // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). + parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Group Response Extension Grouping Factor for TCDM + parameter int unsigned RspGF = 1, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = NumIn == 1 ? 1 : $clog2(NumIn) +)( + input logic clk_i, + input logic rst_ni, + // Parallel input request port + input logic [NumIn-1:0][NumInLog2-1:0] req_ini_addr_i, // Initiator address + input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address + input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, + input logic [NumIn-1:0] req_wen_i, + input logic [NumIn-1:0][BeWidth] req_be_i, + input logic [NumIn-1:0] req_valid_i, + output logic [NumIn-1:0] req_ready_o, + // Burst output request port + output logic [NumIn-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address + output logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_o, // Target address + output logic [NumIn-1:0][DataWidth-1:0] req_wdata_o, + output logic [NumIn-1:0] req_wen_o, + output logic [NumIn-1:0][BeWidth] req_be_o, + output burst_t [NumIn-1:0] req_burst_o, + output logic [NumIn-1:0] req_valid_o, + input logic [NumIn-1:0] req_ready_i, + // Response out + output logic [NumIn-1:0][NumInLog2-1:0] resp_ini_addr_o, + output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, + output logic [NumIn-1:0] resp_valid_o, + input logic [NumIn-1:0] resp_ready_i, + // Response in + input logic [NumIn-1:0][NumInLog2-1:0] resp_ini_addr_i, + input logic [NumIn-1:0][DataWidth-1:0] resp_rdata_i, + input burst_gresp_t [NumIn-1:0] resp_burst_i, + input logic [NumIn-1:0] resp_valid_i, + output logic [NumIn-1:0] resp_ready_o +); + + `include "common_cells/registers.svh" + + /*************/ + /* Request */ + /*************/ + + logic [NumIn-1:0][DataWidth-1:0] req_cutter_wdata; + logic [NumInLog2-1:0] req_cutter_ini_addr; + logic [AddrWidth-1:0] req_cutter_tgt_addr; + logic req_cutter_wen; + logic [BeWidth-1:0] req_cutter_be; + burst_t req_cutter_burst; + logic cutter_ready; + + logic [NumInLog2-1:0] req_bursted_ini_addr; + logic [AddrWidth-1:0] req_bursted_tgt_addr; + logic [DataWidth-1:0] req_bursted_wdata; + logic req_bursted_wen; + logic [BeWidth-1:0] req_bursted_be; + burst_t req_bursted_burst; + logic req_bursted_valid; + + logic req_read_q, req_read_d; + logic store_burst; + + // Save on-flight burst flag + assign store_burst = |(req_ready_i&req_valid_o); + assign req_read_d = req_bursted_burst.isburst; + `FFL(req_read_q, req_read_d, store_burst, 1'b0); + + always_comb begin + + // Assign input requests to cutter inputs + req_cutter_tgt_addr = req_tgt_addr_i[0]; + req_cutter_wdata = req_wdata_i; + req_cutter_wen = req_wen_i[0]; + req_cutter_be = req_be_i[0]; + req_cutter_burst.isburst = 1'b0; + req_cutter_burst.blen = NumIn; + + // Burst the request + if (&req_valid_i && !req_wen_i[0]) begin + // Send a burst request on the first port + req_cutter_burst.isburst = 1'b1; + req_tgt_addr_o[0] = req_bursted_tgt_addr; + req_wdata_o[0] = req_bursted_wdata; + req_wen_o[0] = req_bursted_wen; + req_be_o[0] = req_bursted_be; + req_burst_o[0] = req_bursted_burst; + req_valid_o[0] = req_bursted_valid; + req_ready_o[0] = cutter_ready; + // Silence other ports + for (int i = 1; i < NumIn; i++) begin + req_tgt_addr_o[i] = '0; + req_wdata_o[i] = '0; + req_wen_o[i] = 1'b0; + req_be_o[i] = '0; + req_burst_o[i] = '0; + req_valid_o[i] = 1'b0; + req_ready_o[i] = cutter_ready; + end + end else begin + // Bypass input + req_ini_addr_o = req_ini_addr_i; + req_tgt_addr_o = req_tgt_addr_i; + req_wdata_o = req_wdata_i; + req_wen_o = req_wen_i; + req_be_o = req_be_i; + req_burst_o = '0; + req_valid_o = req_valid_i; + req_ready_o = req_ready_i; + end + + end + + burst_cutter #( + .NumIn (NumIn ), + .NumOut (NumOut ), + .AddrWidth (AddrWidth ), + .DataWidth (DataWidth ), + .BeWidth (BeWidth ), + .AddrMemWidth (AddrMemWidth ), + .ByteOffWidth (ByteOffWidth ) + ) i_burst_cutter ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + // Memory Request In + .req_ini_addr_i (req_cutter_ini_addr ), + .req_tgt_addr_i (req_cutter_tgt_addr ), + .req_wen_i (req_cutter_wen ), + .req_wdata_i (req_cutter_wdata ), + .req_be_i (req_cutter_be ), + .req_burst_i (req_cutter_burst ), + .req_valid_i (req_valid_i[0] ), + .req_ready_o (cutter_ready ), + // Memory Request Out + .req_ini_addr_o (req_bursted_ini_addr ), + .req_tgt_addr_o (req_bursted_tgt_addr ), + .req_wen_o (req_bursted_wen ), + .req_wdata_o (req_bursted_wdata ), + .req_be_o (req_bursted_be ), + .req_burst_o (req_bursted_burst ), + .req_valid_o (req_bursted_valid ), + .req_ready_i (req_ready_i[0] ) + ); + + /*************/ + /* Response */ + /*************/ + + localparam int unsigned NumGroup = RspGF > 0 ? NumIn >> $clog2(RspGF) : NumIn; + + always_comb begin + for (int i = 0; i < NumIn; i++) begin + automatic int group_idx = i >> $clog2(RspGF); + + if (i < NumGroup*RspGF && req_read_q) begin + // Assign valid and data from grouped responses + resp_ini_addr_o[i] = i%RspGF == 0 ? resp_ini_addr_i[i] : resp_ini_addr_i[i] + i%RspGF; + resp_rdata_o[i] = i%RspGF == 0 ? resp_rdata_i[i] : resp_burst_i[group_idx*RspGF][(i%RspGF)-1]; + resp_valid_o[i] = resp_valid_i[group_idx*RspGF]; + // Assign ready when all grouped responses are retired + resp_ready_o[i] = i%RspGF == 0 ? &resp_ready_i[i+:RspGF] : 1'b0; + end else begin + resp_ini_addr_o[i] = resp_ini_addr_i[i]; + resp_rdata_o[i] = resp_rdata_i[i]; + resp_valid_o[i] = resp_valid_i[i]; + resp_ready_o[i] = resp_ready_i[i]; + end + + end + end + +endmodule : burst_req_grouper diff --git a/rtl/variable_latency_interconnect/burst_rsp_grouper.sv b/rtl/variable_latency_interconnect/burst_rsp_grouper.sv new file mode 100644 index 0000000..871c61f --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_rsp_grouper.sv @@ -0,0 +1,81 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Author: Diyou Shen ETH Zurich +// +// +// Description: +// This module is used to check if the parallel responses can be grouped into +// a single response (by default rsp_o[0]) +// This could reduce the number of traffic on the rsp channel for remote loads + +module burst_rsp_grouper + import burst_pkg::*; +#( + parameter int unsigned NumIn = 32, // number of initiator ports + parameter int unsigned NumOut = 64, // number of destination ports + parameter int unsigned DataWidth = 32, + // Group Response Extension Grouping Factor for TCDM + parameter int unsigned RspGF = 1, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = (NumIn == 1) ? 1 : $clog2(NumIn), + // Burst response type can be overwritten for DataWidth > 32b + // This can happen when the DataWidth includes transaction metadata + parameter type burst_resp_t = tcdm_burst_pkg::burst_gresp_t +) ( + input logic clk_i, + input logic rst_ni, + /// Bank side + input logic [RspGF-1:0][NumInLog2-1:0] resp_ini_addr_i, + input logic [RspGF-1:0][DataWidth-1:0] resp_rdata_i, + input logic [RspGF-1:0] resp_valid_i, + output logic [RspGF-1:0] resp_ready_o, + /// Xbar side + output logic [RspGF-1:0][NumInLog2-1:0] resp_ini_addr_o, + output logic [RspGF-1:0][DataWidth-1:0] resp_rdata_o, + output burst_resp_t [RspGF-1:0] resp_burst_o, + output logic [RspGF-1:0] resp_valid_o, + input logic [RspGF-1:0] resp_ready_i +); + + // Include FF module + `include "common_cells/registers.svh" + + always_comb begin + + // By default silence all valid ports + resp_burst_o = '0; + resp_valid_o = '0; + + // Only send first response data on normal port + resp_ini_addr_o[0] = resp_ini_addr_i[0]; + resp_rdata_o[0] = resp_rdata_i[0]; + resp_ini_addr_o[RspGF-1:1] = '0; + resp_rdata_o[RspGF-1:1] = '0; + + // Assign Bank ready from the grouped response ready + for(int i = 0; i < RspGF; i++) begin + resp_ready_o[i] = resp_ready_i[0]; + end + + // Wait until all responses are valid + if (&resp_valid_i) begin + resp_valid_o[0] = 1'b1; + for (int unsigned i = 0; i < RspGF-1; i ++) begin + resp_burst_o[i] = resp_rdata_i[i+1]; + end + end + end + + /****************** + * Assertions * + ******************/ + // Check number of cuts. + if ((RspGF != 1) && ((RspGF % 2) != 0)) + $error("[data_grouper] Grouping Factor has to be a power of two"); + + if (RspGF <= 1) + $error("[data_grouper] Grouping Factor needs to be larger than 1"); + +endmodule : burst_rsp_grouper diff --git a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv index d5d59e4..8a95f2f 100644 --- a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv +++ b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv @@ -10,6 +10,7 @@ // Author: Michael Schaffner , ETH Zurich // Matheus Cavalcante , ETH Zurich +// Marco Bertuletti , ETH Zurich // Date: 16.01.2020 @@ -18,6 +19,8 @@ // Note that only the full crossbar allows NumIn/NumOut configurations that are not // aligned to a power of 2. +`ifdef USE_BURST + module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( // Global parameters parameter int unsigned NumIn = 32, // Number of Initiators. Must be aligned with a power of 2 for butterflies. @@ -74,6 +77,59 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( input logic [NumOut-1:0][BurstRspWidth-1:0] resp_burst_i // Burst response ); +`else + +module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( + // Global parameters + parameter int unsigned NumIn = 32, // Number of Initiators. Must be aligned with a power of 2 for butterflies. + parameter int unsigned NumOut = 64, // Number of Targets. Must be aligned with a power of 2 for butterflies. + parameter int unsigned AddrWidth = 32, // Address Width on the Initiator Side + parameter int unsigned DataWidth = 32, // Data Word Width + parameter int unsigned BeWidth = DataWidth/8, // Byte Strobe Width + parameter int unsigned AddrMemWidth = 12, // Number of Address bits per Target + parameter bit AxiVldRdy = 1'b1, // Valid/ready signaling + // Spill registers + // A bit set at position i indicates a spill register at the i-th crossbar layer. + // The layers are counted starting at 0 from the initiator, for the requests, and from the target, for the responses. + parameter logic [63:0] SpillRegisterReq = 64'h0, + parameter logic [63:0] SpillRegisterResp = 64'h0, + parameter bit FallThroughRegister = 1'b0, // Insert a fall-through register, if missing a spill register in that stage + // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, + // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). + parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Topology can be: LIC, BFLY2, BFLY4, CLOS + parameter topo_e Topology = tcdm_interconnect_pkg::LIC, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = NumIn == 1 ? 1 : $clog2(NumIn) +) ( + input logic clk_i, + input logic rst_ni, + // Initiator side + input logic [NumIn-1:0] req_valid_i, // Request valid + output logic [NumIn-1:0] req_ready_o, // Request ready + input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address + input logic [NumIn-1:0] req_wen_i, // Write enable + input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data + input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable + output logic [NumIn-1:0] resp_valid_o, // Response valid + input logic [NumIn-1:0] resp_ready_i, // Response ready + output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, // Data response + // Target side + output logic [NumOut-1:0] req_valid_o, // Request valid + input logic [NumOut-1:0] req_ready_i, // Request ready + output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address + output logic [NumOut-1:0][AddrMemWidth-1:0] req_tgt_addr_o, // Target address + output logic [NumOut-1:0] req_wen_o, // Write enable + output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, // Write data + output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable + input logic [NumOut-1:0] resp_valid_i, // Response valid + output logic [NumOut-1:0] resp_ready_o, // Response ready + input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address + input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i // Data response +); + +`endif + /****************** * Parameters * ******************/ @@ -98,14 +154,23 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_inputs // Aggregate data to be routed to targets +`ifdef USE_BURST assign req_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j], req_burst_i[j]}; +`else + assign req_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j]}; +`endif assign {resp_rdata_o[j], resp_burst_o[j]} = resp_agg_out[j]; end // Disaggregate data for (genvar k = 0; unsigned'(k) < NumOut; k++) begin : gen_outputs +`ifdef USE_BURST assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k], req_burst_o[k]} = req_agg_out[k]; assign resp_agg_in[k] = {resp_rdata_i[k], resp_burst_i[k]}; +`else + assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k]} = req_agg_out[k]; + assign resp_agg_in[k] = resp_rdata_i[k]; +`endif end for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_target From 6ee2d7f4062e4ec4f0652ad991951ffcbeabc167 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Tue, 27 May 2025 14:23:26 +0200 Subject: [PATCH 04/15] Add "isburst" signal on the response path An "isburst" signal is needed to redistribute grouped responses at the initiator. --- .../burst_manager.sv | 13 +++--- .../burst_pkg.sv | 5 ++- .../burst_req_grouper.sv | 43 +++++++++++-------- .../burst_rsp_grouper.sv | 5 ++- .../variable_latency_interconnect.sv | 6 ++- 5 files changed, 42 insertions(+), 30 deletions(-) diff --git a/rtl/variable_latency_interconnect/burst_manager.sv b/rtl/variable_latency_interconnect/burst_manager.sv index a683fc3..7652c8f 100644 --- a/rtl/variable_latency_interconnect/burst_manager.sv +++ b/rtl/variable_latency_interconnect/burst_manager.sv @@ -28,7 +28,7 @@ module burst_manager parameter int unsigned NumInLog2 = (NumIn == 1) ? 1 : $clog2(NumIn), // Burst response type can be overwritten for DataWidth > 32b // This can happen when the DataWidth includes transaction metadata - parameter type burst_resp_t = tcdm_burst_pkg::burst_gresp_t + parameter type burst_resp_t = burst_pkg::burst_gresp_t ) ( input logic clk_i, input logic rst_ni, @@ -343,11 +343,12 @@ module burst_manager end for (genvar i = 0; i < NumOut; i++) begin - assign resp_ini_addr_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_ini_addr[i] : '0) : resp_ini_addr_i[i]; - assign resp_rdata_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_rdata[i] : '0) : resp_rdata_i[i]; - assign resp_burst_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_burst[i] : '0) : '0; - assign resp_valid_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_valid[i] : '0) : resp_valid_i[i]; - assign resp_ready_o[i] = group_mask_q[i] ? grouped_resp_ready[RspGF*(i/RspGF)] : resp_ready_i[i]; + assign resp_ini_addr_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_ini_addr[i] : '0) : resp_ini_addr_i[i]; + assign resp_rdata_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_rdata[i] : '0) : resp_rdata_i[i]; + assign resp_burst_o[i].gdata = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_burst[i].gdata : '0) : '0; + assign resp_burst_o[i].isburst = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_burst[i].isburst : 1'b0) : 1'b0; + assign resp_valid_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_valid[i] : '0) : resp_valid_i[i]; + assign resp_ready_o[i] = group_mask_q[i] ? grouped_resp_ready[RspGF*(i/RspGF)] : resp_ready_i[i]; end end diff --git a/rtl/variable_latency_interconnect/burst_pkg.sv b/rtl/variable_latency_interconnect/burst_pkg.sv index 08d5b0e..5337a6a 100644 --- a/rtl/variable_latency_interconnect/burst_pkg.sv +++ b/rtl/variable_latency_interconnect/burst_pkg.sv @@ -37,6 +37,9 @@ package burst_pkg; // replace rdata payload with this when the response is grouped localparam int RspBurstMSB = (RspGF > 1) ? (RspGF - 2) : 0; - typedef logic [RspBurstMSB:0][31:0] burst_gresp_t; + typedef struct packed { + logic isburst; + logic [RspBurstMSB:0][31:0] gdata; + } burst_gresp_t; endpackage : burst_pkg diff --git a/rtl/variable_latency_interconnect/burst_req_grouper.sv b/rtl/variable_latency_interconnect/burst_req_grouper.sv index aec3b46..cdb7174 100644 --- a/rtl/variable_latency_interconnect/burst_req_grouper.sv +++ b/rtl/variable_latency_interconnect/burst_req_grouper.sv @@ -26,7 +26,10 @@ module burst_req_grouper // Group Response Extension Grouping Factor for TCDM parameter int unsigned RspGF = 1, // Dependant parameters. DO NOT CHANGE! - parameter int unsigned NumInLog2 = NumIn == 1 ? 1 : $clog2(NumIn) + parameter int unsigned NumInLog2 = NumIn == 1 ? 1 : $clog2(NumIn), + // Burst response type can be overwritten for DataWidth > 32b + // This can happen when the DataWidth includes transaction metadata + parameter type burst_resp_t = burst_pkg::burst_gresp_t )( input logic clk_i, input logic rst_ni, @@ -55,7 +58,7 @@ module burst_req_grouper // Response in input logic [NumIn-1:0][NumInLog2-1:0] resp_ini_addr_i, input logic [NumIn-1:0][DataWidth-1:0] resp_rdata_i, - input burst_gresp_t [NumIn-1:0] resp_burst_i, + input burst_resp_t [NumIn-1:0] resp_burst_i, input logic [NumIn-1:0] resp_valid_i, output logic [NumIn-1:0] resp_ready_o ); @@ -82,14 +85,6 @@ module burst_req_grouper burst_t req_bursted_burst; logic req_bursted_valid; - logic req_read_q, req_read_d; - logic store_burst; - - // Save on-flight burst flag - assign store_burst = |(req_ready_i&req_valid_o); - assign req_read_d = req_bursted_burst.isburst; - `FFL(req_read_q, req_read_d, store_burst, 1'b0); - always_comb begin // Assign input requests to cutter inputs @@ -174,15 +169,25 @@ module burst_req_grouper always_comb begin for (int i = 0; i < NumIn; i++) begin - automatic int group_idx = i >> $clog2(RspGF); - - if (i < NumGroup*RspGF && req_read_q) begin - // Assign valid and data from grouped responses - resp_ini_addr_o[i] = i%RspGF == 0 ? resp_ini_addr_i[i] : resp_ini_addr_i[i] + i%RspGF; - resp_rdata_o[i] = i%RspGF == 0 ? resp_rdata_i[i] : resp_burst_i[group_idx*RspGF][(i%RspGF)-1]; - resp_valid_o[i] = resp_valid_i[group_idx*RspGF]; - // Assign ready when all grouped responses are retired - resp_ready_o[i] = i%RspGF == 0 ? &resp_ready_i[i+:RspGF] : 1'b0; + automatic int grp_idx = i >> $clog2(RspGF); + automatic int grp_off = i % RspGF; + + if (i < NumGroup*RspGF) begin + + if (resp_valid_i[grp_idx*RspGF] && resp_burst_i[grp_idx*RspGF].isburst && !resp_valid_i[i]) begin + // Assign valid and data from grouped responses + resp_ini_addr_o[i] = grp_off == 0 ? resp_ini_addr_i[i] : resp_ini_addr_i[i] + grp_off; + resp_rdata_o[i] = grp_off == 0 ? resp_rdata_i[i] : resp_burst_i[grp_idx*RspGF].gdata[grp_off-1]; + resp_valid_o[i] = resp_valid_i[grp_idx*RspGF]; + // Assign ready when all grouped responses are retired + resp_ready_o[i] = grp_off == 0 ? &resp_ready_i[i+:RspGF] : 1'b0; + end else begin + resp_ini_addr_o[i] = resp_ini_addr_i[i]; + resp_rdata_o[i] = resp_rdata_i[i]; + resp_valid_o[i] = resp_valid_i[i]; + resp_ready_o[i] = resp_ready_i[i]; + end + end else begin resp_ini_addr_o[i] = resp_ini_addr_i[i]; resp_rdata_o[i] = resp_rdata_i[i]; diff --git a/rtl/variable_latency_interconnect/burst_rsp_grouper.sv b/rtl/variable_latency_interconnect/burst_rsp_grouper.sv index 871c61f..325da1e 100644 --- a/rtl/variable_latency_interconnect/burst_rsp_grouper.sv +++ b/rtl/variable_latency_interconnect/burst_rsp_grouper.sv @@ -22,7 +22,7 @@ module burst_rsp_grouper parameter int unsigned NumInLog2 = (NumIn == 1) ? 1 : $clog2(NumIn), // Burst response type can be overwritten for DataWidth > 32b // This can happen when the DataWidth includes transaction metadata - parameter type burst_resp_t = tcdm_burst_pkg::burst_gresp_t + parameter type burst_resp_t = burst_pkg::burst_gresp_t ) ( input logic clk_i, input logic rst_ni, @@ -62,8 +62,9 @@ module burst_rsp_grouper // Wait until all responses are valid if (&resp_valid_i) begin resp_valid_o[0] = 1'b1; + resp_burst_o[0].isburst = 1'b1; for (int unsigned i = 0; i < RspGF-1; i ++) begin - resp_burst_o[i] = resp_rdata_i[i+1]; + resp_burst_o[0].gdata[i] = resp_rdata_i[i+1]; end end end diff --git a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv index 8a95f2f..71992ca 100644 --- a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv +++ b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv @@ -138,7 +138,7 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( localparam int unsigned NumOutLog2 = $clog2(NumOut); localparam int unsigned ReqAggDataWidth = 1 + BeWidth + AddrMemWidth + DataWidth + BurstWidth; - localparam int unsigned RespAggDataWidth = DataWidth + 32; + localparam int unsigned RespAggDataWidth = DataWidth + BurstRspWidth; /************* * Signals * @@ -156,10 +156,12 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( // Aggregate data to be routed to targets `ifdef USE_BURST assign req_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j], req_burst_i[j]}; + assign {resp_rdata_o[j], resp_burst_o[j]} = resp_agg_out[j]; `else assign req_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j]}; + assign resp_rdata_o[j] = resp_agg_out[j]; `endif - assign {resp_rdata_o[j], resp_burst_o[j]} = resp_agg_out[j]; + end // Disaggregate data From f90d4c94f623a90c59ad29fa444f68f01461ced7 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Tue, 27 May 2025 15:08:06 +0200 Subject: [PATCH 05/15] Add burst_variable_latency_interconnect wrapper --- Bender.yml | 1 + .../burst_variable_latency_interconnect.sv | 134 ++++++++++++++++++ .../variable_latency_interconnect.sv | 94 ++---------- 3 files changed, 146 insertions(+), 83 deletions(-) create mode 100644 rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv diff --git a/Bender.yml b/Bender.yml index bd3c8b4..816e40d 100644 --- a/Bender.yml +++ b/Bender.yml @@ -37,6 +37,7 @@ sources: - rtl/variable_latency_interconnect/variable_latency_bfly_net.sv # Level 4 - rtl/variable_latency_interconnect/variable_latency_interconnect.sv + - rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv # Low-Latency Interco - rtl/low_latency_interco/FanInPrimitive_Req.sv diff --git a/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv new file mode 100644 index 0000000..ecdcb17 --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv @@ -0,0 +1,134 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Michael Schaffner , ETH Zurich +// Matheus Cavalcante , ETH Zurich +// Marco Bertuletti , ETH Zurich + +// Date: 16.01.2020 + +// Description: Interconnect with support to variable target latencies with different +// network topologies. Currently supported are: full crossbar and radix-2/4 butterflies. +// Note that only the full crossbar allows NumIn/NumOut configurations that are not +// aligned to a power of 2. + +module burst_variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( + // Global parameters + parameter int unsigned NumIn = 32, // Number of Initiators. Must be aligned with a power of 2 for butterflies. + parameter int unsigned NumOut = 64, // Number of Targets. Must be aligned with a power of 2 for butterflies. + parameter int unsigned AddrWidth = 32, // Address Width on the Initiator Side + parameter int unsigned DataWidth = 32, // Data Word Width + parameter int unsigned BeWidth = DataWidth/8, // Byte Strobe Width + parameter int unsigned AddrMemWidth = 12, // Number of Address bits per Target + parameter int unsigned RspGF = 1, // Grouping Factor for the Burst Response + parameter int unsigned BurstWidth = 1, // Burst Signal Width + parameter int unsigned BurstRspWidth = (RspGF-1)*DataWidth, // Burst Response Widening + parameter bit AxiVldRdy = 1'b1, // Valid/ready signaling + // Spill registers + // A bit set at position i indicates a spill register at the i-th crossbar layer. + // The layers are counted starting at 0 from the initiator, for the requests, and from the target, for the responses. + parameter logic [63:0] SpillRegisterReq = 64'h0, + parameter logic [63:0] SpillRegisterResp = 64'h0, + parameter bit FallThroughRegister = 1'b0, // Insert a fall-through register, if missing a spill register in that stage + // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, + // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). + parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Topology can be: LIC, BFLY2, BFLY4, CLOS + parameter topo_e Topology = tcdm_interconnect_pkg::LIC, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = NumIn == 1 ? 1 : $clog2(NumIn) +) ( + input logic clk_i, + input logic rst_ni, + // Initiator side + input logic [NumIn-1:0] req_valid_i, // Request valid + output logic [NumIn-1:0] req_ready_o, // Request ready + input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address + input logic [NumIn-1:0] req_wen_i, // Write enable + input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data + input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable + input logic [NumIn-1:0][BurstWidth-1:0] req_burst_i, // Burst data + output logic [NumIn-1:0] resp_valid_o, // Response valid + input logic [NumIn-1:0] resp_ready_i, // Response ready + output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, // Data response + output logic [NumIn-1:0][BurstRspWidth-1:0] resp_burst_o, // Burst response + // Target side + output logic [NumOut-1:0] req_valid_o, // Request valid + input logic [NumOut-1:0] req_ready_i, // Request ready + output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address + output logic [NumOut-1:0][AddrMemWidth-1:0] req_tgt_addr_o, // Target address + output logic [NumOut-1:0] req_wen_o, // Write enable + output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, // Write data + output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable + output logic [NumOut-1:0][BurstWidth-1:0] req_burst_o, // Burst data + input logic [NumOut-1:0] resp_valid_i, // Response valid + output logic [NumOut-1:0] resp_ready_o, // Response ready + input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address + input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i, // Data response + input logic [NumOut-1:0][BurstRspWidth-1:0] resp_burst_i // Burst response +); + + localparam int unsigned ReqAggDataWidth = DataWidth + BurstWidth; + localparam int unsigned RespAggDataWidth = DataWidth + BurstRspWidth; + + logic [NumIn-1:0][ReqAggDataWidth-1:0] req_agg_data_in, req_agg_data_out; + logic [NumIn-1:0][RespAggDataWidth-1:0] resp_agg_data_out, resp_agg_data_in; + + for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_inputs + assign req_agg_data_in[j] = {req_wdata_i[j], req_burst_i[j]}; + assign {resp_rdata_o[j], resp_burst_o[j]} = resp_agg_data_out[j]; + end + + for (genvar k = 0; unsigned'(k) < NumOut; k++) begin : gen_outputs + assign {req_wdata_o[k], req_burst_o[k]} = req_agg_data_out[k]; + assign resp_agg_data_in[k] = {resp_rdata_i[k], resp_burst_i[k]}; + end + + variable_latency_interconnect #( + .NumIn (NumIn ), + .NumOut (NumOut ), + .AddrWidth (AddrWidth ), + .ReqDataWidth (ReqAggDataWidth ), + .RespDataWidth (RespAggDataWidth ), + .BeWidth (BeWidth ), + .AddrMemWidth (AddrMemWidth ), + .AxiVldRdy (AxiVldRdy ), + .SpillRegisterReq (SpillRegisterReq ), + .SpillRegisterResp (SpillRegisterResp ), + .FallThroughRegister (FallThroughRegister ), + .ByteOffWidth (ByteOffWidth ), + .Topology (Topology ) + ) i_variable_latency_interconnect ( + .clk_i, + .rst_ni, + .req_valid_i (req_valid_i ), + .req_ready_o (req_ready_o ), + .req_tgt_addr_i (req_tgt_addr_i ), + .req_wen_i (req_wen_i ), + .req_wdata_i (req_agg_data_in ), + .req_be_i (req_be_i ), + .resp_valid_o (resp_valid_o ), + .resp_ready_i (resp_ready_i ), + .resp_rdata_o (resp_agg_data_out ), + // Target side + .req_valid_o (req_valid_o ), + .req_ready_i (req_ready_i ), + .req_ini_addr_o (req_ini_addr_o ), + .req_tgt_addr_o (req_tgt_addr_o ), + .req_wen_o (req_wen_o ), + .req_wdata_o (req_agg_data_out ), + .req_be_o (req_be_o ), + .resp_valid_i (resp_valid_i ), + .resp_ready_o (resp_ready_o ), + .resp_ini_addr_i (resp_ini_addr_i ), + .resp_rdata_i (resp_agg_data_in ) + ); + +endmodule : burst_variable_latency_interconnect diff --git a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv index 71992ca..3c869b3 100644 --- a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv +++ b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv @@ -19,73 +19,14 @@ // Note that only the full crossbar allows NumIn/NumOut configurations that are not // aligned to a power of 2. -`ifdef USE_BURST - -module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( - // Global parameters - parameter int unsigned NumIn = 32, // Number of Initiators. Must be aligned with a power of 2 for butterflies. - parameter int unsigned NumOut = 64, // Number of Targets. Must be aligned with a power of 2 for butterflies. - parameter int unsigned AddrWidth = 32, // Address Width on the Initiator Side - parameter int unsigned DataWidth = 32, // Data Word Width - parameter int unsigned BeWidth = DataWidth/8, // Byte Strobe Width - parameter int unsigned AddrMemWidth = 12, // Number of Address bits per Target - parameter int unsigned RspGF = 1, // Grouping Factor for the Burst Response - parameter int unsigned BurstWidth = 1, // Burst Signal Width - parameter int unsigned BurstRspWidth = (RspGF-1)*DataWidth, // Burst Response Widening - parameter bit AxiVldRdy = 1'b1, // Valid/ready signaling - // Spill registers - // A bit set at position i indicates a spill register at the i-th crossbar layer. - // The layers are counted starting at 0 from the initiator, for the requests, and from the target, for the responses. - parameter logic [63:0] SpillRegisterReq = 64'h0, - parameter logic [63:0] SpillRegisterResp = 64'h0, - parameter bit FallThroughRegister = 1'b0, // Insert a fall-through register, if missing a spill register in that stage - // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, - // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). - parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, - // Topology can be: LIC, BFLY2, BFLY4, CLOS - parameter topo_e Topology = tcdm_interconnect_pkg::LIC, - // Dependant parameters. DO NOT CHANGE! - parameter int unsigned NumInLog2 = NumIn == 1 ? 1 : $clog2(NumIn) -) ( - input logic clk_i, - input logic rst_ni, - // Initiator side - input logic [NumIn-1:0] req_valid_i, // Request valid - output logic [NumIn-1:0] req_ready_o, // Request ready - input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address - input logic [NumIn-1:0] req_wen_i, // Write enable - input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data - input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable - input logic [NumIn-1:0][BurstWidth-1:0] req_burst_i, // Burst data - output logic [NumIn-1:0] resp_valid_o, // Response valid - input logic [NumIn-1:0] resp_ready_i, // Response ready - output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, // Data response - output logic [NumIn-1:0][BurstRspWidth-1:0] resp_burst_o, // Burst response - // Target side - output logic [NumOut-1:0] req_valid_o, // Request valid - input logic [NumOut-1:0] req_ready_i, // Request ready - output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address - output logic [NumOut-1:0][AddrMemWidth-1:0] req_tgt_addr_o, // Target address - output logic [NumOut-1:0] req_wen_o, // Write enable - output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, // Write data - output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable - output logic [NumOut-1:0][BurstWidth-1:0] req_burst_o, // Burst data - input logic [NumOut-1:0] resp_valid_i, // Response valid - output logic [NumOut-1:0] resp_ready_o, // Response ready - input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address - input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i, // Data response - input logic [NumOut-1:0][BurstRspWidth-1:0] resp_burst_i // Burst response -); - -`else - module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( // Global parameters parameter int unsigned NumIn = 32, // Number of Initiators. Must be aligned with a power of 2 for butterflies. parameter int unsigned NumOut = 64, // Number of Targets. Must be aligned with a power of 2 for butterflies. parameter int unsigned AddrWidth = 32, // Address Width on the Initiator Side - parameter int unsigned DataWidth = 32, // Data Word Width - parameter int unsigned BeWidth = DataWidth/8, // Byte Strobe Width + parameter int unsigned ReqDataWidth = 32, // Data Word Width on the Request path + parameter int unsigned RespDataWidth = 32, // Data Word Width on the Response path + parameter int unsigned BeWidth = ReqDataWidth/8, // Byte Strobe Width parameter int unsigned AddrMemWidth = 12, // Number of Address bits per Target parameter bit AxiVldRdy = 1'b1, // Valid/ready signaling // Spill registers @@ -96,7 +37,7 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( parameter bit FallThroughRegister = 1'b0, // Insert a fall-through register, if missing a spill register in that stage // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). - parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + parameter int unsigned ByteOffWidth = $clog2(ReqDataWidth-1)-3, // Topology can be: LIC, BFLY2, BFLY4, CLOS parameter topo_e Topology = tcdm_interconnect_pkg::LIC, // Dependant parameters. DO NOT CHANGE! @@ -109,27 +50,25 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( output logic [NumIn-1:0] req_ready_o, // Request ready input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address input logic [NumIn-1:0] req_wen_i, // Write enable - input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data + input logic [NumIn-1:0][ReqDataWidth-1:0] req_wdata_i, // Write data input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable output logic [NumIn-1:0] resp_valid_o, // Response valid input logic [NumIn-1:0] resp_ready_i, // Response ready - output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, // Data response + output logic [NumIn-1:0][RespDataWidth-1:0] resp_rdata_o, // Data response // Target side output logic [NumOut-1:0] req_valid_o, // Request valid input logic [NumOut-1:0] req_ready_i, // Request ready output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address output logic [NumOut-1:0][AddrMemWidth-1:0] req_tgt_addr_o, // Target address output logic [NumOut-1:0] req_wen_o, // Write enable - output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, // Write data + output logic [NumOut-1:0][ReqDataWidth-1:0] req_wdata_o, // Write data output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable input logic [NumOut-1:0] resp_valid_i, // Response valid output logic [NumOut-1:0] resp_ready_o, // Response ready input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address - input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i // Data response + input logic [NumOut-1:0][RespDataWidth-1:0] resp_rdata_i // Data response ); -`endif - /****************** * Parameters * ******************/ @@ -137,8 +76,8 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( // localparams and aggregation of address, wen and payload data localparam int unsigned NumOutLog2 = $clog2(NumOut); - localparam int unsigned ReqAggDataWidth = 1 + BeWidth + AddrMemWidth + DataWidth + BurstWidth; - localparam int unsigned RespAggDataWidth = DataWidth + BurstRspWidth; + localparam int unsigned ReqAggDataWidth = 1 + BeWidth + AddrMemWidth + ReqDataWidth; + localparam int unsigned RespAggDataWidth = RespDataWidth ; /************* * Signals * @@ -154,25 +93,14 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_inputs // Aggregate data to be routed to targets -`ifdef USE_BURST - assign req_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j], req_burst_i[j]}; - assign {resp_rdata_o[j], resp_burst_o[j]} = resp_agg_out[j]; -`else assign req_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j]}; assign resp_rdata_o[j] = resp_agg_out[j]; -`endif - end - // Disaggregate data for (genvar k = 0; unsigned'(k) < NumOut; k++) begin : gen_outputs -`ifdef USE_BURST - assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k], req_burst_o[k]} = req_agg_out[k]; - assign resp_agg_in[k] = {resp_rdata_i[k], resp_burst_i[k]}; -`else + // Disaggregate data assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k]} = req_agg_out[k]; assign resp_agg_in[k] = resp_rdata_i[k]; -`endif end for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_target From f6568976b38fe1de19d3ed300b6757e87b7b616f Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Wed, 28 May 2025 10:26:08 +0200 Subject: [PATCH 06/15] Add check for consecutive addresses before bursting --- .../burst_req_grouper.sv | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/rtl/variable_latency_interconnect/burst_req_grouper.sv b/rtl/variable_latency_interconnect/burst_req_grouper.sv index cdb7174..9e2d5dc 100644 --- a/rtl/variable_latency_interconnect/burst_req_grouper.sv +++ b/rtl/variable_latency_interconnect/burst_req_grouper.sv @@ -85,6 +85,9 @@ module burst_req_grouper burst_t req_bursted_burst; logic req_bursted_valid; + // To verify that the request goes to consecutive addresses + logic consecutive; + always_comb begin // Assign input requests to cutter inputs @@ -95,8 +98,17 @@ module burst_req_grouper req_cutter_burst.isburst = 1'b0; req_cutter_burst.blen = NumIn; + // Check if request goes to consecutive addresses + for (int i = 1; i < NumIn; i++) begin + if (req_valid_i[i] && req_valid_i[i-1]) begin + consecutive = (req_tgt_addr_i[i][AddrWidth-1:ByteOffWidth] == req_tgt_addr_i[i-1][AddrWidth-1:ByteOffWidth] + 1); + end else begin + consecutive = 1'b0; + end + end + // Burst the request - if (&req_valid_i && !req_wen_i[0]) begin + if (&req_valid_i && !req_wen_i[0] && consecutive) begin // Send a burst request on the first port req_cutter_burst.isburst = 1'b1; req_tgt_addr_o[0] = req_bursted_tgt_addr; From 3aaf4db6e5d8ee3bdc3268a51bb7d3e4d6c7d3fe Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Fri, 5 Sep 2025 10:07:57 +0200 Subject: [PATCH 07/15] Grouped response support for outstanding transactions --- Bender.yml | 1 - .../burst_manager.sv | 123 +++++++++--------- .../burst_req_grouper.sv | 60 +++++---- .../burst_rsp_grouper.sv | 82 ------------ 4 files changed, 94 insertions(+), 172 deletions(-) delete mode 100644 rtl/variable_latency_interconnect/burst_rsp_grouper.sv diff --git a/Bender.yml b/Bender.yml index 816e40d..448ca17 100644 --- a/Bender.yml +++ b/Bender.yml @@ -27,7 +27,6 @@ sources: - rtl/variable_latency_interconnect/burst_cutter.sv - rtl/variable_latency_interconnect/burst_manager.sv - rtl/variable_latency_interconnect/burst_req_grouper.sv - - rtl/variable_latency_interconnect/burst_rsp_grouper.sv # Level 2 - rtl/tcdm_interconnect/clos_net.sv - rtl/tcdm_interconnect/bfly_net.sv diff --git a/rtl/variable_latency_interconnect/burst_manager.sv b/rtl/variable_latency_interconnect/burst_manager.sv index 7652c8f..434d677 100644 --- a/rtl/variable_latency_interconnect/burst_manager.sv +++ b/rtl/variable_latency_interconnect/burst_manager.sv @@ -37,7 +37,7 @@ module burst_manager input logic [NumOut-1:0][AddrWidth-1:0] req_tgt_addr_i, input logic [NumOut-1:0][DataWidth-1:0] req_wdata_i, input logic [NumOut-1:0] req_wen_i, - input logic [NumOut-1:0][BeWidth-1:0] req_ben_i, + input logic [NumOut-1:0][BeWidth-1:0] req_be_i, input burst_t [NumOut-1:0] req_burst_i, input logic [NumOut-1:0] req_valid_i, output logic [NumOut-1:0] req_ready_o, @@ -52,7 +52,7 @@ module burst_manager output logic [NumOut-1:0][AddrWidth-1:0] req_tgt_addr_o, output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, output logic [NumOut-1:0] req_wen_o, - output logic [NumOut-1:0][BeWidth-1:0] req_ben_o, + output logic [NumOut-1:0][BeWidth-1:0] req_be_o, output logic [NumOut-1:0] req_valid_o, input logic [NumOut-1:0] req_ready_i, // @@ -96,8 +96,8 @@ module burst_manager always_comb begin prearb_data = '0; prearb_valid = '0; - ready_mask = '0; valid_mask = req_valid_i; + ready_mask = '0; for (int unsigned i = 0; i < NumOut; i++) begin if (req_valid_i[i] && req_burst_i[i].isburst) begin @@ -105,10 +105,10 @@ module burst_manager prearb_data[i].tgt_addr = req_tgt_addr_i[i]; prearb_data[i].wdata = req_wdata_i[i]; prearb_data[i].wen = req_wen_i[i]; - prearb_data[i].ben = req_ben_i[i]; + prearb_data[i].ben = req_be_i[i]; prearb_data[i].burst = req_burst_i[i]; prearb_valid[i] = 1'b1; - valid_mask = 1'b0; + valid_mask[i] = 1'b0; // Mark retired burst requests if (prearb_ready[i]) begin ready_mask[i] = 1'b1; @@ -190,31 +190,30 @@ module burst_manager DoBurst // generate parallel requests when ready } req_gen_fsm_e; - // FSM state + // FSM state & signals req_gen_fsm_e state_d, state_q; - // FSM stored signals - fifo_data_t breq_d, breq_q; - + fifo_data_t req_d, req_q; + // Indicates which req inputs are involved in a burst logic [NumOut-1:0] burst_mask_d, burst_mask_q; - // group mask used for response grouping + // Indicates which resp inputs are involved in a burst logic [NumOut-1:0] group_mask_d, group_mask_q; - - // indicate if there is pending response to be picked + // indicates if there is pending response to be picked logic pending_rsp; + // Store FSM state and signals `FF(state_q, state_d, Idle, clk_i, rst_ni); - `FF(breq_q, breq_d, '0, clk_i, rst_ni); + `FF(req_q, req_d, '0, clk_i, rst_ni); `FF(burst_mask_q, burst_mask_d, '0, clk_i, rst_ni); `FF(group_mask_q, group_mask_d, '0, clk_i, rst_ni); - // Each element of a burst request must be retired to start request + // Block burstlen ports after the port receiving a burst assign req_ready_o = ready_mask | (req_ready_i & ~burst_mask_q); always_comb begin : request_generator // FSM defaults state_d = state_q; - breq_d = breq_q; + req_d = req_q; burst_mask_d = burst_mask_q; // comb logic defaults @@ -227,8 +226,7 @@ module burst_manager req_tgt_addr_o = req_tgt_addr_i; req_ini_addr_o = req_ini_addr_i; req_wen_o = req_wen_i; - req_ben_o = req_ben_i; - + req_be_o = req_be_i; // Let valid requests not in burst pass req_valid_o = valid_mask; @@ -245,11 +243,11 @@ module burst_manager // pop next element fifo_pop = 1'b1; // store request - breq_d = fifo_data; + req_d = fifo_data; // a mask with burst length ones - burst_mask_d = (1'b1 << breq_d.burst.blen) - 1'b1; + burst_mask_d = (1'b1 << req_d.burst.blen) - 1'b1; // shift the mask to the first bank index addressed by the burst - burst_mask_d = burst_mask_d << breq_d.idx; + burst_mask_d = burst_mask_d << req_d.idx; state_d = DoBurst; end @@ -257,18 +255,18 @@ module burst_manager DoBurst: begin - // If there is pending responses among the affected banks we wait + // Check if there is pending responses among the affected banks pending_rsp = |((resp_valid_o & ~resp_ready_i) & burst_mask_q); - // Send out requests when 1. required banks are all ready 2. no pending responses - if (&(req_ready_i | (~burst_mask_q)) & !pending_rsp) begin + // If no pending response and all the affected banks are ready send a new request + if (&(req_ready_i | (~burst_mask_q)) && !pending_rsp) begin for (int unsigned i = 0; i < NumOut; i++) begin + // Overwrite the request on affected banks if (burst_mask_q[i]) begin - req_wdata_o[i] = breq_q.wdata; - req_wen_o[i] = breq_q.wen; - req_ben_o[i] = breq_q.ben; - // overwrite tgt_addr - req_tgt_addr_o[i] = i + breq_q.tgt_addr - breq_q.idx; - req_ini_addr_o[i] = i + breq_q.ini_addr - breq_q.idx; + req_wdata_o[i] = req_q.wdata; + req_tgt_addr_o[i] = i + req_q.tgt_addr - req_q.idx; + req_ini_addr_o[i] = i + req_q.ini_addr - req_q.idx; + req_wen_o[i] = req_q.wen; + req_be_o[i] = req_q.ben; // Set the valid for burst requests req_valid_o[i] = 1'b1; end @@ -306,49 +304,46 @@ module burst_manager logic [NumOut-1:0] grouped_resp_valid; logic [NumOut-1:0] grouped_resp_ready; - for (genvar i = 0; i < NumGroup; i ++) begin : gen_data_grouper - burst_rsp_grouper #( - .NumIn ( NumIn ), - .NumOut ( NumOut ), - .DataWidth ( DataWidth ), - .RspGF ( RspGF ), - .burst_resp_t ( burst_resp_t ) - ) i_burst_rsp_grouper ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - /// Bank side - .resp_ini_addr_i (resp_ini_addr_i[i*RspGF+:RspGF] ), - .resp_rdata_i (resp_rdata_i[i*RspGF+:RspGF] ), - .resp_valid_i (resp_valid_i[i*RspGF+:RspGF] ), - .resp_ready_o (grouped_resp_ready[i*RspGF+:RspGF] ), - /// Xbar side - .resp_ini_addr_o (grouped_resp_ini_addr[i*RspGF+:RspGF] ), - .resp_rdata_o (grouped_resp_rdata[i*RspGF+:RspGF] ), - .resp_burst_o (grouped_resp_burst[i*RspGF+:RspGF] ), - .resp_valid_o (grouped_resp_valid[i*RspGF+:RspGF] ), - .resp_ready_i (resp_ready_i[i*RspGF+:RspGF] ) - ); - end - always_comb begin + // Latch the new ports requested in burst + group_mask_d = group_mask_q; for (int i = 0; i < NumGroup; i ++) begin - if (state_q == DoBurst) begin - group_mask_d[i*RspGF+:RspGF] = {RspGF{&burst_mask_q[i*RspGF+:RspGF]}}; - end else if (resp_ready_i[i*RspGF]) begin + if ((state_q == DoBurst) && !pending_rsp) begin + group_mask_d[i*RspGF+:RspGF] = group_mask_q[i*RspGF+:RspGF] | burst_mask_q[i*RspGF+:RspGF]; + end else if (resp_valid_o[i*RspGF] && resp_ready_i[i*RspGF]) begin group_mask_d[i*RspGF+:RspGF] = '0; - end else begin - group_mask_d[i*RspGF+:RspGF] = group_mask_q[i*RspGF+:RspGF]; end end end + // Assign data to grouped response field + always_comb begin + for (int i = 0; i < NumGroup; i++) begin + grouped_resp_burst[i*RspGF].isburst = &resp_valid_i[i*RspGF+:RspGF]; + grouped_resp_valid[i*RspGF] = &resp_valid_i[i*RspGF+:RspGF]; + for (int j = 1; j < RspGF; j++) begin + grouped_resp_burst[i*RspGF].gdata[j-1] = resp_rdata_i[i*RspGF+j]; + grouped_resp_burst[i*RspGF+j].isburst = '0; + grouped_resp_valid[i*RspGF+j] = 1'b0; + end + end + end + + // Assign grouped outputs + // TODO: the code runs through, but there is a violation because the valid_o is sent before all the grouped factors are collected + // This gives an assertion error on the local_response_interconnect, because the response_data changes (we add the gdata), before + // the handshake happens. for (genvar i = 0; i < NumOut; i++) begin - assign resp_ini_addr_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_ini_addr[i] : '0) : resp_ini_addr_i[i]; - assign resp_rdata_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_rdata[i] : '0) : resp_rdata_i[i]; - assign resp_burst_o[i].gdata = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_burst[i].gdata : '0) : '0; - assign resp_burst_o[i].isburst = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_burst[i].isburst : 1'b0) : 1'b0; - assign resp_valid_o[i] = group_mask_q[i] ? (i % RspGF == 0 ? grouped_resp_valid[i] : '0) : resp_valid_i[i]; - assign resp_ready_o[i] = group_mask_q[i] ? grouped_resp_ready[RspGF*(i/RspGF)] : resp_ready_i[i]; + assign grouped_resp_ini_addr[i] = (i % RspGF == 0) ? resp_ini_addr_i[i] : '0; + assign grouped_resp_rdata[i] = (i % RspGF == 0) ? resp_rdata_i[i] : '0; + + assign grouped_resp_ready[i] = (resp_valid_o[RspGF*(i/RspGF)] && resp_ready_i[RspGF*(i/RspGF)]); + assign resp_ini_addr_o[i] = group_mask_q[i] ? grouped_resp_ini_addr[i] : resp_ini_addr_i[i]; + assign resp_rdata_o[i] = group_mask_q[i] ? grouped_resp_rdata[i] : resp_rdata_i[i]; + assign resp_burst_o[i].gdata = group_mask_q[i] ? grouped_resp_burst[i].gdata : '0; + assign resp_burst_o[i].isburst = group_mask_q[i] ? grouped_resp_burst[i].isburst : 1'b0; + assign resp_valid_o[i] = group_mask_q[i] ? grouped_resp_valid[i] : resp_valid_i[i]; + assign resp_ready_o[i] = group_mask_q[i] ? grouped_resp_ready[i] : (resp_valid_o[i] && resp_ready_i[i]); end end diff --git a/rtl/variable_latency_interconnect/burst_req_grouper.sv b/rtl/variable_latency_interconnect/burst_req_grouper.sv index 9e2d5dc..6907add 100644 --- a/rtl/variable_latency_interconnect/burst_req_grouper.sv +++ b/rtl/variable_latency_interconnect/burst_req_grouper.sv @@ -180,32 +180,42 @@ module burst_req_grouper localparam int unsigned NumGroup = RspGF > 0 ? NumIn >> $clog2(RspGF) : NumIn; always_comb begin - for (int i = 0; i < NumIn; i++) begin - automatic int grp_idx = i >> $clog2(RspGF); - automatic int grp_off = i % RspGF; - - if (i < NumGroup*RspGF) begin - - if (resp_valid_i[grp_idx*RspGF] && resp_burst_i[grp_idx*RspGF].isburst && !resp_valid_i[i]) begin - // Assign valid and data from grouped responses - resp_ini_addr_o[i] = grp_off == 0 ? resp_ini_addr_i[i] : resp_ini_addr_i[i] + grp_off; - resp_rdata_o[i] = grp_off == 0 ? resp_rdata_i[i] : resp_burst_i[grp_idx*RspGF].gdata[grp_off-1]; - resp_valid_o[i] = resp_valid_i[grp_idx*RspGF]; - // Assign ready when all grouped responses are retired - resp_ready_o[i] = grp_off == 0 ? &resp_ready_i[i+:RspGF] : 1'b0; - end else begin - resp_ini_addr_o[i] = resp_ini_addr_i[i]; - resp_rdata_o[i] = resp_rdata_i[i]; - resp_valid_o[i] = resp_valid_i[i]; - resp_ready_o[i] = resp_ready_i[i]; - end - end else begin - resp_ini_addr_o[i] = resp_ini_addr_i[i]; - resp_rdata_o[i] = resp_rdata_i[i]; - resp_valid_o[i] = resp_valid_i[i]; - resp_ready_o[i] = resp_ready_i[i]; - end + // Default assignment + resp_ini_addr_o = resp_ini_addr_i; + resp_rdata_o = resp_rdata_i; + resp_valid_o = resp_valid_i; + resp_ready_o = resp_ready_i; + + for (int ii = 0; ii < NumGroup; ii++) begin + + if (resp_valid_i[ii*RspGF] && resp_burst_i[ii*RspGF].isburst) begin + + // If any of the other inputs is valid give them priority + if (|resp_valid_o[(ii*RspGF+1)+:(RspGF-1)]) begin + resp_ini_addr_o[ii*RspGF] = '0; + resp_rdata_o[ii*RspGF] = '0; + resp_valid_o[ii*RspGF] = 1'b0; + resp_ready_o[ii*RspGF] = 1'b0; + + end else begin + // Assign values from port ii*RspGF + resp_ini_addr_o[ii*RspGF] = resp_ini_addr_i[ii*RspGF]; + resp_rdata_o[ii*RspGF] = resp_rdata_i[ii*RspGF]; + resp_rdata_o[ii*RspGF][DataWidth-1:DataWidth-6] = resp_rdata_i[ii*RspGF][DataWidth-1:DataWidth-6]; + resp_valid_o[ii*RspGF] = resp_valid_i[ii*RspGF]; + // Send ready back only when all the ports are ready + resp_ready_o[ii*RspGF] = &resp_ready_i[ii*RspGF+:RspGF]; + for (int jj = 1; jj < RspGF; jj++) begin + resp_ini_addr_o[ii*RspGF+jj] = resp_ini_addr_i[ii*RspGF] + jj; + resp_rdata_o[ii*RspGF+jj] = resp_burst_i[ii*RspGF].gdata[jj-1]; + resp_rdata_o[ii*RspGF+jj][DataWidth-1:DataWidth-6] = resp_rdata_i[ii*RspGF][DataWidth-1:DataWidth-6]; + resp_valid_o[ii*RspGF+jj] = resp_valid_i[ii*RspGF]; + resp_ready_o[ii*RspGF+jj] = 1'b0; + end + end + + end end end diff --git a/rtl/variable_latency_interconnect/burst_rsp_grouper.sv b/rtl/variable_latency_interconnect/burst_rsp_grouper.sv deleted file mode 100644 index 325da1e..0000000 --- a/rtl/variable_latency_interconnect/burst_rsp_grouper.sv +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2023 ETH Zurich and University of Bologna. -// Licensed under the Apache License, Version 2.0, see LICENSE for details. -// SPDX-License-Identifier: Apache-2.0 -// -// Author: Diyou Shen ETH Zurich -// -// -// Description: -// This module is used to check if the parallel responses can be grouped into -// a single response (by default rsp_o[0]) -// This could reduce the number of traffic on the rsp channel for remote loads - -module burst_rsp_grouper - import burst_pkg::*; -#( - parameter int unsigned NumIn = 32, // number of initiator ports - parameter int unsigned NumOut = 64, // number of destination ports - parameter int unsigned DataWidth = 32, - // Group Response Extension Grouping Factor for TCDM - parameter int unsigned RspGF = 1, - // Dependant parameters. DO NOT CHANGE! - parameter int unsigned NumInLog2 = (NumIn == 1) ? 1 : $clog2(NumIn), - // Burst response type can be overwritten for DataWidth > 32b - // This can happen when the DataWidth includes transaction metadata - parameter type burst_resp_t = burst_pkg::burst_gresp_t -) ( - input logic clk_i, - input logic rst_ni, - /// Bank side - input logic [RspGF-1:0][NumInLog2-1:0] resp_ini_addr_i, - input logic [RspGF-1:0][DataWidth-1:0] resp_rdata_i, - input logic [RspGF-1:0] resp_valid_i, - output logic [RspGF-1:0] resp_ready_o, - /// Xbar side - output logic [RspGF-1:0][NumInLog2-1:0] resp_ini_addr_o, - output logic [RspGF-1:0][DataWidth-1:0] resp_rdata_o, - output burst_resp_t [RspGF-1:0] resp_burst_o, - output logic [RspGF-1:0] resp_valid_o, - input logic [RspGF-1:0] resp_ready_i -); - - // Include FF module - `include "common_cells/registers.svh" - - always_comb begin - - // By default silence all valid ports - resp_burst_o = '0; - resp_valid_o = '0; - - // Only send first response data on normal port - resp_ini_addr_o[0] = resp_ini_addr_i[0]; - resp_rdata_o[0] = resp_rdata_i[0]; - resp_ini_addr_o[RspGF-1:1] = '0; - resp_rdata_o[RspGF-1:1] = '0; - - // Assign Bank ready from the grouped response ready - for(int i = 0; i < RspGF; i++) begin - resp_ready_o[i] = resp_ready_i[0]; - end - - // Wait until all responses are valid - if (&resp_valid_i) begin - resp_valid_o[0] = 1'b1; - resp_burst_o[0].isburst = 1'b1; - for (int unsigned i = 0; i < RspGF-1; i ++) begin - resp_burst_o[0].gdata[i] = resp_rdata_i[i+1]; - end - end - end - - /****************** - * Assertions * - ******************/ - // Check number of cuts. - if ((RspGF != 1) && ((RspGF % 2) != 0)) - $error("[data_grouper] Grouping Factor has to be a power of two"); - - if (RspGF <= 1) - $error("[data_grouper] Grouping Factor needs to be larger than 1"); - -endmodule : burst_rsp_grouper From f4708a6e6e5306469f6e0defd0549032a87908fc Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Tue, 16 Sep 2025 16:46:23 +0200 Subject: [PATCH 08/15] Propagate correct id on grouped response --- rtl/variable_latency_interconnect/burst_req_grouper.sv | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rtl/variable_latency_interconnect/burst_req_grouper.sv b/rtl/variable_latency_interconnect/burst_req_grouper.sv index 6907add..db5d7fd 100644 --- a/rtl/variable_latency_interconnect/burst_req_grouper.sv +++ b/rtl/variable_latency_interconnect/burst_req_grouper.sv @@ -202,14 +202,16 @@ module burst_req_grouper // Assign values from port ii*RspGF resp_ini_addr_o[ii*RspGF] = resp_ini_addr_i[ii*RspGF]; resp_rdata_o[ii*RspGF] = resp_rdata_i[ii*RspGF]; - resp_rdata_o[ii*RspGF][DataWidth-1:DataWidth-6] = resp_rdata_i[ii*RspGF][DataWidth-1:DataWidth-6]; resp_valid_o[ii*RspGF] = resp_valid_i[ii*RspGF]; // Send ready back only when all the ports are ready resp_ready_o[ii*RspGF] = &resp_ready_i[ii*RspGF+:RspGF]; for (int jj = 1; jj < RspGF; jj++) begin resp_ini_addr_o[ii*RspGF+jj] = resp_ini_addr_i[ii*RspGF] + jj; - resp_rdata_o[ii*RspGF+jj] = resp_burst_i[ii*RspGF].gdata[jj-1]; - resp_rdata_o[ii*RspGF+jj][DataWidth-1:DataWidth-6] = resp_rdata_i[ii*RspGF][DataWidth-1:DataWidth-6]; + // TODO: This is necessary to assign all the response fields by + // default to the value of the (ii*RspGF)'th port. It assumes + // that the actual data payload is in the LSBs. + resp_rdata_o[ii*RspGF+jj] = (DataWidth > 32) ? {resp_rdata_i[ii*RspGF][DataWidth-1:32], resp_burst_i[ii*RspGF].gdata[jj-1]} : + resp_burst_i[ii*RspGF].gdata[jj-1]; resp_valid_o[ii*RspGF+jj] = resp_valid_i[ii*RspGF]; resp_ready_o[ii*RspGF+jj] = 1'b0; end From e6e284f761cc169ad5b957b9b9870969f41a7375 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Tue, 23 Sep 2025 11:38:45 +0200 Subject: [PATCH 09/15] Fix handshake in burst manager --- .../burst_manager.sv | 148 +++++++++--------- 1 file changed, 73 insertions(+), 75 deletions(-) diff --git a/rtl/variable_latency_interconnect/burst_manager.sv b/rtl/variable_latency_interconnect/burst_manager.sv index 434d677..71bcbe6 100644 --- a/rtl/variable_latency_interconnect/burst_manager.sv +++ b/rtl/variable_latency_interconnect/burst_manager.sv @@ -90,15 +90,14 @@ module burst_manager arb_data_t postarb_data; logic postarb_valid, postarb_ready; logic [NumOutLog2-1:0] postarb_idx; - logic [NumOut-1:0] ready_mask; - logic [NumOut-1:0] valid_mask; + logic [NumOut-1:0] ready_mask; + logic [NumOut-1:0] valid_mask; + always_comb begin prearb_data = '0; prearb_valid = '0; valid_mask = req_valid_i; - ready_mask = '0; - for (int unsigned i = 0; i < NumOut; i++) begin if (req_valid_i[i] && req_burst_i[i].isburst) begin prearb_data[i].ini_addr = req_ini_addr_i[i]; @@ -109,14 +108,13 @@ module burst_manager prearb_data[i].burst = req_burst_i[i]; prearb_valid[i] = 1'b1; valid_mask[i] = 1'b0; - // Mark retired burst requests - if (prearb_ready[i]) begin - ready_mask[i] = 1'b1; - end end end end + // Send ready for retired bursts + assign ready_mask = prearb_valid & prearb_ready; + rr_arb_tree #( .NumIn ( NumOut ), .DataType ( arb_data_t ), @@ -165,7 +163,7 @@ module burst_manager // Fall though FIFO to store bursts fifo_v3 #( .FALL_THROUGH ( 1'b1 ), - .DEPTH ( NumOut ), + .DEPTH ( NumOut ), .dtype ( fifo_data_t ) ) i_fall_though_fifo ( .clk_i ( clk_i ), @@ -197,8 +195,8 @@ module burst_manager logic [NumOut-1:0] burst_mask_d, burst_mask_q; // Indicates which resp inputs are involved in a burst logic [NumOut-1:0] group_mask_d, group_mask_q; - // indicates if there is pending response to be picked - logic pending_rsp; + // indicates if there is pending req/resp to be picked + logic pending_req, pending_rsp, allready; // Store FSM state and signals `FF(state_q, state_d, Idle, clk_i, rst_ni); @@ -206,75 +204,69 @@ module burst_manager `FF(burst_mask_q, burst_mask_d, '0, clk_i, rst_ni); `FF(group_mask_q, group_mask_d, '0, clk_i, rst_ni); - // Block burstlen ports after the port receiving a burst - assign req_ready_o = ready_mask | (req_ready_i & ~burst_mask_q); + // a mask with burst length ones + assign burst_mask_d = ((1'b1 << fifo_data.burst.blen) - 1'b1) << fifo_data.idx; always_comb begin : request_generator // FSM defaults - state_d = state_q; - req_d = req_q; - burst_mask_d = burst_mask_q; + state_d = state_q; + req_d = req_q; - // comb logic defaults - pending_rsp = '0; // Do not take in next burst for now fifo_pop = 1'b0; // Bypass all requests by default - req_wdata_o = req_wdata_i; + req_wdata_o = req_wdata_i; req_tgt_addr_o = req_tgt_addr_i; req_ini_addr_o = req_ini_addr_i; - req_wen_o = req_wen_i; - req_be_o = req_be_i; - // Let valid requests not in burst pass - req_valid_o = valid_mask; + req_wen_o = req_wen_i; + req_be_o = req_be_i; case (state_q) // Idle state, ready to take in burst request Idle: begin - // Clear mask (unlock banks) - burst_mask_d = '0; - if (~fifo_empty) begin - // there is pending burst request - // start to handling the burst, mark as not ready - // pop next element + // Let valid requests not in burst pass + req_valid_o = valid_mask; + req_ready_o = (valid_mask & req_ready_i) | ready_mask; + + // Check if there is a request on the affected banks + pending_req = |(req_valid_o & burst_mask_d); + // Check if there is a response on the affected banks + pending_rsp = |(resp_valid_o & burst_mask_d); + + // Start pending burst + if (!fifo_empty && !pending_req && !pending_rsp) begin fifo_pop = 1'b1; - // store request - req_d = fifo_data; - // a mask with burst length ones - burst_mask_d = (1'b1 << req_d.burst.blen) - 1'b1; - // shift the mask to the first bank index addressed by the burst - burst_mask_d = burst_mask_d << req_d.idx; - state_d = DoBurst; + req_d = fifo_data; + state_d = DoBurst; end end DoBurst: begin - // Check if there is pending responses among the affected banks - pending_rsp = |((resp_valid_o & ~resp_ready_i) & burst_mask_q); - // If no pending response and all the affected banks are ready send a new request - if (&(req_ready_i | (~burst_mask_q)) && !pending_rsp) begin - for (int unsigned i = 0; i < NumOut; i++) begin - // Overwrite the request on affected banks - if (burst_mask_q[i]) begin - req_wdata_o[i] = req_q.wdata; - req_tgt_addr_o[i] = i + req_q.tgt_addr - req_q.idx; - req_ini_addr_o[i] = i + req_q.ini_addr - req_q.idx; - req_wen_o[i] = req_q.wen; - req_be_o[i] = req_q.ben; - // Set the valid for burst requests - req_valid_o[i] = 1'b1; - end + // Let valid requests not in burst pass + req_valid_o = valid_mask & ~burst_mask_q; + req_ready_o = ((valid_mask & req_ready_i) & ~burst_mask_q) | ready_mask; + + for (int unsigned i = 0; i < NumOut; i++) begin + // Overwrite the request on affected banks + if (burst_mask_q[i]) begin + req_wdata_o[i] = req_q.wdata; + req_tgt_addr_o[i] = i + req_q.tgt_addr - req_q.idx; + req_ini_addr_o[i] = i + req_q.ini_addr - req_q.idx; + req_wen_o[i] = req_q.wen; + req_be_o[i] = req_q.ben; + // Set the valid for burst requests + req_valid_o[i] = 1'b1; end - // Switch state - state_d = Idle; end + state_d = Idle; + end default: state_d = Idle; @@ -306,44 +298,50 @@ module burst_manager always_comb begin // Latch the new ports requested in burst - group_mask_d = group_mask_q; for (int i = 0; i < NumGroup; i ++) begin - if ((state_q == DoBurst) && !pending_rsp) begin - group_mask_d[i*RspGF+:RspGF] = group_mask_q[i*RspGF+:RspGF] | burst_mask_q[i*RspGF+:RspGF]; - end else if (resp_valid_o[i*RspGF] && resp_ready_i[i*RspGF]) begin + // If ready cancel the reservation + if (resp_valid_o[i*RspGF] && resp_ready_i[i*RspGF]) begin group_mask_d[i*RspGF+:RspGF] = '0; + end else begin + group_mask_d[i*RspGF+:RspGF] = group_mask_q[i*RspGF+:RspGF]; + end + // If new burst mark the affected banks + if (state_q == DoBurst) begin + group_mask_d[i*RspGF+:RspGF] = group_mask_d[i*RspGF+:RspGF] | burst_mask_q[i*RspGF+:RspGF]; end end end - // Assign data to grouped response field + // Assign input data to grouped response always_comb begin for (int i = 0; i < NumGroup; i++) begin - grouped_resp_burst[i*RspGF].isburst = &resp_valid_i[i*RspGF+:RspGF]; - grouped_resp_valid[i*RspGF] = &resp_valid_i[i*RspGF+:RspGF]; + grouped_resp_ini_addr[i*RspGF] = resp_ini_addr_i[i*RspGF]; + grouped_resp_rdata[i*RspGF] = resp_rdata_i[i*RspGF]; + grouped_resp_burst[i*RspGF].isburst = &resp_valid_i[i*RspGF+:RspGF]; + grouped_resp_valid[i*RspGF] = &resp_valid_i[i*RspGF+:RspGF]; + grouped_resp_ready[i*RspGF] = resp_valid_o[i*RspGF] && resp_ready_i[i*RspGF]; + for (int j = 1; j < RspGF; j++) begin + grouped_resp_ini_addr[i*RspGF+j] = '0; + grouped_resp_rdata[i*RspGF+j] = '0; grouped_resp_burst[i*RspGF].gdata[j-1] = resp_rdata_i[i*RspGF+j]; - grouped_resp_burst[i*RspGF+j].isburst = '0; + grouped_resp_burst[i*RspGF+j].isburst = 1'b0; grouped_resp_valid[i*RspGF+j] = 1'b0; + // grouped response is ready if the i*RspGF'th output handshakes + grouped_resp_ready[i*RspGF+j] = resp_valid_o[i*RspGF] && resp_ready_i[i*RspGF]; end + end end - // Assign grouped outputs - // TODO: the code runs through, but there is a violation because the valid_o is sent before all the grouped factors are collected - // This gives an assertion error on the local_response_interconnect, because the response_data changes (we add the gdata), before - // the handshake happens. + // Assign outputs for (genvar i = 0; i < NumOut; i++) begin - assign grouped_resp_ini_addr[i] = (i % RspGF == 0) ? resp_ini_addr_i[i] : '0; - assign grouped_resp_rdata[i] = (i % RspGF == 0) ? resp_rdata_i[i] : '0; - - assign grouped_resp_ready[i] = (resp_valid_o[RspGF*(i/RspGF)] && resp_ready_i[RspGF*(i/RspGF)]); - assign resp_ini_addr_o[i] = group_mask_q[i] ? grouped_resp_ini_addr[i] : resp_ini_addr_i[i]; - assign resp_rdata_o[i] = group_mask_q[i] ? grouped_resp_rdata[i] : resp_rdata_i[i]; - assign resp_burst_o[i].gdata = group_mask_q[i] ? grouped_resp_burst[i].gdata : '0; - assign resp_burst_o[i].isburst = group_mask_q[i] ? grouped_resp_burst[i].isburst : 1'b0; - assign resp_valid_o[i] = group_mask_q[i] ? grouped_resp_valid[i] : resp_valid_i[i]; - assign resp_ready_o[i] = group_mask_q[i] ? grouped_resp_ready[i] : (resp_valid_o[i] && resp_ready_i[i]); + assign resp_ini_addr_o[i] = group_mask_q[i] ? grouped_resp_ini_addr[i] : resp_ini_addr_i[i]; + assign resp_rdata_o[i] = group_mask_q[i] ? grouped_resp_rdata[i] : resp_rdata_i[i]; + assign resp_burst_o[i].gdata = group_mask_q[i] ? grouped_resp_burst[i].gdata : '0; + assign resp_burst_o[i].isburst = group_mask_q[i] ? grouped_resp_burst[i].isburst : 1'b0; + assign resp_valid_o[i] = group_mask_q[i] ? grouped_resp_valid[i] : resp_valid_i[i]; + assign resp_ready_o[i] = group_mask_q[i] ? grouped_resp_ready[i] : (resp_valid_o[i] && resp_ready_i[i]); end end From 278fe27384ef590011e1477e4ef1e3132d5c9d35 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Wed, 24 Sep 2025 15:54:37 +0200 Subject: [PATCH 10/15] Fix response collection when grouping-factor is 1 --- .../burst_req_grouper.sv | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/rtl/variable_latency_interconnect/burst_req_grouper.sv b/rtl/variable_latency_interconnect/burst_req_grouper.sv index db5d7fd..8eec580 100644 --- a/rtl/variable_latency_interconnect/burst_req_grouper.sv +++ b/rtl/variable_latency_interconnect/burst_req_grouper.sv @@ -177,27 +177,37 @@ module burst_req_grouper /* Response */ /*************/ - localparam int unsigned NumGroup = RspGF > 0 ? NumIn >> $clog2(RspGF) : NumIn; - always_comb begin + localparam int unsigned NumGroup = RspGF > 1 ? NumIn >> $clog2(RspGF) : NumIn; + + if (RspGF == 1) begin: gen_default_assignment // Default assignment - resp_ini_addr_o = resp_ini_addr_i; - resp_rdata_o = resp_rdata_i; - resp_valid_o = resp_valid_i; - resp_ready_o = resp_ready_i; + assign resp_ini_addr_o = resp_ini_addr_i; + assign resp_rdata_o = resp_rdata_i; + assign resp_valid_o = resp_valid_i; + assign resp_ready_o = resp_ready_i; - for (int ii = 0; ii < NumGroup; ii++) begin + end else begin: gen_grouped_resp_assignment - if (resp_valid_i[ii*RspGF] && resp_burst_i[ii*RspGF].isburst) begin + always_comb begin + // Default assignment + resp_ini_addr_o = resp_ini_addr_i; + resp_rdata_o = resp_rdata_i; + resp_valid_o = resp_valid_i; + resp_ready_o = resp_ready_i; - // If any of the other inputs is valid give them priority + for (int ii = 0; ii < NumGroup; ii++) begin + if (resp_valid_i[ii*RspGF] && resp_burst_i[ii*RspGF].isburst) begin + // If the response is grouped only one every RspGF input will be + // valid. If any of the other inputs is valid give them priority. + // Otherwise assign to the other ports the response from the + // (ii*RspGF)'th port and signal them valid. if (|resp_valid_o[(ii*RspGF+1)+:(RspGF-1)]) begin resp_ini_addr_o[ii*RspGF] = '0; resp_rdata_o[ii*RspGF] = '0; resp_valid_o[ii*RspGF] = 1'b0; resp_ready_o[ii*RspGF] = 1'b0; - end else begin // Assign values from port ii*RspGF resp_ini_addr_o[ii*RspGF] = resp_ini_addr_i[ii*RspGF]; @@ -216,10 +226,11 @@ module burst_req_grouper resp_ready_o[ii*RspGF+jj] = 1'b0; end end - end - + end end + end + endmodule : burst_req_grouper From 8071742966adff18adb9edf3082f8a9020cee1c8 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Wed, 24 Sep 2025 15:55:07 +0200 Subject: [PATCH 11/15] Fix reminder computation in cutter --- .../burst_cutter.sv | 105 ++++++++---------- 1 file changed, 45 insertions(+), 60 deletions(-) diff --git a/rtl/variable_latency_interconnect/burst_cutter.sv b/rtl/variable_latency_interconnect/burst_cutter.sv index daca673..1fff8f8 100644 --- a/rtl/variable_latency_interconnect/burst_cutter.sv +++ b/rtl/variable_latency_interconnect/burst_cutter.sv @@ -47,6 +47,8 @@ module burst_cutter input logic req_ready_i ); + `include "common_cells/registers.svh" + localparam int unsigned BurstLen = NumIn; localparam int unsigned BurstLenWidth = NumInLog2; localparam int unsigned NumBanks = NumOut; @@ -57,6 +59,15 @@ module burst_cutter BurstCut // second cut of burst } burst_cutter_fsm_e; + // Keep everything same width + logic [31:0] bank_offset; + logic [31:0] max_blen; + logic [31:0] remaining_len; + assign bank_offset = {{(32-BankOffsetBits){1'b0}}, req_tgt_addr_i[AddrMemWidth-1 : ByteOffWidth]}; + assign max_blen = NumBanks - bank_offset; + assign remaining_len = {{(32-BurstLenWidth){1'b0}}, req_burst_i.blen} > max_blen ? + {{(32-BurstLenWidth){1'b0}}, req_burst_i.blen} - max_blen : '0; + // FSM state burst_cutter_fsm_e state_d, state_q; burst_cutter_fsm_e next_state; @@ -67,43 +78,27 @@ module burst_cutter logic [DataWidth-1:0] cut_wdata_d, cut_wdata_q; burst_t cut_burst_d, cut_burst_q; - logic [BankOffsetBits-1:0] bank_offset; - logic [BurstLenWidth:0] max_blen; - logic [BurstLenWidth:0] remaining_len; - - always_ff @(posedge clk_i or negedge rst_ni) begin : burst_cutter_proc - if(~rst_ni) begin - state_q <= Bypass; - cut_burst_q <= '0; - cut_ini_addr_q <= '0; - cut_tgt_addr_q <= '0; - cut_wdata_q <= '0; - end else begin - state_q <= state_d; - cut_ini_addr_q <= cut_tgt_addr_d; - cut_tgt_addr_q <= cut_tgt_addr_d; - cut_wdata_q <= cut_wdata_d; - cut_burst_q <= cut_burst_d; - end - end + // Store FSM state and signals + `FF(state_q, state_d, Bypass, clk_i, rst_ni); + `FF(cut_burst_q, cut_burst_d, '0, clk_i, rst_ni); + `FF(cut_ini_addr_q, cut_ini_addr_d, '0, clk_i, rst_ni); + `FF(cut_tgt_addr_q, cut_tgt_addr_d, '0, clk_i, rst_ni); + `FF(cut_wdata_q, cut_wdata_d, '0, clk_i, rst_ni); always_comb begin - // FSM defaults - state_d = state_q; - cut_burst_d = cut_burst_q; - cut_tgt_addr_d = cut_tgt_addr_q; - cut_ini_addr_d = cut_ini_addr_q; - cut_wdata_d = cut_wdata_q; - - bank_offset = '0; - max_blen = '0; - remaining_len = '0; - next_state = Bypass; + // FSM defaults + state_d = state_q; + cut_burst_d = cut_burst_q; + cut_tgt_addr_d = cut_tgt_addr_q; + cut_ini_addr_d = cut_ini_addr_q; + cut_wdata_d = cut_wdata_q; // Need to cut, use FSM to realize the logic case (state_q) + Bypass: begin + // Bypass the signals req_ini_addr_o = req_ini_addr_i; req_tgt_addr_o = req_tgt_addr_i; @@ -113,70 +108,60 @@ module burst_cutter req_burst_o = req_burst_i; req_valid_o = req_valid_i; req_ready_o = req_ready_i; - // Keep current state by default - next_state = state_q; // Check if it is valid and being a burst request if (req_burst_i.isburst) begin - bank_offset = req_tgt_addr_i[AddrMemWidth-1 : ByteOffWidth]; - max_blen = NumBanks - bank_offset; + // No support for write burst, tie to 0 if (req_wen_i) begin - // no support for write burst, tie to 0 req_burst_o = '0; end else begin - if (req_burst_i.blen > max_blen) begin - next_state = BurstCut; - + // Cut burst when it is longer than the max length + if (remaining_len > 0) begin + if (remaining_len > NumBanks) begin + $error("Only one cut is supported, reduce the burst length."); + end // pause taking in new requests req_ready_o = 1'b0; // Send out the first burst req_burst_o.isburst = 1'b1; req_burst_o.blen = max_blen; - // store the info for next burst cut_ini_addr_d = req_ini_addr_i + (max_blen << ByteOffWidth); cut_tgt_addr_d = req_tgt_addr_i + (max_blen << ByteOffWidth); cut_wdata_d = req_wdata_i[max_blen]; - - remaining_len = req_burst_i.blen - max_blen; - if (remaining_len > NumBanks) begin - $error("Only one cut is supported, reduce the burst length."); - end - cut_burst_d.isburst = 1'b1; - cut_burst_d.blen = remaining_len; - + cut_burst_d.blen = remaining_len[BurstLenWidth-1:0]; + // Keep state until the current one is picked + if (req_ready_i) begin + state_d = BurstCut; + end end + end end - // Keep state until the current one is picked - if (req_ready_i) begin - state_d = next_state; - end + end BurstCut: begin - next_state = state_q; // assign the outputs // send out this part and wait for ready req_tgt_addr_o = cut_ini_addr_q; req_tgt_addr_o = cut_tgt_addr_q; - req_wdata_o = cut_wdata_q; - req_wen_o = '0; // only read burst is supported - req_be_o = '0; - req_burst_o = cut_burst_q; - req_valid_o = 1'b1; - req_ready_o = 1'b0; + req_wdata_o = cut_wdata_q; + req_wen_o = '0; // only read burst is supported + req_be_o = '0; + req_burst_o = cut_burst_q; + req_valid_o = 1'b1; + req_ready_o = 1'b0; // When we get the ready, the second part is out if (req_ready_i) begin - next_state = Bypass; req_ready_o = req_ready_i; + state_d = Bypass; end - state_d = next_state; end default: state_d = Bypass; From b87dd18c0b4e99ab34689ef3afadf1ee700d16c4 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Tue, 7 Oct 2025 17:37:48 +0200 Subject: [PATCH 12/15] Fix naming of signals --- rtl/variable_latency_interconnect/burst_manager.sv | 4 ++-- rtl/variable_latency_interconnect/burst_req_grouper.sv | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rtl/variable_latency_interconnect/burst_manager.sv b/rtl/variable_latency_interconnect/burst_manager.sv index 71bcbe6..577291a 100644 --- a/rtl/variable_latency_interconnect/burst_manager.sv +++ b/rtl/variable_latency_interconnect/burst_manager.sv @@ -81,7 +81,7 @@ module burst_manager logic [AddrWidth-1:0] tgt_addr; logic [DataWidth-1:0] wdata; logic wen; - logic [BeWidth] ben; + logic [BeWidth-1:0] ben; burst_t burst; } arb_data_t; @@ -140,7 +140,7 @@ module burst_manager logic [AddrWidth-1:0] tgt_addr; logic [DataWidth-1:0] wdata; logic wen; - logic [BeWidth] ben; + logic [BeWidth-1:0] ben; burst_t burst; logic [NumOutLog2-1:0] idx; } fifo_data_t; diff --git a/rtl/variable_latency_interconnect/burst_req_grouper.sv b/rtl/variable_latency_interconnect/burst_req_grouper.sv index 8eec580..5e759d5 100644 --- a/rtl/variable_latency_interconnect/burst_req_grouper.sv +++ b/rtl/variable_latency_interconnect/burst_req_grouper.sv @@ -38,7 +38,7 @@ module burst_req_grouper input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, input logic [NumIn-1:0] req_wen_i, - input logic [NumIn-1:0][BeWidth] req_be_i, + input logic [NumIn-1:0][BeWidth-1:0] req_be_i, input logic [NumIn-1:0] req_valid_i, output logic [NumIn-1:0] req_ready_o, // Burst output request port @@ -46,7 +46,7 @@ module burst_req_grouper output logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_o, // Target address output logic [NumIn-1:0][DataWidth-1:0] req_wdata_o, output logic [NumIn-1:0] req_wen_o, - output logic [NumIn-1:0][BeWidth] req_be_o, + output logic [NumIn-1:0][BeWidth-1:0] req_be_o, output burst_t [NumIn-1:0] req_burst_o, output logic [NumIn-1:0] req_valid_o, input logic [NumIn-1:0] req_ready_i, From e903c495646ae5c5d2aa65921a1a30e99d0cd56c Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Thu, 9 Oct 2025 08:58:22 +0200 Subject: [PATCH 13/15] Fix for number of inputs different from number of outputs --- .../burst_variable_latency_interconnect.sv | 7 +++++-- .../variable_latency_interconnect.sv | 10 +++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv index ecdcb17..41a7d3e 100644 --- a/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv +++ b/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv @@ -78,8 +78,11 @@ module burst_variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; localparam int unsigned ReqAggDataWidth = DataWidth + BurstWidth; localparam int unsigned RespAggDataWidth = DataWidth + BurstRspWidth; - logic [NumIn-1:0][ReqAggDataWidth-1:0] req_agg_data_in, req_agg_data_out; - logic [NumIn-1:0][RespAggDataWidth-1:0] resp_agg_data_out, resp_agg_data_in; + logic [NumIn-1:0][ReqAggDataWidth-1:0] req_agg_data_in; + logic [NumOut-1:0][ReqAggDataWidth-1:0] req_agg_data_out; + + logic [NumIn-1:0][RespAggDataWidth-1:0] resp_agg_data_out; + logic [NumOut-1:0][RespAggDataWidth-1:0] resp_agg_data_in; for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_inputs assign req_agg_data_in[j] = {req_wdata_i[j], req_burst_i[j]}; diff --git a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv index 3c869b3..8387e48 100644 --- a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv +++ b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv @@ -105,14 +105,10 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_target // Extract target index - if (NumIn == 1) begin - assign tgt_sel[j] = '0; + if (NumOut == 1) begin + assign tgt_sel[j] = 0; end else begin - if (NumOut == 1) begin - assign tgt_sel[j] = 0; - end else begin - assign tgt_sel[j] = req_tgt_addr_i[j][ByteOffWidth +: NumOutLog2]; - end + assign tgt_sel[j] = req_tgt_addr_i[j][ByteOffWidth +: NumOutLog2]; end end From 8529a1a5767048632bc30dca9bda674a89b4ed35 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Thu, 16 Oct 2025 14:11:17 +0200 Subject: [PATCH 14/15] Add grouped request --- .../burst_cutter.sv | 2 + .../burst_manager.sv | 105 ++++++++++----- .../burst_pkg.sv | 9 +- .../burst_req_grouper.sv | 125 ++++++++++++------ .../burst_variable_latency_interconnect.sv | 3 +- 5 files changed, 164 insertions(+), 80 deletions(-) diff --git a/rtl/variable_latency_interconnect/burst_cutter.sv b/rtl/variable_latency_interconnect/burst_cutter.sv index 1fff8f8..749de08 100644 --- a/rtl/variable_latency_interconnect/burst_cutter.sv +++ b/rtl/variable_latency_interconnect/burst_cutter.sv @@ -127,12 +127,14 @@ module burst_cutter // Send out the first burst req_burst_o.isburst = 1'b1; req_burst_o.blen = max_blen; + req_burst_o.gdata = '0; // store the info for next burst cut_ini_addr_d = req_ini_addr_i + (max_blen << ByteOffWidth); cut_tgt_addr_d = req_tgt_addr_i + (max_blen << ByteOffWidth); cut_wdata_d = req_wdata_i[max_blen]; cut_burst_d.isburst = 1'b1; cut_burst_d.blen = remaining_len[BurstLenWidth-1:0]; + cut_burst_d.gdata = '0; // Keep state until the current one is picked if (req_ready_i) begin state_d = BurstCut; diff --git a/rtl/variable_latency_interconnect/burst_manager.sv b/rtl/variable_latency_interconnect/burst_manager.sv index 577291a..e0bce26 100644 --- a/rtl/variable_latency_interconnect/burst_manager.sv +++ b/rtl/variable_latency_interconnect/burst_manager.sv @@ -22,10 +22,13 @@ module burst_manager // determines the width of the byte offset in a memory word. normally this can be left at the default vaule, // but sometimes it needs to be overridden (e.g. when meta-data is supplied to the memory via the wdata signal). parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Group Request Extension Grouping Factor for TCDM + parameter int unsigned ReqGF = 1, // Group Response Extension Grouping Factor for TCDM parameter int unsigned RspGF = 1, // Dependant parameters. DO NOT CHANGE! - parameter int unsigned NumInLog2 = (NumIn == 1) ? 1 : $clog2(NumIn), + parameter int unsigned NumInLog2 = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1, + parameter int unsigned NumOutLog2 = (NumOut > 32'd1) ? unsigned'($clog2(NumOut)) : 32'd1, // Burst response type can be overwritten for DataWidth > 32b // This can happen when the DataWidth includes transaction metadata parameter type burst_resp_t = burst_pkg::burst_gresp_t @@ -70,11 +73,50 @@ module burst_manager // Include FF module `include "common_cells/registers.svh" - localparam int unsigned NumOutLog2 = (NumOut > 32'd1) ? unsigned'($clog2(NumOut)) : 32'd1; + /*************** + * Burst WRITE * + ***************/ + + localparam int unsigned NumGroupReq = ReqGF > 0 ? NumOut >> $clog2(ReqGF) : NumOut; + logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr; + logic [NumOut-1:0][AddrWidth-1:0] req_tgt_addr; + logic [NumOut-1:0][DataWidth-1:0] req_wdata; + logic [NumOut-1:0] req_wen; + logic [NumOut-1:0][BeWidth-1:0] req_be; + burst_t [NumOut-1:0] req_burst; + logic [NumOut-1:0] req_valid; + logic [NumOut-1:0] req_ready; + + // Write request ungrouper + always_comb begin + req_ini_addr = req_ini_addr_i; + req_tgt_addr = req_tgt_addr_i; + req_wdata = req_wdata_i; + req_wen = req_wen_i; + req_be = req_be_i; + req_burst = req_burst_i; + req_valid = req_valid_i; + // Redistribute grouped write requests + for (int i = 0; i < NumGroupReq; i++) begin + for (int j = 0; j < ReqGF; j++) begin + if (req_burst[i*ReqGF].isburst && req_wen_i[i*ReqGF] && ReqGF > 1) begin + req_ini_addr[i*ReqGF+j] = req_ini_addr_i[i*ReqGF] + j; + req_tgt_addr[i*ReqGF+j] = req_tgt_addr_i[i*ReqGF] + j; + req_wen[i*ReqGF+j] = req_wen_i[i*ReqGF]; + req_be[i*ReqGF+j] = req_be_i[i*ReqGF]; + req_burst[i*ReqGF+j] = '0; + req_valid[i*ReqGF+j] = req_valid_i[i*ReqGF]; + if (j > 0) begin + req_wdata[i*ReqGF+j] = req_burst_i[i*ReqGF].gdata[j]; + end + end + end + end + end - /****************** - * Burst Identify * - ******************/ + /************** + * Burst READ * + **************/ typedef struct packed { logic [NumInLog2-1:0] ini_addr; @@ -93,21 +135,20 @@ module burst_manager logic [NumOut-1:0] ready_mask; logic [NumOut-1:0] valid_mask; - always_comb begin prearb_data = '0; prearb_valid = '0; - valid_mask = req_valid_i; + valid_mask = req_valid; for (int unsigned i = 0; i < NumOut; i++) begin - if (req_valid_i[i] && req_burst_i[i].isburst) begin - prearb_data[i].ini_addr = req_ini_addr_i[i]; - prearb_data[i].tgt_addr = req_tgt_addr_i[i]; - prearb_data[i].wdata = req_wdata_i[i]; - prearb_data[i].wen = req_wen_i[i]; - prearb_data[i].ben = req_be_i[i]; - prearb_data[i].burst = req_burst_i[i]; - prearb_valid[i] = 1'b1; - valid_mask[i] = 1'b0; + if (req_valid[i] && req_burst[i].isburst) begin + prearb_data[i].ini_addr = req_ini_addr[i]; + prearb_data[i].tgt_addr = req_tgt_addr[i]; + prearb_data[i].wdata = req_wdata[i]; + prearb_data[i].wen = req_wen[i]; + prearb_data[i].ben = req_be[i]; + prearb_data[i].burst = req_burst[i]; + prearb_valid[i] = 1'b1; + valid_mask[i] = 1'b0; end end end @@ -116,11 +157,11 @@ module burst_manager assign ready_mask = prearb_valid & prearb_ready; rr_arb_tree #( - .NumIn ( NumOut ), - .DataType ( arb_data_t ), - .ExtPrio ( 1'b0), - .AxiVldRdy ( 1'b1), - .LockIn ( 1'b1) + .NumIn ( NumOut ), + .DataType ( arb_data_t ), + .ExtPrio ( 1'b0 ), + .AxiVldRdy ( 1'b1 ), + .LockIn ( 1'b1 ) ) i_rr_arb_tree ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), @@ -217,11 +258,11 @@ module burst_manager fifo_pop = 1'b0; // Bypass all requests by default - req_wdata_o = req_wdata_i; - req_tgt_addr_o = req_tgt_addr_i; - req_ini_addr_o = req_ini_addr_i; - req_wen_o = req_wen_i; - req_be_o = req_be_i; + req_wdata_o = req_wdata; + req_ini_addr_o = req_ini_addr; + req_tgt_addr_o = req_tgt_addr; + req_wen_o = req_wen; + req_be_o = req_be; case (state_q) @@ -273,9 +314,9 @@ module burst_manager endcase end - /****************** - * Rsp Handling * - ******************/ + /*********************** + * Response Handling * + ***********************/ if (RspGF == 1) begin : gen_grouper_bypass // Bypass all responses if no grouping @@ -288,7 +329,7 @@ module burst_manager end else begin : gen_grouper // Number of groups we will check for grouping rsp - localparam int unsigned NumGroup = RspGF > 0 ? NumOut >> $clog2(RspGF) : NumOut; + localparam int unsigned NumGroupRsp = RspGF > 0 ? NumOut >> $clog2(RspGF) : NumOut; logic [NumOut-1:0][NumInLog2-1:0] grouped_resp_ini_addr; logic [NumOut-1:0][DataWidth-1:0] grouped_resp_rdata; @@ -298,7 +339,7 @@ module burst_manager always_comb begin // Latch the new ports requested in burst - for (int i = 0; i < NumGroup; i ++) begin + for (int i = 0; i < NumGroupRsp; i ++) begin // If ready cancel the reservation if (resp_valid_o[i*RspGF] && resp_ready_i[i*RspGF]) begin group_mask_d[i*RspGF+:RspGF] = '0; @@ -314,7 +355,7 @@ module burst_manager // Assign input data to grouped response always_comb begin - for (int i = 0; i < NumGroup; i++) begin + for (int i = 0; i < NumGroupRsp; i++) begin grouped_resp_ini_addr[i*RspGF] = resp_ini_addr_i[i*RspGF]; grouped_resp_rdata[i*RspGF] = resp_rdata_i[i*RspGF]; grouped_resp_burst[i*RspGF].isburst = &resp_valid_i[i*RspGF+:RspGF]; diff --git a/rtl/variable_latency_interconnect/burst_pkg.sv b/rtl/variable_latency_interconnect/burst_pkg.sv index 5337a6a..ee875e0 100644 --- a/rtl/variable_latency_interconnect/burst_pkg.sv +++ b/rtl/variable_latency_interconnect/burst_pkg.sv @@ -20,12 +20,17 @@ package burst_pkg; localparam integer unsigned BurstLen = `ifdef BURSTLEN `BURSTLEN `else 1 `endif; parameter int unsigned BurstLenWidth = BurstLen == 1 ? 1 : $clog2(BurstLen); + // Grouped request in bursted writes + localparam integer unsigned ReqGF = `ifdef GROUP_REQ `GROUP_REQ `else 1 `endif; + localparam int ReqBurstMSB = (ReqGF > 1) ? (ReqGF - 2) : 0; + // Number of cuts if a burst crosses the target memory boundary localparam integer unsigned NumCuts = 1; typedef struct packed { logic isburst; logic [BurstLenWidth-1:0] blen; + logic [ReqBurstMSB:0][31:0] gdata; } burst_t; /******************************** @@ -34,9 +39,9 @@ package burst_pkg; // Grouping Factor of response data localparam integer unsigned RspGF = `ifdef GROUP_RSP `GROUP_RSP `else 1 `endif; - - // replace rdata payload with this when the response is grouped localparam int RspBurstMSB = (RspGF > 1) ? (RspGF - 2) : 0; + + // Add this to rdata payload when the response is grouped typedef struct packed { logic isburst; logic [RspBurstMSB:0][31:0] gdata; diff --git a/rtl/variable_latency_interconnect/burst_req_grouper.sv b/rtl/variable_latency_interconnect/burst_req_grouper.sv index 5e759d5..2bcb8fe 100644 --- a/rtl/variable_latency_interconnect/burst_req_grouper.sv +++ b/rtl/variable_latency_interconnect/burst_req_grouper.sv @@ -23,6 +23,8 @@ module burst_req_grouper // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Group Request Extension Grouping Factor for TCDM + parameter int unsigned ReqGF = 1, // Group Response Extension Grouping Factor for TCDM parameter int unsigned RspGF = 1, // Dependant parameters. DO NOT CHANGE! @@ -64,6 +66,8 @@ module burst_req_grouper ); `include "common_cells/registers.svh" + localparam int unsigned NumGroupReq = ReqGF > 1 ? NumIn >> $clog2(ReqGF) : NumIn; + localparam int unsigned NumGroupRsp = RspGF > 1 ? NumIn >> $clog2(RspGF) : NumIn; /*************/ /* Request */ @@ -86,58 +90,94 @@ module burst_req_grouper logic req_bursted_valid; // To verify that the request goes to consecutive addresses - logic consecutive; + logic [NumIn-2:0] consecutive; + logic consecutive_read, consecutive_write; always_comb begin - // Assign input requests to cutter inputs - req_cutter_tgt_addr = req_tgt_addr_i[0]; - req_cutter_wdata = req_wdata_i; - req_cutter_wen = req_wen_i[0]; - req_cutter_be = req_be_i[0]; - req_cutter_burst.isburst = 1'b0; - req_cutter_burst.blen = NumIn; + // Bypass input + req_ini_addr_o = req_ini_addr_i; + req_tgt_addr_o = req_tgt_addr_i; + req_wdata_o = req_wdata_i; + req_wen_o = req_wen_i; + req_be_o = req_be_i; + req_burst_o = '0; + req_valid_o = req_valid_i; + req_ready_o = req_ready_i; // Check if request goes to consecutive addresses - for (int i = 1; i < NumIn; i++) begin - if (req_valid_i[i] && req_valid_i[i-1]) begin - consecutive = (req_tgt_addr_i[i][AddrWidth-1:ByteOffWidth] == req_tgt_addr_i[i-1][AddrWidth-1:ByteOffWidth] + 1); - end else begin - consecutive = 1'b0; + for (int i = 0; i < NumIn-1; i++) begin + consecutive[i] = (req_tgt_addr_i[i+1][AddrWidth-1:ByteOffWidth] + - req_tgt_addr_i[i][AddrWidth-1:ByteOffWidth]) == AddrWidth'(1); + end + + /* WRITE */ + + // Assign grouped requests + if (ReqGF > 1) begin + for (int i = 0; i < NumGroupReq; i++) begin + consecutive_write = &consecutive[i*ReqGF+:(ReqGF-1)] && &req_wen_i[i*ReqGF+:ReqGF]; + if (&req_valid_i[i*ReqGF+:ReqGF] && consecutive_write) begin + req_ini_addr_o[i*ReqGF] = req_ini_addr_i[i*ReqGF]; + req_tgt_addr_o[i*ReqGF] = req_tgt_addr_i[i*ReqGF]; + req_wdata_o[i*ReqGF] = req_wdata_i[i*ReqGF]; + req_wen_o[i*ReqGF] = req_wen_i[i*ReqGF]; + req_be_o[i*ReqGF] = req_be_i[i*ReqGF]; + req_burst_o[i*ReqGF].isburst = 1'b1; + req_burst_o[i*ReqGF].blen = '0; + req_valid_o[i*ReqGF] = req_valid_i[i*ReqGF]; + req_ready_o[i*ReqGF] = req_valid_o[i*ReqGF] && req_ready_i[i*ReqGF]; + for (int j = 1; j < ReqGF; j++) begin + req_ini_addr_o[i*ReqGF+j] = '0; + req_tgt_addr_o[i*ReqGF+j] = '0; + req_wdata_o[i*ReqGF+j] = '0; + req_wen_o[i*ReqGF+j] = 1'b0; + req_be_o[i*ReqGF+j] = '0; + req_burst_o[i*ReqGF+j] = '0; + req_valid_o[i*ReqGF+j] = 1'b0; + req_ready_o[i*ReqGF+j] = req_valid_o[i*ReqGF] && req_ready_i[i*ReqGF]; + // Redistribute the outputs from the i*RspGF'th input + req_burst_o[i*ReqGF].gdata = req_wdata_i[i*ReqGF+j]; + end + end end end - // Burst the request - if (&req_valid_i && !req_wen_i[0] && consecutive) begin - // Send a burst request on the first port + /* READ */ + + // Assign input requests to cutter inputs + req_cutter_tgt_addr = req_tgt_addr_i[0]; + req_cutter_wdata = req_wdata_i; + req_cutter_wen = req_wen_i[0]; + req_cutter_be = req_be_i[0]; + req_cutter_burst.isburst = 1'b0; + req_cutter_burst.blen = NumIn; + req_cutter_burst.gdata = '0; + + consecutive_read = &consecutive && (~|req_wen_i); + + // Burst the read request + if (&req_valid_i && consecutive_read) begin req_cutter_burst.isburst = 1'b1; - req_tgt_addr_o[0] = req_bursted_tgt_addr; - req_wdata_o[0] = req_bursted_wdata; - req_wen_o[0] = req_bursted_wen; - req_be_o[0] = req_bursted_be; - req_burst_o[0] = req_bursted_burst; - req_valid_o[0] = req_bursted_valid; - req_ready_o[0] = cutter_ready; + req_ini_addr_o[0] = req_bursted_ini_addr; + req_tgt_addr_o[0] = req_bursted_tgt_addr; + req_wdata_o[0] = req_bursted_wdata; + req_wen_o[0] = req_bursted_wen; + req_be_o[0] = req_bursted_be; + req_burst_o[0] = req_bursted_burst; + req_valid_o[0] = req_bursted_valid; + req_ready_o[0] = cutter_ready; // Silence other ports for (int i = 1; i < NumIn; i++) begin - req_tgt_addr_o[i] = '0; - req_wdata_o[i] = '0; - req_wen_o[i] = 1'b0; - req_be_o[i] = '0; - req_burst_o[i] = '0; - req_valid_o[i] = 1'b0; - req_ready_o[i] = cutter_ready; + req_ini_addr_o[i] = '0; + req_tgt_addr_o[i] = '0; + req_wdata_o[i] = '0; + req_wen_o[i] = 1'b0; + req_be_o[i] = '0; + req_burst_o[i] = '0; + req_valid_o[i] = 1'b0; + req_ready_o[i] = cutter_ready; end - end else begin - // Bypass input - req_ini_addr_o = req_ini_addr_i; - req_tgt_addr_o = req_tgt_addr_i; - req_wdata_o = req_wdata_i; - req_wen_o = req_wen_i; - req_be_o = req_be_i; - req_burst_o = '0; - req_valid_o = req_valid_i; - req_ready_o = req_ready_i; end end @@ -177,9 +217,6 @@ module burst_req_grouper /* Response */ /*************/ - - localparam int unsigned NumGroup = RspGF > 1 ? NumIn >> $clog2(RspGF) : NumIn; - if (RspGF == 1) begin: gen_default_assignment // Default assignment @@ -197,7 +234,7 @@ module burst_req_grouper resp_valid_o = resp_valid_i; resp_ready_o = resp_ready_i; - for (int ii = 0; ii < NumGroup; ii++) begin + for (int ii = 0; ii < NumGroupRsp; ii++) begin if (resp_valid_i[ii*RspGF] && resp_burst_i[ii*RspGF].isburst) begin // If the response is grouped only one every RspGF input will be // valid. If any of the other inputs is valid give them priority. diff --git a/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv index 41a7d3e..ba7707f 100644 --- a/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv +++ b/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv @@ -27,9 +27,8 @@ module burst_variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; parameter int unsigned DataWidth = 32, // Data Word Width parameter int unsigned BeWidth = DataWidth/8, // Byte Strobe Width parameter int unsigned AddrMemWidth = 12, // Number of Address bits per Target - parameter int unsigned RspGF = 1, // Grouping Factor for the Burst Response parameter int unsigned BurstWidth = 1, // Burst Signal Width - parameter int unsigned BurstRspWidth = (RspGF-1)*DataWidth, // Burst Response Widening + parameter int unsigned BurstRspWidth = 1, // Burst Response Widening parameter bit AxiVldRdy = 1'b1, // Valid/ready signaling // Spill registers // A bit set at position i indicates a spill register at the i-th crossbar layer. From 89d030e5750ec5b9786535e222bfdd58b88f4b3b Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Tue, 28 Oct 2025 16:30:59 +0100 Subject: [PATCH 15/15] Correct burst parametrization --- rtl/variable_latency_interconnect/burst_pkg.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/variable_latency_interconnect/burst_pkg.sv b/rtl/variable_latency_interconnect/burst_pkg.sv index ee875e0..3e5450f 100644 --- a/rtl/variable_latency_interconnect/burst_pkg.sv +++ b/rtl/variable_latency_interconnect/burst_pkg.sv @@ -18,7 +18,7 @@ package burst_pkg; // Maximum length of the issued burst localparam integer unsigned BurstLen = `ifdef BURSTLEN `BURSTLEN `else 1 `endif; - parameter int unsigned BurstLenWidth = BurstLen == 1 ? 1 : $clog2(BurstLen); + parameter int unsigned BurstLenWidth = BurstLen == 1 ? 0 : $clog2(BurstLen)+1; // Grouped request in bursted writes localparam integer unsigned ReqGF = `ifdef GROUP_REQ `GROUP_REQ `else 1 `endif;