diff --git a/piton/design/chip/tile/l15/rtl/l15_mshr.v.pyv b/piton/design/chip/tile/l15/rtl/l15_mshr.v.pyv index bd340d26c..46a3a7dcf 100644 --- a/piton/design/chip/tile/l15/rtl/l15_mshr.v.pyv +++ b/piton/design/chip/tile/l15/rtl/l15_mshr.v.pyv @@ -158,30 +158,30 @@ reg [`L15_PADDR_HI:0] tmp_st_address [`L15_NUM_THREADS-1:0]; reg [`L15_PADDR_HI:0] tmp_ld_address [`L15_NUM_THREADS-1:0]; reg [2-1:0] tmp_st_way [`L15_NUM_THREADS-1:0]; reg [`L15_MESI_TRANS_STATE_WIDTH-1:0] tmp_st_state [`L15_NUM_THREADS-1:0]; +integer i=0; + always @ * begin - <% - t = ''' - tmp_vals[THREADID] = 0; - tmp_vals[THREADID][`L15_MSHR_ID_IFILL] = ifill_val[THREADID]; - tmp_vals[THREADID][`L15_MSHR_ID_LD] = ld_val[THREADID]; - tmp_vals[THREADID][`L15_MSHR_ID_ST] = st_val[THREADID]; - - tmp_st_address[THREADID] = st_address[THREADID]; - tmp_ld_address[THREADID] = ld_address[THREADID]; - tmp_st_way[THREADID] = st_way[THREADID]; - tmp_st_state[THREADID] =st_state[THREADID]; - ''' - - for i in range (2): - tt = t.replace('THREADID', repr(i)) - print(tt) - %> - mshr_pipe_vals_s1 = {tmp_vals[1], tmp_vals[0]}; - mshr_pipe_ld_address = {tmp_ld_address[1], tmp_ld_address[0]}; - mshr_pipe_st_address = {tmp_st_address[1], tmp_st_address[0]}; - mshr_pipe_st_way_s1 = {tmp_st_way[1], tmp_st_way[0]}; - mshr_pipe_st_state_s1 = {tmp_st_state[1], tmp_st_state[0]}; + + for(i = 0; i < `L15_NUM_THREADS; i = i+1) + begin + tmp_vals[i] = 0; + tmp_vals[i][`L15_MSHR_ID_IFILL] = ifill_val[i]; + tmp_vals[i][`L15_MSHR_ID_LD] = ld_val[i]; + tmp_vals[i][`L15_MSHR_ID_ST] = st_val[i]; + + tmp_st_address[i] = st_address[i]; + tmp_ld_address[i] = ld_address[i]; + tmp_st_way[i] = st_way[i]; + tmp_st_state[i] = st_state[i]; + + mshr_pipe_vals_s1[(`L15_NUM_MSHRID_PER_THREAD*(i+1))-1 -: `L15_NUM_MSHRID_PER_THREAD] = tmp_vals[i]; + mshr_pipe_ld_address[(`L15_PADDR_WIDTH*(i+1))-1 -: `L15_PADDR_WIDTH] = tmp_ld_address[i]; + mshr_pipe_st_address[(`L15_PADDR_WIDTH*(i+1))-1 -: `L15_PADDR_WIDTH] = tmp_st_address[i]; + mshr_pipe_st_way_s1[(2*(i+1))-1 -: 2] = tmp_st_way[i]; + mshr_pipe_st_state_s1[(`L15_MESI_TRANS_STATE_WIDTH*(i+1))-1 -: `L15_MESI_TRANS_STATE_WIDTH] = tmp_st_state[i]; + end + // S1 read mshr_pipe_readres_homeid_s1[`PACKET_HOME_ID_WIDTH-1:0] = 0; @@ -361,10 +361,11 @@ always @ (posedge clk) begin if (!rst_n) begin - st_homeid[0] <= 0; - st_homeid[1] <= 0; - ld_homeid[0] <= 0; - ld_homeid[1] <= 0; + for(i = 0; i < `L15_NUM_THREADS; i = i+1) + begin + st_homeid[i] <= {PACKET_HOME_ID_WIDTH{1'b0}}; + ld_homeid[i] <= {PACKET_HOME_ID_WIDTH{1'b0}}; + end end else begin diff --git a/piton/design/chip/tile/l15/rtl/l15_pipeline.v.pyv b/piton/design/chip/tile/l15/rtl/l15_pipeline.v.pyv index 547417467..1fb1d9d44 100644 --- a/piton/design/chip/tile/l15/rtl/l15_pipeline.v.pyv +++ b/piton/design/chip/tile/l15/rtl/l15_pipeline.v.pyv @@ -393,6 +393,8 @@ reg [`L15_MESI_TRANS_STATE_WIDTH-1:0] mshr_st_state_array [`L15_THREAD_ARRAY_MAS reg [`L15_PADDR_MASK] mshr_st_address_array [`L15_THREAD_ARRAY_MASK]; reg [`L15_PADDR_MASK] mshr_ld_address_array [`L15_THREAD_ARRAY_MASK]; reg [`L15_WAY_MASK] mshr_st_way_array [`L15_THREAD_ARRAY_MASK]; +integer i=0; + always @ * begin pipe_mshr_readreq_mshrid_s1 = noc2decoder_l15_mshrid; @@ -402,18 +404,16 @@ begin // predecode_mshr_read_address_s1 = mshr_pipe_address_s1; predecode_mshr_read_homeid_s1 = mshr_pipe_readres_homeid_s1; - // mshr_val_array - mshr_val_array[0] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*1 - 1 -: `L15_NUM_MSHRID_PER_THREAD]; - mshr_st_state_array[0] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*1 - 1 -: `L15_MESI_TRANS_STATE_WIDTH]; - mshr_st_address_array[0] = mshr_pipe_st_address[`L15_PADDR_WIDTH*1 - 1 -: `L15_PADDR_WIDTH]; - mshr_ld_address_array[0] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*1 - 1 -: `L15_PADDR_WIDTH]; - mshr_st_way_array[0] = mshr_pipe_st_way_s1[2*1 - 1 -: 2]; - - mshr_val_array[1] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*2 - 1 -: `L15_NUM_MSHRID_PER_THREAD]; - mshr_st_state_array[1] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*2 - 1 -: `L15_MESI_TRANS_STATE_WIDTH]; - mshr_st_address_array[1] = mshr_pipe_st_address[`L15_PADDR_WIDTH*2 - 1 -: `L15_PADDR_WIDTH]; - mshr_ld_address_array[1] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*2 - 1 -: `L15_PADDR_WIDTH]; - mshr_st_way_array[1] = mshr_pipe_st_way_s1[2*2 - 1 -: 2]; + // mshr_val_array + for(i = 0; i < `L15_NUM_THREADS; i = i+1) + begin + mshr_val_array[i] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*(i+1) - 1 -: `L15_NUM_MSHRID_PER_THREAD]; + mshr_st_state_array[i] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*(i+1) - 1 -: `L15_MESI_TRANS_STATE_WIDTH]; + mshr_st_address_array[i] = mshr_pipe_st_address[`L15_PADDR_WIDTH*(i+1) - 1 -: `L15_PADDR_WIDTH]; + mshr_ld_address_array[i] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*(i+1) - 1 -: `L15_PADDR_WIDTH]; + mshr_st_way_array[i] = mshr_pipe_st_way_s1[`L15_WAY_WIDTH*(i+1) - 1 -: `L15_WAY_WIDTH]; + end + end // match pcx address to special accesses @@ -434,10 +434,11 @@ begin end // decode requests to predecode signals -reg predecode_tagcheck_matched_t0ld_s1; -reg predecode_tagcheck_matched_t0st_s1; -reg predecode_tagcheck_matched_t1ld_s1; -reg predecode_tagcheck_matched_t1st_s1; +reg predecode_tagcheck_matched_trd_ld_s1 [`L15_THREAD_ARRAY_MASK]; // each element belongs to one thread +reg predecode_tagcheck_matched_trd_st_s1 [`L15_THREAD_ARRAY_MASK]; +reg predecode_tagcheck_matched_lds_s1; +reg predecode_tagcheck_matched_sts_s1; + reg predecode_int_vec_dis_s1; reg predecode_tagcheck_matched_s1; reg [19:4] predecode_partial_tag_s1; @@ -833,22 +834,26 @@ begin // TAG CHECKING predecode_partial_tag_s1[19:4] = pcxdecoder_l15_address[19:4]; // compare partial tag to save energy & timing - predecode_tagcheck_matched_t0ld_s1 = mshr_val_array[0][`L15_MSHR_ID_LD] - && (predecode_partial_tag_s1[19:4] == mshr_ld_address_array[0][19:4]); - predecode_tagcheck_matched_t1ld_s1 = mshr_val_array[1][`L15_MSHR_ID_LD] - && (predecode_partial_tag_s1[19:4] == mshr_ld_address_array[1][19:4]); - predecode_tagcheck_matched_t0st_s1 = mshr_val_array[0][`L15_MSHR_ID_ST] - && (pcxdecoder_l15_address[39:4] == mshr_st_address_array[0][39:4]); - predecode_tagcheck_matched_t1st_s1 = mshr_val_array[1][`L15_MSHR_ID_ST] - && (pcxdecoder_l15_address[39:4] == mshr_st_address_array[1][39:4]); - - predecode_tagcheck_matched_s1 = predecode_tagcheck_matched_t0ld_s1 || predecode_tagcheck_matched_t1ld_s1 - || predecode_tagcheck_matched_t0st_s1 || predecode_tagcheck_matched_t1st_s1; - - + predecode_tagcheck_matched_lds_s1 = 0; + predecode_tagcheck_matched_sts_s1 = 0; + + for(i = 0; i < `L15_NUM_THREADS; i = i+1) + begin + predecode_tagcheck_matched_trd_ld_s1[i] = mshr_val_array[i][`L15_MSHR_ID_LD] + && (predecode_partial_tag_s1[19:4] == mshr_ld_address_array[i][19:4]); + predecode_tagcheck_matched_trd_st_s1[i] = mshr_val_array[i][`L15_MSHR_ID_ST] + && (pcxdecoder_l15_address[39:4] == mshr_st_address_array[i][39:4]); + + predecode_tagcheck_matched_lds_s1 = predecode_tagcheck_matched_trd_ld_s1[i] | predecode_tagcheck_matched_lds_s1; + predecode_tagcheck_matched_sts_s1 = predecode_tagcheck_matched_trd_st_s1[i] | predecode_tagcheck_matched_sts_s1; + + if(predecode_tagcheck_matched_trd_st_s1[i] == 1) + predecode_hit_stbuf_threadid_s1 = i; + end + + predecode_tagcheck_matched_s1 = predecode_tagcheck_matched_lds_s1 | predecode_tagcheck_matched_sts_s1; // misc - predecode_hit_stbuf_s1 = predecode_tagcheck_matched_t0st_s1 || predecode_tagcheck_matched_t1st_s1; - predecode_hit_stbuf_threadid_s1 = predecode_tagcheck_matched_t1st_s1 ? 1'b1 : 1'b0; + predecode_hit_stbuf_s1 = predecode_tagcheck_matched_sts_s1; // note: only work with 2 threads for now; need to change the algo of mshr if need to increase the num of threads end @@ -3302,31 +3307,37 @@ reg [`L15_UNPARAM_1_0] stbuf_way_s3; // wmt todo: move calculation to s2 // STORE BUFFER STUFF always @ * begin -`ifdef PITON_ASIC_RTL - stbuf_compare_address_match_s3[0] = mshr_st_address_array[0][10:4] == cache_index_s3; -`else - stbuf_compare_address_match_s3[0] = mshr_st_address_array[0][39:4] == address_s3[39:4]; -`endif - stbuf_compare_match_s3[0] = mshr_val_array[0][`L15_MSHR_ID_ST] - && (mshr_st_state_array[0] == `L15_MESI_TRANSITION_STATE_SM) - && (stbuf_compare_address_match_s3[0] == 1'b1); - stbuf_compare_lru_match_s3[0] = stbuf_compare_match_s3[0] && (mshr_st_way_array[0] == lru_way_s3); - -`ifdef PITON_ASIC_RTL - stbuf_compare_address_match_s3[1] = mshr_st_address_array[1][10:4] == cache_index_s3; -`else - stbuf_compare_address_match_s3[1] = mshr_st_address_array[1][39:4] == address_s3[39:4]; -`endif - stbuf_compare_match_s3[1] = mshr_val_array[1][`L15_MSHR_ID_ST] - && (mshr_st_state_array[1] == `L15_MESI_TRANSITION_STATE_SM) - && (stbuf_compare_address_match_s3[1] == 1'b1); - stbuf_compare_lru_match_s3[1] = stbuf_compare_match_s3[1] && (mshr_st_way_array[1] == lru_way_s3); + + stbuf_compare_match_val_s3 = 0; + stbuf_compare_lru_match_val_s3 = 0; + for(i = 0; i < `L15_NUM_THREADS; i = i+1) + begin + `ifdef PITON_ASIC_RTL + stbuf_compare_address_match_s3[i] = mshr_st_address_array[i][10:4] == cache_index_s3; + `else + stbuf_compare_address_match_s3[i] = mshr_st_address_array[i][39:4] == address_s3[39:4]; + `endif + + stbuf_compare_match_s3[i] = mshr_val_array[i][`L15_MSHR_ID_ST] + && (mshr_st_state_array[i] == `L15_MESI_TRANSITION_STATE_SM) + && (stbuf_compare_address_match_s3[i] == 1'b1); - stbuf_compare_threadid_s3 = stbuf_compare_match_s3[1] ? 1'b1 : 1'b0; - stbuf_compare_lru_threadid_s3 = stbuf_compare_lru_match_s3[1] ? 1'b1 : 1'b0; - stbuf_compare_match_val_s3 = stbuf_compare_match_s3[0] || stbuf_compare_match_s3[1]; - stbuf_compare_lru_match_val_s3 = stbuf_compare_lru_match_s3[0] || stbuf_compare_lru_match_s3[1]; + stbuf_compare_lru_match_s3[i] = stbuf_compare_match_s3[i] && (mshr_st_way_array[i] == lru_way_s3); + if (stbuf_compare_match_s3[i] == 1) + begin + stbuf_compare_threadid_s3 = i; + end + + if (stbuf_compare_lru_match_s3[i] == 1) + begin + stbuf_compare_lru_threadid_s3 = i; + end + + stbuf_compare_match_val_s3 = stbuf_compare_match_s3[i] | stbuf_compare_match_val_s3; + stbuf_compare_lru_match_val_s3 = stbuf_compare_lru_match_s3[i] | stbuf_compare_lru_match_val_s3; + end + stbuf_way_s3 = mshr_st_way_array[stbuf_compare_threadid_s3]; // stbuf_way_wmt_data_s3 = wmt_data_s3[stbuf_way_s3]; // stbuf_way_to_l1_s3 = stbuf_way_wmt_data_s3[`L15_UNPARAM_1_0]; @@ -3338,8 +3349,8 @@ always @ * begin // expanding some signals tagcheck_way_mask_s3[`L15_UNPARAM_3_0] = tagcheck_way_s3 == 2'd0 ? 4'b0001 : - 2'd1 ? 4'b0010 : - 2'd2 ? 4'b0100 : + tagcheck_way_s3 == 2'd1 ? 4'b0010 : + tagcheck_way_s3 == 2'd2 ? 4'b0100 : 4'b1000 ; tagcheck_state_me_s3 = tagcheck_state_s3 == `L15_MESI_STATE_M || tagcheck_state_s3 == `L15_MESI_STATE_E; diff --git a/piton/design/chip/tile/l15/rtl/noc2decoder.v b/piton/design/chip/tile/l15/rtl/noc2decoder.v index 48bc78719..4a2e73b24 100644 --- a/piton/design/chip/tile/l15/rtl/noc2decoder.v +++ b/piton/design/chip/tile/l15/rtl/noc2decoder.v @@ -103,8 +103,13 @@ begin noc2decoder_l15_csm_mshrid = noc2_mshrid[`L15_CSM_NUM_TICKETS_LOG2-1:0]; // the threadid is encoded in the mshrid sent to L2, is the next L15_THREADID_WIDTH bits after the first L15_MSHR_ID_WIDTH bits noc2decoder_l15_threadid = noc2_mshrid[`L15_MSHR_ID_WIDTH+`L15_THREADID_WIDTH -1 -: `L15_THREADID_WIDTH]; - noc2decoder_l15_hmc_fill = noc2_mshrid[`MSG_MSHRID_WIDTH-1]; - + + `ifdef NO_RTL_CSM + noc2decoder_l15_hmc_fill = 1'b0; //noc2_mshrid[`MSG_MSHRID_WIDTH-1]; + `else + noc2decoder_l15_hmc_fill = noc2_mshrid[`MSG_MSHRID_WIDTH-1]; + `endif + noc2decoder_l15_l2miss = noc2_data[`MSG_L2_MISS]; noc2decoder_l15_icache_type = noc2_data[`MSG_CACHE_TYPE]; noc2decoder_l15_f4b = 0; diff --git a/piton/design/include/l15.h.pyv b/piton/design/include/l15.h.pyv index 3e8f62a38..c99153606 100644 --- a/piton/design/include/l15.h.pyv +++ b/piton/design/include/l15.h.pyv @@ -85,6 +85,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. print("`define L15_WMT_ALIAS_WIDTH %d" % int(math.log(l15_set_count/l1d_set_count, 2))) print("`define L15_CACHELINE_WIDTH %d" % (L15_LINE_SIZE*8)) + + print("`define L15_NUM_THREADS %d" % CONFIG_L15_NUM_THREADS) + print("`define L15_THREADID_WIDTH %d" % int(math.log(CONFIG_L15_NUM_THREADS, 2))) %> `define L15_MESI_STATE_WIDTH 2 @@ -209,10 +212,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. `define L15_MSHR_ID_LD 2'd2 `define L15_MSHR_ID_ST 2'd3 -`define L15_NUM_THREADS 2 -`define L15_THREADID_WIDTH 1 -`define L15_THREADID_MASK 0:0 -`define L15_THREAD_ARRAY_MASK 1:0 +// if NO_RTL_CSM is defined L15_NUM_THREADS could go up to 64, otherwise 32. +`define L15_THREADID_MASK `L15_THREADID_WIDTH-1:0 +`define L15_THREAD_ARRAY_MASK `L15_NUM_THREADS-1:0 `define L15_NUM_MSHRID_PER_THREAD 4 // pipeline OPs diff --git a/piton/tools/bin/pyhplib.py b/piton/tools/bin/pyhplib.py index 2cdffcaab..aa4172f0a 100644 --- a/piton/tools/bin/pyhplib.py +++ b/piton/tools/bin/pyhplib.py @@ -72,6 +72,7 @@ # cache configurations CONFIG_L15_SIZE = int(os.environ.get('CONFIG_L15_SIZE', '8192')) CONFIG_L15_ASSOCIATIVITY = int(os.environ.get('CONFIG_L15_ASSOCIATIVITY', '4')) +CONFIG_L15_NUM_THREADS = int(os.environ.get('CONFIG_L15_NUM_THREADS', '2')) CONFIG_L1D_SIZE = int(os.environ.get('CONFIG_L1D_SIZE', '8192')) CONFIG_L1D_ASSOCIATIVITY = int(os.environ.get('CONFIG_L1D_ASSOCIATIVITY', '4')) CONFIG_L1I_SIZE = int(os.environ.get('CONFIG_L1I_SIZE', '16384')) diff --git a/piton/tools/src/sims/sims,2.0 b/piton/tools/src/sims/sims,2.0 index d2905648f..d3ab39331 100755 --- a/piton/tools/src/sims/sims,2.0 +++ b/piton/tools/src/sims/sims,2.0 @@ -2637,6 +2637,7 @@ sub parse_args "config_l1d_associativity", "config_l15_size", "config_l15_associativity", + "config_l15_num_threads", "config_l2_size", "config_l2_associativity", );