Skip to content

Commit

Permalink
L15 MSHR fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
abbasBSC committed Aug 29, 2023
1 parent b32615b commit 6aaec1b
Show file tree
Hide file tree
Showing 10 changed files with 180 additions and 102 deletions.
23 changes: 12 additions & 11 deletions piton/ariane_setup.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
# Modified by Barcelona Supercomputing Center on March 3rd, 2022
# Copyright 2018 ETH Zurich and University of Bologna.
# Copyright and related rights are licensed under the Solderpad Hardware
# License, Version 0.51 (the "License"); you may not use this file except in
Expand Down Expand Up @@ -51,8 +52,11 @@ echo "make sure that you source this script in a bash shell in the root folder o

if [ -z "$BASH" ] || [ ${0: -4} != "bash" ]
then
echo "not in bash ($0), aborting"
return
#echo "not in bash ($0), aborting"
# Commentted out, as it is not really an issue and it may fail when running the CICD
echo "not in bash ($0)"
#return

fi

SCRIPTNAME=ariane_setup.sh
Expand All @@ -76,18 +80,15 @@ export ARIANE_ROOT=${PITON_ROOT}/piton/design/chip/tile/ariane/
export CXX=g++ CC=gcc
# customize this to a fast local disk

if [ "$RISCV" == "" ]
then
export RISCV=$HOME/scratch/riscv_install
fi
export VERILATOR_ROOT=$ARIANE_ROOT/tmp/verilator-4.014/

export RISCV=~/scratch/`whoami`/riscv_install
export VERILATOR_ROOT=~/scratch/`whoami`/verilator_4_104/
# setup paths
export PATH=$RISCV/bin:$VERILATOR_ROOT/bin:$PATH
export LIBRARY_PATH=$RISCV/lib
export LD_LIBRARY_PATH=$RISCV/lib
export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include
export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include
export LD_LIBRARY_PATH=$RISCV/lib:$LD_LIBRARY_PATH
export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include:$CPLUS_INCLUDE_PATH
export MODELSIM_HOME=$HOME/scratch/questa_install/questasim

# source OpenPiton setup script
# note: customize this script to reflect your tool setup
Expand Down
53 changes: 27 additions & 26 deletions piton/design/chip/tile/l15/rtl/l15_mshr.v.pyv
Original file line number Diff line number Diff line change
Expand Up @@ -158,30 +158,30 @@ reg [`L15_PADDR_HI:0] tmp_st_address [`L15_NUM_THREADS-1:0];
reg [`L15_PADDR_HI:0] tmp_ld_address [`L15_NUM_THREADS-1:0];
reg [2-1:0] tmp_st_way [`L15_NUM_THREADS-1:0];
reg [`L15_MESI_TRANS_STATE_WIDTH-1:0] tmp_st_state [`L15_NUM_THREADS-1:0];
integer i=0;

always @ *
begin
<%
t = '''
tmp_vals[THREADID] = 0;
tmp_vals[THREADID][`L15_MSHR_ID_IFILL] = ifill_val[THREADID];
tmp_vals[THREADID][`L15_MSHR_ID_LD] = ld_val[THREADID];
tmp_vals[THREADID][`L15_MSHR_ID_ST] = st_val[THREADID];

tmp_st_address[THREADID] = st_address[THREADID];
tmp_ld_address[THREADID] = ld_address[THREADID];
tmp_st_way[THREADID] = st_way[THREADID];
tmp_st_state[THREADID] =st_state[THREADID];
'''

for i in range (2):
tt = t.replace('THREADID', repr(i))
print(tt)
%>
mshr_pipe_vals_s1 = {tmp_vals[1], tmp_vals[0]};
mshr_pipe_ld_address = {tmp_ld_address[1], tmp_ld_address[0]};
mshr_pipe_st_address = {tmp_st_address[1], tmp_st_address[0]};
mshr_pipe_st_way_s1 = {tmp_st_way[1], tmp_st_way[0]};
mshr_pipe_st_state_s1 = {tmp_st_state[1], tmp_st_state[0]};

for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
tmp_vals[i] = 0;
tmp_vals[i][`L15_MSHR_ID_IFILL] = ifill_val[i];
tmp_vals[i][`L15_MSHR_ID_LD] = ld_val[i];
tmp_vals[i][`L15_MSHR_ID_ST] = st_val[i];

tmp_st_address[i] = st_address[i];
tmp_ld_address[i] = ld_address[i];
tmp_st_way[i] = st_way[i];
tmp_st_state[i] = st_state[i];

mshr_pipe_vals_s1[(`L15_NUM_MSHRID_PER_THREAD*(i+1))-1 -: `L15_NUM_MSHRID_PER_THREAD] = tmp_vals[i];
mshr_pipe_ld_address[(`L15_PADDR_WIDTH*(i+1))-1 -: `L15_PADDR_WIDTH] = tmp_ld_address[i];
mshr_pipe_st_address[(`L15_PADDR_WIDTH*(i+1))-1 -: `L15_PADDR_WIDTH] = tmp_st_address[i];
mshr_pipe_st_way_s1[(2*(i+1))-1 -: 2] = tmp_st_way[i];
mshr_pipe_st_state_s1[(`L15_MESI_TRANS_STATE_WIDTH*(i+1))-1 -: `L15_MESI_TRANS_STATE_WIDTH] = tmp_st_state[i];
end


// S1 read
mshr_pipe_readres_homeid_s1[`PACKET_HOME_ID_WIDTH-1:0] = 0;
Expand Down Expand Up @@ -361,10 +361,11 @@ always @ (posedge clk)
begin
if (!rst_n)
begin
st_homeid[0] <= 0;
st_homeid[1] <= 0;
ld_homeid[0] <= 0;
ld_homeid[1] <= 0;
for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
st_homeid[i] <= '0;
ld_homeid[i] <= '0;
end
end
else
begin
Expand Down
139 changes: 82 additions & 57 deletions piton/design/chip/tile/l15/rtl/l15_pipeline.v.pyv
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ reg [`L15_MESI_TRANS_STATE_WIDTH-1:0] mshr_st_state_array [`L15_THREAD_ARRAY_MAS
reg [`L15_PADDR_MASK] mshr_st_address_array [`L15_THREAD_ARRAY_MASK];
reg [`L15_PADDR_MASK] mshr_ld_address_array [`L15_THREAD_ARRAY_MASK];
reg [`L15_WAY_MASK] mshr_st_way_array [`L15_THREAD_ARRAY_MASK];
integer i=0;

always @ *
begin
pipe_mshr_readreq_mshrid_s1 = noc2decoder_l15_mshrid;
Expand All @@ -403,17 +405,27 @@ begin
predecode_mshr_read_homeid_s1 = mshr_pipe_readres_homeid_s1;

// mshr_val_array
mshr_val_array[0] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*1 - 1 -: `L15_NUM_MSHRID_PER_THREAD];
mshr_st_state_array[0] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*1 - 1 -: `L15_MESI_TRANS_STATE_WIDTH];
mshr_st_address_array[0] = mshr_pipe_st_address[`L15_PADDR_WIDTH*1 - 1 -: `L15_PADDR_WIDTH];
mshr_ld_address_array[0] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*1 - 1 -: `L15_PADDR_WIDTH];
mshr_st_way_array[0] = mshr_pipe_st_way_s1[2*1 - 1 -: 2];

mshr_val_array[1] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*2 - 1 -: `L15_NUM_MSHRID_PER_THREAD];
mshr_st_state_array[1] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*2 - 1 -: `L15_MESI_TRANS_STATE_WIDTH];
mshr_st_address_array[1] = mshr_pipe_st_address[`L15_PADDR_WIDTH*2 - 1 -: `L15_PADDR_WIDTH];
mshr_ld_address_array[1] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*2 - 1 -: `L15_PADDR_WIDTH];
mshr_st_way_array[1] = mshr_pipe_st_way_s1[2*2 - 1 -: 2];
//mshr_val_array[0] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*1 - 1 -: `L15_NUM_MSHRID_PER_THREAD];
//mshr_st_state_array[0] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*1 - 1 -: `L15_MESI_TRANS_STATE_WIDTH];
//mshr_st_address_array[0] = mshr_pipe_st_address[`L15_PADDR_WIDTH*1 - 1 -: `L15_PADDR_WIDTH];
//mshr_ld_address_array[0] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*1 - 1 -: `L15_PADDR_WIDTH];
//mshr_st_way_array[0] = mshr_pipe_st_way_s1[2*1 - 1 -: 2];
//
//mshr_val_array[1] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*2 - 1 -: `L15_NUM_MSHRID_PER_THREAD];
//mshr_st_state_array[1] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*2 - 1 -: `L15_MESI_TRANS_STATE_WIDTH];
//mshr_st_address_array[1] = mshr_pipe_st_address[`L15_PADDR_WIDTH*2 - 1 -: `L15_PADDR_WIDTH];
//mshr_ld_address_array[1] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*2 - 1 -: `L15_PADDR_WIDTH];
//mshr_st_way_array[1] = mshr_pipe_st_way_s1[2*2 - 1 -: 2];

for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
mshr_val_array[i] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*(i+1) - 1 -: `L15_NUM_MSHRID_PER_THREAD];
mshr_st_state_array[i] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*(i+1) - 1 -: `L15_MESI_TRANS_STATE_WIDTH];
mshr_st_address_array[i] = mshr_pipe_st_address[`L15_PADDR_WIDTH*(i+1) - 1 -: `L15_PADDR_WIDTH];
mshr_ld_address_array[i] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*(i+1) - 1 -: `L15_PADDR_WIDTH];
mshr_st_way_array[i] = mshr_pipe_st_way_s1[`L15_WAY_WIDTH*(i+1) - 1 -: `L15_WAY_WIDTH];
end

end

// match pcx address to special accesses
Expand All @@ -434,10 +446,15 @@ begin
end

// decode requests to predecode signals
reg predecode_tagcheck_matched_t0ld_s1;
reg predecode_tagcheck_matched_t0st_s1;
reg predecode_tagcheck_matched_t1ld_s1;
reg predecode_tagcheck_matched_t1st_s1;
reg predecode_tagcheck_matched_trd_ld_s1 [`L15_THREAD_ARRAY_MASK]; // each element belongs to one thread
reg predecode_tagcheck_matched_trd_st_s1 [`L15_THREAD_ARRAY_MASK];
reg predecode_tagcheck_matched_lds_s1;
reg predecode_tagcheck_matched_sts_s1;
//reg predecode_tagcheck_matched_t0ld_s1;
//reg predecode_tagcheck_matched_t0st_s1;
//reg predecode_tagcheck_matched_t1ld_s1;
//reg predecode_tagcheck_matched_t1st_s1;

reg predecode_int_vec_dis_s1;
reg predecode_tagcheck_matched_s1;
reg [19:4] predecode_partial_tag_s1;
Expand Down Expand Up @@ -833,22 +850,28 @@ begin

// TAG CHECKING
predecode_partial_tag_s1[19:4] = pcxdecoder_l15_address[19:4]; // compare partial tag to save energy & timing
predecode_tagcheck_matched_t0ld_s1 = mshr_val_array[0][`L15_MSHR_ID_LD]
&& (predecode_partial_tag_s1[19:4] == mshr_ld_address_array[0][19:4]);
predecode_tagcheck_matched_t1ld_s1 = mshr_val_array[1][`L15_MSHR_ID_LD]
&& (predecode_partial_tag_s1[19:4] == mshr_ld_address_array[1][19:4]);
predecode_tagcheck_matched_t0st_s1 = mshr_val_array[0][`L15_MSHR_ID_ST]
&& (pcxdecoder_l15_address[39:4] == mshr_st_address_array[0][39:4]);
predecode_tagcheck_matched_t1st_s1 = mshr_val_array[1][`L15_MSHR_ID_ST]
&& (pcxdecoder_l15_address[39:4] == mshr_st_address_array[1][39:4]);

predecode_tagcheck_matched_s1 = predecode_tagcheck_matched_t0ld_s1 || predecode_tagcheck_matched_t1ld_s1
|| predecode_tagcheck_matched_t0st_s1 || predecode_tagcheck_matched_t1st_s1;


predecode_tagcheck_matched_lds_s1 = 0;
predecode_tagcheck_matched_sts_s1 = 0;

for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
predecode_tagcheck_matched_trd_ld_s1[i] = mshr_val_array[i][`L15_MSHR_ID_LD]
&& (predecode_partial_tag_s1[19:4] == mshr_ld_address_array[i][19:4]);
predecode_tagcheck_matched_trd_st_s1[i] = mshr_val_array[i][`L15_MSHR_ID_ST]
&& (pcxdecoder_l15_address[39:4] == mshr_st_address_array[i][39:4]);

predecode_tagcheck_matched_lds_s1 = predecode_tagcheck_matched_trd_ld_s1[i] | predecode_tagcheck_matched_lds_s1;
predecode_tagcheck_matched_sts_s1 = predecode_tagcheck_matched_trd_st_s1[i] | predecode_tagcheck_matched_sts_s1;

if(predecode_tagcheck_matched_trd_st_s1[i] == 1)
predecode_hit_stbuf_threadid_s1 = i;

end

predecode_tagcheck_matched_s1 = predecode_tagcheck_matched_lds_s1 | predecode_tagcheck_matched_sts_s1;
// misc
predecode_hit_stbuf_s1 = predecode_tagcheck_matched_t0st_s1 || predecode_tagcheck_matched_t1st_s1;
predecode_hit_stbuf_threadid_s1 = predecode_tagcheck_matched_t1st_s1 ? 1'b1 : 1'b0;
predecode_hit_stbuf_s1 = predecode_tagcheck_matched_sts_s1;
//predecode_hit_stbuf_threadid_s1 = predecode_tagcheck_matched_t1st_s1 ? 1'b1 : 1'b0;
// note: only work with 2 threads for now; need to change the algo of mshr if need to increase the num of threads
end

Expand Down Expand Up @@ -3302,31 +3325,33 @@ reg [`L15_UNPARAM_1_0] stbuf_way_s3; // wmt todo: move calculation to s2
// STORE BUFFER STUFF
always @ *
begin
`ifdef PITON_ASIC_RTL
stbuf_compare_address_match_s3[0] = mshr_st_address_array[0][10:4] == cache_index_s3;
`else
stbuf_compare_address_match_s3[0] = mshr_st_address_array[0][39:4] == address_s3[39:4];
`endif
stbuf_compare_match_s3[0] = mshr_val_array[0][`L15_MSHR_ID_ST]
&& (mshr_st_state_array[0] == `L15_MESI_TRANSITION_STATE_SM)
&& (stbuf_compare_address_match_s3[0] == 1'b1);
stbuf_compare_lru_match_s3[0] = stbuf_compare_match_s3[0] && (mshr_st_way_array[0] == lru_way_s3);

`ifdef PITON_ASIC_RTL
stbuf_compare_address_match_s3[1] = mshr_st_address_array[1][10:4] == cache_index_s3;
`else
stbuf_compare_address_match_s3[1] = mshr_st_address_array[1][39:4] == address_s3[39:4];
`endif
stbuf_compare_match_s3[1] = mshr_val_array[1][`L15_MSHR_ID_ST]
&& (mshr_st_state_array[1] == `L15_MESI_TRANSITION_STATE_SM)
&& (stbuf_compare_address_match_s3[1] == 1'b1);
stbuf_compare_lru_match_s3[1] = stbuf_compare_match_s3[1] && (mshr_st_way_array[1] == lru_way_s3);

stbuf_compare_threadid_s3 = stbuf_compare_match_s3[1] ? 1'b1 : 1'b0;
stbuf_compare_lru_threadid_s3 = stbuf_compare_lru_match_s3[1] ? 1'b1 : 1'b0;
stbuf_compare_match_val_s3 = stbuf_compare_match_s3[0] || stbuf_compare_match_s3[1];
stbuf_compare_lru_match_val_s3 = stbuf_compare_lru_match_s3[0] || stbuf_compare_lru_match_s3[1];


stbuf_compare_match_val_s3 = 0;
stbuf_compare_lru_match_val_s3 = 0;
for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
`ifdef PITON_ASIC_RTL
stbuf_compare_address_match_s3[i] = mshr_st_address_array[i][10:4] == cache_index_s3;
`else
stbuf_compare_address_match_s3[i] = mshr_st_address_array[i][39:4] == address_s3[39:4];
`endif

stbuf_compare_match_s3[i] = mshr_val_array[i][`L15_MSHR_ID_ST]
&& (mshr_st_state_array[i] == `L15_MESI_TRANSITION_STATE_SM)
&& (stbuf_compare_address_match_s3[i] == 1'b1);

stbuf_compare_lru_match_s3[i] = stbuf_compare_match_s3[i] && (mshr_st_way_array[i] == lru_way_s3);

if (stbuf_compare_match_s3[i] == 1)
stbuf_compare_threadid_s3 = i;

if (stbuf_compare_lru_match_s3[i] == 1)
stbuf_compare_lru_threadid_s3 = i;

stbuf_compare_match_val_s3 = stbuf_compare_match_s3[i] | stbuf_compare_match_val_s3;
stbuf_compare_lru_match_val_s3 = stbuf_compare_lru_match_s3[i] | stbuf_compare_lru_match_val_s3;
end

stbuf_way_s3 = mshr_st_way_array[stbuf_compare_threadid_s3];
// stbuf_way_wmt_data_s3 = wmt_data_s3[stbuf_way_s3];
// stbuf_way_to_l1_s3 = stbuf_way_wmt_data_s3[`L15_UNPARAM_1_0];
Expand All @@ -3338,8 +3363,8 @@ always @ *
begin
// expanding some signals
tagcheck_way_mask_s3[`L15_UNPARAM_3_0] = tagcheck_way_s3 == 2'd0 ? 4'b0001 :
2'd1 ? 4'b0010 :
2'd2 ? 4'b0100 :
tagcheck_way_s3 == 2'd1 ? 4'b0010 :
tagcheck_way_s3 == 2'd2 ? 4'b0100 :
4'b1000 ;

tagcheck_state_me_s3 = tagcheck_state_s3 == `L15_MESI_STATE_M || tagcheck_state_s3 == `L15_MESI_STATE_E;
Expand Down
9 changes: 7 additions & 2 deletions piton/design/chip/tile/l15/rtl/noc2decoder.v
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,13 @@ begin
noc2decoder_l15_csm_mshrid = noc2_mshrid[`L15_CSM_NUM_TICKETS_LOG2-1:0];
// the threadid is encoded in the mshrid sent to L2, is the next L15_THREADID_WIDTH bits after the first L15_MSHR_ID_WIDTH bits
noc2decoder_l15_threadid = noc2_mshrid[`L15_MSHR_ID_WIDTH+`L15_THREADID_WIDTH -1 -: `L15_THREADID_WIDTH];
noc2decoder_l15_hmc_fill = noc2_mshrid[`MSG_MSHRID_WIDTH-1];


`ifdef NO_RTL_CSM
noc2decoder_l15_hmc_fill = 1'b0; //noc2_mshrid[`MSG_MSHRID_WIDTH-1];
`else
noc2decoder_l15_hmc_fill = noc2_mshrid[`MSG_MSHRID_WIDTH-1];
`endif

noc2decoder_l15_l2miss = noc2_data[`MSG_L2_MISS];
noc2decoder_l15_icache_type = noc2_data[`MSG_CACHE_TYPE];
noc2decoder_l15_f4b = 0;
Expand Down
14 changes: 10 additions & 4 deletions piton/design/include/l15.h.pyv
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,16 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
`define L15_MSHR_ID_LD 2'd2
`define L15_MSHR_ID_ST 2'd3

`define L15_NUM_THREADS 2
`define L15_THREADID_WIDTH 1
`define L15_THREADID_MASK 0:0
`define L15_THREAD_ARRAY_MASK 1:0
// if NO_RTL_CSM is defined L15_NUM_THREADS could go up to 64, otherwise 32.
<%
import math
import pyhplib
from pyhplib import *
print("`define L15_NUM_THREADS %d" % L15_NUM_THREADS)
print("`define L15_THREADID_WIDTH %d" % int(math.log(L15_NUM_THREADS, 2)))
%>
`define L15_THREADID_MASK `L15_THREADID_WIDTH-1:0
`define L15_THREAD_ARRAY_MASK `L15_NUM_THREADS-1:0
`define L15_NUM_MSHRID_PER_THREAD 4

// pipeline OPs
Expand Down
4 changes: 4 additions & 0 deletions piton/tools/bin/pyhplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
MAX_X = 8;
MAX_Y = 8;



PITON_X_TILES = int(os.environ.get('PITON_X_TILES', '-1'))
#print "//x_tiles:", num_tiles

Expand Down Expand Up @@ -87,6 +89,8 @@
L15_LINE_SIZE = 16
L2_LINE_SIZE = 64

L15_NUM_THREADS = int(os.environ.get('L15_NUM_THREADS', '2'))

#########################################################
# BRAM configurations
#########################################################
Expand Down
4 changes: 3 additions & 1 deletion piton/tools/src/sims/manycore.config
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,14 @@
#ifdef FLIST_ORAM
-flist=$DV_ROOT/design/chip/tinyoram/rtl/Flist.oram
-config_rtl=ORAM_ON"
-config_rtl=NO_RTL_CSM
-sim_run_args=+oram"
#endif

// No scan chains
-config_rtl=NO_SCAN

-config_rtl=NO_RTL_CSM

-config_l1i_size=16384 // default
-config_l1i_associativity=4 // default

Expand Down
6 changes: 6 additions & 0 deletions piton/tools/src/sims/sims,2.0
Original file line number Diff line number Diff line change
Expand Up @@ -2651,6 +2651,12 @@ sub parse_args
}
}

GetOptions (\%opt,"l15_num_threads=s",
);
if ($opt{l15_num_threads}) {
$ENV{L15_NUM_THREADS} = $opt{l15_num_threads};
}

$ENV{PROTOSYN_RUNTIME_DESIGN_PATH} = $ENV{DV_ROOT} . "/verif/env/manycore";
$ENV{PROTOSYN_RUNTIME_BOARD} = "";

Expand Down
Loading

0 comments on commit 6aaec1b

Please sign in to comment.