Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

L15 MSHR fixed #134

Open
wants to merge 2 commits into
base: openpiton-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 27 additions & 26 deletions piton/design/chip/tile/l15/rtl/l15_mshr.v.pyv
Original file line number Diff line number Diff line change
Expand Up @@ -158,30 +158,30 @@ reg [`L15_PADDR_HI:0] tmp_st_address [`L15_NUM_THREADS-1:0];
reg [`L15_PADDR_HI:0] tmp_ld_address [`L15_NUM_THREADS-1:0];
reg [2-1:0] tmp_st_way [`L15_NUM_THREADS-1:0];
reg [`L15_MESI_TRANS_STATE_WIDTH-1:0] tmp_st_state [`L15_NUM_THREADS-1:0];
integer i=0;

always @ *
begin
<%
t = '''
tmp_vals[THREADID] = 0;
tmp_vals[THREADID][`L15_MSHR_ID_IFILL] = ifill_val[THREADID];
tmp_vals[THREADID][`L15_MSHR_ID_LD] = ld_val[THREADID];
tmp_vals[THREADID][`L15_MSHR_ID_ST] = st_val[THREADID];

tmp_st_address[THREADID] = st_address[THREADID];
tmp_ld_address[THREADID] = ld_address[THREADID];
tmp_st_way[THREADID] = st_way[THREADID];
tmp_st_state[THREADID] =st_state[THREADID];
'''

for i in range (2):
tt = t.replace('THREADID', repr(i))
print(tt)
%>
mshr_pipe_vals_s1 = {tmp_vals[1], tmp_vals[0]};
mshr_pipe_ld_address = {tmp_ld_address[1], tmp_ld_address[0]};
mshr_pipe_st_address = {tmp_st_address[1], tmp_st_address[0]};
mshr_pipe_st_way_s1 = {tmp_st_way[1], tmp_st_way[0]};
mshr_pipe_st_state_s1 = {tmp_st_state[1], tmp_st_state[0]};

for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
tmp_vals[i] = 0;
tmp_vals[i][`L15_MSHR_ID_IFILL] = ifill_val[i];
tmp_vals[i][`L15_MSHR_ID_LD] = ld_val[i];
tmp_vals[i][`L15_MSHR_ID_ST] = st_val[i];

tmp_st_address[i] = st_address[i];
tmp_ld_address[i] = ld_address[i];
tmp_st_way[i] = st_way[i];
tmp_st_state[i] = st_state[i];

mshr_pipe_vals_s1[(`L15_NUM_MSHRID_PER_THREAD*(i+1))-1 -: `L15_NUM_MSHRID_PER_THREAD] = tmp_vals[i];
mshr_pipe_ld_address[(`L15_PADDR_WIDTH*(i+1))-1 -: `L15_PADDR_WIDTH] = tmp_ld_address[i];
mshr_pipe_st_address[(`L15_PADDR_WIDTH*(i+1))-1 -: `L15_PADDR_WIDTH] = tmp_st_address[i];
mshr_pipe_st_way_s1[(2*(i+1))-1 -: 2] = tmp_st_way[i];
mshr_pipe_st_state_s1[(`L15_MESI_TRANS_STATE_WIDTH*(i+1))-1 -: `L15_MESI_TRANS_STATE_WIDTH] = tmp_st_state[i];
end


// S1 read
mshr_pipe_readres_homeid_s1[`PACKET_HOME_ID_WIDTH-1:0] = 0;
Expand Down Expand Up @@ -361,10 +361,11 @@ always @ (posedge clk)
begin
if (!rst_n)
begin
st_homeid[0] <= 0;
st_homeid[1] <= 0;
ld_homeid[0] <= 0;
ld_homeid[1] <= 0;
for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
st_homeid[i] <= {PACKET_HOME_ID_WIDTH{1'b0}};
ld_homeid[i] <= {PACKET_HOME_ID_WIDTH{1'b0}};
end
end
else
begin
Expand Down
123 changes: 67 additions & 56 deletions piton/design/chip/tile/l15/rtl/l15_pipeline.v.pyv
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ reg [`L15_MESI_TRANS_STATE_WIDTH-1:0] mshr_st_state_array [`L15_THREAD_ARRAY_MAS
reg [`L15_PADDR_MASK] mshr_st_address_array [`L15_THREAD_ARRAY_MASK];
reg [`L15_PADDR_MASK] mshr_ld_address_array [`L15_THREAD_ARRAY_MASK];
reg [`L15_WAY_MASK] mshr_st_way_array [`L15_THREAD_ARRAY_MASK];
integer i=0;

always @ *
begin
pipe_mshr_readreq_mshrid_s1 = noc2decoder_l15_mshrid;
Expand All @@ -402,18 +404,16 @@ begin
// predecode_mshr_read_address_s1 = mshr_pipe_address_s1;
predecode_mshr_read_homeid_s1 = mshr_pipe_readres_homeid_s1;

// mshr_val_array
mshr_val_array[0] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*1 - 1 -: `L15_NUM_MSHRID_PER_THREAD];
mshr_st_state_array[0] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*1 - 1 -: `L15_MESI_TRANS_STATE_WIDTH];
mshr_st_address_array[0] = mshr_pipe_st_address[`L15_PADDR_WIDTH*1 - 1 -: `L15_PADDR_WIDTH];
mshr_ld_address_array[0] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*1 - 1 -: `L15_PADDR_WIDTH];
mshr_st_way_array[0] = mshr_pipe_st_way_s1[2*1 - 1 -: 2];

mshr_val_array[1] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*2 - 1 -: `L15_NUM_MSHRID_PER_THREAD];
mshr_st_state_array[1] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*2 - 1 -: `L15_MESI_TRANS_STATE_WIDTH];
mshr_st_address_array[1] = mshr_pipe_st_address[`L15_PADDR_WIDTH*2 - 1 -: `L15_PADDR_WIDTH];
mshr_ld_address_array[1] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*2 - 1 -: `L15_PADDR_WIDTH];
mshr_st_way_array[1] = mshr_pipe_st_way_s1[2*2 - 1 -: 2];
// mshr_val_array
for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
mshr_val_array[i] = mshr_pipe_vals_s1[`L15_NUM_MSHRID_PER_THREAD*(i+1) - 1 -: `L15_NUM_MSHRID_PER_THREAD];
mshr_st_state_array[i] = mshr_pipe_st_state_s1[`L15_MESI_TRANS_STATE_WIDTH*(i+1) - 1 -: `L15_MESI_TRANS_STATE_WIDTH];
mshr_st_address_array[i] = mshr_pipe_st_address[`L15_PADDR_WIDTH*(i+1) - 1 -: `L15_PADDR_WIDTH];
mshr_ld_address_array[i] = mshr_pipe_ld_address[`L15_PADDR_WIDTH*(i+1) - 1 -: `L15_PADDR_WIDTH];
mshr_st_way_array[i] = mshr_pipe_st_way_s1[`L15_WAY_WIDTH*(i+1) - 1 -: `L15_WAY_WIDTH];
end

end

// match pcx address to special accesses
Expand All @@ -434,10 +434,11 @@ begin
end

// decode requests to predecode signals
reg predecode_tagcheck_matched_t0ld_s1;
reg predecode_tagcheck_matched_t0st_s1;
reg predecode_tagcheck_matched_t1ld_s1;
reg predecode_tagcheck_matched_t1st_s1;
reg predecode_tagcheck_matched_trd_ld_s1 [`L15_THREAD_ARRAY_MASK]; // each element belongs to one thread
reg predecode_tagcheck_matched_trd_st_s1 [`L15_THREAD_ARRAY_MASK];
reg predecode_tagcheck_matched_lds_s1;
reg predecode_tagcheck_matched_sts_s1;

reg predecode_int_vec_dis_s1;
reg predecode_tagcheck_matched_s1;
reg [19:4] predecode_partial_tag_s1;
Expand Down Expand Up @@ -833,22 +834,26 @@ begin

// TAG CHECKING
predecode_partial_tag_s1[19:4] = pcxdecoder_l15_address[19:4]; // compare partial tag to save energy & timing
predecode_tagcheck_matched_t0ld_s1 = mshr_val_array[0][`L15_MSHR_ID_LD]
&& (predecode_partial_tag_s1[19:4] == mshr_ld_address_array[0][19:4]);
predecode_tagcheck_matched_t1ld_s1 = mshr_val_array[1][`L15_MSHR_ID_LD]
&& (predecode_partial_tag_s1[19:4] == mshr_ld_address_array[1][19:4]);
predecode_tagcheck_matched_t0st_s1 = mshr_val_array[0][`L15_MSHR_ID_ST]
&& (pcxdecoder_l15_address[39:4] == mshr_st_address_array[0][39:4]);
predecode_tagcheck_matched_t1st_s1 = mshr_val_array[1][`L15_MSHR_ID_ST]
&& (pcxdecoder_l15_address[39:4] == mshr_st_address_array[1][39:4]);

predecode_tagcheck_matched_s1 = predecode_tagcheck_matched_t0ld_s1 || predecode_tagcheck_matched_t1ld_s1
|| predecode_tagcheck_matched_t0st_s1 || predecode_tagcheck_matched_t1st_s1;


predecode_tagcheck_matched_lds_s1 = 0;
predecode_tagcheck_matched_sts_s1 = 0;

for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
predecode_tagcheck_matched_trd_ld_s1[i] = mshr_val_array[i][`L15_MSHR_ID_LD]
&& (predecode_partial_tag_s1[19:4] == mshr_ld_address_array[i][19:4]);
predecode_tagcheck_matched_trd_st_s1[i] = mshr_val_array[i][`L15_MSHR_ID_ST]
&& (pcxdecoder_l15_address[39:4] == mshr_st_address_array[i][39:4]);

predecode_tagcheck_matched_lds_s1 = predecode_tagcheck_matched_trd_ld_s1[i] | predecode_tagcheck_matched_lds_s1;
predecode_tagcheck_matched_sts_s1 = predecode_tagcheck_matched_trd_st_s1[i] | predecode_tagcheck_matched_sts_s1;

if(predecode_tagcheck_matched_trd_st_s1[i] == 1)
predecode_hit_stbuf_threadid_s1 = i;
end

predecode_tagcheck_matched_s1 = predecode_tagcheck_matched_lds_s1 | predecode_tagcheck_matched_sts_s1;
// misc
predecode_hit_stbuf_s1 = predecode_tagcheck_matched_t0st_s1 || predecode_tagcheck_matched_t1st_s1;
predecode_hit_stbuf_threadid_s1 = predecode_tagcheck_matched_t1st_s1 ? 1'b1 : 1'b0;
predecode_hit_stbuf_s1 = predecode_tagcheck_matched_sts_s1;
// note: only work with 2 threads for now; need to change the algo of mshr if need to increase the num of threads
end

Expand Down Expand Up @@ -3302,31 +3307,37 @@ reg [`L15_UNPARAM_1_0] stbuf_way_s3; // wmt todo: move calculation to s2
// STORE BUFFER STUFF
always @ *
begin
`ifdef PITON_ASIC_RTL
stbuf_compare_address_match_s3[0] = mshr_st_address_array[0][10:4] == cache_index_s3;
`else
stbuf_compare_address_match_s3[0] = mshr_st_address_array[0][39:4] == address_s3[39:4];
`endif
stbuf_compare_match_s3[0] = mshr_val_array[0][`L15_MSHR_ID_ST]
&& (mshr_st_state_array[0] == `L15_MESI_TRANSITION_STATE_SM)
&& (stbuf_compare_address_match_s3[0] == 1'b1);
stbuf_compare_lru_match_s3[0] = stbuf_compare_match_s3[0] && (mshr_st_way_array[0] == lru_way_s3);

`ifdef PITON_ASIC_RTL
stbuf_compare_address_match_s3[1] = mshr_st_address_array[1][10:4] == cache_index_s3;
`else
stbuf_compare_address_match_s3[1] = mshr_st_address_array[1][39:4] == address_s3[39:4];
`endif
stbuf_compare_match_s3[1] = mshr_val_array[1][`L15_MSHR_ID_ST]
&& (mshr_st_state_array[1] == `L15_MESI_TRANSITION_STATE_SM)
&& (stbuf_compare_address_match_s3[1] == 1'b1);
stbuf_compare_lru_match_s3[1] = stbuf_compare_match_s3[1] && (mshr_st_way_array[1] == lru_way_s3);

stbuf_compare_match_val_s3 = 0;
stbuf_compare_lru_match_val_s3 = 0;
for(i = 0; i < `L15_NUM_THREADS; i = i+1)
begin
`ifdef PITON_ASIC_RTL
stbuf_compare_address_match_s3[i] = mshr_st_address_array[i][10:4] == cache_index_s3;
`else
stbuf_compare_address_match_s3[i] = mshr_st_address_array[i][39:4] == address_s3[39:4];
`endif

stbuf_compare_match_s3[i] = mshr_val_array[i][`L15_MSHR_ID_ST]
&& (mshr_st_state_array[i] == `L15_MESI_TRANSITION_STATE_SM)
&& (stbuf_compare_address_match_s3[i] == 1'b1);

stbuf_compare_threadid_s3 = stbuf_compare_match_s3[1] ? 1'b1 : 1'b0;
stbuf_compare_lru_threadid_s3 = stbuf_compare_lru_match_s3[1] ? 1'b1 : 1'b0;
stbuf_compare_match_val_s3 = stbuf_compare_match_s3[0] || stbuf_compare_match_s3[1];
stbuf_compare_lru_match_val_s3 = stbuf_compare_lru_match_s3[0] || stbuf_compare_lru_match_s3[1];
stbuf_compare_lru_match_s3[i] = stbuf_compare_match_s3[i] && (mshr_st_way_array[i] == lru_way_s3);

if (stbuf_compare_match_s3[i] == 1)
begin
stbuf_compare_threadid_s3 = i;
end

if (stbuf_compare_lru_match_s3[i] == 1)
begin
stbuf_compare_lru_threadid_s3 = i;
end

stbuf_compare_match_val_s3 = stbuf_compare_match_s3[i] | stbuf_compare_match_val_s3;
stbuf_compare_lru_match_val_s3 = stbuf_compare_lru_match_s3[i] | stbuf_compare_lru_match_val_s3;
end

stbuf_way_s3 = mshr_st_way_array[stbuf_compare_threadid_s3];
// stbuf_way_wmt_data_s3 = wmt_data_s3[stbuf_way_s3];
// stbuf_way_to_l1_s3 = stbuf_way_wmt_data_s3[`L15_UNPARAM_1_0];
Expand All @@ -3338,8 +3349,8 @@ always @ *
begin
// expanding some signals
tagcheck_way_mask_s3[`L15_UNPARAM_3_0] = tagcheck_way_s3 == 2'd0 ? 4'b0001 :
2'd1 ? 4'b0010 :
2'd2 ? 4'b0100 :
tagcheck_way_s3 == 2'd1 ? 4'b0010 :
tagcheck_way_s3 == 2'd2 ? 4'b0100 :
4'b1000 ;

tagcheck_state_me_s3 = tagcheck_state_s3 == `L15_MESI_STATE_M || tagcheck_state_s3 == `L15_MESI_STATE_E;
Expand Down
9 changes: 7 additions & 2 deletions piton/design/chip/tile/l15/rtl/noc2decoder.v
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,13 @@ begin
noc2decoder_l15_csm_mshrid = noc2_mshrid[`L15_CSM_NUM_TICKETS_LOG2-1:0];
// the threadid is encoded in the mshrid sent to L2, is the next L15_THREADID_WIDTH bits after the first L15_MSHR_ID_WIDTH bits
noc2decoder_l15_threadid = noc2_mshrid[`L15_MSHR_ID_WIDTH+`L15_THREADID_WIDTH -1 -: `L15_THREADID_WIDTH];
noc2decoder_l15_hmc_fill = noc2_mshrid[`MSG_MSHRID_WIDTH-1];


`ifdef NO_RTL_CSM
noc2decoder_l15_hmc_fill = 1'b0; //noc2_mshrid[`MSG_MSHRID_WIDTH-1];
`else
noc2decoder_l15_hmc_fill = noc2_mshrid[`MSG_MSHRID_WIDTH-1];
`endif

noc2decoder_l15_l2miss = noc2_data[`MSG_L2_MISS];
noc2decoder_l15_icache_type = noc2_data[`MSG_CACHE_TYPE];
noc2decoder_l15_f4b = 0;
Expand Down
10 changes: 6 additions & 4 deletions piton/design/include/l15.h.pyv
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
print("`define L15_WMT_ALIAS_WIDTH %d" % int(math.log(l15_set_count/l1d_set_count, 2)))

print("`define L15_CACHELINE_WIDTH %d" % (L15_LINE_SIZE*8))

print("`define L15_NUM_THREADS %d" % CONFIG_L15_NUM_THREADS)
print("`define L15_THREADID_WIDTH %d" % int(math.log(CONFIG_L15_NUM_THREADS, 2)))
%>

`define L15_MESI_STATE_WIDTH 2
Expand Down Expand Up @@ -209,10 +212,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
`define L15_MSHR_ID_LD 2'd2
`define L15_MSHR_ID_ST 2'd3

`define L15_NUM_THREADS 2
`define L15_THREADID_WIDTH 1
`define L15_THREADID_MASK 0:0
`define L15_THREAD_ARRAY_MASK 1:0
// if NO_RTL_CSM is defined L15_NUM_THREADS could go up to 64, otherwise 32.
`define L15_THREADID_MASK `L15_THREADID_WIDTH-1:0
`define L15_THREAD_ARRAY_MASK `L15_NUM_THREADS-1:0
`define L15_NUM_MSHRID_PER_THREAD 4

// pipeline OPs
Expand Down
1 change: 1 addition & 0 deletions piton/tools/bin/pyhplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
# cache configurations
CONFIG_L15_SIZE = int(os.environ.get('CONFIG_L15_SIZE', '8192'))
CONFIG_L15_ASSOCIATIVITY = int(os.environ.get('CONFIG_L15_ASSOCIATIVITY', '4'))
CONFIG_L15_NUM_THREADS = int(os.environ.get('CONFIG_L15_NUM_THREADS', '2'))
CONFIG_L1D_SIZE = int(os.environ.get('CONFIG_L1D_SIZE', '8192'))
CONFIG_L1D_ASSOCIATIVITY = int(os.environ.get('CONFIG_L1D_ASSOCIATIVITY', '4'))
CONFIG_L1I_SIZE = int(os.environ.get('CONFIG_L1I_SIZE', '16384'))
Expand Down
1 change: 1 addition & 0 deletions piton/tools/src/sims/sims,2.0
Original file line number Diff line number Diff line change
Expand Up @@ -2637,6 +2637,7 @@ sub parse_args
"config_l1d_associativity",
"config_l15_size",
"config_l15_associativity",
"config_l15_num_threads",
"config_l2_size",
"config_l2_associativity",
);
Expand Down