diff --git a/inc/NoCL.h b/inc/NoCL.h index 9c349ab..cc231a8 100644 --- a/inc/NoCL.h +++ b/inc/NoCL.h @@ -416,12 +416,22 @@ template __attribute__ ((noinline)) // Get potential scalarisable instructions printStat("ScalarisableInstrs: ", STAT_SIMT_SCALARISABLE_INSTRS); #endif + #if SIMTEnableSVStoreBuffer + // Store buffer stats + printStat("SBLoadHit: ", STAT_SIMT_SB_LOAD_HIT); + printStat("SBLoadMiss: ", STAT_SIMT_SB_LOAD_MISS); + #endif #endif #if SIMTEnableCapRegFileScalarisation // Get number of vector registers used printStat("MaxCapVecRegs: ", STAT_SIMT_MAX_CAP_VEC_REGS); printStat("TotalCapVecRegs: ", STAT_SIMT_TOTAL_CAP_VEC_REGS); + #if SIMTEnableSVStoreBuffer + // Store buffer stats + printStat("SBCapLoadHit: ", STAT_SIMT_SB_CAP_LOAD_HIT); + printStat("SBCapLoadMiss: ", STAT_SIMT_SB_CAP_LOAD_MISS); + #endif #endif // Get number of DRAM accesses diff --git a/pebbles b/pebbles index 62ce11f..764d2cd 160000 --- a/pebbles +++ b/pebbles @@ -1 +1 @@ -Subproject commit 62ce11f5c894bc32861e0e87f02b01e4ed962167 +Subproject commit 764d2cd1a1e260b3b3ec4d036fc838838ff1c042 diff --git a/scripts/sweep.py b/scripts/sweep.py index e5ee1c1..8ceceda 100755 --- a/scripts/sweep.py +++ b/scripts/sweep.py @@ -35,8 +35,8 @@ def printUsage(): ("SIMTEnableSVStoreBuffer", "1") ] config["DynRegSpill"] = config["RegFileScalarisation"] + [ - ("SIMTRegFileSize", "1024") - , ("SIMTCapRegFileSize", "1024") + ("SIMTRegFileSize", "512") + , ("SIMTCapRegFileSize", "512") , ("SIMTUseSharedVecScratchpad", "1") ] config["DynHalfRF"] = [ @@ -52,17 +52,20 @@ def printUsage(): # Combinations of configs that are of interest configCombos = [ ["Clang"] + , ["Clang", "StoreBuffer"] + , ["Clang", "ScalarUnit"] , ["CHERI", "RegFileScalarisation"] , ["CHERI", "DynRegSpill"] , ["CHERI", "StoreBuffer"] , ["CHERI", "ScalarUnit"] - , ["Clang", "DynRegSpill", "StoreBuffer", "ScalarUnit"] - , ["CHERI", "DynRegSpill", "StoreBuffer", "ScalarUnit"] + # ["Clang", "DynRegSpill", "StoreBuffer", "ScalarUnit"] + #, ["CHERI", "DynRegSpill", "StoreBuffer", "ScalarUnit"] ] # Config combos of interest when benchmarking only benchCombos = [ - ["GCC", "DynHalfRF"] + ["GCC"] + , ["GCC", "DynHalfRF"] , ["GCC", "StaticHalfRF"] ] @@ -132,7 +135,7 @@ def applySettings(combo): os.chdir(repoDir + "/test") os.system("rm -f bench.log") # Benchmark each combination in simulation - for combo in (configCombos + benchCombos): + for combo in (benchCombos + configCombos): name = "Baseline" if combo == [] else "+".join(combo) print("Config: " + name) clean() @@ -140,14 +143,15 @@ def applySettings(combo): # Synthesise os.chdir(repoDir + "/src") os.system("make > /dev/null") - os.chdir(repoDir + "/de10-pro-e") + os.chdir(repoDir + "/de10-pro") # Default to revD FPGA os.system("make one > /dev/null") # Run benchmarks os.chdir(repoDir + "/test") os.system("echo >> bench.log") os.system("echo ====== " + name + " ====== >> bench.log") os.system("echo >> bench.log") - os.system("./test.sh --fpga-e --stats --apps-only >> bench.log") + # Default to revD FPGA + os.system("./test.sh --fpga-d --stats --apps-only >> bench.log") else: printUsage() sys.exit(-1) diff --git a/src/Core/SIMT.hs b/src/Core/SIMT.hs index 1ecbddb..200be91 100644 --- a/src/Core/SIMT.hs +++ b/src/Core/SIMT.hs @@ -32,6 +32,7 @@ import Pebbles.Pipeline.SIMT.Management import Pebbles.Pipeline.Interface import Pebbles.Memory.Interface import Pebbles.Memory.CapSerDes +import Pebbles.Memory.CoalescingUnit import Pebbles.Memory.DRAM.Interface import Pebbles.Instructions.RV32_I import Pebbles.Instructions.RV32_M @@ -149,9 +150,11 @@ makeSIMTCore :: -- ^ Memory responses -> DRAMStatSigs -- ^ For DRAM stat counters + -> CoalUnitPerfStats + -- ^ For coalescing unit stats -> Module (Stream SIMTResp) -- ^ SIMT management responses -makeSIMTCore config mgmtReqs memReqs memResps dramStatSigs = mdo +makeSIMTCore config mgmtReqs memReqs memResps dramStatSigs coalStats = mdo -- Scalar unit enabled? let enScalarUnit = SIMTEnableScalarUnit == 1 @@ -382,6 +385,7 @@ makeSIMTCore config mgmtReqs memReqs memResps dramStatSigs = mdo , simtScalarResumeReqs = toStream scalarResumeQueue , simtDRAMStatSigs = dramStatSigs , simtMemReqs = fromList memReqSinks + , simtCoalStats = coalStats } return pipelineOuts.simtMgmtResps diff --git a/src/Main.hs b/src/Main.hs index dff009d..7bd76ad 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -180,10 +180,12 @@ makeSIMTDomain (clk, rst) = -- SIMT core simtMgmtResps <- - makeSIMTAccelerator simtMgmtReqs memReqs memResps dramStatSigs + makeSIMTAccelerator simtMgmtReqs memReqs memResps + dramStatSigs coalPerfStats -- SIMT memory subsystem - (memReqs, memResps, dramReqs1) <- makeSIMTMemSubsystem dramResps1 + (memReqs, memResps, dramReqs1, coalPerfStats) <- + makeSIMTMemSubsystem dramResps1 -- DRAM bus ((dramResps0, dramResps1), dramReqs) <- @@ -239,7 +241,8 @@ makeSIMTMemSubsystem :: , Vec SIMTLanes (Option MemReq) , Option (ScalarVal 33) ) , Source (SIMTPipelineInstrInfo, Vec SIMTLanes (Option MemResp)) - , Stream (DRAMReq ()) ) + , Stream (DRAMReq ()) + , CoalUnitPerfStats ) makeSIMTMemSubsystem dramResps = mdo -- Memory request queue memReqsQueue :: Queue (SIMTPipelineInstrInfo, @@ -273,7 +276,7 @@ makeSIMTMemSubsystem dramResps = mdo , isSRAMAccess = isBankedSRAMAccess , canBuffer = isStackAccess } - (memResps, sramReqs, dramReqs) <- + (memResps, sramReqs, dramReqs, coalPerfStats) <- makeSIMTCoalescingUnit coalUnitOpts memReqs1 dramResps sramResps @@ -301,7 +304,7 @@ makeSIMTMemSubsystem dramResps = mdo then error "SRAM base address not suitably aligned" else return () - return (toSink memReqsQueue, memResps1, dramReqs) + return (toSink memReqsQueue, memResps1, dramReqs, coalPerfStats) where -- SRAM-related addresses diff --git a/test/test.sh b/test/test.sh index f1d7227..894b69e 100755 --- a/test/test.sh +++ b/test/test.sh @@ -276,6 +276,10 @@ checkApp() { local SCALAR_SUSPS=$(getStat "ScalarSusps" "sum") local SCALAR_ABORTS=$(getStat "ScalarAborts" "sum") local DRAM_ACCS=$(getStat "DRAMAccs" "sum") + local SB_LOAD_HIT=$(getStat "SBLoadHit" "sum") + local SB_LOAD_MISS=$(getStat "SBLoadMiss" "sum") + local SB_CAP_LOAD_HIT=$(getStat "SBCapLoadHit" "sum") + local SB_CAP_LOAD_MISS=$(getStat "SBCapLoadMiss" "sum") local IPC=$(python3 -c "print('%.2f' % (float(${INSTRS}) / ${CYCLES}))") local OPTIONAL_STATS="" if [ "$VEC_REGS" != "" ]; then @@ -308,6 +312,18 @@ checkApp() { if [ "$SCALAR_ABORTS" != "" ]; then OPTIONAL_STATS="$OPTIONAL_STATS,ScalarAborts=$SCALAR_ABORTS" fi + if [ "$SB_LOAD_HIT" != "" ]; then + OPTIONAL_STATS="$OPTIONAL_STATS,SBLoadHit=$SB_LOAD_HIT" + fi + if [ "$SB_LOAD_MISS" != "" ]; then + OPTIONAL_STATS="$OPTIONAL_STATS,SBLoadMiss=$SB_LOAD_MISS" + fi + if [ "$SB_CAP_LOAD_HIT" != "" ]; then + OPTIONAL_STATS="$OPTIONAL_STATS,SBCapLoadHit=$SB_CAP_LOAD_HIT" + fi + if [ "$SB_CAP_LOAD_MISS" != "" ]; then + OPTIONAL_STATS="$OPTIONAL_STATS,SBCapLoadMiss=$SB_CAP_LOAD_MISS" + fi if [ "$EmitStats" != "" ]; then test "$OK" != "" assert $? "" " [IPC=$IPC,Instrs=$INSTRS,Cycles=$CYCLES,DRAMAccs=$DRAM_ACCS$OPTIONAL_STATS]"