Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add L1 Benchmarking #1

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 71 additions & 40 deletions benchmarking/L1-benchmarking.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,12 +1,34 @@
#!/usr/bin/python

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Point to Python3 or 2


import sys
abejgonzalez marked this conversation as resolved.
Show resolved Hide resolved
import argparse
import re

"""
Lines formatting:
Cycle: decimal_int SnoopAddr: hexadecimal_int SnoopBlockAddr: hexadecimal_int
Cycle: decimal_int SnoopRespAddr: hexadecimal_int
Cycle: decimal_int PrefetchAddr: hexadecimal_int
Cycle: decimal_int PrefetchRespAddr: hexadecimal_int
"""

snoop_regex = re.compile("^Cycle:\s*(\d+)\s*SnoopAddr:\s*([\da-f]+)\s*SnoopBlock:\s*([\da-f]+)\s*")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A couple minor changes:

  • \s* can probably be reduced to + since you are just doing spaces. If you have 1 space then I would just simplify and do .
  • IIRC re.match already starts the search at the start of the line so you don't need the ^.
  • I think you don't need to match the spaces at the end (you need to verify this). In any case, I think the better option would be to do .*.

resp_regex = re.compile("^Cycle:\s*(\d+)\s*SnoopRespAddr:\s*([\da-f]+)\s*")
prefetch_regex = re.compile("^Cycle:\s*(\d)*\s*PrefetchAddr:\s*([\da-f]+)\s*")
prefetch_resp_regex = re.compile("^Cycle:\s*(\d+)*\s*PrefetchRespAddr:\s*([\da-f]+)\s*")


def main():
with open(sys.argv[1]) as f:
prefetch_lines = f.readlines()
parser = argparse.ArgumentParser(description="Specify input files")
parser.add_argument('prefetch_file', type=str, help='input file for the prefetch config')
parser.add_argument('no_prefetch_file', type=str, help='input file for the non-prefetch config')
args = parser.parse_args()

with open(sys.argv[2]) as f:
with open(args.prefetch_file) as f:
prefetch_lines = f.readlines()
with open(args.no_prefetch_file) as f:
no_prefetch_lines = f.readlines()

misses_prevented = 0
prefetch_queue={}
prefetches_sent=[]
Expand All @@ -18,6 +40,9 @@ def main():
with_prefetch_hits = with_prefetch['hits']
with_prefetch_misses = with_prefetch['misses']

print("Misses without prefetcher: " + str(len(no_prefetch_misses)))
print("Misses with prefetcher: " + str(len(with_prefetch_misses)))

prefetch_hits_only = list(with_prefetch_hits)
no_prefetch_misses_only = list(no_prefetch_misses)

Expand All @@ -30,49 +55,53 @@ def main():

useful_prefetches=[] #prefetches that actually prevent a miss
num_prefetch_resps = 0
num_unique_prefetch_resps = 0
delta_sum = 0
num_prefetches_accessed = 0

for line in prefetch_lines:
if "Prefetch Addr" in line:
pref = line.split()
prefetches_sent.append(pref[4]) #add new prefetch address
elif "Prefetch Resp" in line:
pref_resp = line.split()
pref_resp_addr = pref_resp[5]
pref_resp_cycles = int(pref_resp[1])
pref = prefetch_regex.match(line)
pref_resp = prefetch_resp_regex.match(line)
snoop = snoop_regex.match(line)
if pref:
prefetches_sent.append(int(pref.group(2), 16)) #add new prefetch address
elif pref_resp:
pref_resp_addr = int(pref_resp.group(2), 16)
pref_resp_cycles = int(pref_resp.group(1))
if pref_resp_addr in prefetches_sent:
if (pref_resp_addr not in prefetch_queue):
num_unique_prefetch_resps += 1
prefetch_queue[pref_resp_addr] = pref_resp_cycles #only interested in most recent response timing
num_prefetch_resps += 1
elif "Snoop" in line:
snoop = line.split()
addr = snoop[4]
cycles = int(snoop[1])
elif snoop:
addr = int(snoop.group(3), 16) #get block address
cycles = int(snoop.group(1))
if (addr in prefetch_queue):
delta_sum += (cycles - prefetch_queue[addr])
num_prefetches_accessed += 1
if ((addr in no_prefetch_misses_only) and (addr in prefetch_hits_only)):
no_prefetch_misses_only.remove(addr) # make sure miss isn't counted twice
prefetch_hits_only.remove(addr)
misses_prevented += 1
useful_prefetches.append(addr)
useful_prefetches.append(snoop.group(3))

#Accuracy Calculations
num_no_resp_prefetches=len(prefetches_sent)-num_prefetch_resps
num_unused_prefetches=num_prefetch_resps-len(useful_prefetches)
useless_prefetches = num_no_resp_prefetches + num_unused_prefetches

print("misses prevented: " + str(misses_prevented))

coverage = (misses_prevented + 0.0) / (misses_prevented + len(with_prefetch_misses)) * 100
coverage = float(misses_prevented) / (misses_prevented + len(with_prefetch_misses)) * 100
print("coverage: " + str(coverage) + "%")

accuracy = (misses_prevented + 0.0) / (useless_prefetches + misses_prevented) * 100
print("accuracy: " + str(accuracy) + "%")

timeliness = (delta_sum + 0.0) / num_prefetches_accessed
print("timeliness: " + str(timeliness) + " cycles")

# Split into acknowledged and sent prefetches
accuracy_all = float(misses_prevented) / (useless_prefetches + misses_prevented) * 100
print("accuracy: " + str(accuracy_all) + "%")
#TODO: must not count duplicates
accuracy_resp = float(misses_prevented) / (num_unique_prefetch_resps-len(useful_prefetches) + misses_prevented) * 100
print("accuracy (acknowledged): " + str(accuracy_resp) + "%")
if (num_prefetches_accessed != 0):
timeliness = (delta_sum + 0.0) / num_prefetches_accessed

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
timeliness = (delta_sum + 0.0) / num_prefetches_accessed
timeliness = float(delta_sum) / num_prefetches_accessed

print("timeliness: " + str(timeliness) + " cycles")


def classify_accesses(lines):
Expand All @@ -81,26 +110,28 @@ def classify_accesses(lines):
accesses = {"hits": [], "misses": []}
last_resp_cycle = 0
for line in lines:
if 'Snoop' in line:
snoop = line.split()
snoop_cycles = snoop[1]
addr = snoop[4]
snoops[addr] = snoop_cycles
resp = resp_regex.match(line)
snoop = snoop_regex.match(line)
if snoop:
snoop_cycles = int(snoop.group(1))
addr = snoop.group(2)
snoop_block = int(snoop.group(3), 16)
#use absolute addr in case of backlogged accesses to same block
snoops[addr] = (snoop_cycles, snoop_block)
all_addr.append(addr)
elif 'Resp' in line:
elif resp:
#check against snoops
resp = line.split()
resp_cycles = resp[1]
resp_addr = resp[4]
resp_cycles = int(resp.group(1))
resp_addr = resp.group(2)
if (resp_addr in snoops):
if (((int(resp_cycles) - int(snoops[resp_addr])) >= 5) and (int(resp_cycles) - int(last_resp_cycle) > 3)):
accesses["misses"].append(resp_addr) #add snoop addr to misses
if ((resp_cycles - snoops[resp_addr][0] >= 5) and (resp_cycles - last_resp_cycle > 3)):
accesses["misses"].append(snoops[resp_addr][1]) #add snoop block addr to misses
else:
accesses["hits"].append(resp_addr)
accesses["hits"].append(snoops[resp_addr][1])
snoops.pop(resp_addr)
last_resp_cycle = resp[1]
last_resp_cycle = int(resp.group(1))
return accesses



main()
if __name__ == "__main__":
main()
12 changes: 9 additions & 3 deletions benchmarking/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# Prefetcher Benchmarking

abejgonzalez marked this conversation as resolved.
Show resolved Hide resolved
This benchmarking test suite tests for prefetcher coverage, accuracy and timeliness.
This benchmarking script computes prefetcher coverage, accuracy and timeliness.

To run the L1 prefetching benchmark tests on a single-core Saturn config, run
__Coverage__ is the percentage of misses prevented by prefetching. This is calculated here by dividing misses prevented by misses with the prefetch config plus misses prevented. Misses prevented is calculated by looking at each prefetch, and checking if that address was a miss in the non-prefetch config and a hit in the prefetch config.

__Accuracy__ is the percentage of prefetches that prevent misses. This is calculated here by dividing misses prevented by useless prefetches plus misses prevented. For regular accuracy, a useless prefetch is either a prefetch that doesn't get acknowledged or a prefetch that doesn't prevent a miss. For acknowledged accuracy, a useless prefetch is a unique prefetched address that doesn't turn the address it's prefetched for from a miss into a hit.

__Timeliness__ is a measure of how far a prefetch occurs before the memory address is accessed. Here, timeliness is the average number of cycles between when a prefetch was last responded to and when that address is accessed.

To run the L1 prefetching benchmark, run
```
source benchmarkingL1.sh
source benchmarkingL1.sh [prefetch config] [non-prefetch config]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
source benchmarkingL1.sh [prefetch config] [non-prefetch config]
./benchmarking/L1-benchmarking.sh [prefetch config] [non-prefetch config]

```
34 changes: 22 additions & 12 deletions benchmarking/benchmarkingL1.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
#!/bin/bash
# Run L1 prefetcher benchmark tests
# TODO: Add parameterization for other cores
# Run L1 prefetcher benchmark test

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a small description of what $1 and $2 are supposed to be pointing to.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, this is running the test on vvadd. If we are going to add this, I think the benchmark should also be abstracted out.


cd ../../..
source env.sh
cd sims/vcs
make CONFIG=Prefetch2SaturnConfig
make run-binary CONFIG=Prefetch2SaturnConfig BINARY=$RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/vvadd.riscv
cp output/chipyard.TestHarness.Prefetch2SaturnConfig/vvadd.out ../../generators/bar-prefetchers/benchmarking/prefetchL1-vvadd.out
make CONFIG=PassthroughPrefetchSaturnConfig
make run-binary CONFIG=PassthroughPrefetchSaturnConfig BINARY=$RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/vvadd.riscv
cp output/chipyard.TestHarness.PassthroughPrefetchSaturnConfig/vvadd.out ../../generators/bar-prefetchers/benchmarking/no-prefetchL1-vvadd.out
cd ../../generators/bar-prefetchers/benchmarking
# Borrowed from build-toolchains.sh
# On macOS, use GNU readlink from 'coreutils' package in Homebrew/MacPorts
if [ "$(uname -s)" = "Darwin" ] ; then
READLINK=greadlink
else
READLINK=readlink
fi

# If BASH_SOURCE is undefined, we may be running under zsh, in that case
# provide a zsh-compatible alternative
DIR="$(dirname "$($READLINK -f "${BASH_SOURCE[0]:-${(%):-%x}}")")"
CHIPYARD_DIR="$(dirname $(dirname $(dirname "$DIR")))"

cd $CHIPYARD_DIR/sims/vcs
make CONFIG=$1
make run-binary CONFIG=$1 BINARY=$RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/vvadd.riscv
cp output/chipyard.TestHarness.$1/vvadd.out ../../generators/bar-prefetchers/benchmarking/prefetchL1-vvadd.out
make CONFIG=$2
make run-binary CONFIG=$2 BINARY=$RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/vvadd.riscv
cp output/chipyard.TestHarness.$2/vvadd.out ../../generators/bar-prefetchers/benchmarking/no-prefetchL1-vvadd.out
cd $CHIPYARD_DIR/generators/bar-prefetchers/benchmarking
python L1-benchmarking.py "prefetchL1-vvadd.out" "no-prefetchL1-vvadd.out"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this file should be committed to this repo, as Prefetch2SaturnConfig and PassthroughPrefetchSaturnConfig aren't defined for most people.
Really its just wrapping
python3 L1-benchmarking.py <path-to-prefetch.out> <path-to-no-prefetch.out>.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think an option is to give as the arguments two configs that are compared against one another. Then it is up to the script caller to give two configs that are roughly equiv.

4 changes: 2 additions & 2 deletions src/main/scala/Configs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ class WithTLICachePrefetcher(p: CanInstantiatePrefetcher = SingleNextLinePrefetc
)
})

class WithHellaCachePrefetcher(hartIds: Seq[Int], p: CanInstantiatePrefetcher = MultiNextLinePrefetcherParams(handleVA=true)) extends Config((site, here, up) => {
case BuildHellaCache => HellaCachePrefetchWrapperFactory.apply(hartIds, p, up(BuildHellaCache))
class WithHellaCachePrefetcher(hartIds: Seq[Int], p: CanInstantiatePrefetcher = MultiNextLinePrefetcherParams(handleVA=true), printStats: Boolean = false) extends Config((site, here, up) => {
case BuildHellaCache => HellaCachePrefetchWrapperFactory.apply(hartIds, p, printStats, up(BuildHellaCache))
})
48 changes: 26 additions & 22 deletions src/main/scala/HellaCachePrefetcher.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,29 @@ import freechips.rocketchip.subsystem.{CacheBlockBytes}
import freechips.rocketchip.diplomacy.{LazyModule}

object HellaCachePrefetchWrapperFactory {
def apply(hartIds: Seq[Int], prefetcher: CanInstantiatePrefetcher, base: BaseTile => Parameters => HellaCache) = (tile: BaseTile) => (p: Parameters) => {
def apply(hartIds: Seq[Int], prefetcher: CanInstantiatePrefetcher, printPrefetchingStats: Boolean, base: BaseTile => Parameters => HellaCache) = (tile: BaseTile) => (p: Parameters) => {
if (hartIds.contains(tile.staticIdForMetadataUseOnly)) {
new HellaCachePrefetchWrapper(tile.staticIdForMetadataUseOnly, prefetcher, base(tile))(p)
new HellaCachePrefetchWrapper(tile.staticIdForMetadataUseOnly, prefetcher, printPrefetchingStats, base(tile))(p)
} else {
base(tile)(p)
}
}
}

class HellaCachePrefetchWrapper(staticIdForMetadataUseOnly: Int, prefetcher: CanInstantiatePrefetcher, inner: Parameters => HellaCache)(implicit p: Parameters) extends HellaCache(staticIdForMetadataUseOnly)(p) {
class HellaCachePrefetchWrapper(staticIdForMetadataUseOnly: Int, prefetcher: CanInstantiatePrefetcher, printPrefetchingStats: Boolean, inner: Parameters => HellaCache)(implicit p: Parameters) extends HellaCache(staticIdForMetadataUseOnly)(p) {
val cache = LazyModule(inner(p))
override val node = cache.node
override val hartIdSinkNodeOpt = cache.hartIdSinkNodeOpt
override val mmioAddressPrefixSinkNodeOpt = cache.mmioAddressPrefixSinkNodeOpt
override lazy val module = new HellaCachePrefetchWrapperModule(prefetcher, this)
override lazy val module = new HellaCachePrefetchWrapperModule(prefetcher, printPrefetchingStats, this)
def getOMSRAMs() = cache.getOMSRAMs()
}

class HellaCachePrefetchWrapperModule(pP: CanInstantiatePrefetcher, outer: HellaCachePrefetchWrapper) extends HellaCacheModule(outer) with MemoryOpConstants{
class HellaCachePrefetchWrapperModule(pP: CanInstantiatePrefetcher, printPrefetchingStats: Boolean, outer: HellaCachePrefetchWrapper) extends HellaCacheModule(outer) with MemoryOpConstants{
outer.cache.module.io <> io
val cache = outer.cache.module

val cycle_counter = RegInit(0.U(32.W))
val cycle_counter = RegInit(0.U(64.W))
cycle_counter := cycle_counter + 1.U

abejgonzalez marked this conversation as resolved.
Show resolved Hide resolved
// Intercept and no-op prefetch requests generated by the core
Expand Down Expand Up @@ -87,28 +87,32 @@ class HellaCachePrefetchWrapperModule(pP: CanInstantiatePrefetcher, outer: Hella
cache.io.cpu.req.bits.phys := false.B
cache.io.cpu.req.bits.no_alloc := false.B
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using a statically assigned boolean to control printing, this should be enabled by a plusArg.

  val enable_print_stats = PlusArg("prefetcher_print_stats", width=1, default=0)(0)
  when (enable_print_stats) { 
    // your print statements
  }

Then when running the sim just set EXTRA_SIM_FLAGS=+prefetcher_print_stats=1

cache.io.cpu.req.bits.no_xcpt := false.B
when (cache.io.cpu.req.fire()) {
in_flight := true.B
//print prefetch
val last_prefetch_addr = req.bits.block_address
printf(p"Cycle: ${Decimal(cycle_counter)}\tPrefetch Addr: ${Hexadecimal(req.bits.block_address)}\n")
when (cache.io.cpu.req.fire()) { in_flight := true.B }
if (printPrefetchingStats) {
when (cache.io.cpu.req.fire()) {
//print prefetch
val last_prefetch_addr = req.bits.block_address

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be in the if or outside of it?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like you can just delete this?

printf(p"Cycle: ${Decimal(cycle_counter)}\tPrefetchAddr: ${Hexadecimal(req.bits.block_address)}\n")
}
}
}

//print snoop
when (prefetcher.io.snoop.valid) {
val last_snoop_addr = prefetcher.io.snoop.bits.address
printf(p"Cycle: ${Decimal(cycle_counter)}\tSnoop Addr: ${Hexadecimal(prefetcher.io.snoop.bits.address)}\n")
}
if (printPrefetchingStats) {
when (prefetcher.io.snoop.valid) {
val last_snoop_addr = prefetcher.io.snoop.bits.address

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as comment above.

printf(p"Cycle: ${Decimal(cycle_counter)}\tSnoopAddr: ${Hexadecimal(prefetcher.io.snoop.bits.address)}\tSnoopBlock: ${Hexadecimal(prefetcher.io.snoop.bits.block_address)}\n")
}

//print response
when (cache.io.cpu.resp.valid && !isPrefetch(cache.io.cpu.resp.bits.cmd)) {
printf(p"Cycle: ${Decimal(cycle_counter)}\tResp Addr: ${Hexadecimal(cache.io.cpu.resp.bits.addr)}\n")
}
//print response
when (cache.io.cpu.resp.valid && !isPrefetch(cache.io.cpu.resp.bits.cmd)) {
printf(p"Cycle: ${Decimal(cycle_counter)}\tSnoopRespAddr: ${Hexadecimal(cache.io.cpu.resp.bits.addr)}\n")
}

//print prefetch response
when (cache.io.cpu.resp.valid && isPrefetch(cache.io.cpu.resp.bits.cmd)) {
printf(p"Cycle: ${Decimal(cycle_counter)}\tPrefetch Resp Addr: ${Hexadecimal(cache.io.cpu.resp.bits.addr)}\n")
//print prefetch response
when (cache.io.cpu.resp.valid && isPrefetch(cache.io.cpu.resp.bits.cmd)) {
printf(p"Cycle: ${Decimal(cycle_counter)}\tPrefetchRespAddr: ${Hexadecimal(cache.io.cpu.resp.bits.addr)}\n")
}
}

val prefetch_fire = cache.io.cpu.req.fire() && isPrefetch(cache.io.cpu.req.bits.cmd)
Expand Down