-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add L1 Benchmarking #1
base: master
Are you sure you want to change the base?
Changes from 21 commits
55365ed
284dabe
afa397a
60aef70
a1338a5
7e529e2
16d82c6
a2402d9
d500223
8220621
67b14f1
e5e981c
7715229
c8b36e4
fb3174d
0dc6bf0
94d31cc
0707243
db320de
65307dd
5115d59
3d90bde
ddfcafd
1db88dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,5 @@ target/ | |
/.bsp/ | ||
/.idea/ | ||
/test_run_dir/ | ||
benchmarking/ | ||
/.out/ |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,137 @@ | ||||||
#!/usr/bin/python | ||||||
|
||||||
import sys | ||||||
abejgonzalez marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
import argparse | ||||||
import re | ||||||
|
||||||
""" | ||||||
Lines formatting: | ||||||
Cycle: decimal_int SnoopAddr: hexadecimal_int SnoopBlockAddr: hexadecimal_int | ||||||
Cycle: decimal_int SnoopRespAddr: hexadecimal_int | ||||||
Cycle: decimal_int PrefetchAddr: hexadecimal_int | ||||||
Cycle: decimal_int PrefetchRespAddr: hexadecimal_int | ||||||
""" | ||||||
|
||||||
snoop_regex = re.compile("^Cycle:\s*(\d+)\s*SnoopAddr:\s*([\da-f]+)\s*SnoopBlock:\s*([\da-f]+)\s*") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A couple minor changes:
|
||||||
resp_regex = re.compile("^Cycle:\s*(\d+)\s*SnoopRespAddr:\s*([\da-f]+)\s*") | ||||||
prefetch_regex = re.compile("^Cycle:\s*(\d)*\s*PrefetchAddr:\s*([\da-f]+)\s*") | ||||||
prefetch_resp_regex = re.compile("^Cycle:\s*(\d+)*\s*PrefetchRespAddr:\s*([\da-f]+)\s*") | ||||||
|
||||||
|
||||||
def main(): | ||||||
parser = argparse.ArgumentParser(description="Specify input files") | ||||||
parser.add_argument('prefetch_file', type=str, help='input file for the prefetch config') | ||||||
parser.add_argument('no_prefetch_file', type=str, help='input file for the non-prefetch config') | ||||||
args = parser.parse_args() | ||||||
|
||||||
with open(args.prefetch_file) as f: | ||||||
prefetch_lines = f.readlines() | ||||||
with open(args.no_prefetch_file) as f: | ||||||
no_prefetch_lines = f.readlines() | ||||||
|
||||||
misses_prevented = 0 | ||||||
prefetch_queue={} | ||||||
prefetches_sent=[] | ||||||
|
||||||
no_prefetch = classify_accesses(no_prefetch_lines) | ||||||
no_prefetch_misses = no_prefetch['misses'] | ||||||
no_prefetch_hits = no_prefetch['hits'] | ||||||
with_prefetch = classify_accesses(prefetch_lines) | ||||||
with_prefetch_hits = with_prefetch['hits'] | ||||||
with_prefetch_misses = with_prefetch['misses'] | ||||||
|
||||||
print("Misses without prefetcher: " + str(len(no_prefetch_misses))) | ||||||
print("Misses with prefetcher: " + str(len(with_prefetch_misses))) | ||||||
|
||||||
prefetch_hits_only = list(with_prefetch_hits) | ||||||
no_prefetch_misses_only = list(no_prefetch_misses) | ||||||
|
||||||
for addr in no_prefetch_hits: | ||||||
if addr in prefetch_hits_only: | ||||||
prefetch_hits_only.remove(addr) #get only new hits, blind to duplicates | ||||||
for addr in with_prefetch_misses: | ||||||
if addr in no_prefetch_misses_only: | ||||||
no_prefetch_misses_only.remove(addr) | ||||||
|
||||||
useful_prefetches=[] #prefetches that actually prevent a miss | ||||||
num_prefetch_resps = 0 | ||||||
num_unique_prefetch_resps = 0 | ||||||
delta_sum = 0 | ||||||
num_prefetches_accessed = 0 | ||||||
|
||||||
for line in prefetch_lines: | ||||||
pref = prefetch_regex.match(line) | ||||||
pref_resp = prefetch_resp_regex.match(line) | ||||||
snoop = snoop_regex.match(line) | ||||||
if pref: | ||||||
prefetches_sent.append(int(pref.group(2), 16)) #add new prefetch address | ||||||
elif pref_resp: | ||||||
pref_resp_addr = int(pref_resp.group(2), 16) | ||||||
pref_resp_cycles = int(pref_resp.group(1)) | ||||||
if pref_resp_addr in prefetches_sent: | ||||||
if (pref_resp_addr not in prefetch_queue): | ||||||
num_unique_prefetch_resps += 1 | ||||||
prefetch_queue[pref_resp_addr] = pref_resp_cycles #only interested in most recent response timing | ||||||
num_prefetch_resps += 1 | ||||||
elif snoop: | ||||||
addr = int(snoop.group(3), 16) #get block address | ||||||
cycles = int(snoop.group(1)) | ||||||
if (addr in prefetch_queue): | ||||||
delta_sum += (cycles - prefetch_queue[addr]) | ||||||
num_prefetches_accessed += 1 | ||||||
if ((addr in no_prefetch_misses_only) and (addr in prefetch_hits_only)): | ||||||
no_prefetch_misses_only.remove(addr) # make sure miss isn't counted twice | ||||||
prefetch_hits_only.remove(addr) | ||||||
misses_prevented += 1 | ||||||
useful_prefetches.append(snoop.group(3)) | ||||||
|
||||||
#Accuracy Calculations | ||||||
num_no_resp_prefetches=len(prefetches_sent)-num_prefetch_resps | ||||||
num_unused_prefetches=num_prefetch_resps-len(useful_prefetches) | ||||||
useless_prefetches = num_no_resp_prefetches + num_unused_prefetches | ||||||
|
||||||
print("misses prevented: " + str(misses_prevented)) | ||||||
coverage = float(misses_prevented) / (misses_prevented + len(with_prefetch_misses)) * 100 | ||||||
print("coverage: " + str(coverage) + "%") | ||||||
# Split into acknowledged and sent prefetches | ||||||
accuracy_all = float(misses_prevented) / (useless_prefetches + misses_prevented) * 100 | ||||||
print("accuracy: " + str(accuracy_all) + "%") | ||||||
#TODO: must not count duplicates | ||||||
accuracy_resp = float(misses_prevented) / (num_unique_prefetch_resps-len(useful_prefetches) + misses_prevented) * 100 | ||||||
print("accuracy (acknowledged): " + str(accuracy_resp) + "%") | ||||||
if (num_prefetches_accessed != 0): | ||||||
timeliness = (delta_sum + 0.0) / num_prefetches_accessed | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
print("timeliness: " + str(timeliness) + " cycles") | ||||||
|
||||||
|
||||||
def classify_accesses(lines): | ||||||
snoops = {} | ||||||
all_addr = [] | ||||||
accesses = {"hits": [], "misses": []} | ||||||
last_resp_cycle = 0 | ||||||
for line in lines: | ||||||
resp = resp_regex.match(line) | ||||||
snoop = snoop_regex.match(line) | ||||||
if snoop: | ||||||
snoop_cycles = int(snoop.group(1)) | ||||||
addr = snoop.group(2) | ||||||
snoop_block = int(snoop.group(3), 16) | ||||||
#use absolute addr in case of backlogged accesses to same block | ||||||
snoops[addr] = (snoop_cycles, snoop_block) | ||||||
all_addr.append(addr) | ||||||
elif resp: | ||||||
#check against snoops | ||||||
resp_cycles = int(resp.group(1)) | ||||||
resp_addr = resp.group(2) | ||||||
if (resp_addr in snoops): | ||||||
if ((resp_cycles - snoops[resp_addr][0] >= 5) and (resp_cycles - last_resp_cycle > 3)): | ||||||
accesses["misses"].append(snoops[resp_addr][1]) #add snoop block addr to misses | ||||||
else: | ||||||
accesses["hits"].append(snoops[resp_addr][1]) | ||||||
snoops.pop(resp_addr) | ||||||
last_resp_cycle = int(resp.group(1)) | ||||||
return accesses | ||||||
|
||||||
|
||||||
if __name__ == "__main__": | ||||||
main() |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,14 @@ | ||||||
# Prefetcher Benchmarking | ||||||
|
||||||
abejgonzalez marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
This benchmarking script computes prefetcher coverage, accuracy and timeliness. | ||||||
|
||||||
__Coverage__ is the percentage of misses prevented by prefetching. This is calculated here by dividing misses prevented by misses with the prefetch config plus misses prevented. Misses prevented is calculated by looking at each prefetch, and checking if that address was a miss in the non-prefetch config and a hit in the prefetch config. | ||||||
|
||||||
__Accuracy__ is the percentage of prefetches that prevent misses. This is calculated here by dividing misses prevented by useless prefetches plus misses prevented. For regular accuracy, a useless prefetch is either a prefetch that doesn't get acknowledged or a prefetch that doesn't prevent a miss. For acknowledged accuracy, a useless prefetch is a unique prefetched address that doesn't turn the address it's prefetched for from a miss into a hit. | ||||||
|
||||||
__Timeliness__ is a measure of how far a prefetch occurs before the memory address is accessed. Here, timeliness is the average number of cycles between when a prefetch was last responded to and when that address is accessed. | ||||||
|
||||||
To run the L1 prefetching benchmark, run | ||||||
``` | ||||||
source benchmarkingL1.sh [prefetch config] [non-prefetch config] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
# Run L1 prefetcher benchmark test | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a small description of what $1 and $2 are supposed to be pointing to. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Additionally, this is running the test on |
||
|
||
# Borrowed from build-toolchains.sh | ||
# On macOS, use GNU readlink from 'coreutils' package in Homebrew/MacPorts | ||
if [ "$(uname -s)" = "Darwin" ] ; then | ||
READLINK=greadlink | ||
else | ||
READLINK=readlink | ||
fi | ||
|
||
# If BASH_SOURCE is undefined, we may be running under zsh, in that case | ||
# provide a zsh-compatible alternative | ||
DIR="$(dirname "$($READLINK -f "${BASH_SOURCE[0]:-${(%):-%x}}")")" | ||
CHIPYARD_DIR="$(dirname $(dirname $(dirname "$DIR")))" | ||
|
||
cd $CHIPYARD_DIR/sims/vcs | ||
make CONFIG=$1 | ||
make run-binary CONFIG=$1 BINARY=$RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/vvadd.riscv | ||
cp output/chipyard.TestHarness.$1/vvadd.out ../../generators/bar-prefetchers/benchmarking/prefetchL1-vvadd.out | ||
make CONFIG=$2 | ||
make run-binary CONFIG=$2 BINARY=$RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/vvadd.riscv | ||
cp output/chipyard.TestHarness.$2/vvadd.out ../../generators/bar-prefetchers/benchmarking/no-prefetchL1-vvadd.out | ||
cd $CHIPYARD_DIR/generators/bar-prefetchers/benchmarking | ||
python L1-benchmarking.py "prefetchL1-vvadd.out" "no-prefetchL1-vvadd.out" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this file should be committed to this repo, as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think an option is to give as the arguments two configs that are compared against one another. Then it is up to the script caller to give two configs that are roughly equiv. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,28 +11,31 @@ import freechips.rocketchip.subsystem.{CacheBlockBytes} | |
import freechips.rocketchip.diplomacy.{LazyModule} | ||
|
||
object HellaCachePrefetchWrapperFactory { | ||
def apply(hartIds: Seq[Int], prefetcher: CanInstantiatePrefetcher, base: BaseTile => Parameters => HellaCache) = (tile: BaseTile) => (p: Parameters) => { | ||
def apply(hartIds: Seq[Int], prefetcher: CanInstantiatePrefetcher, printPrefetchingStats: Boolean, base: BaseTile => Parameters => HellaCache) = (tile: BaseTile) => (p: Parameters) => { | ||
if (hartIds.contains(tile.staticIdForMetadataUseOnly)) { | ||
new HellaCachePrefetchWrapper(tile.staticIdForMetadataUseOnly, prefetcher, base(tile))(p) | ||
new HellaCachePrefetchWrapper(tile.staticIdForMetadataUseOnly, prefetcher, printPrefetchingStats, base(tile))(p) | ||
} else { | ||
base(tile)(p) | ||
} | ||
} | ||
} | ||
|
||
class HellaCachePrefetchWrapper(staticIdForMetadataUseOnly: Int, prefetcher: CanInstantiatePrefetcher, inner: Parameters => HellaCache)(implicit p: Parameters) extends HellaCache(staticIdForMetadataUseOnly)(p) { | ||
class HellaCachePrefetchWrapper(staticIdForMetadataUseOnly: Int, prefetcher: CanInstantiatePrefetcher, printPrefetchingStats: Boolean, inner: Parameters => HellaCache)(implicit p: Parameters) extends HellaCache(staticIdForMetadataUseOnly)(p) { | ||
val cache = LazyModule(inner(p)) | ||
override val node = cache.node | ||
override val hartIdSinkNodeOpt = cache.hartIdSinkNodeOpt | ||
override val mmioAddressPrefixSinkNodeOpt = cache.mmioAddressPrefixSinkNodeOpt | ||
override lazy val module = new HellaCachePrefetchWrapperModule(prefetcher, this) | ||
override lazy val module = new HellaCachePrefetchWrapperModule(prefetcher, printPrefetchingStats, this) | ||
def getOMSRAMs() = cache.getOMSRAMs() | ||
} | ||
|
||
class HellaCachePrefetchWrapperModule(pP: CanInstantiatePrefetcher, outer: HellaCachePrefetchWrapper) extends HellaCacheModule(outer) with MemoryOpConstants{ | ||
class HellaCachePrefetchWrapperModule(pP: CanInstantiatePrefetcher, printPrefetchingStats: Boolean, outer: HellaCachePrefetchWrapper) extends HellaCacheModule(outer) with MemoryOpConstants{ | ||
outer.cache.module.io <> io | ||
val cache = outer.cache.module | ||
|
||
val cycle_counter = RegInit(0.U(64.W)) | ||
cycle_counter := cycle_counter + 1.U | ||
|
||
abejgonzalez marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Intercept and no-op prefetch requests generated by the core | ||
val core_prefetch = io.cpu.req.valid && isPrefetch(io.cpu.req.bits.cmd) | ||
when (io.cpu.req.valid && isPrefetch(io.cpu.req.bits.cmd)) { | ||
|
@@ -85,6 +88,31 @@ class HellaCachePrefetchWrapperModule(pP: CanInstantiatePrefetcher, outer: Hella | |
cache.io.cpu.req.bits.no_alloc := false.B | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of using a statically assigned boolean to control printing, this should be enabled by a
Then when running the sim just set |
||
cache.io.cpu.req.bits.no_xcpt := false.B | ||
when (cache.io.cpu.req.fire()) { in_flight := true.B } | ||
if (printPrefetchingStats) { | ||
when (cache.io.cpu.req.fire()) { | ||
//print prefetch | ||
val last_prefetch_addr = req.bits.block_address | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems like you can just delete this? |
||
printf(p"Cycle: ${Decimal(cycle_counter)}\tPrefetchAddr: ${Hexadecimal(req.bits.block_address)}\n") | ||
} | ||
} | ||
} | ||
|
||
//print snoop | ||
if (printPrefetchingStats) { | ||
when (prefetcher.io.snoop.valid) { | ||
val last_snoop_addr = prefetcher.io.snoop.bits.address | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as comment above. |
||
printf(p"Cycle: ${Decimal(cycle_counter)}\tSnoopAddr: ${Hexadecimal(prefetcher.io.snoop.bits.address)}\tSnoopBlock: ${Hexadecimal(prefetcher.io.snoop.bits.block_address)}\n") | ||
} | ||
|
||
//print response | ||
when (cache.io.cpu.resp.valid && !isPrefetch(cache.io.cpu.resp.bits.cmd)) { | ||
printf(p"Cycle: ${Decimal(cycle_counter)}\tSnoopRespAddr: ${Hexadecimal(cache.io.cpu.resp.bits.addr)}\n") | ||
} | ||
|
||
//print prefetch response | ||
when (cache.io.cpu.resp.valid && isPrefetch(cache.io.cpu.resp.bits.cmd)) { | ||
printf(p"Cycle: ${Decimal(cycle_counter)}\tPrefetchRespAddr: ${Hexadecimal(cache.io.cpu.resp.bits.addr)}\n") | ||
} | ||
} | ||
|
||
val prefetch_fire = cache.io.cpu.req.fire() && isPrefetch(cache.io.cpu.req.bits.cmd) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Point to Python3 or 2