Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add L1 Benchmarking #1

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ target/
/.bsp/
/.idea/
/test_run_dir/
benchmarking/
/.out/
137 changes: 137 additions & 0 deletions benchmarking/L1-benchmarking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#!/usr/bin/python3

import sys
abejgonzalez marked this conversation as resolved.
Show resolved Hide resolved
import argparse
import re

"""
Lines formatting:
Cycle: decimal_int SnoopAddr: hexadecimal_int SnoopBlockAddr: hexadecimal_int
Cycle: decimal_int SnoopRespAddr: hexadecimal_int
Cycle: decimal_int PrefetchAddr: hexadecimal_int
Cycle: decimal_int PrefetchRespAddr: hexadecimal_int
"""

snoop_regex = re.compile("Cycle: +(\d+)\sSnoopAddr: ([\da-f]+)\sSnoopBlock: ([\da-f]+).*")
resp_regex = re.compile("Cycle: +(\d+)\sSnoopRespAddr: ([\da-f]+).*")
prefetch_regex = re.compile("Cycle: +(\d+)\sPrefetchAddr: ([\da-f]+).*")
prefetch_resp_regex = re.compile("Cycle: +(\d+)\sPrefetchRespAddr: ([\da-f]+).*")


def main():
parser = argparse.ArgumentParser(description="Specify input files")
parser.add_argument('prefetch_file', type=str, help='input file for the prefetch config')
parser.add_argument('no_prefetch_file', type=str, help='input file for the non-prefetch config')
args = parser.parse_args()

with open(args.prefetch_file) as f:
prefetch_lines = f.readlines()
with open(args.no_prefetch_file) as f:
no_prefetch_lines = f.readlines()

misses_prevented = 0
prefetch_queue={}
prefetches_sent=[]

no_prefetch = classify_accesses(no_prefetch_lines)
no_prefetch_misses = no_prefetch['misses']
no_prefetch_hits = no_prefetch['hits']
with_prefetch = classify_accesses(prefetch_lines)
with_prefetch_hits = with_prefetch['hits']
with_prefetch_misses = with_prefetch['misses']

print("Misses without prefetcher: " + str(len(no_prefetch_misses)))
print("Misses with prefetcher: " + str(len(with_prefetch_misses)))

prefetch_hits_only = list(with_prefetch_hits)
no_prefetch_misses_only = list(no_prefetch_misses)

for addr in no_prefetch_hits:
if addr in prefetch_hits_only:
prefetch_hits_only.remove(addr) #get only new hits, blind to duplicates
for addr in with_prefetch_misses:
if addr in no_prefetch_misses_only:
no_prefetch_misses_only.remove(addr)

useful_prefetches=[] #prefetches that actually prevent a miss
num_prefetch_resps = 0
num_unique_prefetch_resps = 0
delta_sum = 0
num_prefetches_accessed = 0

for line in prefetch_lines:
pref = prefetch_regex.match(line)
pref_resp = prefetch_resp_regex.match(line)
snoop = snoop_regex.match(line)
if pref:
prefetches_sent.append(int(pref.group(2), 16)) #add new prefetch address
elif pref_resp:
pref_resp_addr = int(pref_resp.group(2), 16)
pref_resp_cycles = int(pref_resp.group(1))
if pref_resp_addr in prefetches_sent:
if (pref_resp_addr not in prefetch_queue):
num_unique_prefetch_resps += 1
prefetch_queue[pref_resp_addr] = pref_resp_cycles #only interested in most recent response timing
num_prefetch_resps += 1
elif snoop:
addr = int(snoop.group(3), 16) #get block address
cycles = int(snoop.group(1))
if (addr in prefetch_queue):
delta_sum += (cycles - prefetch_queue[addr])
num_prefetches_accessed += 1
if ((addr in no_prefetch_misses_only) and (addr in prefetch_hits_only)):
no_prefetch_misses_only.remove(addr) # make sure miss isn't counted twice
prefetch_hits_only.remove(addr)
misses_prevented += 1
useful_prefetches.append(snoop.group(3))

#Accuracy Calculations
num_no_resp_prefetches=len(prefetches_sent)-num_prefetch_resps
num_unused_prefetches=num_prefetch_resps-len(useful_prefetches)
useless_prefetches = num_no_resp_prefetches + num_unused_prefetches

print("misses prevented: " + str(misses_prevented))
coverage = float(misses_prevented) / (misses_prevented + len(with_prefetch_misses)) * 100
print("coverage: " + str(coverage) + "%")
# Split into acknowledged and sent prefetches
accuracy_all = float(misses_prevented) / (useless_prefetches + misses_prevented) * 100
print("accuracy: " + str(accuracy_all) + "%")
#TODO: must not count duplicates
accuracy_resp = float(misses_prevented) / (num_unique_prefetch_resps-len(useful_prefetches) + misses_prevented) * 100
print("accuracy (acknowledged): " + str(accuracy_resp) + "%")
if (num_prefetches_accessed != 0):
timeliness = float(delta_sum) / num_prefetches_accessed
print("timeliness: " + str(timeliness) + " cycles")


def classify_accesses(lines):
snoops = {}
all_addr = []
accesses = {"hits": [], "misses": []}
last_resp_cycle = 0
for line in lines:
resp = resp_regex.match(line)
snoop = snoop_regex.match(line)
if snoop:
snoop_cycles = int(snoop.group(1))
addr = snoop.group(2)
snoop_block = int(snoop.group(3), 16)
#use absolute addr in case of backlogged accesses to same block
snoops[addr] = (snoop_cycles, snoop_block)
all_addr.append(addr)
elif resp:
#check against snoops
resp_cycles = int(resp.group(1))
resp_addr = resp.group(2)
if (resp_addr in snoops):
if ((resp_cycles - snoops[resp_addr][0] >= 5) and (resp_cycles - last_resp_cycle > 3)):
accesses["misses"].append(snoops[resp_addr][1]) #add snoop block addr to misses
else:
accesses["hits"].append(snoops[resp_addr][1])
snoops.pop(resp_addr)
last_resp_cycle = int(resp.group(1))
return accesses


if __name__ == "__main__":
main()
14 changes: 14 additions & 0 deletions benchmarking/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Prefetcher Benchmarking

abejgonzalez marked this conversation as resolved.
Show resolved Hide resolved
This benchmarking script computes prefetcher coverage, accuracy and timeliness.

__Coverage__ is the percentage of misses prevented by prefetching. This is calculated here by dividing misses prevented by misses with the prefetch config plus misses prevented. Misses prevented is calculated by looking at each prefetch, and checking if that address was a miss in the non-prefetch config and a hit in the prefetch config.

__Accuracy__ is the percentage of prefetches that prevent misses. This is calculated here by dividing misses prevented by useless prefetches plus misses prevented. For regular accuracy, a useless prefetch is either a prefetch that doesn't get acknowledged or a prefetch that doesn't prevent a miss. For acknowledged accuracy, a useless prefetch is a unique prefetched address that doesn't turn the address it's prefetched for from a miss into a hit.

__Timeliness__ is a measure of how far a prefetch occurs before the memory address is accessed. Here, timeliness is the average number of cycles between when a prefetch was last responded to and when that address is accessed.

To run the L1 prefetching benchmark, run
```
./benchmarkingL1.sh [prefetch config] [non-prefetch config] [path to binary]
```
30 changes: 30 additions & 0 deletions benchmarking/benchmarkingL1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash


# ---------------------------------------------------------------------------------------
# Run L1 prefetcher benchmark test

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a small description of what $1 and $2 are supposed to be pointing to.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, this is running the test on vvadd. If we are going to add this, I think the benchmark should also be abstracted out.

# Usage: ./benchmarkingL1.sh [prefetcher config] [non-prefetcher config] [path to binary]
# ---------------------------------------------------------------------------------------


# Borrowed from build-toolchains.sh
# On macOS, use GNU readlink from 'coreutils' package in Homebrew/MacPorts
if [ "$(uname -s)" = "Darwin" ] ; then
READLINK=greadlink
else
READLINK=readlink
fi

# If BASH_SOURCE is undefined, we may be running under zsh, in that case
# provide a zsh-compatible alternative
DIR="$(dirname "$($READLINK -f "${BASH_SOURCE[0]:-${(%):-%x}}")")"
CHIPYARD_DIR="$(dirname $(dirname $(dirname "$DIR")))"

OUT_FILE="$(basename "$3")"
OUT_FILE2=${OUT_FILE%.riscv}

cd $CHIPYARD_DIR/sims/vcs
make run-binary CONFIG=$1 BINARY=$3 EXTRA_SIM_FLAGS=+prefetcher_print_stats=1
make run-binary CONFIG=$2 BINARY=$3 EXTRA_SIM_FLAGS=+prefetcher_print_stats=1
cd $CHIPYARD_DIR/generators/bar-prefetchers/benchmarking
python3 L1-benchmarking.py "$CHIPYARD_DIR/sims/vcs/output/chipyard.TestHarness.$1/${OUT_FILE2}.out" "$CHIPYARD_DIR/sims/vcs/output/chipyard.TestHarness.$2/${OUT_FILE2}.out"
29 changes: 29 additions & 0 deletions src/main/scala/HellaCachePrefetcher.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package prefetchers
import chisel3._
import chisel3.util._
import chisel3.experimental.{IO}
import freechips.rocketchip.util.{PlusArg}
import freechips.rocketchip.config.{Config, Field, Parameters}
import freechips.rocketchip.rocket.{HellaCache, HellaCacheModule, HellaCacheArbiter, SimpleHellaCacheIF, HellaCacheIO}
import freechips.rocketchip.rocket.constants.{MemoryOpConstants}
Expand Down Expand Up @@ -33,6 +34,11 @@ class HellaCachePrefetchWrapperModule(pP: CanInstantiatePrefetcher, outer: Hella
outer.cache.module.io <> io
val cache = outer.cache.module

val cycle_counter = RegInit(0.U(64.W))
cycle_counter := cycle_counter + 1.U

abejgonzalez marked this conversation as resolved.
Show resolved Hide resolved
val enable_print_stats = PlusArg("prefetcher_print_stats", width=1, default=0)(0)

// Intercept and no-op prefetch requests generated by the core
val core_prefetch = io.cpu.req.valid && isPrefetch(io.cpu.req.bits.cmd)
when (io.cpu.req.valid && isPrefetch(io.cpu.req.bits.cmd)) {
Expand Down Expand Up @@ -85,6 +91,29 @@ class HellaCachePrefetchWrapperModule(pP: CanInstantiatePrefetcher, outer: Hella
cache.io.cpu.req.bits.no_alloc := false.B
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using a statically assigned boolean to control printing, this should be enabled by a plusArg.

  val enable_print_stats = PlusArg("prefetcher_print_stats", width=1, default=0)(0)
  when (enable_print_stats) { 
    // your print statements
  }

Then when running the sim just set EXTRA_SIM_FLAGS=+prefetcher_print_stats=1

cache.io.cpu.req.bits.no_xcpt := false.B
when (cache.io.cpu.req.fire()) { in_flight := true.B }
when (enable_print_stats && cache.io.cpu.req.fire()) {
//print prefetch
val last_prefetch_addr = req.bits.block_address
printf(p"Cycle: ${Decimal(cycle_counter)}\tPrefetchAddr: ${Hexadecimal(req.bits.block_address)}\n")
}
}

//print snoop
when (enable_print_stats) {
when (prefetcher.io.snoop.valid) {
val last_snoop_addr = prefetcher.io.snoop.bits.address

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as comment above.

printf(p"Cycle: ${Decimal(cycle_counter)}\tSnoopAddr: ${Hexadecimal(prefetcher.io.snoop.bits.address)}\tSnoopBlock: ${Hexadecimal(prefetcher.io.snoop.bits.block_address)}\n")
}

//print response
when (cache.io.cpu.resp.valid && !isPrefetch(cache.io.cpu.resp.bits.cmd)) {
printf(p"Cycle: ${Decimal(cycle_counter)}\tSnoopRespAddr: ${Hexadecimal(cache.io.cpu.resp.bits.addr)}\n")
}

//print prefetch response
when (cache.io.cpu.resp.valid && isPrefetch(cache.io.cpu.resp.bits.cmd)) {
printf(p"Cycle: ${Decimal(cycle_counter)}\tPrefetchRespAddr: ${Hexadecimal(cache.io.cpu.resp.bits.addr)}\n")
}
}

val prefetch_fire = cache.io.cpu.req.fire() && isPrefetch(cache.io.cpu.req.bits.cmd)
Expand Down