summaryrefslogtreecommitdiff
path: root/src/mem/cache/prefetch/base.cc
diff options
context:
space:
mode:
authorMitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>2014-01-29 23:21:25 -0600
committerMitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>2014-01-29 23:21:25 -0600
commit95735e10e7ea85320ee39c15a4132eece8417af4 (patch)
tree370863ea1bb2413937c03218e0b59aecc7a76fbe /src/mem/cache/prefetch/base.cc
parent32cc2ea8b9173863adeaa03f4d7ee1635acfdef7 (diff)
downloadgem5-95735e10e7ea85320ee39c15a4132eece8417af4.tar.xz
mem: prefetcher: add options, support for unaligned addresses
This patch extends the classic prefetcher to work on non-block aligned addresses. Because the existing prefetchers in gem5 mask off the lower address bits of cache accesses, many predictable strides fail to be detected. For example, if a load were to stride by 48 bytes, with 64 byte cachelines, the current stride based prefetcher would see an access pattern of 0, 64, 64, 128, 192.... Thus not detecting a constant stride pattern. This patch fixes this, by training the prefetcher on access and not masking off the lower address bits. It also adds the following configuration options: 1) Training/prefetching only on cache misses, 2) Training/prefetching only on data acceses, 3) Optionally tagging prefetches with a PC address. #3 allows prefetchers to train off of prefetch requests in systems with multiple cache levels and PC-based prefetchers present at multiple levels. It also effectively allows a pipelining of prefetch requests (like in POWER4) across multiple levels of cache hierarchy. Improves performance on my gem5 configuration by 4.3% for SPECINT and 4.7% for SPECFP (geomean).
Diffstat (limited to 'src/mem/cache/prefetch/base.cc')
-rw-r--r--src/mem/cache/prefetch/base.cc18
1 files changed, 16 insertions, 2 deletions
diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc
index c440978e6..d5cddc88e 100644
--- a/src/mem/cache/prefetch/base.cc
+++ b/src/mem/cache/prefetch/base.cc
@@ -60,7 +60,9 @@ BasePrefetcher::BasePrefetcher(const Params *p)
: ClockedObject(p), size(p->size), latency(p->latency), degree(p->degree),
useMasterId(p->use_master_id), pageStop(!p->cross_pages),
serialSquash(p->serial_squash), onlyData(p->data_accesses_only),
- system(p->sys), masterId(system->getMasterId(name()))
+ onMissOnly(p->on_miss_only), onReadOnly(p->on_read_only),
+ onPrefetch(p->on_prefetch), system(p->sys),
+ masterId(system->getMasterId(name()))
{
}
@@ -185,7 +187,14 @@ BasePrefetcher::getPacket()
Tick
BasePrefetcher::notify(PacketPtr &pkt, Tick tick)
{
- if (!pkt->req->isUncacheable() && !(pkt->req->isInstFetch() && onlyData)) {
+ // Don't consult the prefetcher if any of the following conditons are true
+ // 1) The request is uncacheable
+ // 2) The request is a fetch, but we are only prefeching data
+ // 3) The request is a cache hit, but we are only training on misses
+ // 4) THe request is a write, but we are only training on reads
+ if (!pkt->req->isUncacheable() && !(pkt->req->isInstFetch() && onlyData) &&
+ !(onMissOnly && inCache(pkt->getAddr(), true)) &&
+ !(onReadOnly && !pkt->isRead())) {
// Calculate the blk address
Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
bool is_secure = pkt->isSecure();
@@ -262,6 +271,11 @@ BasePrefetcher::notify(PacketPtr &pkt, Tick tick)
prefetch->req->setThreadContext(pkt->req->contextId(),
pkt->req->threadId());
+ // Tag orefetch reqeuests with corresponding PC to train lower
+ // cache-level prefetchers
+ if (onPrefetch && pkt->req->hasPC())
+ prefetch->req->setPC(pkt->req->getPC());
+
// We just remove the head if we are full
if (pf.size() == size) {
pfRemovedFull++;