From 95735e10e7ea85320ee39c15a4132eece8417af4 Mon Sep 17 00:00:00 2001 From: "Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E)" Date: Wed, 29 Jan 2014 23:21:25 -0600 Subject: mem: prefetcher: add options, support for unaligned addresses This patch extends the classic prefetcher to work on non-block aligned addresses. Because the existing prefetchers in gem5 mask off the lower address bits of cache accesses, many predictable strides fail to be detected. For example, if a load were to stride by 48 bytes, with 64 byte cachelines, the current stride based prefetcher would see an access pattern of 0, 64, 64, 128, 192.... Thus not detecting a constant stride pattern. This patch fixes this, by training the prefetcher on access and not masking off the lower address bits. It also adds the following configuration options: 1) Training/prefetching only on cache misses, 2) Training/prefetching only on data acceses, 3) Optionally tagging prefetches with a PC address. #3 allows prefetchers to train off of prefetch requests in systems with multiple cache levels and PC-based prefetchers present at multiple levels. It also effectively allows a pipelining of prefetch requests (like in POWER4) across multiple levels of cache hierarchy. Improves performance on my gem5 configuration by 4.3% for SPECINT and 4.7% for SPECFP (geomean). --- src/mem/cache/cache_impl.hh | 11 +++++++++++ src/mem/cache/prefetch/Prefetcher.py | 6 ++++++ src/mem/cache/prefetch/base.cc | 18 ++++++++++++++++-- src/mem/cache/prefetch/base.hh | 20 +++++++++++++++----- src/mem/cache/prefetch/stride.cc | 18 +++++++++--------- src/mem/request.hh | 7 +++++++ 6 files changed, 64 insertions(+), 16 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index b26473336..76fb697c2 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -556,6 +556,17 @@ Cache::recvTimingReq(PacketPtr pkt) // move it ahead of mshrs that are ready // mshrQueue.moveToFront(mshr); } + + // We should call the prefetcher reguardless if the request is + // satisfied or not, reguardless if the request is in the MSHR or + // not. The request could be a ReadReq hit, but still not + // satisfied (potentially because of a prior write to the same + // cache line. So, even when not satisfied, tehre is an MSHR + // already allocated for this, we need to let the prefetcher know + // about the request + if (prefetcher) { + next_pf_time = prefetcher->notify(pkt, time); + } } else { // no MSHR assert(pkt->req->masterId() < system->maxMasters()); diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py index af67f40b6..7d7aeed32 100644 --- a/src/mem/cache/prefetch/Prefetcher.py +++ b/src/mem/cache/prefetch/Prefetcher.py @@ -59,6 +59,12 @@ class BasePrefetcher(ClockedObject): "Use the master id to separate calculations of prefetches") data_accesses_only = Param.Bool(False, "Only prefetch on data not on instruction accesses") + on_miss_only = Param.Bool(False, + "Only prefetch on miss (as opposed to always)") + on_read_only = Param.Bool(False, + "Only prefetch on read requests (write requests ignored)") + on_prefetch = Param.Bool(True, + "Let lower cache prefetcher train on prefetch requests") sys = Param.System(Parent.any, "System this device belongs to") class GHBPrefetcher(BasePrefetcher): diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc index c440978e6..d5cddc88e 100644 --- a/src/mem/cache/prefetch/base.cc +++ b/src/mem/cache/prefetch/base.cc @@ -60,7 +60,9 @@ BasePrefetcher::BasePrefetcher(const Params *p) : ClockedObject(p), size(p->size), latency(p->latency), degree(p->degree), useMasterId(p->use_master_id), pageStop(!p->cross_pages), serialSquash(p->serial_squash), onlyData(p->data_accesses_only), - system(p->sys), masterId(system->getMasterId(name())) + onMissOnly(p->on_miss_only), onReadOnly(p->on_read_only), + onPrefetch(p->on_prefetch), system(p->sys), + masterId(system->getMasterId(name())) { } @@ -185,7 +187,14 @@ BasePrefetcher::getPacket() Tick BasePrefetcher::notify(PacketPtr &pkt, Tick tick) { - if (!pkt->req->isUncacheable() && !(pkt->req->isInstFetch() && onlyData)) { + // Don't consult the prefetcher if any of the following conditons are true + // 1) The request is uncacheable + // 2) The request is a fetch, but we are only prefeching data + // 3) The request is a cache hit, but we are only training on misses + // 4) THe request is a write, but we are only training on reads + if (!pkt->req->isUncacheable() && !(pkt->req->isInstFetch() && onlyData) && + !(onMissOnly && inCache(pkt->getAddr(), true)) && + !(onReadOnly && !pkt->isRead())) { // Calculate the blk address Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); bool is_secure = pkt->isSecure(); @@ -262,6 +271,11 @@ BasePrefetcher::notify(PacketPtr &pkt, Tick tick) prefetch->req->setThreadContext(pkt->req->contextId(), pkt->req->threadId()); + // Tag orefetch reqeuests with corresponding PC to train lower + // cache-level prefetchers + if (onPrefetch && pkt->req->hasPC()) + prefetch->req->setPC(pkt->req->getPC()); + // We just remove the head if we are full if (pf.size() == size) { pfRemovedFull++; diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh index 953852c38..fc0dd0b36 100644 --- a/src/mem/cache/prefetch/base.hh +++ b/src/mem/cache/prefetch/base.hh @@ -89,18 +89,28 @@ class BasePrefetcher : public ClockedObject const Cycles latency; /** The number of prefetches to issue */ - unsigned degree; + const unsigned degree; /** If patterns should be found per context id */ - bool useMasterId; + const bool useMasterId; /** Do we prefetch across page boundaries. */ - bool pageStop; + const bool pageStop; /** Do we remove prefetches with later times than a new miss.*/ - bool serialSquash; + const bool serialSquash; /** Do we prefetch on only data reads, or on inst reads as well. */ - bool onlyData; + const bool onlyData; + + /** Do we trigger/train prefetch on cache misses only, or all accesses. */ + const bool onMissOnly; + + /** Do we trigger/train prefetch on reads only, or all accesses. */ + const bool onReadOnly; + + /** Do we tag prefetch's with PC addresses, allowing lower pc-based + prefetchers to prefetch on prefetch requests */ + const bool onPrefetch; /** System we belong to */ System* system; diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc index fd8b20fcc..a7abf4809 100644 --- a/src/mem/cache/prefetch/stride.cc +++ b/src/mem/cache/prefetch/stride.cc @@ -59,7 +59,7 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list &addresses, return; } - Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); + Addr data_addr = pkt->getAddr(); bool is_secure = pkt->isSecure(); MasterID master_id = useMasterId ? pkt->req->masterId() : 0; Addr pc = pkt->req->getPC(); @@ -77,7 +77,7 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list &addresses, if (iter != tab.end()) { // Hit in table - int new_stride = blk_addr - (*iter)->missAddr; + int new_stride = data_addr - (*iter)->missAddr; bool stride_match = (new_stride == (*iter)->stride); if (stride_match && new_stride != 0) { @@ -89,20 +89,20 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list &addresses, (*iter)->confidence = 0; } - DPRINTF(HWPrefetch, "hit: PC %x blk_addr %x (%s) stride %d (%s), " - "conf %d\n", pc, blk_addr, is_secure ? "s" : "ns", new_stride, + DPRINTF(HWPrefetch, "hit: PC %x data_addr %x (%s) stride %d (%s), " + "conf %d\n", pc, data_addr, is_secure ? "s" : "ns", new_stride, stride_match ? "match" : "change", (*iter)->confidence); - (*iter)->missAddr = blk_addr; + (*iter)->missAddr = data_addr; (*iter)->isSecure = is_secure; if ((*iter)->confidence <= 0) return; for (int d = 1; d <= degree; d++) { - Addr new_addr = blk_addr + d * new_stride; - if (pageStop && !samePage(blk_addr, new_addr)) { + Addr new_addr = data_addr + d * new_stride; + if (pageStop && !samePage(data_addr, new_addr)) { // Spanned the page, so now stop pfSpanPage += degree - d + 1; return; @@ -117,7 +117,7 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list &addresses, // Miss in table // Find lowest confidence and replace - DPRINTF(HWPrefetch, "miss: PC %x blk_addr %x (%s)\n", pc, blk_addr, + DPRINTF(HWPrefetch, "miss: PC %x data_addr %x (%s)\n", pc, data_addr, is_secure ? "s" : "ns"); if (tab.size() >= 256) { //set default table size is 256 @@ -139,7 +139,7 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list &addresses, StrideEntry *new_entry = new StrideEntry; new_entry->instAddr = pc; - new_entry->missAddr = blk_addr; + new_entry->missAddr = data_addr; new_entry->isSecure = is_secure; new_entry->stride = 0; new_entry->confidence = 0; diff --git a/src/mem/request.hh b/src/mem/request.hh index 28d378628..e84a77272 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -576,6 +576,13 @@ class Request return _threadId; } + void + setPC(Addr pc) + { + privateFlags.set(VALID_PC); + _pc = pc; + } + bool hasPC() const { -- cgit v1.2.3