From 89a7fb03934b3e38c7d8b2c4818794b3ec874fdf Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Mon, 16 Feb 2009 08:56:40 -0800 Subject: Fixes to get prefetching working again. Apparently we broke it with the cache rewrite and never noticed. Thanks to Bao Yungang for a significant part of these changes (and for inspiring me to work on the rest). Some other overdue cleanup on the prefetch code too. --- src/mem/cache/BaseCache.py | 12 ++- src/mem/cache/base.hh | 15 ++-- src/mem/cache/blk.hh | 2 +- src/mem/cache/builder.cc | 92 +++++------------------ src/mem/cache/cache.hh | 12 ++- src/mem/cache/cache_impl.hh | 91 ++++++++++++++++------- src/mem/cache/mshr.cc | 34 ++++++--- src/mem/cache/mshr.hh | 19 +++-- src/mem/cache/prefetch/base.cc | 156 ++++++++++++++++++++++----------------- src/mem/cache/prefetch/base.hh | 24 +++++- src/mem/cache/prefetch/ghb.cc | 35 ++++----- src/mem/cache/prefetch/ghb.hh | 6 +- src/mem/cache/prefetch/stride.cc | 119 ++++++++++++++++++----------- src/mem/cache/prefetch/stride.hh | 30 ++++---- src/mem/cache/prefetch/tagged.cc | 19 ++--- 15 files changed, 366 insertions(+), 300 deletions(-) (limited to 'src/mem/cache') diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index 936107b7f..bef1b45d2 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -52,12 +52,10 @@ class BaseCache(MemObject): two_queue = Param.Bool(False, "whether the lifo should have two queue replacement") write_buffers = Param.Int(8, "number of write buffers") - prefetch_miss = Param.Bool(False, - "wheter you are using the hardware prefetcher from Miss stream") - prefetch_access = Param.Bool(False, - "wheter you are using the hardware prefetcher from Access stream") + prefetch_on_access = Param.Bool(False, + "notify the hardware prefetcher on every access (not just misses)") prefetcher_size = Param.Int(100, - "Number of entries in the harware prefetch queue") + "Number of entries in the hardware prefetch queue") prefetch_past_page = Param.Bool(False, "Allow prefetches to cross virtual page boundaries") prefetch_serial_squash = Param.Bool(False, @@ -69,9 +67,9 @@ class BaseCache(MemObject): prefetch_policy = Param.Prefetch('none', "Type of prefetcher to use") prefetch_cache_check_push = Param.Bool(True, - "Check if in cash on push or pop of prefetch queue") + "Check if in cache on push or pop of prefetch queue") prefetch_use_cpu_id = Param.Bool(True, - "Use the CPU ID to seperate calculations of prefetches") + "Use the CPU ID to separate calculations of prefetches") prefetch_data_accesses_only = Param.Bool(False, "Only prefetch on data not on instruction accesses") cpu_side = Port("Port on side closer to CPU") diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index 601e9bd48..fe191fb1c 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -445,12 +445,6 @@ class BaseCache : public MemObject } } - Tick nextMSHRReadyTime() - { - return std::min(mshrQueue.nextMSHRReadyTime(), - writeBuffer.nextMSHRReadyTime()); - } - /** * Request the master bus for the given cause and time. * @param cause The reason for the request. @@ -467,10 +461,11 @@ class BaseCache : public MemObject */ void deassertMemSideBusRequest(RequestCause cause) { - // obsolete!! - assert(false); - // memSidePort->deassertBusRequest(cause); - // checkDrain(); + // Obsolete... we no longer signal bus requests explicitly so + // we can't deassert them. Leaving this in as a no-op since + // the prefetcher calls it to indicate that it no longer wants + // to request a prefetch, and someday that might be + // interesting again. } virtual unsigned int drain(Event *de); diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh index bdf323d87..fe65672d6 100644 --- a/src/mem/cache/blk.hh +++ b/src/mem/cache/blk.hh @@ -205,7 +205,7 @@ class CacheBlk * be touched. * @return True if the block was a hardware prefetch, unaccesed. */ - bool isPrefetch() const + bool wasPrefetched() const { return (status & BlkHWPrefetched) != 0; } diff --git a/src/mem/cache/builder.cc b/src/mem/cache/builder.cc index 27daa4025..599353b88 100644 --- a/src/mem/cache/builder.cc +++ b/src/mem/cache/builder.cc @@ -38,7 +38,6 @@ // Must be included first to determine which caches we want #include "enums/Prefetch.hh" #include "mem/config/cache.hh" -#include "mem/config/prefetch.hh" #include "mem/cache/base.hh" #include "mem/cache/cache.hh" #include "mem/bus.hh" @@ -58,38 +57,32 @@ #endif //Prefetcher Headers -#if defined(USE_GHB) #include "mem/cache/prefetch/ghb.hh" -#endif -#if defined(USE_TAGGED) #include "mem/cache/prefetch/tagged.hh" -#endif -#if defined(USE_STRIDED) #include "mem/cache/prefetch/stride.hh" -#endif using namespace std; using namespace TheISA; -#define BUILD_CACHE(TAGS, tags) \ - do { \ - BasePrefetcher *pf; \ - if (prefetch_policy == Enums::tagged) { \ - BUILD_TAGGED_PREFETCHER(TAGS); \ - } \ - else if (prefetch_policy == Enums::stride) { \ - BUILD_STRIDED_PREFETCHER(TAGS); \ - } \ - else if (prefetch_policy == Enums::ghb) { \ - BUILD_GHB_PREFETCHER(TAGS); \ - } \ - else { \ - BUILD_NULL_PREFETCHER(TAGS); \ - } \ - Cache *retval = \ - new Cache(this, tags, pf); \ - return retval; \ +#define BUILD_CACHE(TAGS, tags) \ + do { \ + BasePrefetcher *pf; \ + if (prefetch_policy == Enums::tagged) { \ + pf = new TaggedPrefetcher(this); \ + } \ + else if (prefetch_policy == Enums::stride) { \ + pf = new StridePrefetcher(this); \ + } \ + else if (prefetch_policy == Enums::ghb) { \ + pf = new GHBPrefetcher(this); \ + } \ + else { \ + pf = NULL; \ + } \ + Cache *retval = \ + new Cache(this, tags, pf); \ + return retval; \ } while (0) #define BUILD_CACHE_PANIC(x) do { \ @@ -135,37 +128,6 @@ using namespace TheISA; } \ } while (0) -#define BUILD_COHERENCE(b) do { \ - } while (0) - -#if defined(USE_TAGGED) -#define BUILD_TAGGED_PREFETCHER(t) \ - pf = new TaggedPrefetcher(this) -#else -#define BUILD_TAGGED_PREFETCHER(t) BUILD_CACHE_PANIC("Tagged Prefetcher") -#endif - -#if defined(USE_STRIDED) -#define BUILD_STRIDED_PREFETCHER(t) \ - pf = new StridePrefetcher(this) -#else -#define BUILD_STRIDED_PREFETCHER(t) BUILD_CACHE_PANIC("Stride Prefetcher") -#endif - -#if defined(USE_GHB) -#define BUILD_GHB_PREFETCHER(t) \ - pf = new GHBPrefetcher(this) -#else -#define BUILD_GHB_PREFETCHER(t) BUILD_CACHE_PANIC("GHB Prefetcher") -#endif - -#if defined(USE_TAGGED) -#define BUILD_NULL_PREFETCHER(t) \ - pf = new TaggedPrefetcher(this) -#else -#define BUILD_NULL_PREFETCHER(t) BUILD_CACHE_PANIC("NULL Prefetcher (uses Tagged)") -#endif - BaseCache * BaseCacheParams::create() { @@ -174,24 +136,6 @@ BaseCacheParams::create() subblock_size = block_size; } - //Warnings about prefetcher policy - if (prefetch_policy == Enums::none) { - if (prefetch_miss || prefetch_access) - panic("With no prefetcher, you shouldn't prefetch from" - " either miss or access stream\n"); - } - - if (prefetch_policy == Enums::tagged || prefetch_policy == Enums::stride || - prefetch_policy == Enums::ghb) { - - if (!prefetch_miss && !prefetch_access) - warn("With this prefetcher you should chose a prefetch" - " stream (miss or access)\nNo Prefetching will occur\n"); - - if (prefetch_miss && prefetch_access) - panic("Can't do prefetches from both miss and access stream"); - } - #if defined(USE_CACHE_IIC) // Build IIC params IIC::Params iic_params; diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 4db5230f8..4570b067b 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -64,8 +64,6 @@ class Cache : public BaseCache /** A typedef for a list of BlkType pointers. */ typedef typename TagStore::BlkList BlkList; - bool prefetchAccess; - protected: class CpuSidePort : public CachePort @@ -141,7 +139,10 @@ class Cache : public BaseCache */ const bool doFastWrites; - const bool prefetchMiss; + /** + * Notify the prefetcher on every access, not just misses. + */ + const bool prefetchOnAccess; /** * Does all the processing necessary to perform the provided request. @@ -320,6 +321,11 @@ class Cache : public BaseCache bool inMissQueue(Addr addr) { return (mshrQueue.findMatch(addr) != 0); } + + /** + * Find next request ready time from among possible sources. + */ + Tick nextMSHRReadyTime(); }; #endif // __CACHE_HH__ diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index c4a19ad5c..618e00569 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -53,11 +53,10 @@ template Cache::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf) : BaseCache(p), - prefetchAccess(p->prefetch_access), tags(tags), prefetcher(pf), doFastWrites(true), - prefetchMiss(p->prefetch_miss) + prefetchOnAccess(p->prefetch_on_access) { tempBlock = new BlkType(); tempBlock->data = new uint8_t[blkSize]; @@ -72,7 +71,8 @@ Cache::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf) memSidePort->setOtherPort(cpuSidePort); tags->setCache(this); - prefetcher->setCache(this); + if (prefetcher) + prefetcher->setCache(this); } template @@ -81,7 +81,8 @@ Cache::regStats() { BaseCache::regStats(); tags->regStats(name()); - prefetcher->regStats(name()); + if (prefetcher) + prefetcher->regStats(name()); } template @@ -271,29 +272,11 @@ Cache::access(PacketPtr pkt, BlkType *&blk, blk = tags->accessBlock(pkt->getAddr(), lat); - if (prefetchAccess) { - //We are determining prefetches on access stream, call prefetcher - prefetcher->handleMiss(pkt, curTick); - } - DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(), (blk) ? "hit" : "miss"); if (blk != NULL) { - if (blk->isPrefetch()) { - //Signal that this was a hit under prefetch (no need for - //use prefetch (only can get here if true) - DPRINTF(HWPrefetch, "Hit a block that was prefetched\n"); - blk->status &= ~BlkHWPrefetched; - if (prefetchMiss) { - //If we are using the miss stream, signal the - //prefetcher otherwise the access stream would have - //already signaled this hit - prefetcher->handleMiss(pkt, curTick); - } - } - if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) { // OK to satisfy access hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; @@ -448,6 +431,9 @@ Cache::timingAccess(PacketPtr pkt) } #endif + // track time of availability of next prefetch, if any + Tick next_pf_time = 0; + bool needsResponse = pkt->needsResponse(); if (satisfied) { @@ -457,10 +443,14 @@ Cache::timingAccess(PacketPtr pkt) } else { delete pkt; } + + if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) { + if (blk) + blk->status &= ~BlkHWPrefetched; + next_pf_time = prefetcher->notify(pkt, time); + } } else { // miss - if (prefetchMiss) - prefetcher->handleMiss(pkt, time); Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); MSHR *mshr = mshrQueue.findMatch(blk_addr); @@ -512,9 +502,16 @@ Cache::timingAccess(PacketPtr pkt) allocateMissBuffer(pkt, time, true); } + + if (prefetcher) { + next_pf_time = prefetcher->notify(pkt, time); + } } } + if (next_pf_time != 0) + requestMemSideBus(Request_PF, std::max(time, next_pf_time)); + // copy writebacks to write buffer while (!writebacks.empty()) { PacketPtr wbPkt = writebacks.front(); @@ -663,6 +660,17 @@ Cache::atomicAccess(PacketPtr pkt) } } + // Note that we don't invoke the prefetcher at all in atomic mode. + // It's not clear how to do it properly, particularly for + // prefetchers that aggressively generate prefetch candidates and + // rely on bandwidth contention to throttle them; these will tend + // to pollute the cache in atomic mode since there is no bandwidth + // contention. If we ever do want to enable prefetching in atomic + // mode, though, this is the place to do it... see timingAccess() + // for an example (though we'd want to issue the prefetch(es) + // immediately rather than calling requestMemSideBus() as we do + // there). + // Handle writebacks if needed while (!writebacks.empty()){ PacketPtr wbPkt = writebacks.front(); @@ -787,7 +795,8 @@ Cache::handleResponse(PacketPtr pkt) while (mshr->hasTargets()) { MSHR::Target *target = mshr->getTarget(); - if (target->isCpuSide()) { + switch (target->source) { + case MSHR::Target::FromCPU: Tick completion_time; if (is_fill) { satisfyCpuSideRequest(target->pkt, blk); @@ -825,13 +834,27 @@ Cache::handleResponse(PacketPtr pkt) target->pkt->cmd = MemCmd::ReadRespWithInvalidate; } cpuSidePort->respond(target->pkt, completion_time); - } else { + break; + + case MSHR::Target::FromPrefetcher: + assert(target->pkt->cmd == MemCmd::HardPFReq); + if (blk) + blk->status |= BlkHWPrefetched; + delete target->pkt->req; + delete target->pkt; + break; + + case MSHR::Target::FromSnoop: // I don't believe that a snoop can be in an error state assert(!is_error); // response to snoop request DPRINTF(Cache, "processing deferred snoop...\n"); handleSnoop(target->pkt, blk, true, true, mshr->pendingInvalidate || pkt->isInvalidate()); + break; + + default: + panic("Illegal target->source enum %d\n", target->source); } mshr->popTarget(); @@ -1333,6 +1356,22 @@ Cache::getTimingPacket() } +template +Tick +Cache::nextMSHRReadyTime() +{ + Tick nextReady = std::min(mshrQueue.nextMSHRReadyTime(), + writeBuffer.nextMSHRReadyTime()); + + if (prefetcher) { + nextReady = std::min(nextReady, + prefetcher->nextPrefetchReadyTime()); + } + + return nextReady; +} + + /////////////// // // CpuSidePort diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc index 04b2b8d77..9ec9c090c 100644 --- a/src/mem/cache/mshr.cc +++ b/src/mem/cache/mshr.cc @@ -64,9 +64,9 @@ MSHR::TargetList::TargetList() inline void MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, - Counter order, bool cpuSide, bool markPending) + Counter order, Target::Source source, bool markPending) { - if (cpuSide) { + if (source != Target::FromSnoop) { if (pkt->needsExclusive()) { needsExclusive = true; } @@ -84,7 +84,7 @@ MSHR::TargetList::add(PacketPtr pkt, Tick readyTime, } } - push_back(Target(pkt, readyTime, order, cpuSide, markPending)); + push_back(Target(pkt, readyTime, order, source, markPending)); } @@ -141,7 +141,14 @@ print(std::ostream &os, int verbosity, const std::string &prefix) const { ConstIterator end_i = end(); for (ConstIterator i = begin(); i != end_i; ++i) { - ccprintf(os, "%s%s: ", prefix, i->isCpuSide() ? "cpu" : "mem"); + const char *s; + switch (i->source) { + case Target::FromCPU: s = "FromCPU"; + case Target::FromSnoop: s = "FromSnoop"; + case Target::FromPrefetcher: s = "FromPrefetcher"; + default: s = ""; + } + ccprintf(os, "%s%s: ", prefix, s); i->pkt->print(os, verbosity, ""); } } @@ -162,10 +169,12 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target, downstreamPending = false; threadNum = 0; ntargets = 1; - // Don't know of a case where we would allocate a new MSHR for a - // snoop (mem-side request), so set cpuSide to true here. assert(targets->isReset()); - targets->add(target, whenReady, _order, true, true); + // Don't know of a case where we would allocate a new MSHR for a + // snoop (mem-side request), so set source according to request here + Target::Source source = (target->cmd == MemCmd::HardPFReq) ? + Target::FromPrefetcher : Target::FromCPU; + targets->add(target, whenReady, _order, source, true); assert(deferredTargets->isReset()); pendingInvalidate = false; pendingShared = false; @@ -230,17 +239,22 @@ MSHR::allocateTarget(PacketPtr pkt, Tick whenReady, Counter _order) // comes back (but before this target is processed) // - the outstanding request is for a non-exclusive block and this // target requires an exclusive block + + // assume we'd never issue a prefetch when we've got an + // outstanding miss + assert(pkt->cmd != MemCmd::HardPFReq); + if (inService && (!deferredTargets->empty() || pendingInvalidate || (!targets->needsExclusive && pkt->needsExclusive()))) { // need to put on deferred list - deferredTargets->add(pkt, whenReady, _order, true, true); + deferredTargets->add(pkt, whenReady, _order, Target::FromCPU, true); } else { // No request outstanding, or still OK to append to // outstanding request: append to regular target list. Only // mark pending if current request hasn't been issued yet // (isn't in service). - targets->add(pkt, whenReady, _order, true, !inService); + targets->add(pkt, whenReady, _order, Target::FromCPU, !inService); } ++ntargets; @@ -291,7 +305,7 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order) // actual target device (typ. PhysicalMemory) will delete the // packet on reception, so we need to save a copy here PacketPtr cp_pkt = new Packet(pkt, true); - targets->add(cp_pkt, curTick, _order, false, + targets->add(cp_pkt, curTick, _order, Target::FromSnoop, downstreamPending && targets->needsExclusive); ++ntargets; diff --git a/src/mem/cache/mshr.hh b/src/mem/cache/mshr.hh index 2ff1c2489..bed7012b0 100644 --- a/src/mem/cache/mshr.hh +++ b/src/mem/cache/mshr.hh @@ -55,20 +55,25 @@ class MSHR : public Packet::SenderState, public Printable class Target { public: + + enum Source { + FromCPU, + FromSnoop, + FromPrefetcher + }; + Tick recvTime; //!< Time when request was received (for stats) Tick readyTime; //!< Time when request is ready to be serviced Counter order; //!< Global order (for memory consistency mgmt) PacketPtr pkt; //!< Pending request packet. - bool cpuSide; //!< Did request come from cpu side or mem side? + Source source; //!< Did request come from cpu, memory, or prefetcher? bool markedPending; //!< Did we mark upstream MSHR //!< as downstreamPending? - bool isCpuSide() const { return cpuSide; } - Target(PacketPtr _pkt, Tick _readyTime, Counter _order, - bool _cpuSide, bool _markedPending) + Source _source, bool _markedPending) : recvTime(curTick), readyTime(_readyTime), order(_order), - pkt(_pkt), cpuSide(_cpuSide), markedPending(_markedPending) + pkt(_pkt), source(_source), markedPending(_markedPending) {} }; @@ -85,7 +90,7 @@ class MSHR : public Packet::SenderState, public Printable void resetFlags() { needsExclusive = hasUpgrade = false; } bool isReset() { return !needsExclusive && !hasUpgrade; } void add(PacketPtr pkt, Tick readyTime, Counter order, - bool cpuSide, bool markPending); + Target::Source source, bool markPending); void replaceUpgrades(); void clearDownstreamPending(); bool checkFunctional(PacketPtr pkt); @@ -238,7 +243,7 @@ public: if (getNumTargets() != 1) return false; Target *tgt = getTarget(); - return tgt->isCpuSide() && !tgt->pkt->needsResponse(); + return tgt->source == Target::FromCPU && !tgt->pkt->needsResponse(); } bool promoteDeferredTargets(); diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc index a7e6cfdfc..365ce6727 100644 --- a/src/mem/cache/prefetch/base.cc +++ b/src/mem/cache/prefetch/base.cc @@ -33,6 +33,7 @@ * Hardware Prefetcher Definition. */ +#include "arch/isa_traits.hh" #include "base/trace.hh" #include "mem/cache/base.hh" #include "mem/cache/prefetch/base.hh" @@ -43,7 +44,7 @@ BasePrefetcher::BasePrefetcher(const BaseCacheParams *p) : size(p->prefetcher_size), pageStop(!p->prefetch_past_page), serialSquash(p->prefetch_serial_squash), cacheCheckPush(p->prefetch_cache_check_push), - only_data(p->prefetch_data_accesses_only) + onlyData(p->prefetch_data_accesses_only) { } @@ -52,6 +53,7 @@ BasePrefetcher::setCache(BaseCache *_cache) { cache = _cache; blkSize = cache->getBlockSize(); + _name = cache->name() + "-pf"; } void @@ -99,7 +101,8 @@ BasePrefetcher::regStats(const std::string &name) pfSquashed .name(name + ".prefetcher.num_hwpf_squashed_from_miss") - .desc("number of hwpf that got squashed due to a miss aborting calculation time") + .desc("number of hwpf that got squashed due to a miss " + "aborting calculation time") ; } @@ -126,60 +129,79 @@ BasePrefetcher::inMissQueue(Addr addr) PacketPtr BasePrefetcher::getPacket() { - DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name()); + DPRINTF(HWPrefetch, "Requesting a hw_pf to issue\n"); if (pf.empty()) { - DPRINTF(HWPrefetch, "%s:No HW_PF found\n", cache->name()); + DPRINTF(HWPrefetch, "No HW_PF found\n"); return NULL; } PacketPtr pkt; - bool keepTrying = false; + bool keep_trying = false; do { pkt = *pf.begin(); pf.pop_front(); if (!cacheCheckPush) { - keepTrying = cache->inCache(pkt->getAddr()); + keep_trying = cache->inCache(pkt->getAddr()); } + + if (keep_trying) { + DPRINTF(HWPrefetch, "addr 0x%x in cache, skipping\n", + pkt->getAddr()); + delete pkt->req; + delete pkt; + } + if (pf.empty()) { cache->deassertMemSideBusRequest(BaseCache::Request_PF); - if (keepTrying) return NULL; //None left, all were in cache + if (keep_trying) { + return NULL; // None left, all were in cache + } } - } while (keepTrying); + } while (keep_trying); pfIssued++; + assert(pkt != NULL); + DPRINTF(HWPrefetch, "returning 0x%x\n", pkt->getAddr()); return pkt; } -void -BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) + +Tick +BasePrefetcher::notify(PacketPtr &pkt, Tick time) { - if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data)) - { - //Calculate the blk address - Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); + if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && onlyData)) { + // Calculate the blk address + Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); - //Check if miss is in pfq, if so remove it - std::list::iterator iter = inPrefetch(blkAddr); + // Check if miss is in pfq, if so remove it + std::list::iterator iter = inPrefetch(blk_addr); if (iter != pf.end()) { - DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name()); + DPRINTF(HWPrefetch, "Saw a miss to a queued prefetch addr: " + "0x%x, removing it\n", blk_addr); pfRemovedMSHR++; + delete (*iter)->req; + delete (*iter); pf.erase(iter); if (pf.empty()) cache->deassertMemSideBusRequest(BaseCache::Request_PF); } - //Remove anything in queue with delay older than time - //since everything is inserted in time order, start from end - //and work until pf.empty() or time is earlier - //This is done to emulate Aborting the previous work on a new miss - //Needed for serial calculators like GHB + // Remove anything in queue with delay older than time + // since everything is inserted in time order, start from end + // and work until pf.empty() or time is earlier + // This is done to emulate Aborting the previous work on a new miss + // Needed for serial calculators like GHB if (serialSquash) { iter = pf.end(); iter--; while (!pf.empty() && ((*iter)->time >= time)) { pfSquashed++; - pf.pop_back(); + DPRINTF(HWPrefetch, "Squashing old prefetch addr: 0x%x\n", + (*iter)->getAddr()); + delete (*iter)->req; + delete (*iter); + pf.erase(iter); iter--; } if (pf.empty()) @@ -191,74 +213,70 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time) std::list delays; calculatePrefetch(pkt, addresses, delays); - std::list::iterator addr = addresses.begin(); - std::list::iterator delay = delays.begin(); - while (addr != addresses.end()) - { - DPRINTF(HWPrefetch, "%s:Found a pf canidate, inserting into prefetch queue\n", cache->name()); - //temp calc this here... + std::list::iterator addrIter = addresses.begin(); + std::list::iterator delayIter = delays.begin(); + for (; addrIter != addresses.end(); ++addrIter, ++delayIter) { + Addr addr = *addrIter; + pfIdentified++; - //create a prefetch memreq - Request * prefetchReq = new Request(*addr, blkSize, 0); - PacketPtr prefetch; - prefetch = new Packet(prefetchReq, MemCmd::HardPFReq, -1); - prefetch->allocate(); - prefetch->req->setThreadContext(pkt->req->contextId(), - pkt->req->threadId()); - prefetch->time = time + (*delay); //@todo ADD LATENCY HERE - //... initialize + DPRINTF(HWPrefetch, "Found a pf candidate addr: 0x%x, " + "inserting into prefetch queue with delay %d time %d\n", + addr, *delayIter, time); - //Check if it is already in the cache - if (cacheCheckPush) { - if (cache->inCache(prefetch->getAddr())) { - addr++; - delay++; - continue; - } + // Check if it is already in the cache + if (cacheCheckPush && cache->inCache(addr)) { + DPRINTF(HWPrefetch, "Prefetch addr already in cache\n"); + continue; } - //Check if it is already in the miss_queue - if (cache->inMissQueue(prefetch->getAddr())) { - addr++; - delay++; + // Check if it is already in the miss_queue + if (cache->inMissQueue(addr)) { + DPRINTF(HWPrefetch, "Prefetch addr already in miss queue\n"); continue; } - //Check if it is already in the pf buffer - if (inPrefetch(prefetch->getAddr()) != pf.end()) { + // Check if it is already in the pf buffer + if (inPrefetch(addr) != pf.end()) { pfBufferHit++; - addr++; - delay++; + DPRINTF(HWPrefetch, "Prefetch addr already in pf buffer\n"); continue; } - //We just remove the head if we are full - if (pf.size() == size) - { - DPRINTF(HWPrefetch, "%s:Inserting into prefetch queue, it was full removing oldest\n", cache->name()); + // create a prefetch memreq + Request *prefetchReq = new Request(*addrIter, blkSize, 0); + PacketPtr prefetch = + new Packet(prefetchReq, MemCmd::HardPFReq, Packet::Broadcast); + prefetch->allocate(); + prefetch->req->setThreadContext(pkt->req->contextId(), + pkt->req->threadId()); + + prefetch->time = time + (*delayIter); // @todo ADD LATENCY HERE + + // We just remove the head if we are full + if (pf.size() == size) { pfRemovedFull++; + PacketPtr old_pkt = *pf.begin(); + DPRINTF(HWPrefetch, "Prefetch queue full, " + "removing oldest 0x%x\n", old_pkt->getAddr()); + delete old_pkt->req; + delete old_pkt; pf.pop_front(); } pf.push_back(prefetch); - - //Make sure to request the bus, with proper delay - cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time); - - //Increment through the list - addr++; - delay++; } } + + return pf.empty() ? 0 : pf.front()->time; } std::list::iterator BasePrefetcher::inPrefetch(Addr address) { - //Guaranteed to only be one match, we always check before inserting + // Guaranteed to only be one match, we always check before inserting std::list::iterator iter; - for (iter=pf.begin(); iter != pf.end(); iter++) { + for (iter = pf.begin(); iter != pf.end(); iter++) { if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) { return iter; } @@ -266,4 +284,8 @@ BasePrefetcher::inPrefetch(Addr address) return pf.end(); } - +bool +BasePrefetcher::samePage(Addr a, Addr b) +{ + return roundDown(a, TheISA::VMPageSize) == roundDown(b, TheISA::VMPageSize); +} diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh index 1515d8a93..92040e899 100644 --- a/src/mem/cache/prefetch/base.hh +++ b/src/mem/cache/prefetch/base.hh @@ -73,7 +73,9 @@ class BasePrefetcher bool cacheCheckPush; /** Do we prefetch on only data reads, or on inst reads as well. */ - bool only_data; + bool onlyData; + + std::string _name; public: @@ -90,13 +92,21 @@ class BasePrefetcher void regStats(const std::string &name); public: + BasePrefetcher(const BaseCacheParams *p); virtual ~BasePrefetcher() {} + const std::string name() const { return _name; } + void setCache(BaseCache *_cache); - void handleMiss(PacketPtr &pkt, Tick time); + /** + * Notify prefetcher of cache access (may be any access or just + * misses, depending on cache parameters.) + * @retval Time of next prefetch availability, or 0 if none. + */ + Tick notify(PacketPtr &pkt, Tick time); bool inCache(Addr addr); @@ -109,11 +119,21 @@ class BasePrefetcher return !pf.empty(); } + Tick nextPrefetchReadyTime() + { + return pf.empty() ? MaxTick : pf.front()->time; + } + virtual void calculatePrefetch(PacketPtr &pkt, std::list &addresses, std::list &delays) = 0; std::list::iterator inPrefetch(Addr address); + + /** + * Utility function: are addresses a and b on the same VM page? + */ + bool samePage(Addr a, Addr b); }; diff --git a/src/mem/cache/prefetch/ghb.cc b/src/mem/cache/prefetch/ghb.cc index c8b87e99d..c27165248 100644 --- a/src/mem/cache/prefetch/ghb.cc +++ b/src/mem/cache/prefetch/ghb.cc @@ -41,32 +41,25 @@ void GHBPrefetcher::calculatePrefetch(PacketPtr &pkt, std::list &addresses, std::list &delays) { - Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); - int contextId = pkt->req->contextId(); - if (!useContextId) contextId = 0; + Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); + int ctx_id = useContextId ? pkt->req->contextId() : 0; + assert(ctx_id < Max_Contexts); + int new_stride = blk_addr - lastMissAddr[ctx_id]; + int old_stride = lastMissAddr[ctx_id] - secondLastMissAddr[ctx_id]; - int new_stride = blkAddr - last_miss_addr[contextId]; - int old_stride = last_miss_addr[contextId] - - second_last_miss_addr[contextId]; - - second_last_miss_addr[contextId] = last_miss_addr[contextId]; - last_miss_addr[contextId] = blkAddr; + secondLastMissAddr[ctx_id] = lastMissAddr[ctx_id]; + lastMissAddr[ctx_id] = blk_addr; if (new_stride == old_stride) { - for (int d=1; d <= degree; d++) { - Addr newAddr = blkAddr + d * new_stride; - if (this->pageStop && - (blkAddr & ~(TheISA::VMPageSize - 1)) != - (newAddr & ~(TheISA::VMPageSize - 1))) - { - //Spanned the page, so now stop - this->pfSpanPage += degree - d + 1; + for (int d = 1; d <= degree; d++) { + Addr new_addr = blk_addr + d * new_stride; + if (pageStop && !samePage(blk_addr, new_addr)) { + // Spanned the page, so now stop + pfSpanPage += degree - d + 1; return; - } - else - { - addresses.push_back(newAddr); + } else { + addresses.push_back(new_addr); delays.push_back(latency); } } diff --git a/src/mem/cache/prefetch/ghb.hh b/src/mem/cache/prefetch/ghb.hh index 156a74afa..c85221a39 100644 --- a/src/mem/cache/prefetch/ghb.hh +++ b/src/mem/cache/prefetch/ghb.hh @@ -42,8 +42,10 @@ class GHBPrefetcher : public BasePrefetcher { protected: - Addr second_last_miss_addr[64/*MAX_CPUS*/]; - Addr last_miss_addr[64/*MAX_CPUS*/]; + static const int Max_Contexts = 64; + + Addr secondLastMissAddr[Max_Contexts]; + Addr lastMissAddr[Max_Contexts]; Tick latency; int degree; diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc index ad5846daa..cfd2469fa 100644 --- a/src/mem/cache/prefetch/stride.cc +++ b/src/mem/cache/prefetch/stride.cc @@ -34,59 +34,92 @@ * Stride Prefetcher template instantiations. */ +#include "base/trace.hh" #include "mem/cache/prefetch/stride.hh" void StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list &addresses, std::list &delays) { -// Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1); - int contextId = pkt->req->contextId(); - if (!useContextId) contextId = 0; + if (!pkt->req->hasPC()) { + DPRINTF(HWPrefetch, "ignoring request with no PC"); + return; + } - /* Scan Table for IAddr Match */ -/* std::list::iterator iter; - for (iter=table[contextId].begin(); - iter !=table[contextId].end(); - iter++) { - if ((*iter)->IAddr == pkt->pc) break; - } + Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); + int ctx_id = useContextId ? pkt->req->contextId() : 0; + Addr pc = pkt->req->getPC(); + assert(ctx_id < Max_Contexts); + std::list &tab = table[ctx_id]; - if (iter != table[contextId].end()) { - //Hit in table + /* Scan Table for instAddr Match */ + std::list::iterator iter; + for (iter = tab.begin(); iter != tab.end(); iter++) { + if ((*iter)->instAddr == pc) + break; + } - int newStride = blkAddr - (*iter)->MAddr; - if (newStride == (*iter)->stride) { - (*iter)->confidence++; - } - else { - (*iter)->stride = newStride; - (*iter)->confidence--; - } + if (iter != tab.end()) { + // Hit in table - (*iter)->MAddr = blkAddr; + int new_stride = blk_addr - (*iter)->missAddr; + bool stride_match = (new_stride == (*iter)->stride); - for (int d=1; d <= degree; d++) { - Addr newAddr = blkAddr + d * newStride; - if (this->pageStop && - (blkAddr & ~(TheISA::VMPageSize - 1)) != - (newAddr & ~(TheISA::VMPageSize - 1))) - { - //Spanned the page, so now stop - this->pfSpanPage += degree - d + 1; - return; - } - else - { - addresses.push_back(newAddr); - delays.push_back(latency); - } - } - } - else { - //Miss in table - //Find lowest confidence and replace + if (stride_match && new_stride != 0) { + if ((*iter)->confidence < Max_Conf) + (*iter)->confidence++; + } else { + (*iter)->stride = new_stride; + if ((*iter)->confidence > Min_Conf) + (*iter)->confidence = 0; + } - } -*/ + DPRINTF(HWPrefetch, "hit: PC %x blk_addr %x stride %d (%s), conf %d\n", + pc, blk_addr, new_stride, stride_match ? "match" : "change", + (*iter)->confidence); + + (*iter)->missAddr = blk_addr; + + if ((*iter)->confidence <= 0) + return; + + for (int d = 1; d <= degree; d++) { + Addr new_addr = blk_addr + d * new_stride; + if (pageStop && !samePage(blk_addr, new_addr)) { + // Spanned the page, so now stop + pfSpanPage += degree - d + 1; + return; + } else { + DPRINTF(HWPrefetch, " queuing prefetch to %x @ %d\n", + new_addr, latency); + addresses.push_back(new_addr); + delays.push_back(latency); + } + } + } else { + // Miss in table + // Find lowest confidence and replace + + DPRINTF(HWPrefetch, "miss: PC %x blk_addr %x\n", pc, blk_addr); + + if (tab.size() >= 256) { //set default table size is 256 + std::list::iterator min_pos = tab.begin(); + int min_conf = (*min_pos)->confidence; + for (iter = min_pos, ++iter; iter != tab.end(); ++iter) { + if ((*iter)->confidence < min_conf){ + min_pos = iter; + min_conf = (*iter)->confidence; + } + } + DPRINTF(HWPrefetch, " replacing PC %x\n", (*min_pos)->instAddr); + tab.erase(min_pos); + } + + StrideEntry *new_entry = new StrideEntry; + new_entry->instAddr = pc; + new_entry->missAddr = blk_addr; + new_entry->stride = 0; + new_entry->confidence = 0; + tab.push_back(new_entry); + } } diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh index 4738fd9bc..6ccd32b91 100644 --- a/src/mem/cache/prefetch/stride.hh +++ b/src/mem/cache/prefetch/stride.hh @@ -36,36 +36,36 @@ #ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ #define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__ +#include #include "mem/cache/prefetch/base.hh" class StridePrefetcher : public BasePrefetcher { protected: - class strideEntry + static const int Max_Contexts = 64; + + // These constants need to be changed with the type of the + // 'confidence' field below. + static const int Max_Conf = INT_MAX; + static const int Min_Conf = INT_MIN; + + class StrideEntry { public: - Addr IAddr; - Addr MAddr; + Addr instAddr; + Addr missAddr; int stride; - int64_t confidence; - -/* bool operator < (strideEntry a,strideEntry b) - { - if (a.confidence == b.confidence) { - return true; //?????? - } - else return a.confidence < b.confidence; - }*/ + int confidence; }; - Addr* lastMissAddr[64/*MAX_CPUS*/]; - std::list table[64/*MAX_CPUS*/]; + Addr *lastMissAddr[Max_Contexts]; + + std::list table[Max_Contexts]; Tick latency; int degree; bool useContextId; - public: StridePrefetcher(const BaseCacheParams *p) diff --git a/src/mem/cache/prefetch/tagged.cc b/src/mem/cache/prefetch/tagged.cc index 6afe1c6c2..a6c2403ba 100644 --- a/src/mem/cache/prefetch/tagged.cc +++ b/src/mem/cache/prefetch/tagged.cc @@ -47,20 +47,15 @@ TaggedPrefetcher:: calculatePrefetch(PacketPtr &pkt, std::list &addresses, std::list &delays) { - Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1); + Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1); - for (int d=1; d <= degree; d++) { - Addr newAddr = blkAddr + d*(this->blkSize); - if (this->pageStop && - (blkAddr & ~(TheISA::VMPageSize - 1)) != - (newAddr & ~(TheISA::VMPageSize - 1))) - { - //Spanned the page, so now stop - this->pfSpanPage += degree - d + 1; + for (int d = 1; d <= degree; d++) { + Addr newAddr = blkAddr + d*(blkSize); + if (pageStop && !samePage(blkAddr, newAddr)) { + // Spanned the page, so now stop + pfSpanPage += degree - d + 1; return; - } - else - { + } else { addresses.push_back(newAddr); delays.push_back(latency); } -- cgit v1.2.3