summaryrefslogtreecommitdiff
path: root/src/mem/cache/prefetch
diff options
context:
space:
mode:
authorSteve Reinhardt <steve.reinhardt@amd.com>2009-02-16 08:56:40 -0800
committerSteve Reinhardt <steve.reinhardt@amd.com>2009-02-16 08:56:40 -0800
commit89a7fb03934b3e38c7d8b2c4818794b3ec874fdf (patch)
tree53a9b0877112908b1f6c3e5cad256a9b63a5de16 /src/mem/cache/prefetch
parent6923282fb5a9ba6af14d19be094839eefe1c34be (diff)
downloadgem5-89a7fb03934b3e38c7d8b2c4818794b3ec874fdf.tar.xz
Fixes to get prefetching working again.
Apparently we broke it with the cache rewrite and never noticed. Thanks to Bao Yungang <baoyungang@gmail.com> for a significant part of these changes (and for inspiring me to work on the rest). Some other overdue cleanup on the prefetch code too.
Diffstat (limited to 'src/mem/cache/prefetch')
-rw-r--r--src/mem/cache/prefetch/base.cc156
-rw-r--r--src/mem/cache/prefetch/base.hh24
-rw-r--r--src/mem/cache/prefetch/ghb.cc35
-rw-r--r--src/mem/cache/prefetch/ghb.hh6
-rw-r--r--src/mem/cache/prefetch/stride.cc119
-rw-r--r--src/mem/cache/prefetch/stride.hh30
-rw-r--r--src/mem/cache/prefetch/tagged.cc19
7 files changed, 227 insertions, 162 deletions
diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc
index a7e6cfdfc..365ce6727 100644
--- a/src/mem/cache/prefetch/base.cc
+++ b/src/mem/cache/prefetch/base.cc
@@ -33,6 +33,7 @@
* Hardware Prefetcher Definition.
*/
+#include "arch/isa_traits.hh"
#include "base/trace.hh"
#include "mem/cache/base.hh"
#include "mem/cache/prefetch/base.hh"
@@ -43,7 +44,7 @@ BasePrefetcher::BasePrefetcher(const BaseCacheParams *p)
: size(p->prefetcher_size), pageStop(!p->prefetch_past_page),
serialSquash(p->prefetch_serial_squash),
cacheCheckPush(p->prefetch_cache_check_push),
- only_data(p->prefetch_data_accesses_only)
+ onlyData(p->prefetch_data_accesses_only)
{
}
@@ -52,6 +53,7 @@ BasePrefetcher::setCache(BaseCache *_cache)
{
cache = _cache;
blkSize = cache->getBlockSize();
+ _name = cache->name() + "-pf";
}
void
@@ -99,7 +101,8 @@ BasePrefetcher::regStats(const std::string &name)
pfSquashed
.name(name + ".prefetcher.num_hwpf_squashed_from_miss")
- .desc("number of hwpf that got squashed due to a miss aborting calculation time")
+ .desc("number of hwpf that got squashed due to a miss "
+ "aborting calculation time")
;
}
@@ -126,60 +129,79 @@ BasePrefetcher::inMissQueue(Addr addr)
PacketPtr
BasePrefetcher::getPacket()
{
- DPRINTF(HWPrefetch, "%s:Requesting a hw_pf to issue\n", cache->name());
+ DPRINTF(HWPrefetch, "Requesting a hw_pf to issue\n");
if (pf.empty()) {
- DPRINTF(HWPrefetch, "%s:No HW_PF found\n", cache->name());
+ DPRINTF(HWPrefetch, "No HW_PF found\n");
return NULL;
}
PacketPtr pkt;
- bool keepTrying = false;
+ bool keep_trying = false;
do {
pkt = *pf.begin();
pf.pop_front();
if (!cacheCheckPush) {
- keepTrying = cache->inCache(pkt->getAddr());
+ keep_trying = cache->inCache(pkt->getAddr());
}
+
+ if (keep_trying) {
+ DPRINTF(HWPrefetch, "addr 0x%x in cache, skipping\n",
+ pkt->getAddr());
+ delete pkt->req;
+ delete pkt;
+ }
+
if (pf.empty()) {
cache->deassertMemSideBusRequest(BaseCache::Request_PF);
- if (keepTrying) return NULL; //None left, all were in cache
+ if (keep_trying) {
+ return NULL; // None left, all were in cache
+ }
}
- } while (keepTrying);
+ } while (keep_trying);
pfIssued++;
+ assert(pkt != NULL);
+ DPRINTF(HWPrefetch, "returning 0x%x\n", pkt->getAddr());
return pkt;
}
-void
-BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
+
+Tick
+BasePrefetcher::notify(PacketPtr &pkt, Tick time)
{
- if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && only_data))
- {
- //Calculate the blk address
- Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
+ if (!pkt->req->isUncacheable() && !(pkt->req->isInstRead() && onlyData)) {
+ // Calculate the blk address
+ Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
- //Check if miss is in pfq, if so remove it
- std::list<PacketPtr>::iterator iter = inPrefetch(blkAddr);
+ // Check if miss is in pfq, if so remove it
+ std::list<PacketPtr>::iterator iter = inPrefetch(blk_addr);
if (iter != pf.end()) {
- DPRINTF(HWPrefetch, "%s:Saw a miss to a queued prefetch, removing it\n", cache->name());
+ DPRINTF(HWPrefetch, "Saw a miss to a queued prefetch addr: "
+ "0x%x, removing it\n", blk_addr);
pfRemovedMSHR++;
+ delete (*iter)->req;
+ delete (*iter);
pf.erase(iter);
if (pf.empty())
cache->deassertMemSideBusRequest(BaseCache::Request_PF);
}
- //Remove anything in queue with delay older than time
- //since everything is inserted in time order, start from end
- //and work until pf.empty() or time is earlier
- //This is done to emulate Aborting the previous work on a new miss
- //Needed for serial calculators like GHB
+ // Remove anything in queue with delay older than time
+ // since everything is inserted in time order, start from end
+ // and work until pf.empty() or time is earlier
+ // This is done to emulate Aborting the previous work on a new miss
+ // Needed for serial calculators like GHB
if (serialSquash) {
iter = pf.end();
iter--;
while (!pf.empty() && ((*iter)->time >= time)) {
pfSquashed++;
- pf.pop_back();
+ DPRINTF(HWPrefetch, "Squashing old prefetch addr: 0x%x\n",
+ (*iter)->getAddr());
+ delete (*iter)->req;
+ delete (*iter);
+ pf.erase(iter);
iter--;
}
if (pf.empty())
@@ -191,74 +213,70 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
std::list<Tick> delays;
calculatePrefetch(pkt, addresses, delays);
- std::list<Addr>::iterator addr = addresses.begin();
- std::list<Tick>::iterator delay = delays.begin();
- while (addr != addresses.end())
- {
- DPRINTF(HWPrefetch, "%s:Found a pf canidate, inserting into prefetch queue\n", cache->name());
- //temp calc this here...
+ std::list<Addr>::iterator addrIter = addresses.begin();
+ std::list<Tick>::iterator delayIter = delays.begin();
+ for (; addrIter != addresses.end(); ++addrIter, ++delayIter) {
+ Addr addr = *addrIter;
+
pfIdentified++;
- //create a prefetch memreq
- Request * prefetchReq = new Request(*addr, blkSize, 0);
- PacketPtr prefetch;
- prefetch = new Packet(prefetchReq, MemCmd::HardPFReq, -1);
- prefetch->allocate();
- prefetch->req->setThreadContext(pkt->req->contextId(),
- pkt->req->threadId());
- prefetch->time = time + (*delay); //@todo ADD LATENCY HERE
- //... initialize
+ DPRINTF(HWPrefetch, "Found a pf candidate addr: 0x%x, "
+ "inserting into prefetch queue with delay %d time %d\n",
+ addr, *delayIter, time);
- //Check if it is already in the cache
- if (cacheCheckPush) {
- if (cache->inCache(prefetch->getAddr())) {
- addr++;
- delay++;
- continue;
- }
+ // Check if it is already in the cache
+ if (cacheCheckPush && cache->inCache(addr)) {
+ DPRINTF(HWPrefetch, "Prefetch addr already in cache\n");
+ continue;
}
- //Check if it is already in the miss_queue
- if (cache->inMissQueue(prefetch->getAddr())) {
- addr++;
- delay++;
+ // Check if it is already in the miss_queue
+ if (cache->inMissQueue(addr)) {
+ DPRINTF(HWPrefetch, "Prefetch addr already in miss queue\n");
continue;
}
- //Check if it is already in the pf buffer
- if (inPrefetch(prefetch->getAddr()) != pf.end()) {
+ // Check if it is already in the pf buffer
+ if (inPrefetch(addr) != pf.end()) {
pfBufferHit++;
- addr++;
- delay++;
+ DPRINTF(HWPrefetch, "Prefetch addr already in pf buffer\n");
continue;
}
- //We just remove the head if we are full
- if (pf.size() == size)
- {
- DPRINTF(HWPrefetch, "%s:Inserting into prefetch queue, it was full removing oldest\n", cache->name());
+ // create a prefetch memreq
+ Request *prefetchReq = new Request(*addrIter, blkSize, 0);
+ PacketPtr prefetch =
+ new Packet(prefetchReq, MemCmd::HardPFReq, Packet::Broadcast);
+ prefetch->allocate();
+ prefetch->req->setThreadContext(pkt->req->contextId(),
+ pkt->req->threadId());
+
+ prefetch->time = time + (*delayIter); // @todo ADD LATENCY HERE
+
+ // We just remove the head if we are full
+ if (pf.size() == size) {
pfRemovedFull++;
+ PacketPtr old_pkt = *pf.begin();
+ DPRINTF(HWPrefetch, "Prefetch queue full, "
+ "removing oldest 0x%x\n", old_pkt->getAddr());
+ delete old_pkt->req;
+ delete old_pkt;
pf.pop_front();
}
pf.push_back(prefetch);
-
- //Make sure to request the bus, with proper delay
- cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time);
-
- //Increment through the list
- addr++;
- delay++;
}
}
+
+ return pf.empty() ? 0 : pf.front()->time;
}
std::list<PacketPtr>::iterator
BasePrefetcher::inPrefetch(Addr address)
{
- //Guaranteed to only be one match, we always check before inserting
+ // Guaranteed to only be one match, we always check before inserting
std::list<PacketPtr>::iterator iter;
- for (iter=pf.begin(); iter != pf.end(); iter++) {
+ for (iter = pf.begin(); iter != pf.end(); iter++) {
if (((*iter)->getAddr() & ~(Addr)(blkSize-1)) == address) {
return iter;
}
@@ -266,4 +284,8 @@ BasePrefetcher::inPrefetch(Addr address)
return pf.end();
}
-
+bool
+BasePrefetcher::samePage(Addr a, Addr b)
+{
+ return roundDown(a, TheISA::VMPageSize) == roundDown(b, TheISA::VMPageSize);
+}
diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh
index 1515d8a93..92040e899 100644
--- a/src/mem/cache/prefetch/base.hh
+++ b/src/mem/cache/prefetch/base.hh
@@ -73,7 +73,9 @@ class BasePrefetcher
bool cacheCheckPush;
/** Do we prefetch on only data reads, or on inst reads as well. */
- bool only_data;
+ bool onlyData;
+
+ std::string _name;
public:
@@ -90,13 +92,21 @@ class BasePrefetcher
void regStats(const std::string &name);
public:
+
BasePrefetcher(const BaseCacheParams *p);
virtual ~BasePrefetcher() {}
+ const std::string name() const { return _name; }
+
void setCache(BaseCache *_cache);
- void handleMiss(PacketPtr &pkt, Tick time);
+ /**
+ * Notify prefetcher of cache access (may be any access or just
+ * misses, depending on cache parameters.)
+ * @retval Time of next prefetch availability, or 0 if none.
+ */
+ Tick notify(PacketPtr &pkt, Tick time);
bool inCache(Addr addr);
@@ -109,11 +119,21 @@ class BasePrefetcher
return !pf.empty();
}
+ Tick nextPrefetchReadyTime()
+ {
+ return pf.empty() ? MaxTick : pf.front()->time;
+ }
+
virtual void calculatePrefetch(PacketPtr &pkt,
std::list<Addr> &addresses,
std::list<Tick> &delays) = 0;
std::list<PacketPtr>::iterator inPrefetch(Addr address);
+
+ /**
+ * Utility function: are addresses a and b on the same VM page?
+ */
+ bool samePage(Addr a, Addr b);
};
diff --git a/src/mem/cache/prefetch/ghb.cc b/src/mem/cache/prefetch/ghb.cc
index c8b87e99d..c27165248 100644
--- a/src/mem/cache/prefetch/ghb.cc
+++ b/src/mem/cache/prefetch/ghb.cc
@@ -41,32 +41,25 @@ void
GHBPrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
std::list<Tick> &delays)
{
- Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
- int contextId = pkt->req->contextId();
- if (!useContextId) contextId = 0;
+ Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
+ int ctx_id = useContextId ? pkt->req->contextId() : 0;
+ assert(ctx_id < Max_Contexts);
+ int new_stride = blk_addr - lastMissAddr[ctx_id];
+ int old_stride = lastMissAddr[ctx_id] - secondLastMissAddr[ctx_id];
- int new_stride = blkAddr - last_miss_addr[contextId];
- int old_stride = last_miss_addr[contextId] -
- second_last_miss_addr[contextId];
-
- second_last_miss_addr[contextId] = last_miss_addr[contextId];
- last_miss_addr[contextId] = blkAddr;
+ secondLastMissAddr[ctx_id] = lastMissAddr[ctx_id];
+ lastMissAddr[ctx_id] = blk_addr;
if (new_stride == old_stride) {
- for (int d=1; d <= degree; d++) {
- Addr newAddr = blkAddr + d * new_stride;
- if (this->pageStop &&
- (blkAddr & ~(TheISA::VMPageSize - 1)) !=
- (newAddr & ~(TheISA::VMPageSize - 1)))
- {
- //Spanned the page, so now stop
- this->pfSpanPage += degree - d + 1;
+ for (int d = 1; d <= degree; d++) {
+ Addr new_addr = blk_addr + d * new_stride;
+ if (pageStop && !samePage(blk_addr, new_addr)) {
+ // Spanned the page, so now stop
+ pfSpanPage += degree - d + 1;
return;
- }
- else
- {
- addresses.push_back(newAddr);
+ } else {
+ addresses.push_back(new_addr);
delays.push_back(latency);
}
}
diff --git a/src/mem/cache/prefetch/ghb.hh b/src/mem/cache/prefetch/ghb.hh
index 156a74afa..c85221a39 100644
--- a/src/mem/cache/prefetch/ghb.hh
+++ b/src/mem/cache/prefetch/ghb.hh
@@ -42,8 +42,10 @@ class GHBPrefetcher : public BasePrefetcher
{
protected:
- Addr second_last_miss_addr[64/*MAX_CPUS*/];
- Addr last_miss_addr[64/*MAX_CPUS*/];
+ static const int Max_Contexts = 64;
+
+ Addr secondLastMissAddr[Max_Contexts];
+ Addr lastMissAddr[Max_Contexts];
Tick latency;
int degree;
diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc
index ad5846daa..cfd2469fa 100644
--- a/src/mem/cache/prefetch/stride.cc
+++ b/src/mem/cache/prefetch/stride.cc
@@ -34,59 +34,92 @@
* Stride Prefetcher template instantiations.
*/
+#include "base/trace.hh"
#include "mem/cache/prefetch/stride.hh"
void
StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
std::list<Tick> &delays)
{
-// Addr blkAddr = pkt->paddr & ~(Addr)(this->blkSize-1);
- int contextId = pkt->req->contextId();
- if (!useContextId) contextId = 0;
+ if (!pkt->req->hasPC()) {
+ DPRINTF(HWPrefetch, "ignoring request with no PC");
+ return;
+ }
- /* Scan Table for IAddr Match */
-/* std::list<strideEntry*>::iterator iter;
- for (iter=table[contextId].begin();
- iter !=table[contextId].end();
- iter++) {
- if ((*iter)->IAddr == pkt->pc) break;
- }
+ Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
+ int ctx_id = useContextId ? pkt->req->contextId() : 0;
+ Addr pc = pkt->req->getPC();
+ assert(ctx_id < Max_Contexts);
+ std::list<StrideEntry*> &tab = table[ctx_id];
- if (iter != table[contextId].end()) {
- //Hit in table
+ /* Scan Table for instAddr Match */
+ std::list<StrideEntry*>::iterator iter;
+ for (iter = tab.begin(); iter != tab.end(); iter++) {
+ if ((*iter)->instAddr == pc)
+ break;
+ }
- int newStride = blkAddr - (*iter)->MAddr;
- if (newStride == (*iter)->stride) {
- (*iter)->confidence++;
- }
- else {
- (*iter)->stride = newStride;
- (*iter)->confidence--;
- }
+ if (iter != tab.end()) {
+ // Hit in table
- (*iter)->MAddr = blkAddr;
+ int new_stride = blk_addr - (*iter)->missAddr;
+ bool stride_match = (new_stride == (*iter)->stride);
- for (int d=1; d <= degree; d++) {
- Addr newAddr = blkAddr + d * newStride;
- if (this->pageStop &&
- (blkAddr & ~(TheISA::VMPageSize - 1)) !=
- (newAddr & ~(TheISA::VMPageSize - 1)))
- {
- //Spanned the page, so now stop
- this->pfSpanPage += degree - d + 1;
- return;
- }
- else
- {
- addresses.push_back(newAddr);
- delays.push_back(latency);
- }
- }
- }
- else {
- //Miss in table
- //Find lowest confidence and replace
+ if (stride_match && new_stride != 0) {
+ if ((*iter)->confidence < Max_Conf)
+ (*iter)->confidence++;
+ } else {
+ (*iter)->stride = new_stride;
+ if ((*iter)->confidence > Min_Conf)
+ (*iter)->confidence = 0;
+ }
- }
-*/
+ DPRINTF(HWPrefetch, "hit: PC %x blk_addr %x stride %d (%s), conf %d\n",
+ pc, blk_addr, new_stride, stride_match ? "match" : "change",
+ (*iter)->confidence);
+
+ (*iter)->missAddr = blk_addr;
+
+ if ((*iter)->confidence <= 0)
+ return;
+
+ for (int d = 1; d <= degree; d++) {
+ Addr new_addr = blk_addr + d * new_stride;
+ if (pageStop && !samePage(blk_addr, new_addr)) {
+ // Spanned the page, so now stop
+ pfSpanPage += degree - d + 1;
+ return;
+ } else {
+ DPRINTF(HWPrefetch, " queuing prefetch to %x @ %d\n",
+ new_addr, latency);
+ addresses.push_back(new_addr);
+ delays.push_back(latency);
+ }
+ }
+ } else {
+ // Miss in table
+ // Find lowest confidence and replace
+
+ DPRINTF(HWPrefetch, "miss: PC %x blk_addr %x\n", pc, blk_addr);
+
+ if (tab.size() >= 256) { //set default table size is 256
+ std::list<StrideEntry*>::iterator min_pos = tab.begin();
+ int min_conf = (*min_pos)->confidence;
+ for (iter = min_pos, ++iter; iter != tab.end(); ++iter) {
+ if ((*iter)->confidence < min_conf){
+ min_pos = iter;
+ min_conf = (*iter)->confidence;
+ }
+ }
+ DPRINTF(HWPrefetch, " replacing PC %x\n", (*min_pos)->instAddr);
+ tab.erase(min_pos);
+ }
+
+ StrideEntry *new_entry = new StrideEntry;
+ new_entry->instAddr = pc;
+ new_entry->missAddr = blk_addr;
+ new_entry->stride = 0;
+ new_entry->confidence = 0;
+ tab.push_back(new_entry);
+ }
}
diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh
index 4738fd9bc..6ccd32b91 100644
--- a/src/mem/cache/prefetch/stride.hh
+++ b/src/mem/cache/prefetch/stride.hh
@@ -36,36 +36,36 @@
#ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
#define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
+#include <limits.h>
#include "mem/cache/prefetch/base.hh"
class StridePrefetcher : public BasePrefetcher
{
protected:
- class strideEntry
+ static const int Max_Contexts = 64;
+
+ // These constants need to be changed with the type of the
+ // 'confidence' field below.
+ static const int Max_Conf = INT_MAX;
+ static const int Min_Conf = INT_MIN;
+
+ class StrideEntry
{
public:
- Addr IAddr;
- Addr MAddr;
+ Addr instAddr;
+ Addr missAddr;
int stride;
- int64_t confidence;
-
-/* bool operator < (strideEntry a,strideEntry b)
- {
- if (a.confidence == b.confidence) {
- return true; //??????
- }
- else return a.confidence < b.confidence;
- }*/
+ int confidence;
};
- Addr* lastMissAddr[64/*MAX_CPUS*/];
- std::list<strideEntry*> table[64/*MAX_CPUS*/];
+ Addr *lastMissAddr[Max_Contexts];
+
+ std::list<StrideEntry*> table[Max_Contexts];
Tick latency;
int degree;
bool useContextId;
-
public:
StridePrefetcher(const BaseCacheParams *p)
diff --git a/src/mem/cache/prefetch/tagged.cc b/src/mem/cache/prefetch/tagged.cc
index 6afe1c6c2..a6c2403ba 100644
--- a/src/mem/cache/prefetch/tagged.cc
+++ b/src/mem/cache/prefetch/tagged.cc
@@ -47,20 +47,15 @@ TaggedPrefetcher::
calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
std::list<Tick> &delays)
{
- Addr blkAddr = pkt->getAddr() & ~(Addr)(this->blkSize-1);
+ Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
- for (int d=1; d <= degree; d++) {
- Addr newAddr = blkAddr + d*(this->blkSize);
- if (this->pageStop &&
- (blkAddr & ~(TheISA::VMPageSize - 1)) !=
- (newAddr & ~(TheISA::VMPageSize - 1)))
- {
- //Spanned the page, so now stop
- this->pfSpanPage += degree - d + 1;
+ for (int d = 1; d <= degree; d++) {
+ Addr newAddr = blkAddr + d*(blkSize);
+ if (pageStop && !samePage(blkAddr, newAddr)) {
+ // Spanned the page, so now stop
+ pfSpanPage += degree - d + 1;
return;
- }
- else
- {
+ } else {
addresses.push_back(newAddr);
delays.push_back(latency);
}