diff options
-rw-r--r-- | src/mem/cache/base.cc | 4 | ||||
-rw-r--r-- | src/mem/cache/base.hh | 35 | ||||
-rw-r--r-- | src/mem/cache/cache_impl.hh | 103 | ||||
-rw-r--r-- | src/mem/cache/tags/base.cc | 2 | ||||
-rw-r--r-- | src/mem/cache/tags/base.hh | 5 | ||||
-rw-r--r-- | src/mem/cache/tags/base_set_assoc.cc | 3 | ||||
-rw-r--r-- | src/mem/cache/tags/base_set_assoc.hh | 12 | ||||
-rw-r--r-- | src/mem/cache/tags/fa_lru.cc | 4 | ||||
-rw-r--r-- | src/mem/cache/tags/fa_lru.hh | 9 |
9 files changed, 111 insertions, 66 deletions
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index d89517b9c..78e2ca9ab 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -72,7 +72,9 @@ BaseCache::BaseCache(const Params *p) writeBuffer("write buffer", p->write_buffers, p->mshrs+1000, 0, MSHRQueue_WriteBuffer), blkSize(p->system->cacheLineSize()), - hitLatency(p->hit_latency), + lookupLatency(p->hit_latency), + forwardLatency(p->hit_latency), + fillLatency(p->response_latency), responseLatency(p->response_latency), numTarget(p->tgts_per_mshr), forwardSnoops(p->forward_snoops), diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index 0be6b7944..beb818961 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 ARM Limited + * Copyright (c) 2012-2013, 2015 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -202,6 +202,17 @@ class BaseCache : public MemObject /** Write/writeback buffer */ MSHRQueue writeBuffer; + /** + * Allocate a buffer, passing the time indicating when schedule an + * event to the queued port to go and ask the MSHR and write queue + * if they have packets to send. + * + * allocateBufferInternal() function is called in: + * - MSHR allocateWriteBuffer (unchached write forwarded to WriteBuffer); + * - MSHR allocateMissBuffer (cacheable miss in MSHR queue); + * - MSHR allocateUncachedReadBuffer (unchached read allocated in MSHR + * queue) + */ MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size, PacketPtr pkt, Tick time, bool requestBus) { @@ -251,15 +262,25 @@ class BaseCache : public MemObject const unsigned blkSize; /** - * The latency of a hit in this device. + * The latency of tag lookup of a cache. It occurs when there is + * an access to the cache. */ - const Cycles hitLatency; + const Cycles lookupLatency; + + /** + * This is the forward latency of the cache. It occurs when there + * is a cache miss and a request is forwarded downstream, in + * particular an outbound miss. + */ + const Cycles forwardLatency; + + /** The latency to fill a cache block */ + const Cycles fillLatency; /** - * The latency of sending reponse to its upper level cache/core on a - * linefill. In most contemporary processors, the return path on a cache - * miss is much quicker that the hit latency. The responseLatency parameter - * tries to capture this latency. + * The latency of sending reponse to its upper level cache/core on + * a linefill. The responseLatency parameter captures this + * latency. */ const Cycles responseLatency; diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index c671deb68..2fb0baaa4 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2014 ARM Limited + * Copyright (c) 2010-2015 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -314,11 +314,14 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, if (pkt->req->isUncacheable()) { uncacheableFlush(pkt); blk = NULL; - lat = hitLatency; + // lookupLatency is the latency in case the request is uncacheable. + lat = lookupLatency; return false; } int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; + // Here lat is the value passed as parameter to accessBlock() function + // that can modify its value. blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), lat, id); DPRINTF(Cache, "%s%s %x (%s) %s %s\n", pkt->cmdString(), @@ -392,7 +395,6 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt) { DPRINTF(Cache, "%s for %s address %x size %d\n", __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize()); - Tick time = clockEdge(hitLatency); assert(pkt->isResponse()); @@ -418,7 +420,10 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt) delete rec; // @todo someone should pay for this pkt->firstWordDelay = pkt->lastWordDelay = 0; - memSidePort->schedTimingSnoopResp(pkt, time); + // forwardLatency is set here because there is a response from an + // upper level cache. + memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency)); + } template<class TagStore> @@ -449,9 +454,6 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt) delete pendingDelete[x]; pendingDelete.clear(); - // we charge hitLatency for doing just about anything here - Tick time = clockEdge(hitLatency); - assert(pkt->isRequest()); // Just forward the packet if caches are disabled. @@ -527,21 +529,34 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt) // prefetching (cache loading) uncacheable data is nonsensical pkt->makeTimingResponse(); std::memset(pkt->getPtr<uint8_t>(), 0xFF, pkt->getSize()); - cpuSidePort->schedTimingResp(pkt, clockEdge(hitLatency)); + // We use lookupLatency here because the request is uncacheable + cpuSidePort->schedTimingResp(pkt, clockEdge(lookupLatency)); return true; } else if (pkt->isWrite() && !pkt->isRead()) { - allocateWriteBuffer(pkt, time, true); + // We use forwardLatency here because there is an uncached + // memory write, forwarded to WriteBuffer. It specifies the + // latency to allocate an internal buffer and to schedule an + // event to the queued port. + allocateWriteBuffer(pkt, clockEdge(forwardLatency), true); } else { - allocateUncachedReadBuffer(pkt, time, true); + // We use forwardLatency here because there is an uncached + // memory read, allocateded to MSHR queue (it requires the same + // time of forwarding to WriteBuffer, in our assumption). It + // specifies the latency to allocate an internal buffer and to + // schedule an event to the queued port. + allocateUncachedReadBuffer(pkt, clockEdge(forwardLatency), true); } assert(pkt->needsResponse()); // else we should delete it here?? return true; } - Cycles lat = hitLatency; + // We use lookupLatency here because it is used to specify the latency + // to access. + Cycles lat = lookupLatency; BlkType *blk = NULL; PacketList writebacks; - + // Note that lat is passed by reference here. The function access() calls + // accessBlock() which can modify lat value. bool satisfied = access(pkt, blk, lat, writebacks); // track time of availability of next prefetch, if any @@ -565,6 +580,13 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt) pkt->makeTimingResponse(); // @todo: Make someone pay for this pkt->firstWordDelay = pkt->lastWordDelay = 0; + + // In this case we are considering lat neglecting + // responseLatency, modelling hit latency just as + // lookupLatency We pass lat by reference to access(), + // which calls accessBlock() function. If it is a hit, + // accessBlock() can modify lat to override the + // lookupLatency value. cpuSidePort->schedTimingResp(pkt, clockEdge(lat)); } else { /// @todo nominally we should just delete the packet here, @@ -638,7 +660,12 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt) if (mshr->threadNum != 0/*pkt->req->threadId()*/) { mshr->threadNum = -1; } - mshr->allocateTarget(pkt, time, order++); + // We use forwardLatency here because it is the same + // considering new targets. We have multiple requests for the + // same address here. It pecifies the latency to allocate an + // internal buffer and to schedule an event to the queued + // port. + mshr->allocateTarget(pkt, clockEdge(forwardLatency), order++); if (mshr->getNumTargets() == numTarget) { noTargetMSHR = mshr; setBlocked(Blocked_NoTargets); @@ -669,7 +696,11 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt) // no-write-allocate or bypass accesses this will have to // be changed. if (pkt->cmd == MemCmd::Writeback) { - allocateWriteBuffer(pkt, time, true); + // We use forwardLatency here because there is an + // uncached memory write, forwarded to WriteBuffer. It + // specifies the latency to allocate an internal buffer and to + // schedule an event to the queued port. + allocateWriteBuffer(pkt, clockEdge(forwardLatency), true); } else { if (blk && blk->isValid()) { // If we have a write miss to a valid block, we @@ -691,8 +722,13 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt) assert(!blk->isWritable()); blk->status &= ~BlkReadable; } - - allocateMissBuffer(pkt, time, true); + // Here we are using forwardLatency, modelling the latency of + // a miss (outbound) just as forwardLatency, neglecting the + // lookupLatency component. In this case this latency value + // specifies the latency to allocate an internal buffer and to + // schedule an event to the queued port, when a cacheable miss + // is forwarded to MSHR queue. + allocateMissBuffer(pkt, clockEdge(forwardLatency), true); } if (prefetcher) { @@ -702,14 +738,17 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt) } } } - + // Here we condiser just forward latency. if (next_pf_time != MaxTick) - requestMemSideBus(Request_PF, std::max(time, next_pf_time)); - + requestMemSideBus(Request_PF, std::max(clockEdge(forwardLatency), + next_pf_time)); // copy writebacks to write buffer while (!writebacks.empty()) { PacketPtr wbPkt = writebacks.front(); - allocateWriteBuffer(wbPkt, time, true); + // We use forwardLatency here because we are copying writebacks + // to write buffer. It specifies the latency to allocate an internal + // buffer and to schedule an event to the queued port. + allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true); writebacks.pop_front(); } @@ -778,8 +817,8 @@ template<class TagStore> Tick Cache<TagStore>::recvAtomic(PacketPtr pkt) { - Cycles lat = hitLatency; - + // We are in atomic mode so we pay just for lookupLatency here. + Cycles lat = lookupLatency; // @TODO: make this a parameter bool last_level_cache = false; @@ -996,7 +1035,6 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt) { assert(pkt->isResponse()); - Tick time = clockEdge(hitLatency); MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState); bool is_error = pkt->isError(); @@ -1221,13 +1259,18 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt) // copy writebacks to write buffer while (!writebacks.empty()) { PacketPtr wbPkt = writebacks.front(); - allocateWriteBuffer(wbPkt, time, true); + allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true); writebacks.pop_front(); } // if we used temp block, clear it out if (blk == tempBlock) { if (blk->isDirty()) { - allocateWriteBuffer(writebackBlk(blk), time, true); + // We use forwardLatency here because we are copying + // writebacks to write buffer. It specifies the latency to + // allocate an internal buffer and to schedule an event to the + // queued port. + allocateWriteBuffer(writebackBlk(blk), clockEdge(forwardLatency), + true); } blk->invalidate(); } @@ -1467,8 +1510,8 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk, assert(pkt->hasData()); std::memcpy(blk->data, pkt->getConstPtr<uint8_t>(), blkSize); } - - blk->whenReady = clockEdge() + responseLatency * clockPeriod() + + // We pay for fillLatency here. + blk->whenReady = clockEdge() + fillLatency * clockPeriod() + pkt->lastWordDelay; return blk; @@ -1521,7 +1564,8 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data, } DPRINTF(Cache, "%s created response: %s address %x size %d\n", __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize()); - memSidePort->schedTimingSnoopResp(pkt, clockEdge(hitLatency)); + // We model a snoop just considering forwardLatency + memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency)); } template<class TagStore> @@ -1794,7 +1838,8 @@ Cache<TagStore>::recvAtomicSnoop(PacketPtr pkt) BlkType *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure()); handleSnoop(pkt, blk, false, false, false); - return hitLatency * clockPeriod(); + // We consider forwardLatency here because a snoop occurs in atomic mode + return forwardLatency * clockPeriod(); } diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc index 47a43fb7e..8d2322e51 100644 --- a/src/mem/cache/tags/base.cc +++ b/src/mem/cache/tags/base.cc @@ -55,7 +55,7 @@ using namespace std; BaseTags::BaseTags(const Params *p) : ClockedObject(p), blkSize(p->block_size), size(p->size), - hitLatency(p->hit_latency), cache(nullptr), warmupBound(0), + accessLatency(p->hit_latency), cache(nullptr), warmupBound(0), warmedUp(false), numBlocks(0) { } diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh index 9e1fb1972..03b6cfed8 100644 --- a/src/mem/cache/tags/base.hh +++ b/src/mem/cache/tags/base.hh @@ -68,9 +68,8 @@ class BaseTags : public ClockedObject const unsigned blkSize; /** The size of the cache. */ const unsigned size; - /** The hit latency of the cache. */ - const Cycles hitLatency; - + /** The access latency of the cache. */ + const Cycles accessLatency; /** Pointer to the parent cache. */ BaseCache *cache; diff --git a/src/mem/cache/tags/base_set_assoc.cc b/src/mem/cache/tags/base_set_assoc.cc index bb0c20141..3c8371edb 100644 --- a/src/mem/cache/tags/base_set_assoc.cc +++ b/src/mem/cache/tags/base_set_assoc.cc @@ -68,9 +68,6 @@ BaseSetAssoc::BaseSetAssoc(const Params *p) if (assoc <= 0) { fatal("associativity must be greater than zero"); } - if (hitLatency <= 0) { - fatal("access latency must be greater than zero"); - } blkMask = blkSize - 1; setShift = floorLog2(blkSize); diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh index ac575d2ff..0107aafaf 100644 --- a/src/mem/cache/tags/base_set_assoc.hh +++ b/src/mem/cache/tags/base_set_assoc.hh @@ -178,7 +178,7 @@ public: Addr tag = extractTag(addr); int set = extractSet(addr); BlkType *blk = sets[set].findBlk(tag, is_secure); - lat = hitLatency; + lat = accessLatency;; // Access all tags in parallel, hence one in each way. The data side // either accesses all blocks in parallel, or one block sequentially on @@ -195,7 +195,7 @@ public: if (blk != NULL) { if (blk->whenReady > curTick() && cache->ticksToCycles(blk->whenReady - curTick()) - > hitLatency) { + > accessLatency) { lat = cache->ticksToCycles(blk->whenReady - curTick()); } blk->refCount += 1; @@ -343,14 +343,6 @@ public: } /** - * Return the hit latency. - * @return the hit latency. - */ - Cycles getHitLatency() const - { - return hitLatency; - } - /** *iterated through all blocks and clear all locks *Needed to clear all lock tracking at once */ diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc index 6a63da673..ffe2cbf25 100644 --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -60,8 +60,6 @@ FALRU::FALRU(const Params *p) if (!isPowerOf2(blkSize)) fatal("cache block size (in bytes) `%d' must be a power of two", blkSize); - if (!(hitLatency > 0)) - fatal("Access latency in cycles must be at least one cycle"); if (!isPowerOf2(size)) fatal("Cache Size must be power of 2 for now"); @@ -202,7 +200,7 @@ FALRU::accessBlock(Addr addr, bool is_secure, Cycles &lat, int context_src, *inCache = tmp_in_cache; } - lat = hitLatency; + lat = accessLatency; //assert(check()); return blk; } diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh index ef13b2c79..07a31c154 100644 --- a/src/mem/cache/tags/fa_lru.hh +++ b/src/mem/cache/tags/fa_lru.hh @@ -210,15 +210,6 @@ public: void insertBlock(PacketPtr pkt, BlkType *blk); /** - * Return the hit latency of this cache. - * @return The hit latency. - */ - Cycles getHitLatency() const - { - return hitLatency; - } - - /** * Return the block size of this cache. * @return The block size. */ |