9 files changed, 111 insertions, 66 deletions
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index d89517b9c..78e2ca9ab 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -72,7 +72,9 @@ BaseCache::BaseCache(const Params *p)
       writeBuffer("write buffer", p->write_buffers, p->mshrs+1000, 0,
                   MSHRQueue_WriteBuffer),
       blkSize(p->system->cacheLineSize()),
-      hitLatency(p->hit_latency),
+      lookupLatency(p->hit_latency),
+      forwardLatency(p->hit_latency),
+      fillLatency(p->response_latency),
       responseLatency(p->response_latency),
       numTarget(p->tgts_per_mshr),
       forwardSnoops(p->forward_snoops),
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 0be6b7944..beb818961 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2013, 2015 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -202,6 +202,17 @@ class BaseCache : public MemObject
     /** Write/writeback buffer */
     MSHRQueue writeBuffer;
 
+    /**
+     * Allocate a buffer, passing the time indicating when schedule an
+     * event to the queued port to go and ask the MSHR and write queue
+     * if they have packets to send.
+     *
+     * allocateBufferInternal() function is called in:
+     * - MSHR allocateWriteBuffer (unchached write forwarded to WriteBuffer);
+     * - MSHR allocateMissBuffer (cacheable miss in MSHR queue);
+     * - MSHR allocateUncachedReadBuffer (unchached read allocated in MSHR
+     *   queue)
+     */
     MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size,
                                  PacketPtr pkt, Tick time, bool requestBus)
     {
@@ -251,15 +262,25 @@ class BaseCache : public MemObject
     const unsigned blkSize;
 
     /**
-     * The latency of a hit in this device.
+     * The latency of tag lookup of a cache. It occurs when there is
+     * an access to the cache.
      */
-    const Cycles hitLatency;
+    const Cycles lookupLatency;
+
+    /**
+     * This is the forward latency of the cache. It occurs when there
+     * is a cache miss and a request is forwarded downstream, in
+     * particular an outbound miss.
+     */
+    const Cycles forwardLatency;
+
+    /** The latency to fill a cache block */
+    const Cycles fillLatency;
 
     /**
-     * The latency of sending reponse to its upper level cache/core on a
-     * linefill. In most contemporary processors, the return path on a cache
-     * miss is much quicker that the hit latency. The responseLatency parameter
-     * tries to capture this latency.
+     * The latency of sending reponse to its upper level cache/core on
+     * a linefill. The responseLatency parameter captures this
+     * latency.
      */
     const Cycles responseLatency;
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index c671deb68..2fb0baaa4 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2014 ARM Limited
+ * Copyright (c) 2010-2015 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -314,11 +314,14 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
     if (pkt->req->isUncacheable()) {
         uncacheableFlush(pkt);
         blk = NULL;
-        lat = hitLatency;
+        // lookupLatency is the latency in case the request is uncacheable.
+        lat = lookupLatency;
         return false;
     }
 
     int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
+    // Here lat is the value passed as parameter to accessBlock() function
+    // that can modify its value.
     blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), lat, id);
 
     DPRINTF(Cache, "%s%s %x (%s) %s %s\n", pkt->cmdString(),
@@ -392,7 +395,6 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt)
 {
     DPRINTF(Cache, "%s for %s address %x size %d\n", __func__,
             pkt->cmdString(), pkt->getAddr(), pkt->getSize());
-    Tick time = clockEdge(hitLatency);
 
     assert(pkt->isResponse());
 
@@ -418,7 +420,10 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt)
     delete rec;
     // @todo someone should pay for this
     pkt->firstWordDelay = pkt->lastWordDelay = 0;
-    memSidePort->schedTimingSnoopResp(pkt, time);
+    // forwardLatency is set here because there is a response from an
+    // upper level cache.
+    memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency));
+
 }
 
 template<class TagStore>
@@ -449,9 +454,6 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
         delete pendingDelete[x];
     pendingDelete.clear();
 
-    // we charge hitLatency for doing just about anything here
-    Tick time = clockEdge(hitLatency);
-
     assert(pkt->isRequest());
 
     // Just forward the packet if caches are disabled.
@@ -527,21 +529,34 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
             // prefetching (cache loading) uncacheable data is nonsensical
             pkt->makeTimingResponse();
             std::memset(pkt->getPtr<uint8_t>(), 0xFF, pkt->getSize());
-            cpuSidePort->schedTimingResp(pkt, clockEdge(hitLatency));
+            // We use lookupLatency here because the request is uncacheable
+            cpuSidePort->schedTimingResp(pkt, clockEdge(lookupLatency));
             return true;
         } else if (pkt->isWrite() && !pkt->isRead()) {
-            allocateWriteBuffer(pkt, time, true);
+            // We use forwardLatency here because there is an uncached
+            // memory write, forwarded to WriteBuffer. It specifies the
+            // latency to allocate an internal buffer and to schedule an
+            // event to the queued port.
+            allocateWriteBuffer(pkt, clockEdge(forwardLatency), true);
         } else {
-            allocateUncachedReadBuffer(pkt, time, true);
+            // We use forwardLatency here because there is an uncached
+            // memory read, allocateded to MSHR queue (it requires the same
+            // time of forwarding to WriteBuffer, in our assumption). It
+            // specifies the latency to allocate an internal buffer and to
+            // schedule an event to the queued port.
+            allocateUncachedReadBuffer(pkt, clockEdge(forwardLatency), true);
         }
         assert(pkt->needsResponse()); // else we should delete it here??
         return true;
     }
 
-    Cycles lat = hitLatency;
+    // We use lookupLatency here because it is used to specify the latency
+    // to access.
+    Cycles lat = lookupLatency;
     BlkType *blk = NULL;
     PacketList writebacks;
-
+    // Note that lat is passed by reference here. The function access() calls
+    // accessBlock() which can modify lat value.
     bool satisfied = access(pkt, blk, lat, writebacks);
 
     // track time of availability of next prefetch, if any
@@ -565,6 +580,13 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
             pkt->makeTimingResponse();
             // @todo: Make someone pay for this
             pkt->firstWordDelay = pkt->lastWordDelay = 0;
+
+            // In this case we are considering lat neglecting
+            // responseLatency, modelling hit latency just as
+            // lookupLatency We pass lat by reference to access(),
+            // which calls accessBlock() function. If it is a hit,
+            // accessBlock() can modify lat to override the
+            // lookupLatency value.
             cpuSidePort->schedTimingResp(pkt, clockEdge(lat));
         } else {
             /// @todo nominally we should just delete the packet here,
@@ -638,7 +660,12 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
                 if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
                     mshr->threadNum = -1;
                 }
-                mshr->allocateTarget(pkt, time, order++);
+                // We use forwardLatency here because it is the same
+                // considering new targets. We have multiple requests for the
+                // same address here. It pecifies the latency to allocate an
+                // internal buffer and to schedule an event to the queued
+                // port.
+                mshr->allocateTarget(pkt, clockEdge(forwardLatency), order++);
                 if (mshr->getNumTargets() == numTarget) {
                     noTargetMSHR = mshr;
                     setBlocked(Blocked_NoTargets);
@@ -669,7 +696,11 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
             // no-write-allocate or bypass accesses this will have to
             // be changed.
             if (pkt->cmd == MemCmd::Writeback) {
-                allocateWriteBuffer(pkt, time, true);
+                // We use forwardLatency here because there is an
+                // uncached memory write, forwarded to WriteBuffer. It
+                // specifies the latency to allocate an internal buffer and to
+                // schedule an event to the queued port.
+                allocateWriteBuffer(pkt, clockEdge(forwardLatency), true);
             } else {
                 if (blk && blk->isValid()) {
                     // If we have a write miss to a valid block, we
@@ -691,8 +722,13 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
                     assert(!blk->isWritable());
                     blk->status &= ~BlkReadable;
                 }
-
-                allocateMissBuffer(pkt, time, true);
+                // Here we are using forwardLatency, modelling the latency of
+                // a miss (outbound) just as forwardLatency, neglecting the
+                // lookupLatency component. In this case this latency value
+                // specifies the latency to allocate an internal buffer and to
+                // schedule an event to the queued port, when a cacheable miss
+                // is forwarded to MSHR queue.
+                allocateMissBuffer(pkt, clockEdge(forwardLatency), true);
             }
 
             if (prefetcher) {
@@ -702,14 +738,17 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
             }
         }
     }
-
+    // Here we condiser just forward latency.
     if (next_pf_time != MaxTick)
-        requestMemSideBus(Request_PF, std::max(time, next_pf_time));
-
+        requestMemSideBus(Request_PF, std::max(clockEdge(forwardLatency),
+                                                next_pf_time));
     // copy writebacks to write buffer
     while (!writebacks.empty()) {
         PacketPtr wbPkt = writebacks.front();
-        allocateWriteBuffer(wbPkt, time, true);
+        // We use forwardLatency here because we are copying writebacks
+        // to write buffer. It specifies the latency to allocate an internal
+        // buffer and to schedule an event to the queued port.
+        allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true);
         writebacks.pop_front();
     }
 
@@ -778,8 +817,8 @@ template<class TagStore>
 Tick
 Cache<TagStore>::recvAtomic(PacketPtr pkt)
 {
-    Cycles lat = hitLatency;
-
+    // We are in atomic mode so we pay just for lookupLatency here.
+    Cycles lat = lookupLatency;
     // @TODO: make this a parameter
     bool last_level_cache = false;
 
@@ -996,7 +1035,6 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
 {
     assert(pkt->isResponse());
 
-    Tick time = clockEdge(hitLatency);
     MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
     bool is_error = pkt->isError();
 
@@ -1221,13 +1259,18 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
     // copy writebacks to write buffer
     while (!writebacks.empty()) {
         PacketPtr wbPkt = writebacks.front();
-        allocateWriteBuffer(wbPkt, time, true);
+        allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true);
         writebacks.pop_front();
     }
     // if we used temp block, clear it out
     if (blk == tempBlock) {
         if (blk->isDirty()) {
-            allocateWriteBuffer(writebackBlk(blk), time, true);
+            // We use forwardLatency here because we are copying
+            // writebacks to write buffer. It specifies the latency to
+            // allocate an internal buffer and to schedule an event to the
+            // queued port.
+            allocateWriteBuffer(writebackBlk(blk), clockEdge(forwardLatency),
+                                 true);
         }
         blk->invalidate();
     }
@@ -1467,8 +1510,8 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
         assert(pkt->hasData());
         std::memcpy(blk->data, pkt->getConstPtr<uint8_t>(), blkSize);
     }
-
-    blk->whenReady = clockEdge() + responseLatency * clockPeriod() +
+    // We pay for fillLatency here.
+    blk->whenReady = clockEdge() + fillLatency * clockPeriod() +
         pkt->lastWordDelay;
 
     return blk;
@@ -1521,7 +1564,8 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
     }
     DPRINTF(Cache, "%s created response: %s address %x size %d\n",
             __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
-    memSidePort->schedTimingSnoopResp(pkt, clockEdge(hitLatency));
+    // We model a snoop just considering forwardLatency
+    memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency));
 }
 
 template<class TagStore>
@@ -1794,7 +1838,8 @@ Cache<TagStore>::recvAtomicSnoop(PacketPtr pkt)
 
     BlkType *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
     handleSnoop(pkt, blk, false, false, false);
-    return hitLatency * clockPeriod();
+    // We consider forwardLatency here because a snoop occurs in atomic mode
+    return forwardLatency * clockPeriod();
 }
 
 
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
index 47a43fb7e..8d2322e51 100644
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -55,7 +55,7 @@ using namespace std;
 
 BaseTags::BaseTags(const Params *p)
     : ClockedObject(p), blkSize(p->block_size), size(p->size),
-      hitLatency(p->hit_latency), cache(nullptr), warmupBound(0),
+      accessLatency(p->hit_latency), cache(nullptr), warmupBound(0),
       warmedUp(false), numBlocks(0)
 {
 }
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index 9e1fb1972..03b6cfed8 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -68,9 +68,8 @@ class BaseTags : public ClockedObject
     const unsigned blkSize;
     /** The size of the cache. */
     const unsigned size;
-    /** The hit latency of the cache. */
-    const Cycles hitLatency;
-
+    /** The access latency of the cache. */
+    const Cycles accessLatency;
     /** Pointer to the parent cache. */
     BaseCache *cache;
 
diff --git a/src/mem/cache/tags/base_set_assoc.cc b/src/mem/cache/tags/base_set_assoc.cc
index bb0c20141..3c8371edb 100644
--- a/src/mem/cache/tags/base_set_assoc.cc
+++ b/src/mem/cache/tags/base_set_assoc.cc
@@ -68,9 +68,6 @@ BaseSetAssoc::BaseSetAssoc(const Params *p)
     if (assoc <= 0) {
         fatal("associativity must be greater than zero");
     }
-    if (hitLatency <= 0) {
-        fatal("access latency must be greater than zero");
-    }
 
     blkMask = blkSize - 1;
     setShift = floorLog2(blkSize);
diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh
index ac575d2ff..0107aafaf 100644
--- a/src/mem/cache/tags/base_set_assoc.hh
+++ b/src/mem/cache/tags/base_set_assoc.hh
@@ -178,7 +178,7 @@ public:
         Addr tag = extractTag(addr);
         int set = extractSet(addr);
         BlkType *blk = sets[set].findBlk(tag, is_secure);
-        lat = hitLatency;
+        lat = accessLatency;;
 
         // Access all tags in parallel, hence one in each way.  The data side
         // either accesses all blocks in parallel, or one block sequentially on
@@ -195,7 +195,7 @@ public:
         if (blk != NULL) {
             if (blk->whenReady > curTick()
                 && cache->ticksToCycles(blk->whenReady - curTick())
-                > hitLatency) {
+                > accessLatency) {
                 lat = cache->ticksToCycles(blk->whenReady - curTick());
             }
             blk->refCount += 1;
@@ -343,14 +343,6 @@ public:
     }
 
     /**
-     * Return the hit latency.
-     * @return the hit latency.
-     */
-    Cycles getHitLatency() const
-    {
-        return hitLatency;
-    }
-    /**
      *iterated through all blocks and clear all locks
      *Needed to clear all lock tracking at once
      */
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 6a63da673..ffe2cbf25 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -60,8 +60,6 @@ FALRU::FALRU(const Params *p)
     if (!isPowerOf2(blkSize))
         fatal("cache block size (in bytes) `%d' must be a power of two",
               blkSize);
-    if (!(hitLatency > 0))
-        fatal("Access latency in cycles must be at least one cycle");
     if (!isPowerOf2(size))
         fatal("Cache Size must be power of 2 for now");
 
@@ -202,7 +200,7 @@ FALRU::accessBlock(Addr addr, bool is_secure, Cycles &lat, int context_src,
         *inCache = tmp_in_cache;
     }
 
-    lat = hitLatency;
+    lat = accessLatency;
     //assert(check());
     return blk;
 }
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index ef13b2c79..07a31c154 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -210,15 +210,6 @@ public:
     void insertBlock(PacketPtr pkt, BlkType *blk);
 
     /**
-     * Return the hit latency of this cache.
-     * @return The hit latency.
-     */
-    Cycles getHitLatency() const
-    {
-        return hitLatency;
-    }
-
-    /**
      * Return the block size of this cache.
      * @return The block size.
      */