From c6e0d8f54f1ce90933f95a7a3a875fed53b8ee3e Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Thu, 18 Oct 2018 15:31:51 +0200
Subject: mem-cache: Move access latency calculation to Cache

Access latency was not being calculated properly, as it was
always assuming that for hits reads take as long as writes,
and that parallel accesses would produce the same latency
for read and write misses.

By moving the calculation to the Cache we can use the write/
read information, reduce latency variables duplication and
remove Cache dependency from Tags.

The tag lookup latency is still calculated by the Tags.

Change-Id: I71bc68fb5c3515b372c3bf002d61b6f048a45540
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/13697
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
---
 src/mem/cache/base.cc                | 48 +++++++++++++++++++++++++++---------
 src/mem/cache/base.hh                | 16 ++++++++++++
 src/mem/cache/tags/Tags.py           |  4 ---
 src/mem/cache/tags/base.cc           |  6 +----
 src/mem/cache/tags/base.hh           | 18 +++++++++-----
 src/mem/cache/tags/base_set_assoc.hh | 27 +++++++-------------
 src/mem/cache/tags/fa_lru.cc         | 18 ++++----------
 src/mem/cache/tags/fa_lru.hh         |  5 ++--
 src/mem/cache/tags/sector_tags.cc    | 18 +++-----------
 src/mem/cache/tags/sector_tags.hh    |  4 +--
 10 files changed, 89 insertions(+), 75 deletions(-)

diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index 4ca8152e9..980aa91f8 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -95,6 +95,7 @@ BaseCache::BaseCache(const BaseCacheParams *p, unsigned blk_size)
       forwardLatency(p->tag_latency),
       fillLatency(p->data_latency),
       responseLatency(p->response_latency),
+      sequentialAccess(p->sequential_access),
       numTarget(p->tgts_per_mshr),
       forwardSnoops(true),
       clusivity(p->clusivity),
@@ -225,8 +226,8 @@ BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time)
         // In this case we are considering request_time that takes
         // into account the delay of the xbar, if any, and just
         // lat, neglecting responseLatency, modelling hit latency
-        // just as lookupLatency or or the value of lat overriden
-        // by access(), that calls accessBlock() function.
+        // just as the value of lat overriden by access(), which calls
+        // the calculateAccessLatency() function.
         cpuSidePort.schedTimingResp(pkt, request_time, true);
     } else {
         DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__,
@@ -342,15 +343,13 @@ BaseCache::recvTimingReq(PacketPtr pkt)
     // the delay provided by the crossbar
     Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
 
-    // We use lookupLatency here because it is used to specify the latency
-    // to access.
-    Cycles lat = lookupLatency;
+    Cycles lat;
     CacheBlk *blk = nullptr;
     bool satisfied = false;
     {
         PacketList writebacks;
         // Note that lat is passed by reference here. The function
-        // access() calls accessBlock() which can modify lat value.
+        // access() will set the lat value.
         satisfied = access(pkt, blk, lat, writebacks);
 
         // copy writebacks to write buffer here to ensure they logically
@@ -360,8 +359,7 @@ BaseCache::recvTimingReq(PacketPtr pkt)
 
     // Here we charge the headerDelay that takes into account the latencies
     // of the bus, if the packet comes from it.
-    // The latency charged it is just lat that is the value of lookupLatency
-    // modified by access() function, or if not just lookupLatency.
+    // The latency charged is just the value set by the access() function.
     // In case of a hit we are neglecting response latency.
     // In case of a miss we are neglecting forward latency.
     Tick request_time = clockEdge(lat) + pkt->headerDelay;
@@ -886,6 +884,31 @@ BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool)
 // Access path: requests coming in from the CPU side
 //
 /////////////////////////////////////////////////////
+Cycles
+BaseCache::calculateAccessLatency(const CacheBlk* blk,
+                                  const Cycles lookup_lat) const
+{
+    Cycles lat(lookup_lat);
+
+    if (blk != nullptr) {
+        // First access tags, then data
+        if (sequentialAccess) {
+            lat += dataLatency;
+        // Latency is dictated by the slowest of tag and data latencies
+        } else {
+            lat = std::max(lookup_lat, dataLatency);
+        }
+
+        // Check if the block to be accessed is available. If not, apply the
+        // access latency on top of block->whenReady.
+        if (blk->whenReady > curTick() &&
+            ticksToCycles(blk->whenReady - curTick()) > lat) {
+            lat += ticksToCycles(blk->whenReady - curTick());
+        }
+    }
+
+    return lat;
+}
 
 bool
 BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
@@ -898,9 +921,12 @@ BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
                   "Should never see a write in a read-only cache %s\n",
                   name());
 
-    // Here lat is the value passed as parameter to accessBlock() function
-    // that can modify its value.
-    blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), lat);
+    // Access block in the tags
+    Cycles tag_latency(0);
+    blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), tag_latency);
+
+    // Calculate access latency
+    lat = calculateAccessLatency(blk, tag_latency);
 
     DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(),
             blk ? "hit " + blk->print() : "miss");
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index ad5ff3bc4..240bf216f 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -418,6 +418,17 @@ class BaseCache : public MemObject
      */
     Addr regenerateBlkAddr(CacheBlk* blk);
 
+    /**
+     * Calculate access latency in ticks given a tag lookup latency, and
+     * whether access was a hit or miss.
+     *
+     * @param blk The cache block that was accessed.
+     * @param lookup_lat Latency of the respective tag lookup.
+     * @return The number of ticks that pass due to a block access.
+     */
+    Cycles calculateAccessLatency(const CacheBlk* blk,
+                                  const Cycles lookup_lat) const;
+
     /**
      * Does all the processing necessary to perform the provided request.
      * @param pkt The memory request to perform.
@@ -805,6 +816,11 @@ class BaseCache : public MemObject
      */
     const Cycles responseLatency;
 
+    /**
+     * Whether tags and data are accessed sequentially.
+     */
+    const bool sequentialAccess;
+
     /** The number of targets for each MSHR. */
     const int numTarget;
 
diff --git a/src/mem/cache/tags/Tags.py b/src/mem/cache/tags/Tags.py
index 8e302898c..b34779eef 100644
--- a/src/mem/cache/tags/Tags.py
+++ b/src/mem/cache/tags/Tags.py
@@ -54,10 +54,6 @@ class BaseTags(ClockedObject):
     tag_latency = Param.Cycles(Parent.tag_latency,
                                "The tag lookup latency for this cache")
 
-    # Get the RAM access latency from the parent (cache)
-    data_latency = Param.Cycles(Parent.data_latency,
-                               "The data access latency for this cache")
-
     # Get the warmup percentage from the parent (cache)
     warmup_percentage = Param.Percent(Parent.warmup_percentage,
         "Percentage of tags to be touched to warm up the cache")
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
index c6a9a8295..5fbbdc194 100644
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -61,11 +61,7 @@
 
 BaseTags::BaseTags(const Params *p)
     : ClockedObject(p), blkSize(p->block_size), blkMask(blkSize - 1),
-      size(p->size),
-      lookupLatency(p->tag_latency),
-      accessLatency(p->sequential_access ?
-                    p->tag_latency + p->data_latency :
-                    std::max(p->tag_latency, p->data_latency)),
+      size(p->size), lookupLatency(p->tag_latency),
       cache(nullptr), indexingPolicy(p->indexing_policy),
       warmupBound((p->warmup_percentage/100.0) * (p->size / p->block_size)),
       warmedUp(false), numBlocks(p->size / p->block_size),
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index a7a35ffbb..273abf5dc 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -79,12 +79,7 @@ class BaseTags : public ClockedObject
     const unsigned size;
     /** The tag lookup latency of the cache. */
     const Cycles lookupLatency;
-    /**
-     * The total access latency of the cache. This latency
-     * is different depending on the cache access mode
-     * (parallel or sequential)
-     */
-    const Cycles accessLatency;
+
     /** Pointer to the parent cache. */
     BaseCache *cache;
 
@@ -293,6 +288,17 @@ class BaseTags : public ClockedObject
     virtual CacheBlk* findVictim(Addr addr, const bool is_secure,
                                  std::vector<CacheBlk*>& evict_blks) const = 0;
 
+    /**
+     * Access block and update replacement data. May not succeed, in which case
+     * nullptr is returned. This has all the implications of a cache access and
+     * should only be used as such. Returns the tag lookup latency as a side
+     * effect.
+     *
+     * @param addr The address to find.
+     * @param is_secure True if the target memory space is secure.
+     * @param lat The latency of the tag lookup.
+     * @return Pointer to the cache block if found.
+     */
     virtual CacheBlk* accessBlock(Addr addr, bool is_secure, Cycles &lat) = 0;
 
     /**
diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh
index 58aceb087..bc98afa5e 100644
--- a/src/mem/cache/tags/base_set_assoc.hh
+++ b/src/mem/cache/tags/base_set_assoc.hh
@@ -115,12 +115,13 @@ class BaseSetAssoc : public BaseTags
 
     /**
      * Access block and update replacement data. May not succeed, in which case
-     * nullptr is returned. This has all the implications of a cache
-     * access and should only be used as such. Returns the access latency as a
-     * side effect.
+     * nullptr is returned. This has all the implications of a cache access and
+     * should only be used as such. Returns the tag lookup latency as a side
+     * effect.
+     *
      * @param addr The address to find.
      * @param is_secure True if the target memory space is secure.
-     * @param lat The access latency.
+     * @param lat The latency of the tag lookup.
      * @return Pointer to the cache block if found.
      */
     CacheBlk* accessBlock(Addr addr, bool is_secure, Cycles &lat) override
@@ -139,28 +140,18 @@ class BaseSetAssoc : public BaseTags
             dataAccesses += allocAssoc;
         }
 
+        // If a cache hit
         if (blk != nullptr) {
-            // If a cache hit
-            lat = accessLatency;
-            // Check if the block to be accessed is available. If not,
-            // apply the accessLatency on top of block->whenReady.
-            if (blk->whenReady > curTick() &&
-                cache->ticksToCycles(blk->whenReady - curTick()) >
-                accessLatency) {
-                lat = cache->ticksToCycles(blk->whenReady - curTick()) +
-                accessLatency;
-            }
-
             // Update number of references to accessed block
             blk->refCount++;
 
             // Update replacement data of accessed block
             replacementPolicy->touch(blk->replacementData);
-        } else {
-            // If a cache miss
-            lat = lookupLatency;
         }
 
+        // The tag lookup latency is the same for a hit or a miss
+        lat = lookupLatency;
+
         return blk;
     }
 
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 2c92a940f..964846871 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -153,30 +153,22 @@ FALRU::accessBlock(Addr addr, bool is_secure, Cycles &lat,
     CachesMask mask = 0;
     FALRUBlk* blk = static_cast<FALRUBlk*>(findBlock(addr, is_secure));
 
+    // If a cache hit
     if (blk && blk->isValid()) {
-        // If a cache hit
-        lat = accessLatency;
-        // Check if the block to be accessed is available. If not,
-        // apply the accessLatency on top of block->whenReady.
-        if (blk->whenReady > curTick() &&
-            cache->ticksToCycles(blk->whenReady - curTick()) >
-            accessLatency) {
-            lat = cache->ticksToCycles(blk->whenReady - curTick()) +
-            accessLatency;
-        }
         mask = blk->inCachesMask;
 
         moveToHead(blk);
-    } else {
-        // If a cache miss
-        lat = lookupLatency;
     }
+
     if (in_caches_mask) {
         *in_caches_mask = mask;
     }
 
     cacheTracking.recordAccess(blk);
 
+    // The tag lookup latency is the same for a hit or a miss
+    lat = lookupLatency;
+
     return blk;
 }
 
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index 6ea4c8d6a..1de6de400 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -180,10 +180,11 @@ class FALRU : public BaseTags
      * Access block and update replacement data.  May not succeed, in which
      * case nullptr pointer is returned.  This has all the implications of a
      * cache access and should only be used as such.
-     * Returns the access latency and inCachesMask flags as a side effect.
+     * Returns tag lookup latency and the inCachesMask flags as a side effect.
+     *
      * @param addr The address to look for.
      * @param is_secure True if the target memory space is secure.
-     * @param lat The latency of the access.
+     * @param lat The latency of the tag lookup.
      * @param in_cache_mask Mask indicating the caches in which the blk fits.
      * @return Pointer to the cache block.
      */
diff --git a/src/mem/cache/tags/sector_tags.cc b/src/mem/cache/tags/sector_tags.cc
index 24751c97d..02649cc40 100644
--- a/src/mem/cache/tags/sector_tags.cc
+++ b/src/mem/cache/tags/sector_tags.cc
@@ -149,18 +149,8 @@ SectorTags::accessBlock(Addr addr, bool is_secure, Cycles &lat)
         dataAccesses += allocAssoc*numBlocksPerSector;
     }
 
+    // If a cache hit
     if (blk != nullptr) {
-        // If a cache hit
-        lat = accessLatency;
-        // Check if the block to be accessed is available. If not,
-        // apply the accessLatency on top of block->whenReady.
-        if (blk->whenReady > curTick() &&
-            cache->ticksToCycles(blk->whenReady - curTick()) >
-            accessLatency) {
-            lat = cache->ticksToCycles(blk->whenReady - curTick()) +
-            accessLatency;
-        }
-
         // Update number of references to accessed block
         blk->refCount++;
 
@@ -171,11 +161,11 @@ SectorTags::accessBlock(Addr addr, bool is_secure, Cycles &lat)
         // Update replacement data of accessed block, which is shared with
         // the whole sector it belongs to
         replacementPolicy->touch(sector_blk->replacementData);
-    } else {
-        // If a cache miss
-        lat = lookupLatency;
     }
 
+    // The tag lookup latency is the same for a hit or a miss
+    lat = lookupLatency;
+
     return blk;
 }
 
diff --git a/src/mem/cache/tags/sector_tags.hh b/src/mem/cache/tags/sector_tags.hh
index 1149ba1e0..f9d47f3c4 100644
--- a/src/mem/cache/tags/sector_tags.hh
+++ b/src/mem/cache/tags/sector_tags.hh
@@ -118,12 +118,12 @@ class SectorTags : public BaseTags
     /**
      * Access block and update replacement data. May not succeed, in which
      * case nullptr is returned. This has all the implications of a cache
-     * access and should only be used as such. Returns the access latency
+     * access and should only be used as such. Returns the tag lookup latency
      * as a side effect.
      *
      * @param addr The address to find.
      * @param is_secure True if the target memory space is secure.
-     * @param lat The access latency.
+     * @param lat The latency of the tag lookup.
      * @return Pointer to the cache block if found.
      */
     CacheBlk* accessBlock(Addr addr, bool is_secure, Cycles &lat) override;
-- 
cgit v1.2.3