mem: Make the coherent crossbar account for timing snoops

This patch introduces the concept of a snoop latency. Given the requirement to snoop and forward packets in zero time (due to the coherency mechanism), the latency is accounted for later. On a snoop, we establish the latency, and later add it to the header delay of the packet. To allow multiple caches to contribute to the snoop latency, we use a separate variable in the packet, and then take the maximum before adding it to the header delay.
author: Andreas Hansson <andreas.hansson@arm.com> 2015-09-25 07:13:54 -0400
committer: Andreas Hansson <andreas.hansson@arm.com> 2015-09-25 07:13:54 -0400
commit: 462c288a757bc4fd50324ebe2f1fd697d53838ab (patch)
tree: 02cc70a4d0682136b458ee59d5c9dad0dfdec00a /src/mem/cache/cache.cc
parent: 3bd78a141ec60ae65d910286c3d99ae7030ba703 (diff)
download: gem5-462c288a757bc4fd50324ebe2f1fd697d53838ab.tar.xz
1 files changed, 29 insertions, 7 deletions
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 3060c3e77..19d8dc800 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -1730,7 +1730,7 @@ Cache::doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
     memSidePort->schedTimingSnoopResp(pkt, forward_time, true);
 }
 
-void
+uint32_t
 Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
                    bool is_deferred, bool pending_inval)
 {
@@ -1748,6 +1748,8 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
     bool invalidate = pkt->isInvalidate();
     bool M5_VAR_USED needs_exclusive = pkt->needsExclusive();
 
+    uint32_t snoop_delay = 0;
+
     if (forwardSnoops) {
         // first propagate snoop upward to see if anyone above us wants to
         // handle it.  save & restore packet src since it will get
@@ -1765,6 +1767,12 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
             // time
             snoopPkt.headerDelay = snoopPkt.payloadDelay = 0;
             cpuSidePort->sendTimingSnoopReq(&snoopPkt);
+
+            // add the header delay (including crossbar and snoop
+            // delays) of the upward snoop to the snoop delay for this
+            // cache
+            snoop_delay += snoopPkt.headerDelay;
+
             if (snoopPkt.memInhibitAsserted()) {
                 // cache-to-cache response from some upper cache
                 assert(!alreadyResponded);
@@ -1796,7 +1804,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
     if (!blk || !blk->isValid()) {
         DPRINTF(Cache, "%s snoop miss for %s addr %#llx size %d\n",
                 __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
-        return;
+        return snoop_delay;
     } else {
        DPRINTF(Cache, "%s snoop hit for %s for addr %#llx size %d, "
                "old state is %s\n", __func__, pkt->cmdString(),
@@ -1824,7 +1832,7 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
         DPRINTF(Cache, "Found addr %#llx in upper level cache for snoop %s from"
                 " lower cache\n", pkt->getAddr(), pkt->cmdString());
         pkt->setBlockCached();
-        return;
+        return snoop_delay;
     }
 
     if (!pkt->req->isUncacheable() && pkt->isRead() && !invalidate) {
@@ -1884,6 +1892,8 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
     }
 
     DPRINTF(Cache, "new state is %s\n", blk->print());
+
+    return snoop_delay;
 }
 
 
@@ -1907,6 +1917,14 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
     Addr blk_addr = blockAlign(pkt->getAddr());
     MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
 
+    // Update the latency cost of the snoop so that the crossbar can
+    // account for it. Do not overwrite what other neighbouring caches
+    // have already done, rather take the maximum. The update is
+    // tentative, for cases where we return before an upward snoop
+    // happens below.
+    pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay,
+                                         lookupLatency * clockPeriod());
+
     // Inform request(Prefetch, CleanEvict or Writeback) from below of
     // MSHR hit, set setBlockCached.
     if (mshr && pkt->mustCheckAbove()) {
@@ -2004,7 +2022,12 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
     // We could be more selective and return here if the
     // request is non-exclusive or if the writeback is
     // exclusive.
-    handleSnoop(pkt, blk, true, false, false);
+    uint32_t snoop_delay = handleSnoop(pkt, blk, true, false, false);
+
+    // Override what we did when we first saw the snoop, as we now
+    // also have the cost of the upwards snoops to account for
+    pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay, snoop_delay +
+                                         lookupLatency * clockPeriod());
 }
 
 bool
@@ -2030,9 +2053,8 @@ Cache::recvAtomicSnoop(PacketPtr pkt)
     }
 
     CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
-    handleSnoop(pkt, blk, false, false, false);
-    // We consider forwardLatency here because a snoop occurs in atomic mode
-    return forwardLatency * clockPeriod();
+    uint32_t snoop_delay = handleSnoop(pkt, blk, false, false, false);
+    return snoop_delay + lookupLatency * clockPeriod();
 }
author	Andreas Hansson <andreas.hansson@arm.com>	2015-09-25 07:13:54 -0400
committer	Andreas Hansson <andreas.hansson@arm.com>	2015-09-25 07:13:54 -0400
commit	462c288a757bc4fd50324ebe2f1fd697d53838ab (patch)
tree	02cc70a4d0682136b458ee59d5c9dad0dfdec00a /src/mem/cache/cache.cc
parent	3bd78a141ec60ae65d910286c3d99ae7030ba703 (diff)
download	gem5-462c288a757bc4fd50324ebe2f1fd697d53838ab.tar.xz