mem: Add clean evicts to improve snoop filter tracking

This patch adds eviction notices to the caches, to provide accurate tracking of cache blocks in snoop filters. We add the CleanEvict message to the memory heirarchy and use both CleanEvicts and Writebacks with BLOCK_CACHED flags to propagate notice of clean and dirty evictions respectively, down the memory hierarchy. Note that the BLOCK_CACHED flag indicates whether there exist any copies of the evicted block in the caches above the evicting cache. The purpose of the CleanEvict message is to notify snoop filters of silent evictions in the relevant caches. The CleanEvict message behaves much like a Writeback. CleanEvict is a write and a request but unlike a Writeback, CleanEvict does not have data and does not need exclusive access to the block. The cache generates the CleanEvict message on a fill resulting in eviction of a clean block. Before travelling downwards CleanEvict requests generate zero-time snoop requests to check if the same block is cached in upper levels of the memory heirarchy. If the block exists, the cache discards the CleanEvict message. The snoops check the tags, writeback queue and the MSHRs of upper level caches in a manner similar to snoops generated from HardPFReqs. Currently CleanEvicts keep travelling towards main memory unless they encounter the block corresponding to their address or reach main memory (since we have no well defined point of serialisation). Main memory simply discards CleanEvict messages. We have modified the behavior of Writebacks, such that they generate snoops to check for the presence of blocks in upper level caches. It is possible in our current implmentation for a lower level cache to be writing back a block while a shared copy of the same block exists in the upper level cache. If the snoops find the same block in upper level caches, we set the BLOCK_CACHED flag in the Writeback message. We have also added logic to account for interaction of other message types with CleanEvicts waiting in the writeback queue. A simple example is of a response arriving at a cache removing any CleanEvicts to the same address from the cache's writeback queue.
author: Ali Jafri <ali.jafri@arm.com> 2015-07-03 10:14:37 -0400
committer: Ali Jafri <ali.jafri@arm.com> 2015-07-03 10:14:37 -0400
commit: a262908acc0a641700a03fcea89c48133f0467cd (patch)
tree: 86fa711fe3b1d3459d0f0088a3cfffa5d14a6dad /src/mem/cache/cache_impl.hh
parent: aa5bbe81f6fd18d824608d48a5adf74ea2c5b51d (diff)
download: gem5-a262908acc0a641700a03fcea89c48133f0467cd.tar.xz
1 files changed, 313 insertions, 121 deletions
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 9c5070ffa..117596d9b 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -334,6 +334,36 @@ Cache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
             pkt->getAddr(), pkt->getSize(), pkt->isSecure() ? "s" : "ns",
             blk ? "hit " + blk->print() : "miss");
 
+
+    if (pkt->evictingBlock()) {
+        // We check for presence of block in above caches before issuing
+        // Writeback or CleanEvict to write buffer. Therefore the only
+        // possible cases can be of a CleanEvict packet coming from above
+        // encountering a Writeback generated in this cache peer cache and
+        // waiting in the write buffer. Cases of upper level peer caches
+        // generating CleanEvict and Writeback or simply CleanEvict and
+        // CleanEvict almost simultaneously will be caught by snoops sent out
+        // by crossbar.
+        std::vector<MSHR *> outgoing;
+        if (writeBuffer.findMatches(pkt->getAddr(), pkt->isSecure(),
+                                   outgoing)) {
+            assert(outgoing.size() == 1);
+            PacketPtr wbPkt = outgoing[0]->getTarget()->pkt;
+            assert(pkt->cmd == MemCmd::CleanEvict &&
+                   wbPkt->cmd == MemCmd::Writeback);
+            // As the CleanEvict is coming from above, it would have snooped
+            // into other peer caches of the same level while traversing the
+            // crossbar. If a copy of the block had been found, the CleanEvict
+            // would have been deleted in the crossbar. Now that the
+            // CleanEvict is here we can be sure none of the other upper level
+            // caches connected to this cache have the block, so we can clear
+            // the BLOCK_CACHED flag in the Writeback if set and discard the
+            // CleanEvict by returning true.
+            wbPkt->clearBlockCached();
+            return true;
+        }
+    }
+
     // Writeback handling is special case.  We can write the block into
     // the cache without having a writeable copy (or any copy at all).
     if (pkt->cmd == MemCmd::Writeback) {
@@ -363,6 +393,19 @@ Cache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
         DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
         incHitCount(pkt);
         return true;
+    } else if (pkt->cmd == MemCmd::CleanEvict) {
+        if (blk != NULL) {
+            // Found the block in the tags, need to stop CleanEvict from
+            // propagating further down the hierarchy. Returning true will
+            // treat the CleanEvict like a satisfied write request and delete
+            // it.
+            return true;
+        }
+        // We didn't find the block here, propagate the CleanEvict further
+        // down the memory hierarchy. Returning false will treat the CleanEvict
+        // like a Writeback which could not find a replaceable block so has to
+        // go to next level.
+        return false;
     } else if ((blk != NULL) &&
                (pkt->needsExclusive() ? blk->isWritable()
                                       : blk->isReadable())) {
@@ -395,6 +438,41 @@ class ForwardResponseRecord : public Packet::SenderState
 };
 
 void
+Cache::doWritebacks(PacketList& writebacks, Tick forward_time)
+{
+    while (!writebacks.empty()) {
+        PacketPtr wbPkt = writebacks.front();
+        // We use forwardLatency here because we are copying writebacks to
+        // write buffer.  Call isCachedAbove for both Writebacks and
+        // CleanEvicts. If isCachedAbove returns true we set BLOCK_CACHED flag
+        // in Writebacks and discard CleanEvicts.
+        if (isCachedAbove(wbPkt)) {
+            if (wbPkt->cmd == MemCmd::CleanEvict) {
+                // Delete CleanEvict because cached copies exist above. The
+                // packet destructor will delete the request object because
+                // this is a non-snoop request packet which does not require a
+                // response.
+                delete wbPkt;
+            } else {
+                // Set BLOCK_CACHED flag in Writeback and send below, so that
+                // the Writeback does not reset the bit corresponding to this
+                // address in the snoop filter below.
+                wbPkt->setBlockCached();
+                allocateWriteBuffer(wbPkt, forward_time, true);
+            }
+        } else {
+            // If the block is not cached above, send packet below. Both
+            // CleanEvict and Writeback with BLOCK_CACHED flag cleared will
+            // reset the bit corresponding to this address in the snoop filter
+            // below.
+            allocateWriteBuffer(wbPkt, forward_time, true);
+        }
+        writebacks.pop_front();
+    }
+}
+
+
+void
 Cache::recvTimingSnoopResp(PacketPtr pkt)
 {
     DPRINTF(Cache, "%s for %s addr %#llx size %d\n", __func__,
@@ -510,7 +588,7 @@ Cache::recvTimingReq(PacketPtr pkt)
 
         /// @todo nominally we should just delete the packet here,
         /// however, until 4-phase stuff we can't because sending
-        /// cache is still relying on it
+        /// cache is still relying on it.
         pendingDelete.push_back(pkt);
 
         // no need to take any action in this particular cache as the
@@ -537,13 +615,7 @@ Cache::recvTimingReq(PacketPtr pkt)
 
         // copy writebacks to write buffer here to ensure they logically
         // proceed anything happening below
-        while (!writebacks.empty()) {
-            PacketPtr wbPkt = writebacks.front();
-            // We use forwardLatency here because we are copying
-            // writebacks to write buffer.
-            allocateWriteBuffer(wbPkt, forward_time, true);
-            writebacks.pop_front();
-        }
+        doWritebacks(writebacks, forward_time);
     }
 
     // Here we charge the headerDelay that takes into account the latencies
@@ -591,8 +663,10 @@ Cache::recvTimingReq(PacketPtr pkt)
             cpuSidePort->schedTimingResp(pkt, request_time);
         } else {
             /// @todo nominally we should just delete the packet here,
-            /// however, until 4-phase stuff we can't because sending
-            /// cache is still relying on it
+            /// however, until 4-phase stuff we can't because sending cache is
+            /// still relying on it. If the block is found in access(),
+            /// CleanEvict and Writeback messages will be deleted here as
+            /// well.
             pendingDelete.push_back(pkt);
         }
     } else {
@@ -660,31 +734,38 @@ Cache::recvTimingReq(PacketPtr pkt)
 
             // Coalesce unless it was a software prefetch (see above).
             if (pkt) {
-                DPRINTF(Cache, "%s coalescing MSHR for %s addr %#llx size %d\n",
-                        __func__, pkt->cmdString(), pkt->getAddr(),
-                        pkt->getSize());
-
-                assert(pkt->req->masterId() < system->maxMasters());
-                mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
-                if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
-                    mshr->threadNum = -1;
-                }
-                // We use forward_time here because it is the same
-                // considering new targets. We have multiple requests for the
-                // same address here. It specifies the latency to allocate an
-                // internal buffer and to schedule an event to the queued
-                // port and also takes into account the additional delay of
-                // the xbar.
-                mshr->allocateTarget(pkt, forward_time, order++);
-                if (mshr->getNumTargets() == numTarget) {
-                    noTargetMSHR = mshr;
-                    setBlocked(Blocked_NoTargets);
-                    // need to be careful with this... if this mshr isn't
-                    // ready yet (i.e. time > curTick()), we don't want to
-                    // move it ahead of mshrs that are ready
-                    // mshrQueue.moveToFront(mshr);
+                assert(pkt->cmd != MemCmd::Writeback);
+                // CleanEvicts corresponding to blocks which have outstanding
+                // requests in MSHRs can be deleted here.
+                if (pkt->cmd == MemCmd::CleanEvict) {
+                    pendingDelete.push_back(pkt);
+                } else {
+                    DPRINTF(Cache, "%s coalescing MSHR for %s addr %#llx size %d\n",
+                            __func__, pkt->cmdString(), pkt->getAddr(),
+                            pkt->getSize());
+
+                    assert(pkt->req->masterId() < system->maxMasters());
+                    mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
+                    if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
+                        mshr->threadNum = -1;
+                    }
+                    // We use forward_time here because it is the same
+                    // considering new targets. We have multiple
+                    // requests for the same address here. It
+                    // specifies the latency to allocate an internal
+                    // buffer and to schedule an event to the queued
+                    // port and also takes into account the additional
+                    // delay of the xbar.
+                    mshr->allocateTarget(pkt, forward_time, order++);
+                    if (mshr->getNumTargets() == numTarget) {
+                        noTargetMSHR = mshr;
+                        setBlocked(Blocked_NoTargets);
+                        // need to be careful with this... if this mshr isn't
+                        // ready yet (i.e. time > curTick()), we don't want to
+                        // move it ahead of mshrs that are ready
+                        // mshrQueue.moveToFront(mshr);
+                    }
                 }
-
                 // We should call the prefetcher reguardless if the request is
                 // satisfied or not, reguardless if the request is in the MSHR or
                 // not.  The request could be a ReadReq hit, but still not
@@ -707,7 +788,7 @@ Cache::recvTimingReq(PacketPtr pkt)
                 mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
             }
 
-            if (pkt->cmd == MemCmd::Writeback ||
+            if (pkt->evictingBlock() ||
                 (pkt->req->isUncacheable() && pkt->isWrite())) {
                 // We use forward_time here because there is an
                 // uncached memory write, forwarded to WriteBuffer. It
@@ -782,7 +863,8 @@ Cache::getBusPacket(PacketPtr cpu_pkt, CacheBlk *blk,
     }
 
     if (!blkValid &&
-        (cpu_pkt->cmd == MemCmd::Writeback || cpu_pkt->isUpgrade())) {
+        (cpu_pkt->isUpgrade() ||
+         cpu_pkt->evictingBlock())) {
         // Writebacks that weren't allocated in access() and upgrades
         // from upper-level caches that missed completely just go
         // through.
@@ -834,8 +916,9 @@ Cache::getBusPacket(PacketPtr cpu_pkt, CacheBlk *blk,
     assert(pkt->getAddr() == blockAlign(pkt->getAddr()));
 
     pkt->allocate();
-    DPRINTF(Cache, "%s created %s addr %#llx size %d\n",
-            __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
+    DPRINTF(Cache, "%s created %s from %s for  addr %#llx size %d\n",
+            __func__, pkt->cmdString(), cpu_pkt->cmdString(), pkt->getAddr(),
+            pkt->getSize());
     return pkt;
 }
 
@@ -1302,19 +1385,28 @@ Cache::recvTimingResp(PacketPtr pkt)
     pkt->headerDelay = pkt->payloadDelay = 0;
 
     // copy writebacks to write buffer
-    while (!writebacks.empty()) {
-        PacketPtr wbPkt = writebacks.front();
-        allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true);
-        writebacks.pop_front();
-    }
-    // if we used temp block, clear it out
-    if (blk == tempBlock) {
+    doWritebacks(writebacks, forward_time);
+
+    // if we used temp block, check to see if its valid and then clear it out
+    if (blk == tempBlock && tempBlock->isValid()) {
+        // We use forwardLatency here because we are copying
+        // Writebacks/CleanEvicts to write buffer. It specifies the latency to
+        // allocate an internal buffer and to schedule an event to the
+        // queued port.
         if (blk->isDirty()) {
-            // We use forwardLatency here because we are copying
-            // writebacks to write buffer. It specifies the latency to
-            // allocate an internal buffer and to schedule an event to the
-            // queued port.
-            allocateWriteBuffer(writebackBlk(blk), forward_time, true);
+            PacketPtr wbPkt = writebackBlk(blk);
+            allocateWriteBuffer(wbPkt, forward_time, true);
+            // Set BLOCK_CACHED flag if cached above.
+            if (isCachedAbove(wbPkt))
+                wbPkt->setBlockCached();
+        } else {
+            PacketPtr wcPkt = cleanEvictBlk(blk);
+            // Check to see if block is cached above. If not allocate
+            // write buffer
+            if (isCachedAbove(wcPkt))
+                delete wcPkt;
+            else
+                allocateWriteBuffer(wcPkt, forward_time, true);
         }
         blk->invalidate();
     }
@@ -1352,6 +1444,30 @@ Cache::writebackBlk(CacheBlk *blk)
     return writeback;
 }
 
+PacketPtr
+Cache::cleanEvictBlk(CacheBlk *blk)
+{
+    assert(blk && blk->isValid() && !blk->isDirty());
+    // Creating a zero sized write, a message to the snoop filter
+    Request *req =
+        new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0,
+                    Request::wbMasterId);
+    if (blk->isSecure())
+        req->setFlags(Request::SECURE);
+
+    req->taskId(blk->task_id);
+    blk->task_id = ContextSwitchTaskId::Unknown;
+    blk->tickInserted = curTick();
+
+    PacketPtr pkt = new Packet(req, MemCmd::CleanEvict);
+    pkt->allocate();
+    DPRINTF(Cache, "%s%s %x Create CleanEvict\n", pkt->cmdString(),
+            pkt->req->isInstFetch() ? " (ifetch)" : "",
+            pkt->getAddr());
+
+    return pkt;
+}
+
 void
 Cache::memWriteback()
 {
@@ -1434,9 +1550,13 @@ Cache::allocateBlock(Addr addr, bool is_secure, PacketList &writebacks)
                     addr, is_secure ? "s" : "ns",
                     blk->isDirty() ? "writeback" : "clean");
 
+            // Will send up Writeback/CleanEvict snoops via isCachedAbove
+            // when pushing this writeback list into the write buffer.
             if (blk->isDirty()) {
                 // Save writeback packet for handling by caller
                 writebacks.push_back(writebackBlk(blk));
+            } else {
+                writebacks.push_back(cleanEvictBlk(blk));
             }
         }
     }
@@ -1460,6 +1580,12 @@ Cache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks)
     CacheBlk::State old_state = blk ? blk->status : 0;
 #endif
 
+    // When handling a fill, discard any CleanEvicts for the
+    // same address in write buffer.
+    Addr M5_VAR_USED blk_addr = blockAlign(pkt->getAddr());
+    std::vector<MSHR *> M5_VAR_USED wbs;
+    assert (!writeBuffer.findMatches(blk_addr, is_secure, wbs));
+
     if (blk == NULL) {
         // better have read new data...
         assert(pkt->hasData());
@@ -1633,9 +1759,9 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
             if (snoopPkt.sharedAsserted()) {
                 pkt->assertShared();
             }
-            // If this request is a prefetch or clean evict and an
-            // upper level signals block present, make sure to
-            // propagate the block presence to the requester.
+            // If this request is a prefetch or clean evict and an upper level
+            // signals block present, make sure to propagate the block
+            // presence to the requester.
             if (snoopPkt.isBlockCached()) {
                 pkt->setBlockCached();
             }
@@ -1674,9 +1800,9 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
     // MemCmd::HardPFReq is only observed by upstream caches.  After missing
     // above and in it's own cache, a new MemCmd::ReadReq is created that
     // downstream caches observe.
-    if (pkt->cmd == MemCmd::HardPFReq) {
-        DPRINTF(Cache, "Squashing prefetch from lower cache %#x\n",
-                pkt->getAddr());
+    if (pkt->mustCheckAbove()) {
+        DPRINTF(Cache, "Found addr %#llx in upper level cache for snoop %s from"
+                " lower cache\n", pkt->getAddr(), pkt->cmdString());
         pkt->setBlockCached();
         return;
     }
@@ -1754,7 +1880,7 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
     assert(!system->bypassCaches());
 
     // no need to snoop writebacks or requests that are not in range
-    if (pkt->cmd == MemCmd::Writeback || !inRange(pkt->getAddr())) {
+    if (!inRange(pkt->getAddr())) {
         return;
     }
 
@@ -1764,11 +1890,12 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
     Addr blk_addr = blockAlign(pkt->getAddr());
     MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
 
-    // Squash any prefetch requests from below on MSHR hits
-    if (mshr && pkt->cmd == MemCmd::HardPFReq) {
-        DPRINTF(Cache, "Setting block present to squash prefetch from"
+    // Inform request(Prefetch, CleanEvict or Writeback) from below of
+    // MSHR hit, set setBlockCached.
+    if (mshr && pkt->mustCheckAbove()) {
+        DPRINTF(Cache, "Setting block cached for %s from"
                 "lower cache on mshr hit %#x\n",
-                pkt->getAddr());
+                pkt->cmdString(), pkt->getAddr());
         pkt->setBlockCached();
         return;
     }
@@ -1795,28 +1922,60 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
         // We should only ever find a single match
         assert(writebacks.size() == 1);
         MSHR *wb_entry = writebacks[0];
+        // Expect to see only Writebacks and/or CleanEvicts here, both of
+        // which should not be generated for uncacheable data.
         assert(!wb_entry->isUncacheable());
+        // There should only be a single request responsible for generating
+        // Writebacks/CleanEvicts.
         assert(wb_entry->getNumTargets() == 1);
         PacketPtr wb_pkt = wb_entry->getTarget()->pkt;
-        assert(wb_pkt->cmd == MemCmd::Writeback);
+        assert(wb_pkt->evictingBlock());
+
+        if (pkt->evictingBlock()) {
+            // if the block is found in the write queue, set the BLOCK_CACHED
+            // flag for Writeback/CleanEvict snoop. On return the snoop will
+            // propagate the BLOCK_CACHED flag in Writeback packets and prevent
+            // any CleanEvicts from travelling down the memory hierarchy.
+            pkt->setBlockCached();
+            DPRINTF(Cache, "Squashing %s from lower cache on writequeue hit"
+                    " %#x\n", pkt->cmdString(), pkt->getAddr());
+            return;
+        }
 
-        assert(!pkt->memInhibitAsserted());
-        pkt->assertMemInhibit();
-        if (!pkt->needsExclusive()) {
-            pkt->assertShared();
-            // the writeback is no longer the exclusive copy in the system
-            wb_pkt->clearSupplyExclusive();
+        if (wb_pkt->cmd == MemCmd::Writeback) {
+            assert(!pkt->memInhibitAsserted());
+            pkt->assertMemInhibit();
+            if (!pkt->needsExclusive()) {
+                pkt->assertShared();
+                // the writeback is no longer the exclusive copy in
+                // the system
+                wb_pkt->clearSupplyExclusive();
+            } else {
+                // if we're not asserting the shared line, we need to
+                // invalidate our copy.  we'll do that below as long as
+                // the packet's invalidate flag is set...
+                assert(pkt->isInvalidate());
+            }
+            doTimingSupplyResponse(pkt, wb_pkt->getConstPtr<uint8_t>(),
+                                   false, false);
         } else {
-            // if we're not asserting the shared line, we need to
-            // invalidate our copy.  we'll do that below as long as
-            // the packet's invalidate flag is set...
-            assert(pkt->isInvalidate());
+            assert(wb_pkt->cmd == MemCmd::CleanEvict);
+            // The cache technically holds the block until the
+            // corresponding CleanEvict message reaches the crossbar
+            // below. Therefore when a snoop encounters a CleanEvict
+            // message we must set assertShared (just like when it
+            // encounters a Writeback) to avoid the snoop filter
+            // prematurely clearing the holder bit in the crossbar
+            // below
+            if (!pkt->needsExclusive())
+                pkt->assertShared();
+            else
+                assert(pkt->isInvalidate());
         }
-        doTimingSupplyResponse(pkt, wb_pkt->getConstPtr<uint8_t>(),
-                               false, false);
 
         if (pkt->isInvalidate()) {
             // Invalidation trumps our writeback... discard here
+            // Note: markInService will remove entry from writeback buffer.
             markInService(wb_entry, false);
             delete wb_pkt;
         }
@@ -1844,8 +2003,11 @@ Cache::recvAtomicSnoop(PacketPtr pkt)
     // Snoops shouldn't happen when bypassing caches
     assert(!system->bypassCaches());
 
-    // no need to snoop writebacks or requests that are not in range
-    if (pkt->cmd == MemCmd::Writeback || !inRange(pkt->getAddr())) {
+    // no need to snoop writebacks or requests that are not in range. In
+    // atomic we have no Writebacks/CleanEvicts queued and no prefetches,
+    // hence there is no need to snoop upwards and determine if they are
+    // present above.
+    if (pkt->evictingBlock() || !inRange(pkt->getAddr())) {
         return 0;
     }
 
@@ -1938,6 +2100,29 @@ Cache::getNextMSHR()
     return NULL;
 }
 
+bool
+Cache::isCachedAbove(const PacketPtr pkt) const
+{
+    if (isTopLevel)
+        return false;
+    // Mirroring the flow of HardPFReqs, the cache sends CleanEvict and
+    // Writeback snoops into upper level caches to check for copies of the
+    // same block. Using the BLOCK_CACHED flag with the Writeback/CleanEvict
+    // packet, the cache can inform the crossbar below of presence or absence
+    // of the block.
+
+    Packet snoop_pkt(pkt, true, false);
+    snoop_pkt.setExpressSnoop();
+    // Assert that packet is either Writeback or CleanEvict and not a prefetch
+    // request because prefetch requests need an MSHR and may generate a snoop
+    // response.
+    assert(pkt->evictingBlock());
+    snoop_pkt.senderState = NULL;
+    cpuSidePort->sendTimingSnoopReq(&snoop_pkt);
+    // Writeback/CleanEvict snoops do not generate a separate snoop response.
+    assert(!(snoop_pkt.memInhibitAsserted()));
+    return snoop_pkt.isBlockCached();
+}
 
 PacketPtr
 Cache::getTimingPacket()
@@ -1955,62 +2140,69 @@ Cache::getTimingPacket()
     DPRINTF(CachePort, "%s %s for addr %#llx size %d\n", __func__,
             tgt_pkt->cmdString(), tgt_pkt->getAddr(), tgt_pkt->getSize());
 
-    if (mshr->isForwardNoResponse()) {
-        // no response expected, just forward packet as it is
-        assert(tags->findBlock(mshr->blkAddr, mshr->isSecure) == NULL);
-        pkt = tgt_pkt;
-    } else {
-        CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure);
-
-        if (tgt_pkt->cmd == MemCmd::HardPFReq && forwardSnoops) {
-            // We need to check the caches above us to verify that
-            // they don't have a copy of this block in the dirty state
-            // at the moment. Without this check we could get a stale
-            // copy from memory that might get used in place of the
-            // dirty one.
-            Packet snoop_pkt(tgt_pkt, true, false);
-            snoop_pkt.setExpressSnoop();
-            snoop_pkt.senderState = mshr;
-            cpuSidePort->sendTimingSnoopReq(&snoop_pkt);
-
-            // Check to see if the prefetch was squashed by an upper cache (to
-            // prevent us from grabbing the line) or if a Check to see if a
-            // writeback arrived between the time the prefetch was placed in
-            // the MSHRs and when it was selected to be sent or if the
-            // prefetch was squashed by an upper cache.
-
-            // It is important to check msmInhibitAsserted before
-            // prefetchSquashed. If another cache has asserted MEM_INGIBIT, it
-            // will be sending a response which will arrive at the MSHR
-            // allocated ofr this request. Checking the prefetchSquash first
-            // may result in the MSHR being prematurely deallocated.
-
-            if (snoop_pkt.memInhibitAsserted()) {
-                // If we are getting a non-shared response it is dirty
-                bool pending_dirty_resp = !snoop_pkt.sharedAsserted();
-                markInService(mshr, pending_dirty_resp);
-                DPRINTF(Cache, "Upward snoop of prefetch for addr"
-                        " %#x (%s) hit\n",
-                        tgt_pkt->getAddr(), tgt_pkt->isSecure()? "s": "ns");
-                return NULL;
-            }
+    CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure);
+
+    if (tgt_pkt->cmd == MemCmd::HardPFReq && forwardSnoops) {
+        // We need to check the caches above us to verify that
+        // they don't have a copy of this block in the dirty state
+        // at the moment. Without this check we could get a stale
+        // copy from memory that might get used in place of the
+        // dirty one.
+        Packet snoop_pkt(tgt_pkt, true, false);
+        snoop_pkt.setExpressSnoop();
+        snoop_pkt.senderState = mshr;
+        cpuSidePort->sendTimingSnoopReq(&snoop_pkt);
+
+        // Check to see if the prefetch was squashed by an upper cache (to
+        // prevent us from grabbing the line) or if a Check to see if a
+        // writeback arrived between the time the prefetch was placed in
+        // the MSHRs and when it was selected to be sent or if the
+        // prefetch was squashed by an upper cache.
+
+        // It is important to check memInhibitAsserted before
+        // prefetchSquashed. If another cache has asserted MEM_INGIBIT, it
+        // will be sending a response which will arrive at the MSHR
+        // allocated ofr this request. Checking the prefetchSquash first
+        // may result in the MSHR being prematurely deallocated.
+
+        if (snoop_pkt.memInhibitAsserted()) {
+            // If we are getting a non-shared response it is dirty
+            bool pending_dirty_resp = !snoop_pkt.sharedAsserted();
+            markInService(mshr, pending_dirty_resp);
+            DPRINTF(Cache, "Upward snoop of prefetch for addr"
+                    " %#x (%s) hit\n",
+                    tgt_pkt->getAddr(), tgt_pkt->isSecure()? "s": "ns");
+            return NULL;
+        }
 
-            if (snoop_pkt.isBlockCached() || blk != NULL) {
-                DPRINTF(Cache, "Block present, prefetch squashed by cache.  "
-                               "Deallocating mshr target %#x.\n",
-                        mshr->blkAddr);
+        if (snoop_pkt.isBlockCached() || blk != NULL) {
+            DPRINTF(Cache, "Block present, prefetch squashed by cache.  "
+                    "Deallocating mshr target %#x.\n",
+                    mshr->blkAddr);
 
-                // Deallocate the mshr target
+            // Deallocate the mshr target
+            if (tgt_pkt->cmd != MemCmd::Writeback) {
                 if (mshr->queue->forceDeallocateTarget(mshr)) {
                     // Clear block if this deallocation resulted freed an
                     // mshr when all had previously been utilized
                     clearBlocked((BlockedCause)(mshr->queue->index));
                 }
                 return NULL;
+            } else {
+                // If this is a Writeback, and the snoops indicate that the blk
+                // is cached above, set the BLOCK_CACHED flag in the Writeback
+                // packet, so that it does not reset the bits corresponding to
+                // this block in the snoop filter below.
+                tgt_pkt->setBlockCached();
             }
-
         }
+    }
 
+    if (mshr->isForwardNoResponse()) {
+        // no response expected, just forward packet as it is
+        assert(tags->findBlock(mshr->blkAddr, mshr->isSecure) == NULL);
+        pkt = tgt_pkt;
+    } else {
         pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive());
 
         mshr->isForward = (pkt == NULL);
author	Ali Jafri <ali.jafri@arm.com>	2015-07-03 10:14:37 -0400
committer	Ali Jafri <ali.jafri@arm.com>	2015-07-03 10:14:37 -0400
commit	a262908acc0a641700a03fcea89c48133f0467cd (patch)
tree	86fa711fe3b1d3459d0f0088a3cfffa5d14a6dad /src/mem/cache/cache_impl.hh
parent	aa5bbe81f6fd18d824608d48a5adf74ea2c5b51d (diff)
download	gem5-a262908acc0a641700a03fcea89c48133f0467cd.tar.xz