cpu, mem: Make software prefetches non-blocking

Previously, they were treated so much like loads that they could stall at the head of the ROB. Now they are always treated like L1 hits. If they actually miss, a new request is created at the L1 and tracked from the MSHRs there if necessary (i.e. if it didn't coalesce with an existing outstanding load).
author: Curtis Dunham <Curtis.Dunham@arm.com> 2014-05-13 12:20:49 -0500
committer: Curtis Dunham <Curtis.Dunham@arm.com> 2014-05-13 12:20:49 -0500
commit: 5d029463eef3cfafa507dacadccd33402f82029a (patch)
tree: d2549fad3b8d6e647b0f9cf9a06887346cb65902 /src/mem/cache
parent: e3b19cb294c98466a431950888045c6b5d24b675 (diff)
download: gem5-5d029463eef3cfafa507dacadccd33402f82029a.tar.xz
3 files changed, 96 insertions, 29 deletions
diff --git a/src/mem/cache/blk.hh b/src/mem/cache/blk.hh
index d38281e48..626b4818d 100644
--- a/src/mem/cache/blk.hh
+++ b/src/mem/cache/blk.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2014 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 3d7fc8fe3..0ee1e353a 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2013 ARM Limited
+ * Copyright (c) 2012-2014 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 34dacdf9f..91cb5a4e3 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -473,8 +473,15 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
         // @todo: someone should pay for this
         pkt->busFirstWordDelay = pkt->busLastWordDelay = 0;
 
-        // writes go in write buffer, reads use MSHR
-        if (pkt->isWrite() && !pkt->isRead()) {
+        // writes go in write buffer, reads use MSHR,
+        // prefetches are acknowledged (responded to) and dropped
+        if (pkt->cmd.isPrefetch()) {
+            // prefetching (cache loading) uncacheable data is nonsensical
+            pkt->makeTimingResponse();
+            std::memset(pkt->getPtr<uint8_t>(), 0xFF, pkt->getSize());
+            cpuSidePort->schedTimingResp(pkt, clockEdge(hitLatency));
+            return true;
+        } else if (pkt->isWrite() && !pkt->isRead()) {
             allocateWriteBuffer(pkt, time, true);
         } else {
             allocateUncachedReadBuffer(pkt, time, true);
@@ -521,7 +528,10 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
         if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) {
             if (blk)
                 blk->status &= ~BlkHWPrefetched;
-            next_pf_time = prefetcher->notify(pkt, time);
+
+            // Don't notify on SWPrefetch
+            if (!pkt->cmd.isSWPrefetch())
+                next_pf_time = prefetcher->notify(pkt, time);
         }
 
         if (needsResponse) {
@@ -544,36 +554,80 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
         Addr blk_addr = blockAlign(pkt->getAddr());
         MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure());
 
+        // Software prefetch handling:
+        // To keep the core from waiting on data it won't look at
+        // anyway, send back a response with dummy data. Miss handling
+        // will continue asynchronously. Unfortunately, the core will
+        // insist upon freeing original Packet/Request, so we have to
+        // create a new pair with a different lifecycle. Note that this
+        // processing happens before any MSHR munging on the behalf of
+        // this request because this new Request will be the one stored
+        // into the MSHRs, not the original.
+        if (pkt->cmd.isSWPrefetch() && isTopLevel) {
+            assert(needsResponse);
+            assert(pkt->req->hasPaddr());
+
+            // There's no reason to add a prefetch as an additional target
+            // to an existing MSHR.  If an outstanding request is already
+            // in progress, there is nothing for the prefetch to do.
+            // If this is the case, we don't even create a request at all.
+            PacketPtr pf = mshr ? NULL : new Packet(pkt);
+
+            if (pf) {
+                pf->req = new Request(pkt->req->getPaddr(),
+                                      pkt->req->getSize(),
+                                      pkt->req->getFlags(),
+                                      pkt->req->masterId());
+                // The core will clean up prior senderState; we need our own.
+                pf->senderState = NULL;
+            }
+
+            pkt->makeTimingResponse();
+            // for debugging, set all the bits in the response data
+            // (also keeps valgrind from complaining when debugging settings
+            //  print out instruction results)
+            std::memset(pkt->getPtr<uint8_t>(), 0xFF, pkt->getSize());
+            cpuSidePort->schedTimingResp(pkt, clockEdge(lat));
+
+            pkt = pf;
+        }
+
         if (mshr) {
             /// MSHR hit
             /// @note writebacks will be checked in getNextMSHR()
             /// for any conflicting requests to the same block
 
             //@todo remove hw_pf here
-            assert(pkt->req->masterId() < system->maxMasters());
-            mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
-            if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
-                mshr->threadNum = -1;
-            }
-            mshr->allocateTarget(pkt, time, order++);
-            if (mshr->getNumTargets() == numTarget) {
-                noTargetMSHR = mshr;
-                setBlocked(Blocked_NoTargets);
-                // need to be careful with this... if this mshr isn't
-                // ready yet (i.e. time > curTick()_, we don't want to
-                // move it ahead of mshrs that are ready
-                // mshrQueue.moveToFront(mshr);
-            }
 
-            // We should call the prefetcher reguardless if the request is
-            // satisfied or not, reguardless if the request is in the MSHR or
-            // not.  The request could be a ReadReq hit, but still not
-            // satisfied (potentially because of a prior write to the same
-            // cache line.  So, even when not satisfied, tehre is an MSHR
-            // already allocated for this, we need to let the prefetcher know
-            // about the request
-            if (prefetcher) {
-                next_pf_time = prefetcher->notify(pkt, time);
+            // Coalesce unless it was a software prefetch (see above).
+            if (pkt) {
+                assert(pkt->req->masterId() < system->maxMasters());
+                mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
+                if (mshr->threadNum != 0/*pkt->req->threadId()*/) {
+                    mshr->threadNum = -1;
+                }
+                mshr->allocateTarget(pkt, time, order++);
+                if (mshr->getNumTargets() == numTarget) {
+                    noTargetMSHR = mshr;
+                    setBlocked(Blocked_NoTargets);
+                    // need to be careful with this... if this mshr isn't
+                    // ready yet (i.e. time > curTick()), we don't want to
+                    // move it ahead of mshrs that are ready
+                    // mshrQueue.moveToFront(mshr);
+                }
+
+                // We should call the prefetcher reguardless if the request is
+                // satisfied or not, reguardless if the request is in the MSHR or
+                // not.  The request could be a ReadReq hit, but still not
+                // satisfied (potentially because of a prior write to the same
+                // cache line.  So, even when not satisfied, tehre is an MSHR
+                // already allocated for this, we need to let the prefetcher know
+                // about the request
+                if (prefetcher) {
+                    // Don't notify on SWPrefetch
+                    if (!pkt->cmd.isSWPrefetch())
+                        next_pf_time = prefetcher->notify(pkt, time);
+                }
             }
         } else {
             // no MSHR
@@ -609,7 +663,9 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
             }
 
             if (prefetcher) {
-                next_pf_time = prefetcher->notify(pkt, time);
+                // Don't notify on SWPrefetch
+                if (!pkt->cmd.isSWPrefetch())
+                    next_pf_time = prefetcher->notify(pkt, time);
             }
         }
     }
@@ -963,6 +1019,17 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
         switch (target->source) {
           case MSHR::Target::FromCPU:
             Tick completion_time;
+
+            // Software prefetch handling for cache closest to core
+            if (target->pkt->cmd.isSWPrefetch() && isTopLevel) {
+                // a software prefetch would have already been ack'd immediately
+                // with dummy data so the core would be able to retire it.
+                // this request completes right here, so we deallocate it.
+                delete target->pkt->req;
+                delete target->pkt;
+                break; // skip response
+            }
+
             if (is_fill) {
                 satisfyCpuSideRequest(target->pkt, blk,
                                       true, mshr->hasPostDowngrade());
author	Curtis Dunham <Curtis.Dunham@arm.com>	2014-05-13 12:20:49 -0500
committer	Curtis Dunham <Curtis.Dunham@arm.com>	2014-05-13 12:20:49 -0500
commit	5d029463eef3cfafa507dacadccd33402f82029a (patch)
tree	d2549fad3b8d6e647b0f9cf9a06887346cb65902 /src/mem/cache
parent	e3b19cb294c98466a431950888045c6b5d24b675 (diff)
download	gem5-5d029463eef3cfafa507dacadccd33402f82029a.tar.xz