From 884807a68ad7e4f390660b3becfe4ee094334e95 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 20:11:06 -0700
Subject: Fix up a bunch of multilevel coherence issues. Atomic mode seems to
 work.  Timing is closer but not there yet.

--HG--
extra : convert_revision : 0dea5c3d4b973d009e9d4a4c21b9cad15961d56f
---
 configs/example/memtest.py  |  2 +-
 src/cpu/memtest/memtest.cc  |  4 +-
 src/cpu/o3/lsq_impl.hh      |  7 ++--
 src/mem/bus.cc              |  5 ++-
 src/mem/cache/cache_impl.hh | 94 ++++++++++++++++++++++++++++++++++++++++-----
 src/mem/packet.hh           | 12 ++++--
 6 files changed, 105 insertions(+), 19 deletions(-)
diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index af100c9a9..5bb874e85 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -144,7 +144,7 @@ system = System(funcmem = PhysicalMemory(),
 def make_level(spec, prototypes, attach_obj, attach_port):
      fanout = spec[0]
      parent = attach_obj # use attach obj as config parent too
-     if fanout > 1 or options.force_bus:
+     if len(spec) > 1 and (fanout > 1 or options.force_bus):
           new_bus = Bus(clock="500MHz", width=16)
           new_bus.port = getattr(attach_obj, attach_port)
           parent.cpu_side_bus = new_bus
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index db3ca282a..f5c8bb93b 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -64,7 +64,9 @@ MemTest::CpuPort::recvTiming(PacketPtr pkt)
 Tick
 MemTest::CpuPort::recvAtomic(PacketPtr pkt)
 {
-    panic("MemTest doesn't expect recvAtomic callback!");
+    // must be snoop upcall
+    assert(pkt->isRequest());
+    assert(pkt->getDest() == Packet::Broadcast);
     return curTick;
 }
 
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index b4a6a02da..10c0afd38 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -84,9 +84,10 @@ LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
         lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
     }
     else {
-    //else it is a coherence request, maybe you need to do something
-        warn("Recieved a coherence request (Invalidate?), 03CPU doesn't"
-             "update LSQ for these\n");
+        // must be a snoop
+
+        // @TODO someday may need to process invalidations in LSQ here
+        // to provide stronger consistency model
     }
     return true;
 }
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 24a0c6f02..e70558bd6 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -183,8 +183,9 @@ Bus::recvTiming(PacketPtr pkt)
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
-    if (tickNextIdle > curTick ||
-        (retryList.size() && (!inRetry || pktPort != retryList.front())))
+    if (!pkt->isExpressSnoop() &&
+        (tickNextIdle > curTick ||
+         (retryList.size() && (!inRetry || pktPort != retryList.front()))))
     {
         addToRetryList(pktPort);
         DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n");
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b159df84a..59571dd6f 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -165,11 +165,25 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
             blk->trackLoadLocked(pkt);
         }
         pkt->setDataFromBlock(blk->data, blkSize);
+        if (pkt->getSize() == blkSize) {
+            // special handling for coherent block requests from
+            // upper-level caches
+            if (pkt->needsExclusive()) {
+                // on ReadExReq we give up our copy
+                tags->invalidateBlk(blk);
+            } else {
+                // on ReadReq we create shareable copies here and in
+                // the requester
+                pkt->assertShared();
+                blk->status &= ~BlkWritable;
+            }
+        }
     } else {
         // Not a read or write... must be an upgrade.  it's OK
         // to just ack those as long as we have an exclusive
         // copy at this level.
         assert(pkt->cmd == MemCmd::UpgradeReq);
+        tags->invalidateBlk(blk);
     }
 }
 
@@ -269,6 +283,18 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
             hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             satisfied = true;
             satisfyCpuSideRequest(pkt, blk);
+        } else if (pkt->cmd == MemCmd::Writeback) {
+            // special case: writeback to read-only block (e.g., from
+            // L1 into L2).  since we're really just passing ownership
+            // from one cache to another, we can update this cache to
+            // be the owner without making the block writeable
+            assert(!blk->isWritable() /* && !blk->isDirty() */);
+            assert(blkSize == pkt->getSize());
+            std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+            blk->status |= BlkDirty;
+            satisfied = true;
+            // nothing else to do; writeback doesn't expect response
+            assert(!pkt->needsResponse());
         } else {
             // permission violation... nothing to do here, leave unsatisfied
             // for statistics purposes this counts like a complete miss
@@ -363,9 +389,10 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
     bool needsResponse = pkt->needsResponse();
 
     if (satisfied) {
-        assert(needsResponse);
-        pkt->makeTimingResponse();
-        cpuSidePort->respond(pkt, curTick+lat);
+        if (needsResponse) {
+            pkt->makeTimingResponse();
+            cpuSidePort->respond(pkt, curTick+lat);
+        }
     } else {
         // miss
         if (prefetchMiss)
@@ -456,10 +483,30 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
 {
     int lat = hitLatency;
 
+    // @TODO: make this a parameter
+    bool last_level_cache = false;
+
     if (pkt->memInhibitAsserted()) {
-        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
-                pkt->getAddr());
         assert(!pkt->req->isUncacheable());
+        // have to invalidate ourselves and any lower caches even if
+        // upper cache will be responding
+        if (pkt->isInvalidate()) {
+            BlkType *blk = tags->findBlock(pkt->getAddr());
+            if (blk && blk->isValid()) {
+                tags->invalidateBlk(blk);
+                DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: invalidating\n",
+                        pkt->cmdString(), pkt->getAddr());
+            }
+            if (!last_level_cache) {
+                DPRINTF(Cache, "forwarding mem-inhibited %s on 0x%x\n",
+                        pkt->cmdString(), pkt->getAddr());
+                lat += memSidePort->sendAtomic(pkt);
+            }
+        } else {
+            DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: not responding\n",
+                    pkt->cmdString(), pkt->getAddr());
+        }
+
         return lat;
     }
 
@@ -791,9 +838,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
         assert(pkt->isRead() || blk->isValid());
     }
 
-    if (pkt->needsExclusive()) {
-        blk->status = BlkValid | BlkWritable | BlkDirty;
-    } else if (!pkt->sharedAsserted()) {
+    if (pkt->needsExclusive() || !pkt->sharedAsserted()) {
         blk->status = BlkValid | BlkWritable;
     } else {
         blk->status = BlkValid;
@@ -839,6 +884,37 @@ void
 Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
                              bool is_timing, bool is_deferred)
 {
+    assert(pkt->isRequest());
+
+    // first propagate snoop upward to see if anyone above us wants to
+    // handle it.  save & restore packet src since it will get
+    // rewritten to be relative to cpu-side bus (if any)
+    bool alreadySupplied = pkt->memInhibitAsserted();
+    bool upperSupply = false;
+    if (is_timing) {
+        Packet *snoopPkt = new Packet(pkt, true);  // clear flags
+        snoopPkt->setExpressSnoop();
+        cpuSidePort->sendTiming(snoopPkt);
+        if (snoopPkt->memInhibitAsserted()) {
+            // cache-to-cache response from some upper cache
+            assert(!alreadySupplied);
+            pkt->assertMemInhibit();
+        }
+        if (snoopPkt->sharedAsserted()) {
+            pkt->assertShared();
+        }
+        delete snoopPkt;
+    } else {
+        int origSrc = pkt->getSrc();
+        cpuSidePort->sendAtomic(pkt);
+        if (!alreadySupplied && pkt->memInhibitAsserted()) {
+            // cache-to-cache response from some upper cache:
+            // forward response to original requester
+            assert(pkt->isResponse());
+        }
+        pkt->setSrc(origSrc);
+    }
+
     if (!blk || !blk->isValid()) {
         return;
     }
@@ -846,7 +922,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     // we may end up modifying both the block state and the packet (if
     // we respond in atomic mode), so just figure out what to do now
     // and then do it later
-    bool supply = blk->isDirty() && pkt->isRead();
+    bool supply = blk->isDirty() && pkt->isRead() && !upperSupply;
     bool invalidate = pkt->isInvalidate();
 
     if (pkt->isRead() && !pkt->isInvalidate()) {
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index c90842dee..036bd3fd7 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -252,9 +252,11 @@ class Packet : public FastAlloc
     bool destValid;
 
     enum Flag {
-        // Snoop flags
+        // Snoop response flags
         MemInhibit,
         Shared,
+        // Special control flags
+        ExpressSnoop,
         NUM_PACKET_FLAGS
     };
 
@@ -317,6 +319,10 @@ class Packet : public FastAlloc
     bool memInhibitAsserted()   { return flags[MemInhibit]; }
     bool sharedAsserted()       { return flags[Shared]; }
 
+    // Special control flags
+    void setExpressSnoop()      { flags[ExpressSnoop] = true; }
+    bool isExpressSnoop()       { return flags[ExpressSnoop]; }
+
     // Network error conditions... encapsulate them as methods since
     // their encoding keeps changing (from result field to command
     // field, etc.)
@@ -372,7 +378,7 @@ class Packet : public FastAlloc
      * that, as we can't guarantee that the new packet's lifetime is
      * less than that of the original packet.  In this case the new
      * packet should allocate its own data. */
-    Packet(Packet *origPkt)
+    Packet(Packet *origPkt, bool clearFlags = false)
         :  cmd(origPkt->cmd), req(origPkt->req),
            data(origPkt->staticData ? origPkt->data : NULL),
            staticData(origPkt->staticData),
@@ -381,7 +387,7 @@ class Packet : public FastAlloc
            src(origPkt->src), dest(origPkt->dest),
            addrSizeValid(origPkt->addrSizeValid),
            srcValid(origPkt->srcValid), destValid(origPkt->destValid),
-           flags(origPkt->flags),
+           flags(clearFlags ? 0 : origPkt->flags),
            time(curTick), senderState(origPkt->senderState)
     {
     }
-- 
cgit v1.2.3