From 792d5b9e5ee40e58b922ae32e5a6ee9aa9586cbc Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Fri, 18 May 2007 22:35:04 -0700
Subject: First set of changes for reorganized cache coherence support.
 Compiles but doesn't work... committing just so I can merge (stupid bk!).

src/mem/bridge.cc:
    Get rid of SNOOP_COMMIT.
src/mem/bus.cc:
src/mem/packet.hh:
    Get rid of SNOOP_COMMIT & two-pass snoop.
    First bits of EXPRESS_SNOOP support.
src/mem/cache/base_cache.cc:
src/mem/cache/base_cache.hh:
src/mem/cache/cache.hh:
src/mem/cache/cache_impl.hh:
src/mem/cache/miss/blocking_buffer.cc:
src/mem/cache/miss/miss_queue.cc:
src/mem/cache/prefetch/base_prefetcher.cc:
    Big reorg of ports and port-related functions & events.
src/mem/cache/cache.cc:
src/mem/cache/cache_builder.cc:
src/mem/cache/coherence/SConscript:
    Get rid of UniCoherence object.

--HG--
extra : convert_revision : 7672434fa3115c9b1c94686f497e57e90413b7c3
---
 src/mem/bridge.cc                         |   6 -
 src/mem/bus.cc                            |  43 ++--
 src/mem/cache/base_cache.cc               | 315 +++++++++--------------------
 src/mem/cache/base_cache.hh               | 321 +++++-------------------------
 src/mem/cache/cache.cc                    |   6 -
 src/mem/cache/cache.hh                    |  52 +++--
 src/mem/cache/cache_builder.cc            |  10 +-
 src/mem/cache/cache_impl.hh               | 278 ++++++++++++++++++++------
 src/mem/cache/coherence/SConscript        |   1 -
 src/mem/cache/coherence/uni_coherence.cc  | 135 -------------
 src/mem/cache/coherence/uni_coherence.hh  | 146 --------------
 src/mem/cache/miss/blocking_buffer.cc     |  14 +-
 src/mem/cache/miss/miss_queue.cc          |  16 +-
 src/mem/cache/prefetch/base_prefetcher.cc |   8 +-
 src/mem/packet.hh                         |   2 +-
 15 files changed, 435 insertions(+), 918 deletions(-)
 delete mode 100644 src/mem/cache/coherence/uni_coherence.cc
 delete mode 100644 src/mem/cache/coherence/uni_coherence.hh

diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index f525ccb48..5460c88dd 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -112,10 +112,6 @@ Bridge::BridgePort::reqQueueFull()
 bool
 Bridge::BridgePort::recvTiming(PacketPtr pkt)
 {
-    if (!(pkt->flags & SNOOP_COMMIT))
-        return true;
-
-
     DPRINTF(BusBridge, "recvTiming: src %d dest %d addr 0x%x\n",
                 pkt->getSrc(), pkt->getDest(), pkt->getAddr());
 
@@ -255,8 +251,6 @@ Bridge::BridgePort::trySend()
 
     PacketPtr pkt = buf->pkt;
 
-    pkt->flags &= ~SNOOP_COMMIT; //CLear it if it was set
-
     if (pkt->cmd == MemCmd::WriteInvalidateReq && fixPartialWrite &&
             pkt->result != Packet::Nacked && pkt->getOffset(pbs) &&
             pkt->getSize() != pbs) {
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 95d4e2873..895123f8b 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -182,8 +182,10 @@ Bus::recvTiming(PacketPtr pkt)
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
-    if (tickNextIdle > curTick ||
-            (retryList.size() && (!inRetry || pktPort != retryList.front()))) {
+    if (!(pkt->flags & EXPRESS_SNOOP) &&
+        tickNextIdle > curTick ||
+        (retryList.size() && (!inRetry || pktPort != retryList.front())))
+    {
         addToRetryList(pktPort);
         DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n");
         return false;
@@ -195,31 +197,18 @@ Bus::recvTiming(PacketPtr pkt)
     // access has been handled twice.
     if (dest == Packet::Broadcast) {
         port = findPort(pkt->getAddr(), pkt->getSrc());
-        pkt->flags &= ~SNOOP_COMMIT;
-        if (timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()])) {
-            bool success;
-
-            pkt->flags |= SNOOP_COMMIT;
-            success = timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
-            assert(success);
-
-            if (pkt->flags & SATISFIED) {
-                //Cache-Cache transfer occuring
-                if (inRetry) {
-                    retryList.front()->onRetryList(false);
-                    retryList.pop_front();
-                    inRetry = false;
-                }
-                occupyBus(pkt);
-                DPRINTF(Bus, "recvTiming: Packet sucessfully sent\n");
-                return true;
+        timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
+
+        if (pkt->flags & SATISFIED) {
+            //Cache-Cache transfer occuring
+            if (inRetry) {
+                retryList.front()->onRetryList(false);
+                retryList.pop_front();
+                inRetry = false;
             }
-        } else {
-            //Snoop didn't succeed
-            DPRINTF(Bus, "Adding1 a retry to RETRY list %d\n",
-                    pktPort->getId());
-            addToRetryList(pktPort);
-            return false;
+            occupyBus(pkt);
+            DPRINTF(Bus, "recvTiming: Packet sucessfully sent\n");
+            return true;
         }
     } else {
         assert(dest >= 0 && dest < maxId);
@@ -426,7 +415,6 @@ Bus::recvAtomic(PacketPtr pkt)
     DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
-    pkt->flags |= SNOOP_COMMIT;
 
     // Assume one bus cycle in order to get through.  This may have
     // some clock skew issues yet again...
@@ -451,7 +439,6 @@ Bus::recvFunctional(PacketPtr pkt)
     DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
-    pkt->flags |= SNOOP_COMMIT;
 
     Port* port = findPort(pkt->getAddr(), pkt->getSrc());
     functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 3ed4b84d1..b699271f7 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -40,29 +40,38 @@
 
 using namespace std;
 
-BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache,
-                                bool _isCpuSide)
-    : Port(_name, _cache), cache(_cache), isCpuSide(_isCpuSide)
+BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
+    : Port(_name, _cache), cache(_cache), otherPort(NULL)
 {
     blocked = false;
     waitingOnRetry = false;
-    //Start ports at null if more than one is created we should panic
-    //cpuSidePort = NULL;
-    //memSidePort = NULL;
 }
 
 
+BaseCache::BaseCache(const std::string &name, Params &params)
+    : MemObject(name),
+      blocked(0), blockedSnoop(0),
+      blkSize(params.blkSize),
+      missCount(params.maxMisses), drainEvent(NULL)
+{
+}
+
+
+
 void
 BaseCache::CachePort::recvStatusChange(Port::Status status)
 {
-    cache->recvStatusChange(status, isCpuSide);
+    if (status == Port::RangeChange) {
+        otherPort->sendStatusChange(Port::RangeChange);
+    }
 }
 
 void
 BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp,
                                        AddrRangeList &snoop)
 {
-    cache->getAddressRanges(resp, snoop, isCpuSide);
+    AddrRangeList dummy;
+    otherPort->getPeerAddressRanges(resp, dummy);
 }
 
 int
@@ -115,92 +124,99 @@ BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt)
         sendFunctional(pkt);
 }
 
+
 void
-BaseCache::CachePort::recvRetry()
+BaseCache::CachePort::respond(PacketPtr pkt, Tick time)
 {
-    PacketPtr pkt;
-    assert(waitingOnRetry);
-    if (!drainList.empty()) {
-        DPRINTF(CachePort, "%s attempting to send a retry for response (%i waiting)\n"
-                , name(), drainList.size());
-        //We have some responses to drain first
-        pkt = drainList.front();
-        drainList.pop_front();
-        if (sendTiming(pkt)) {
-            DPRINTF(CachePort, "%s sucessful in sending a retry for"
-                    "response (%i still waiting)\n", name(), drainList.size());
-            if (!drainList.empty() ||
-                !isCpuSide && cache->doMasterRequest() ||
-                isCpuSide && cache->doSlaveRequest()) {
-
-                DPRINTF(CachePort, "%s has more responses/requests\n", name());
-                new BaseCache::RequestEvent(this, curTick + 1);
-            }
-            waitingOnRetry = false;
-        }
-        else {
-            drainList.push_front(pkt);
+    assert(time >= curTick);
+    if (pkt->needsResponse()) {
+        if (transmitList.empty()) {
+            assert(!responseEvent->scheduled());
+            responseEvent->schedule(time);
+            transmitList.push_back(std::pair<Tick,PacketPtr>(time,pkt));
+            return;
         }
-        // Check if we're done draining once this list is empty
-        if (drainList.empty())
-            cache->checkDrain();
-    }
-    else if (!isCpuSide)
-    {
-        DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name());
-        if (!cache->doMasterRequest()) {
-            //This can happen if I am the owner of a block and see an upgrade
-            //while the block was in my WB Buffers.  I just remove the
-            //wb and de-assert the masterRequest
-            waitingOnRetry = false;
+
+        // something is on the list and this belongs at the end
+        if (time >= transmitList.back().first) {
+            transmitList.push_back(std::pair<Tick,PacketPtr>(time,pkt));
             return;
         }
-        pkt = cache->getPacket();
-        MSHR* mshr = (MSHR*) pkt->senderState;
-        //Copy the packet, it may be modified/destroyed elsewhere
-        PacketPtr copyPkt = new Packet(*pkt);
-        copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
-        mshr->pkt = copyPkt;
-
-        bool success = sendTiming(pkt);
-        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-                pkt->getAddr(), success ? "succesful" : "unsuccesful");
-
-        waitingOnRetry = !success;
-        if (waitingOnRetry) {
-            DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+        // Something is on the list and this belongs somewhere else
+        std::list<std::pair<Tick,PacketPtr> >::iterator i =
+            transmitList.begin();
+        std::list<std::pair<Tick,PacketPtr> >::iterator end =
+            transmitList.end();
+        bool done = false;
+
+        while (i != end && !done) {
+            if (time < i->first) {
+                if (i == transmitList.begin()) {
+                    //Inserting at begining, reschedule
+                    responseEvent->reschedule(time);
+                }
+                transmitList.insert(i,std::pair<Tick,PacketPtr>(time,pkt));
+                done = true;
+            }
+            i++;
+        }
+    }
+    else {
+        assert(0);
+        // this code was on the cpuSidePort only... do we still need it?
+        if (pkt->cmd != MemCmd::UpgradeReq)
+        {
+            delete pkt->req;
+            delete pkt;
         }
+    }
+}
 
-        cache->sendResult(pkt, mshr, success);
+bool
+BaseCache::CachePort::drainResponse()
+{
+    DPRINTF(CachePort,
+            "%s attempting to send a retry for response (%i waiting)\n",
+            name(), drainList.size());
+    //We have some responses to drain first
+    PacketPtr pkt = drainList.front();
+    if (sendTiming(pkt)) {
+        drainList.pop_front();
+        DPRINTF(CachePort, "%s sucessful in sending a retry for"
+                "response (%i still waiting)\n", name(), drainList.size());
+        if (!drainList.empty() || isBusRequested()) {
 
-        if (success && cache->doMasterRequest())
-        {
-            DPRINTF(CachePort, "%s has more requests\n", name());
-            //Still more to issue, rerequest in 1 cycle
-            new BaseCache::RequestEvent(this, curTick + 1);
+            DPRINTF(CachePort, "%s has more responses/requests\n", name());
+            return false;
         }
+    } else {
+        waitingOnRetry = true;
+        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
     }
-    else
-    {
-        assert(cache->doSlaveRequest());
-        //pkt = cache->getCoherencePacket();
-        //We save the packet, no reordering on CSHRS
-        pkt = cache->getCoherencePacket();
-        MSHR* cshr = (MSHR*)pkt->senderState;
-        bool success = sendTiming(pkt);
-        cache->sendCoherenceResult(pkt, cshr, success);
-        waitingOnRetry = !success;
-        if (success && cache->doSlaveRequest())
-        {
-            DPRINTF(CachePort, "%s has more requests\n", name());
-            //Still more to issue, rerequest in 1 cycle
-            new BaseCache::RequestEvent(this, curTick + 1);
+    return true;
+}
+
+
+bool
+BaseCache::CachePort::recvRetryCommon()
+{
+    assert(waitingOnRetry);
+    waitingOnRetry = false;
+    if (!drainList.empty()) {
+        if (!drainResponse()) {
+            // more responses to drain... re-request bus
+            scheduleRequestEvent(curTick + 1);
         }
+        // Check if we're done draining once this list is empty
+        if (drainList.empty()) {
+            cache->checkDrain();
+        }
+        return true;
     }
-    if (waitingOnRetry) DPRINTF(CachePort, "%s STILL Waiting on retry\n", name());
-    else DPRINTF(CachePort, "%s no longer waiting on retry\n", name());
-    return;
+    return false;
 }
+
+
 void
 BaseCache::CachePort::setBlocked()
 {
@@ -225,143 +241,6 @@ BaseCache::CachePort::clearBlocked()
     }
 }
 
-BaseCache::RequestEvent::RequestEvent(CachePort *_cachePort, Tick when)
-    : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort)
-{
-    this->setFlags(AutoDelete);
-    schedule(when);
-}
-
-void
-BaseCache::RequestEvent::process()
-{
-    if (cachePort->waitingOnRetry) return;
-    //We have some responses to drain first
-    if (!cachePort->drainList.empty()) {
-        DPRINTF(CachePort, "%s trying to drain a response\n", cachePort->name());
-        if (cachePort->sendTiming(cachePort->drainList.front())) {
-            DPRINTF(CachePort, "%s drains a response succesfully\n", cachePort->name());
-            cachePort->drainList.pop_front();
-            if (!cachePort->drainList.empty() ||
-                !cachePort->isCpuSide && cachePort->cache->doMasterRequest() ||
-                cachePort->isCpuSide && cachePort->cache->doSlaveRequest()) {
-
-                DPRINTF(CachePort, "%s still has outstanding bus reqs\n", cachePort->name());
-                this->schedule(curTick + 1);
-            }
-        }
-        else {
-            cachePort->waitingOnRetry = true;
-            DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
-        }
-    }
-    else if (!cachePort->isCpuSide)
-    {            //MSHR
-        DPRINTF(CachePort, "%s trying to send a MSHR request\n", cachePort->name());
-        if (!cachePort->cache->doMasterRequest()) {
-            //This can happen if I am the owner of a block and see an upgrade
-            //while the block was in my WB Buffers.  I just remove the
-            //wb and de-assert the masterRequest
-            return;
-        }
-
-        PacketPtr pkt = cachePort->cache->getPacket();
-        MSHR* mshr = (MSHR*) pkt->senderState;
-        //Copy the packet, it may be modified/destroyed elsewhere
-        PacketPtr copyPkt = new Packet(*pkt);
-        copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
-        mshr->pkt = copyPkt;
-
-        bool success = cachePort->sendTiming(pkt);
-        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-                pkt->getAddr(), success ? "succesful" : "unsuccesful");
-
-        cachePort->waitingOnRetry = !success;
-        if (cachePort->waitingOnRetry) {
-            DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
-        }
-
-        cachePort->cache->sendResult(pkt, mshr, success);
-        if (success && cachePort->cache->doMasterRequest())
-        {
-            DPRINTF(CachePort, "%s still more MSHR requests to send\n",
-                    cachePort->name());
-            //Still more to issue, rerequest in 1 cycle
-            this->schedule(curTick+1);
-        }
-    }
-    else
-    {
-        //CSHR
-        assert(cachePort->cache->doSlaveRequest());
-        PacketPtr pkt = cachePort->cache->getCoherencePacket();
-        MSHR* cshr = (MSHR*) pkt->senderState;
-        bool success = cachePort->sendTiming(pkt);
-        cachePort->cache->sendCoherenceResult(pkt, cshr, success);
-        cachePort->waitingOnRetry = !success;
-        if (cachePort->waitingOnRetry)
-            DPRINTF(CachePort, "%s now waiting on a retry\n", cachePort->name());
-        if (success && cachePort->cache->doSlaveRequest())
-        {
-            DPRINTF(CachePort, "%s still more CSHR requests to send\n",
-                    cachePort->name());
-            //Still more to issue, rerequest in 1 cycle
-            this->schedule(curTick+1);
-        }
-    }
-}
-
-const char *
-BaseCache::RequestEvent::description()
-{
-    return "Cache request event";
-}
-
-BaseCache::ResponseEvent::ResponseEvent(CachePort *_cachePort)
-    : Event(&mainEventQueue, CPU_Tick_Pri), cachePort(_cachePort)
-{
-}
-
-void
-BaseCache::ResponseEvent::process()
-{
-    assert(cachePort->transmitList.size());
-    assert(cachePort->transmitList.front().first <= curTick);
-    PacketPtr pkt = cachePort->transmitList.front().second;
-    cachePort->transmitList.pop_front();
-    if (!cachePort->transmitList.empty()) {
-        Tick time = cachePort->transmitList.front().first;
-        schedule(time <= curTick ? curTick+1 : time);
-    }
-
-    if (pkt->flags & NACKED_LINE)
-        pkt->result = Packet::Nacked;
-    else
-        pkt->result = Packet::Success;
-    pkt->makeTimingResponse();
-    DPRINTF(CachePort, "%s attempting to send a response\n", cachePort->name());
-    if (!cachePort->drainList.empty() || cachePort->waitingOnRetry) {
-        //Already have a list, just append
-        cachePort->drainList.push_back(pkt);
-        DPRINTF(CachePort, "%s appending response onto drain list\n", cachePort->name());
-    }
-    else if (!cachePort->sendTiming(pkt)) {
-        //It failed, save it to list of drain events
-        DPRINTF(CachePort, "%s now waiting for a retry\n", cachePort->name());
-        cachePort->drainList.push_back(pkt);
-        cachePort->waitingOnRetry = true;
-    }
-
-    // Check if we're done draining once this list is empty
-    if (cachePort->drainList.empty() && cachePort->transmitList.empty())
-        cachePort->cache->checkDrain();
-}
-
-const char *
-BaseCache::ResponseEvent::description()
-{
-    return "Cache response event";
-}
 
 void
 BaseCache::init()
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index e45e36fa0..2d63945d9 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -26,6 +26,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Erik Hallnor
+ *          Steve Reinhardt
+ *          Ron Dreslinski
  */
 
 /**
@@ -83,7 +85,10 @@ class BaseCache : public MemObject
         BaseCache *cache;
 
       protected:
-        CachePort(const std::string &_name, BaseCache *_cache, bool _isCpuSide);
+        Event *responseEvent;
+
+        CachePort(const std::string &_name, BaseCache *_cache);
+
         virtual void recvStatusChange(Status status);
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
@@ -91,9 +96,11 @@ class BaseCache : public MemObject
 
         virtual int deviceBlockSize();
 
-        virtual void recvRetry();
+        bool recvRetryCommon();
 
       public:
+        void setOtherPort(CachePort *_otherPort) { otherPort = _otherPort; }
+
         void setBlocked();
 
         void clearBlocked();
@@ -104,65 +111,52 @@ class BaseCache : public MemObject
 
         bool canDrain() { return drainList.empty() && transmitList.empty(); }
 
+        bool drainResponse();
+
+        CachePort *otherPort;
+
         bool blocked;
 
         bool mustSendRetry;
 
-        bool isCpuSide;
-
         bool waitingOnRetry;
 
+        /**
+         * Bit vector for the outstanding requests for the master interface.
+         */
+        uint8_t requestCauses;
+
         std::list<PacketPtr> drainList;
 
         std::list<std::pair<Tick,PacketPtr> > transmitList;
-    };
 
-    struct RequestEvent : public Event
-    {
-        CachePort *cachePort;
+        bool isBusRequested() { return requestCauses != 0; }
 
-        RequestEvent(CachePort *_cachePort, Tick when);
-        void process();
-        const char *description();
-    };
+        // These need to be virtual since the Event objects depend on
+        // cache template parameters.
+        virtual void scheduleRequestEvent(Tick t) = 0;
 
-    struct ResponseEvent : public Event
-    {
-        CachePort *cachePort;
+        void requestBus(RequestCause cause, Tick time)
+        {
+            if (!isBusRequested() && !waitingOnRetry) {
+                scheduleRequestEvent(time);
+            }
+            requestCauses |= (1 << cause);
+        }
+
+        void deassertBusRequest(RequestCause cause)
+        {
+            requestCauses &= ~(1 << cause);
+        }
 
-        ResponseEvent(CachePort *_cachePort);
-        void process();
-        const char *description();
+        void respond(PacketPtr pkt, Tick time);
     };
 
   public: //Made public so coherence can get at it.
     CachePort *cpuSidePort;
     CachePort *memSidePort;
 
-    ResponseEvent *sendEvent;
-    ResponseEvent *memSendEvent;
-
   private:
-    void recvStatusChange(Port::Status status, bool isCpuSide)
-    {
-        if (status == Port::RangeChange){
-            if (!isCpuSide) {
-                cpuSidePort->sendStatusChange(Port::RangeChange);
-            }
-            else {
-                memSidePort->sendStatusChange(Port::RangeChange);
-            }
-        }
-    }
-
-    virtual PacketPtr getPacket() = 0;
-
-    virtual PacketPtr getCoherencePacket() = 0;
-
-    virtual void sendResult(PacketPtr &pkt, MSHR* mshr, bool success) = 0;
-
-    virtual void sendCoherenceResult(PacketPtr &pkt, MSHR* mshr, bool success) = 0;
-
     /**
      * Bit vector of the blocking reasons for the access path.
      * @sa #BlockedCause
@@ -175,16 +169,6 @@ class BaseCache : public MemObject
      */
     uint8_t blockedSnoop;
 
-    /**
-     * Bit vector for the outstanding requests for the master interface.
-     */
-    uint8_t masterRequests;
-
-    /**
-     * Bit vector for the outstanding requests for the slave interface.
-     */
-    uint8_t slaveRequests;
-
   protected:
 
     /** Stores time the cache blocked for statistics. */
@@ -309,20 +293,10 @@ class BaseCache : public MemObject
      * of this cache.
      * @param params The parameter object for this BaseCache.
      */
-    BaseCache(const std::string &name, Params &params)
-        : MemObject(name), blocked(0), blockedSnoop(0), masterRequests(0),
-          slaveRequests(0), blkSize(params.blkSize),
-          missCount(params.maxMisses), drainEvent(NULL)
-    {
-        //Start ports at null if more than one is created we should panic
-        cpuSidePort = NULL;
-        memSidePort = NULL;
-    }
+    BaseCache(const std::string &name, Params &params);
 
     ~BaseCache()
     {
-        delete sendEvent;
-        delete memSendEvent;
     }
 
     virtual void init();
@@ -422,12 +396,12 @@ class BaseCache : public MemObject
     }
 
     /**
-     * True if the master bus should be requested.
+     * True if the memory-side bus should be requested.
      * @return True if there are outstanding requests for the master bus.
      */
-    bool doMasterRequest()
+    bool isMemSideBusRequested()
     {
-        return masterRequests != 0;
+        return memSidePort->isBusRequested();
     }
 
     /**
@@ -435,59 +409,18 @@ class BaseCache : public MemObject
      * @param cause The reason for the request.
      * @param time The time to make the request.
      */
-    void setMasterRequest(RequestCause cause, Tick time)
+    void requestMemSideBus(RequestCause cause, Tick time)
     {
-        if (!doMasterRequest() && !memSidePort->waitingOnRetry)
-        {
-            new RequestEvent(memSidePort, time);
-        }
-        uint8_t flag = 1<<cause;
-        masterRequests |= flag;
+        memSidePort->requestBus(cause, time);
     }
 
     /**
      * Clear the master bus request for the given cause.
      * @param cause The request reason to clear.
      */
-    void clearMasterRequest(RequestCause cause)
+    void deassertMemSideBusRequest(RequestCause cause)
     {
-        uint8_t flag = 1<<cause;
-        masterRequests &= ~flag;
-        checkDrain();
-    }
-
-    /**
-     * Return true if the slave bus should be requested.
-     * @return True if there are outstanding requests for the slave bus.
-     */
-    bool doSlaveRequest()
-    {
-        return slaveRequests != 0;
-    }
-
-    /**
-     * Request the slave bus for the given reason and time.
-     * @param cause The reason for the request.
-     * @param time The time to make the request.
-     */
-    void setSlaveRequest(RequestCause cause, Tick time)
-    {
-        if (!doSlaveRequest() && !cpuSidePort->waitingOnRetry)
-        {
-            new RequestEvent(cpuSidePort, time);
-        }
-        uint8_t flag = 1<<cause;
-        slaveRequests |= flag;
-    }
-
-    /**
-     * Clear the slave bus request for the given reason.
-     * @param cause The request reason to clear.
-     */
-    void clearSlaveRequest(RequestCause cause)
-    {
-        uint8_t flag = 1<<cause;
-        slaveRequests &= ~flag;
+        memSidePort->deassertBusRequest(cause);
         checkDrain();
     }
 
@@ -498,111 +431,7 @@ class BaseCache : public MemObject
      */
     void respond(PacketPtr pkt, Tick time)
     {
-        assert(time >= curTick);
-        if (pkt->needsResponse()) {
-/*            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-            reqCpu->schedule(time);
-*/
-            if (cpuSidePort->transmitList.empty()) {
-                assert(!sendEvent->scheduled());
-                sendEvent->schedule(time);
-                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                    (time,pkt));
-                return;
-            }
-
-            // something is on the list and this belongs at the end
-            if (time >= cpuSidePort->transmitList.back().first) {
-                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                    (time,pkt));
-                return;
-            }
-            // Something is on the list and this belongs somewhere else
-            std::list<std::pair<Tick,PacketPtr> >::iterator i =
-                cpuSidePort->transmitList.begin();
-            std::list<std::pair<Tick,PacketPtr> >::iterator end =
-                cpuSidePort->transmitList.end();
-            bool done = false;
-
-            while (i != end && !done) {
-                if (time < i->first) {
-                    if (i == cpuSidePort->transmitList.begin()) {
-                        //Inserting at begining, reschedule
-                        sendEvent->reschedule(time);
-                    }
-                    cpuSidePort->transmitList.insert(i,std::pair<Tick,PacketPtr>
-                                                     (time,pkt));
-                    done = true;
-                }
-                i++;
-            }
-        }
-        else {
-            if (pkt->cmd != MemCmd::UpgradeReq)
-            {
-                delete pkt->req;
-                delete pkt;
-            }
-        }
-    }
-
-    /**
-     * Send a reponse to the slave interface and calculate miss latency.
-     * @param pkt The request to respond to.
-     * @param time The time the response is ready.
-     */
-    void respondToMiss(PacketPtr pkt, Tick time)
-    {
-        assert(time >= curTick);
-        if (!pkt->req->isUncacheable()) {
-            missLatency[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                time - pkt->time;
-        }
-        if (pkt->needsResponse()) {
-/*            CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
-            reqCpu->schedule(time);
-*/
-            if (cpuSidePort->transmitList.empty()) {
-                assert(!sendEvent->scheduled());
-                sendEvent->schedule(time);
-                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                    (time,pkt));
-                return;
-            }
-
-            // something is on the list and this belongs at the end
-            if (time >= cpuSidePort->transmitList.back().first) {
-                cpuSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                    (time,pkt));
-                return;
-            }
-            // Something is on the list and this belongs somewhere else
-            std::list<std::pair<Tick,PacketPtr> >::iterator i =
-                cpuSidePort->transmitList.begin();
-            std::list<std::pair<Tick,PacketPtr> >::iterator end =
-                cpuSidePort->transmitList.end();
-            bool done = false;
-
-            while (i != end && !done) {
-                if (time < i->first) {
-                    if (i == cpuSidePort->transmitList.begin()) {
-                        //Inserting at begining, reschedule
-                        sendEvent->reschedule(time);
-                    }
-                    cpuSidePort->transmitList.insert(i,std::pair<Tick,PacketPtr>
-                                                     (time,pkt));
-                    done = true;
-                }
-                i++;
-            }
-        }
-        else {
-            if (pkt->cmd != MemCmd::UpgradeReq)
-            {
-                delete pkt->req;
-                delete pkt;
-            }
-        }
+        cpuSidePort->respond(pkt, time);
     }
 
     /**
@@ -611,65 +440,7 @@ class BaseCache : public MemObject
      */
     void respondToSnoop(PacketPtr pkt, Tick time)
     {
-        assert(time >= curTick);
-        assert (pkt->needsResponse());
-/*        CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
-        reqMem->schedule(time);
-*/
-        if (memSidePort->transmitList.empty()) {
-            assert(!memSendEvent->scheduled());
-            memSendEvent->schedule(time);
-            memSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                (time,pkt));
-            return;
-        }
-
-        // something is on the list and this belongs at the end
-        if (time >= memSidePort->transmitList.back().first) {
-            memSidePort->transmitList.push_back(std::pair<Tick,PacketPtr>
-                                                (time,pkt));
-            return;
-        }
-        // Something is on the list and this belongs somewhere else
-        std::list<std::pair<Tick,PacketPtr> >::iterator i =
-            memSidePort->transmitList.begin();
-        std::list<std::pair<Tick,PacketPtr> >::iterator end =
-            memSidePort->transmitList.end();
-        bool done = false;
-
-        while (i != end && !done) {
-            if (time < i->first) {
-                if (i == memSidePort->transmitList.begin()) {
-                    //Inserting at begining, reschedule
-                    memSendEvent->reschedule(time);
-                }
-                memSidePort->transmitList.insert(i,std::pair<Tick,PacketPtr>(time,pkt));
-                done = true;
-            }
-            i++;
-        }
-    }
-
-    /**
-     * Notification from master interface that a address range changed. Nothing
-     * to do for a cache.
-     */
-    void rangeChange() {}
-
-    void getAddressRanges(AddrRangeList &resp, AddrRangeList &snoop, bool isCpuSide)
-    {
-        if (isCpuSide)
-        {
-            AddrRangeList dummy;
-            memSidePort->getPeerAddressRanges(resp, dummy);
-        }
-        else
-        {
-            //This is where snoops get updated
-            AddrRangeList dummy;
-            cpuSidePort->getPeerAddressRanges(dummy, snoop);
-            return;
-        }
+        memSidePort->respond(pkt, time);
     }
 
     virtual unsigned int drain(Event *de);
@@ -686,7 +457,7 @@ class BaseCache : public MemObject
 
     bool canDrain()
     {
-        if (doMasterRequest() || doSlaveRequest()) {
+        if (isMemSideBusRequested()) {
             return false;
         } else if (memSidePort && !memSidePort->canDrain()) {
             return false;
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index cb4e7f62e..2b4e7b9c8 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -61,7 +61,6 @@
 #include "mem/cache/miss/miss_queue.hh"
 #include "mem/cache/miss/blocking_buffer.hh"
 
-#include "mem/cache/coherence/uni_coherence.hh"
 #include "mem/cache/coherence/simple_coherence.hh"
 
 #include "mem/cache/cache_impl.hh"
@@ -72,27 +71,22 @@
 
 #if defined(USE_CACHE_FALRU)
 template class Cache<FALRU, SimpleCoherence>;
-template class Cache<FALRU, UniCoherence>;
 #endif
 
 #if defined(USE_CACHE_IIC)
 template class Cache<IIC, SimpleCoherence>;
-template class Cache<IIC, UniCoherence>;
 #endif
 
 #if defined(USE_CACHE_LRU)
 template class Cache<LRU, SimpleCoherence>;
-template class Cache<LRU, UniCoherence>;
 #endif
 
 #if defined(USE_CACHE_SPLIT)
 template class Cache<Split, SimpleCoherence>;
-template class Cache<Split, UniCoherence>;
 #endif
 
 #if defined(USE_CACHE_SPLIT_LIFO)
 template class Cache<SplitLIFO, SimpleCoherence>;
-template class Cache<SplitLIFO, UniCoherence>;
 #endif
 
 #endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 3e45c85d2..75fb50f4e 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -28,6 +28,7 @@
  * Authors: Erik Hallnor
  *          Dave Greene
  *          Steve Reinhardt
+ *          Ron Dreslinski
  */
 
 /**
@@ -46,6 +47,8 @@
 #include "mem/cache/cache_blk.hh"
 #include "mem/cache/miss/miss_buffer.hh"
 
+#include "sim/eventq.hh"
+
 //Forward decleration
 class MSHR;
 class BasePrefetcher;
@@ -83,11 +86,26 @@ class Cache : public BaseCache
             return static_cast<Cache<TagStore,Coherence> *>(cache);
         }
 
+        void processRequestEvent();
+        void processResponseEvent();
+
         virtual bool recvTiming(PacketPtr pkt);
 
+        virtual void recvRetry();
+
         virtual Tick recvAtomic(PacketPtr pkt);
 
         virtual void recvFunctional(PacketPtr pkt);
+
+        typedef EventWrapper<CpuSidePort, &CpuSidePort::processResponseEvent>
+                ResponseEvent;
+
+        typedef EventWrapper<CpuSidePort, &CpuSidePort::processRequestEvent>
+                RequestEvent;
+
+        virtual void scheduleRequestEvent(Tick t) {
+            new RequestEvent(this, t);
+        }
     };
 
     class MemSidePort : public CachePort
@@ -103,11 +121,26 @@ class Cache : public BaseCache
             return static_cast<Cache<TagStore,Coherence> *>(cache);
         }
 
+        void processRequestEvent();
+        void processResponseEvent();
+
         virtual bool recvTiming(PacketPtr pkt);
 
+        virtual void recvRetry();
+
         virtual Tick recvAtomic(PacketPtr pkt);
 
         virtual void recvFunctional(PacketPtr pkt);
+
+        typedef EventWrapper<MemSidePort, &MemSidePort::processResponseEvent>
+                ResponseEvent;
+
+        typedef EventWrapper<MemSidePort, &MemSidePort::processRequestEvent>
+                RequestEvent;
+
+        virtual void scheduleRequestEvent(Tick t) {
+            new RequestEvent(this, t);
+        }
     };
 
     /** Tag and data Storage */
@@ -339,8 +372,6 @@ class Cache : public BaseCache
     virtual Port *getPort(const std::string &if_name, int idx = -1);
     virtual void deletePortRefs(Port *p);
 
-    virtual void recvStatusChange(Port::Status status, bool isCpuSide);
-
     void regStats();
 
     /**
@@ -354,21 +385,14 @@ class Cache : public BaseCache
      * Selects a request to send on the bus.
      * @return The memory request to service.
      */
-    virtual PacketPtr getPacket();
+    PacketPtr getPacket();
 
     /**
      * Was the request was sent successfully?
      * @param pkt The request.
      * @param success True if the request was sent successfully.
      */
-    virtual void sendResult(PacketPtr &pkt, MSHR* mshr, bool success);
-
-    /**
-     * Was the CSHR request was sent successfully?
-     * @param pkt The request.
-     * @param success True if the request was sent successfully.
-     */
-    virtual void sendCoherenceResult(PacketPtr &pkt, MSHR* cshr, bool success);
+    void sendResult(PacketPtr &pkt, MSHR* mshr, bool success);
 
     /**
      * Handles a response (cache line fill/write ack) from the bus.
@@ -376,12 +400,6 @@ class Cache : public BaseCache
      */
     void handleResponse(PacketPtr &pkt);
 
-    /**
-     * Selects a coherence message to forward to lower levels of the hierarchy.
-     * @return The coherence message to forward.
-     */
-    virtual PacketPtr getCoherencePacket();
-
     /**
      * Snoops bus transactions to maintain coherence.
      * @param pkt The current bus transaction.
diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc
index e887f711e..bc1a8a775 100644
--- a/src/mem/cache/cache_builder.cc
+++ b/src/mem/cache/cache_builder.cc
@@ -75,7 +75,6 @@
 #include "mem/cache/miss/blocking_buffer.hh"
 
 // Coherence Templates
-#include "mem/cache/coherence/uni_coherence.hh"
 #include "mem/cache/coherence/simple_coherence.hh"
 
 //Prefetcher Headers
@@ -302,13 +301,8 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
     } while (0)
 
 #define BUILD_COHERENCE(b) do {						\
-        if (protocol == NULL) {						\
-            UniCoherence *coh = new UniCoherence();			\
-            BUILD_CACHES(UniCoherence);				\
-        } else {							\
-            SimpleCoherence *coh = new SimpleCoherence(protocol);	\
-            BUILD_CACHES(SimpleCoherence);				\
-        }								\
+        SimpleCoherence *coh = new SimpleCoherence(protocol);           \
+        BUILD_CACHES(SimpleCoherence);                                  \
     } while (0)
 
 #if defined(USE_TAGGED)
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 9b094c1e3..6b9eac865 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -28,6 +28,8 @@
  * Authors: Erik Hallnor
  *          Dave Greene
  *          Nathan Binkert
+ *          Steve Reinhardt
+ *          Ron Dreslinski
  */
 
 /**
@@ -57,18 +59,8 @@
 bool SIGNAL_NACK_HACK;
 
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::
-recvStatusChange(Port::Status status, bool isCpuSide)
-{
-
-}
-
-
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
-Cache(const std::string &_name,
-      Cache<TagStore,Coherence>::Params &params)
+Cache<TagStore,Coherence>::Cache(const std::string &_name,
+                                 Cache<TagStore,Coherence>::Params &params)
     : BaseCache(_name, params.baseParams),
       prefetchAccess(params.prefetchAccess),
       tags(params.tags), missQueue(params.missQueue),
@@ -84,6 +76,11 @@ Cache(const std::string &_name,
       adaptiveCompression(params.adaptiveCompression),
       writebackCompressed(params.writebackCompressed)
 {
+    cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this);
+    memSidePort = new MemSidePort(_name + "-mem_side_port", this);
+    cpuSidePort->setOtherPort(memSidePort);
+    memSidePort->setOtherPort(cpuSidePort);
+
     tags->setCache(this);
     missQueue->setCache(this);
     missQueue->setPrefetcher(prefetcher);
@@ -406,7 +403,11 @@ Cache<TagStore,Coherence>::handleFill(BlkType *blk, MSHR * mshr,
 //            mshr->pkt = pkt;
             break;
         }
-        respondToMiss(target, completion_time);
+        if (!target->req->isUncacheable()) {
+            missLatency[target->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                completion_time - target->time;
+        }
+        respond(target, completion_time);
         mshr->popTarget();
     }
 
@@ -688,7 +689,7 @@ Cache<TagStore,Coherence>::getPacket()
         }
     }
 
-    assert(!doMasterRequest() || missQueue->havePending());
+    assert(!isMemSideBusRequested() || missQueue->havePending());
     assert(!pkt || pkt->time <= curTick);
     SIGNAL_NACK_HACK = false;
     return pkt;
@@ -727,7 +728,6 @@ Cache<TagStore,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
         pkt->flags &= ~NACKED_LINE;
         SIGNAL_NACK_HACK = false;
         pkt->flags &= ~SATISFIED;
-        pkt->flags &= ~SNOOP_COMMIT;
 
 //Rmove copy from mshr
         delete mshr->pkt;
@@ -783,22 +783,6 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr &pkt)
     }
 }
 
-template<class TagStore, class Coherence>
-PacketPtr
-Cache<TagStore,Coherence>::getCoherencePacket()
-{
-    return coherence->getPacket();
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::sendCoherenceResult(PacketPtr &pkt,
-                                                         MSHR *cshr,
-                                                         bool success)
-{
-    coherence->sendResult(pkt, cshr, success);
-}
-
 
 template<class TagStore, class Coherence>
 void
@@ -1146,27 +1130,15 @@ template<class TagStore, class Coherence>
 Port *
 Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
 {
-    if (if_name == "" || if_name == "cpu_side")
-    {
-        if (cpuSidePort == NULL) {
-            cpuSidePort = new CpuSidePort(name() + "-cpu_side_port", this);
-            sendEvent = new ResponseEvent(cpuSidePort);
-        }
+    if (if_name == "" || if_name == "cpu_side") {
         return cpuSidePort;
-    }
-    else if (if_name == "functional")
-    {
-        return new CpuSidePort(name() + "-cpu_side_funcport", this);
-    }
-    else if (if_name == "mem_side")
-    {
-        if (memSidePort != NULL)
-            panic("Already have a mem side for this cache\n");
-        memSidePort = new MemSidePort(name() + "-mem_side_port", this);
-        memSendEvent = new ResponseEvent(memSidePort);
+    } else if (if_name == "mem_side") {
         return memSidePort;
+    } else if (if_name == "functional") {
+        return new CpuSidePort(name() + "-cpu_side_funcport", this);
+    } else {
+        panic("Port name %s unrecognized\n", if_name);
     }
-    else panic("Port name %s unrecognized\n", if_name);
 }
 
 template<class TagStore, class Coherence>
@@ -1213,6 +1185,68 @@ Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
     return true;
 }
 
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::CpuSidePort::recvRetry()
+{
+    recvRetryCommon();
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::CpuSidePort::processRequestEvent()
+{
+    if (waitingOnRetry)
+        return;
+    //We have some responses to drain first
+    if (!drainList.empty()) {
+        if (!drainResponse()) {
+            // more responses to drain... re-request bus
+            scheduleRequestEvent(curTick + 1);
+        }
+    }
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::CpuSidePort::processResponseEvent()
+{
+    assert(transmitList.size());
+    assert(transmitList.front().first <= curTick);
+    PacketPtr pkt = transmitList.front().second;
+    transmitList.pop_front();
+    if (!transmitList.empty()) {
+        Tick time = transmitList.front().first;
+        responseEvent->schedule(time <= curTick ? curTick+1 : time);
+    }
+
+    if (pkt->flags & NACKED_LINE)
+        pkt->result = Packet::Nacked;
+    else
+        pkt->result = Packet::Success;
+    pkt->makeTimingResponse();
+    DPRINTF(CachePort, "%s attempting to send a response\n", name());
+    if (!drainList.empty() || waitingOnRetry) {
+        //Already have a list, just append
+        drainList.push_back(pkt);
+        DPRINTF(CachePort, "%s appending response onto drain list\n", name());
+    }
+    else if (!sendTiming(pkt)) {
+        //It failed, save it to list of drain events
+        DPRINTF(CachePort, "%s now waiting for a retry\n", name());
+        drainList.push_back(pkt);
+        waitingOnRetry = true;
+    }
+
+    // Check if we're done draining once this list is empty
+    if (drainList.empty() && transmitList.empty())
+        myCache()->checkDrain();
+}
+
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::CpuSidePort::recvAtomic(PacketPtr pkt)
@@ -1249,23 +1283,149 @@ Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
     if (pkt->result == Packet::Nacked)
         panic("Need to implement cache resending nacked packets!\n");
 
-    if (pkt->isRequest() && blocked)
-    {
+    if (pkt->isRequest() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
         return false;
     }
 
-    if (pkt->isResponse())
+    if (pkt->isResponse()) {
         myCache()->handleResponse(pkt);
-    else {
-        //Check if we should do the snoop
-        if (pkt->flags & SNOOP_COMMIT)
-            myCache()->snoop(pkt);
+    } else {
+        myCache()->snoop(pkt);
     }
     return true;
 }
 
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::recvRetry()
+{
+    if (recvRetryCommon()) {
+        return;
+    }
+
+    DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name());
+    if (!cache->isMemSideBusRequested()) {
+        //This can happen if I am the owner of a block and see an upgrade
+        //while the block was in my WB Buffers.  I just remove the
+        //wb and de-assert the masterRequest
+        waitingOnRetry = false;
+        return;
+    }
+    PacketPtr pkt = myCache()->getPacket();
+    MSHR* mshr = (MSHR*) pkt->senderState;
+    //Copy the packet, it may be modified/destroyed elsewhere
+    PacketPtr copyPkt = new Packet(*pkt);
+    copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
+    mshr->pkt = copyPkt;
+
+    bool success = sendTiming(pkt);
+    DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+            pkt->getAddr(), success ? "succesful" : "unsuccesful");
+
+    waitingOnRetry = !success;
+    if (waitingOnRetry) {
+        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+    }
+
+    myCache()->sendResult(pkt, mshr, success);
+
+    if (success && cache->isMemSideBusRequested())
+    {
+        DPRINTF(CachePort, "%s has more requests\n", name());
+        //Still more to issue, rerequest in 1 cycle
+        new RequestEvent(this, curTick + 1);
+    }
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::processRequestEvent()
+{
+    if (waitingOnRetry)
+        return;
+    //We have some responses to drain first
+    if (!drainList.empty()) {
+        if (!drainResponse()) {
+            // more responses to drain... re-request bus
+            scheduleRequestEvent(curTick + 1);
+        }
+        return;
+    }
+
+    DPRINTF(CachePort, "%s trying to send a MSHR request\n", name());
+    if (!isBusRequested()) {
+        //This can happen if I am the owner of a block and see an upgrade
+        //while the block was in my WB Buffers.  I just remove the
+        //wb and de-assert the masterRequest
+        return;
+    }
+
+    PacketPtr pkt = myCache()->getPacket();
+    MSHR* mshr = (MSHR*) pkt->senderState;
+    //Copy the packet, it may be modified/destroyed elsewhere
+    PacketPtr copyPkt = new Packet(*pkt);
+    copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
+    mshr->pkt = copyPkt;
+
+    bool success = sendTiming(pkt);
+    DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+            pkt->getAddr(), success ? "succesful" : "unsuccesful");
+
+    waitingOnRetry = !success;
+    if (waitingOnRetry) {
+        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+    }
+
+    myCache()->sendResult(pkt, mshr, success);
+    if (success && isBusRequested())
+    {
+        DPRINTF(CachePort, "%s still more MSHR requests to send\n", name());
+        //Still more to issue, rerequest in 1 cycle
+        scheduleRequestEvent(curTick+1);
+    }
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::processResponseEvent()
+{
+    assert(transmitList.size());
+    assert(transmitList.front().first <= curTick);
+    PacketPtr pkt = transmitList.front().second;
+    transmitList.pop_front();
+    if (!transmitList.empty()) {
+        Tick time = transmitList.front().first;
+        responseEvent->schedule(time <= curTick ? curTick+1 : time);
+    }
+
+    if (pkt->flags & NACKED_LINE)
+        pkt->result = Packet::Nacked;
+    else
+        pkt->result = Packet::Success;
+    pkt->makeTimingResponse();
+    DPRINTF(CachePort, "%s attempting to send a response\n", name());
+    if (!drainList.empty() || waitingOnRetry) {
+        //Already have a list, just append
+        drainList.push_back(pkt);
+        DPRINTF(CachePort, "%s appending response onto drain list\n", name());
+    }
+    else if (!sendTiming(pkt)) {
+        //It failed, save it to list of drain events
+        DPRINTF(CachePort, "%s now waiting for a retry\n", name());
+        drainList.push_back(pkt);
+        waitingOnRetry = true;
+    }
+
+    // Check if we're done draining once this list is empty
+    if (drainList.empty() && transmitList.empty())
+        myCache()->checkDrain();
+}
+
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
@@ -1292,15 +1452,17 @@ template<class TagStore, class Coherence>
 Cache<TagStore,Coherence>::
 CpuSidePort::CpuSidePort(const std::string &_name,
                          Cache<TagStore,Coherence> *_cache)
-    : BaseCache::CachePort(_name, _cache, true)
+    : BaseCache::CachePort(_name, _cache)
 {
+    responseEvent = new ResponseEvent(this);
 }
 
 template<class TagStore, class Coherence>
 Cache<TagStore,Coherence>::
 MemSidePort::MemSidePort(const std::string &_name,
                          Cache<TagStore,Coherence> *_cache)
-    : BaseCache::CachePort(_name, _cache, false)
+    : BaseCache::CachePort(_name, _cache)
 {
+    responseEvent = new ResponseEvent(this);
 }
 
diff --git a/src/mem/cache/coherence/SConscript b/src/mem/cache/coherence/SConscript
index 03a2d85d7..7b94f73e1 100644
--- a/src/mem/cache/coherence/SConscript
+++ b/src/mem/cache/coherence/SConscript
@@ -31,5 +31,4 @@
 Import('*')
 
 Source('coherence_protocol.cc')
-Source('uni_coherence.cc')
 
diff --git a/src/mem/cache/coherence/uni_coherence.cc b/src/mem/cache/coherence/uni_coherence.cc
deleted file mode 100644
index 6061c89c3..000000000
--- a/src/mem/cache/coherence/uni_coherence.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-#include "mem/cache/coherence/uni_coherence.hh"
-#include "mem/cache/base_cache.hh"
-
-#include "base/trace.hh"
-
-using namespace std;
-
-UniCoherence::UniCoherence()
-    : cshrs(50)
-{
-}
-
-PacketPtr
-UniCoherence::getPacket()
-{
-    PacketPtr pkt = cshrs.getReq();
-    return pkt;
-}
-
-void
-UniCoherence::sendResult(PacketPtr &pkt, MSHR* cshr, bool success)
-{
-    if (success)
-    {
-        bool unblock = cshrs.isFull();
-//        cshrs.markInService(cshr);
-        delete pkt->req;
-        cshrs.deallocate(cshr);
-        if (!cshrs.havePending()) {
-            cache->clearSlaveRequest(Request_Coherence);
-        }
-        if (unblock) {
-            //since CSHRs are always used as buffers, should always get rid of one
-            assert(!cshrs.isFull());
-            cache->clearBlocked(Blocked_Coherence);
-        }
-    }
-}
-
-
-/**
- * @todo add support for returning slave requests, not doing them here.
- */
-bool
-UniCoherence::handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr,
-                               CacheBlk::State &new_state)
-{
-    new_state = 0;
-    if (pkt->isInvalidate()) {
-            DPRINTF(Cache, "snoop inval on blk %x (blk ptr %x)\n",
-                    pkt->getAddr(), blk);
-    }
-    else if (blk) {
-        new_state = blk->status;
-        if (pkt->isRead()) {
-            DPRINTF(Cache, "Uni-coherence snoops a read that hit in itself"
-                    ". Should satisfy the packet\n");
-            return true; //Satisfy Reads if we can
-        }
-    }
-    return false;
-}
-
-bool
-UniCoherence::propogateInvalidate(PacketPtr pkt, bool isTiming)
-{
-    if (pkt->isInvalidate()) {
-/*  Temp Fix for now, forward all invalidates up as functional accesses */
-        if (isTiming) {
-            // Forward to other caches
-            Request* req = new Request(pkt->req->getPaddr(), pkt->getSize(), 0);
-            PacketPtr tmp = new Packet(req, MemCmd::InvalidateReq, -1);
-            cshrs.allocate(tmp);
-            cache->setSlaveRequest(Request_Coherence, curTick);
-            if (cshrs.isFull())
-                cache->setBlockedForSnoop(Blocked_Coherence);
-        }
-        else {
-            PacketPtr tmp = new Packet(pkt->req, MemCmd::InvalidateReq, -1);
-            cache->cpuSidePort->sendAtomic(tmp);
-            delete tmp;
-        }
-/**/
-/*            PacketPtr tmp = new Packet(pkt->req, MemCmd::InvalidateReq, -1);
-            cache->cpuSidePort->sendFunctional(tmp);
-            delete tmp;
-*/
-    }
-    if (pkt->isRead()) {
-        /*For now we will see if someone above us has the data by
-          doing a functional access on reads.  Fix this later */
-            PacketPtr tmp = new Packet(pkt->req, MemCmd::ReadReq, -1);
-            tmp->allocate();
-            cache->cpuSidePort->sendFunctional(tmp);
-            bool hit = (tmp->result == Packet::Success);
-            if (hit) {
-                memcpy(pkt->getPtr<uint8_t>(), tmp->getPtr<uint8_t>(),
-                       pkt->getSize());
-                DPRINTF(Cache, "Uni-coherence snoops a read that hit in L1\n");
-            }
-            delete tmp;
-            return hit;
-    }
-    return false;
-}
diff --git a/src/mem/cache/coherence/uni_coherence.hh b/src/mem/cache/coherence/uni_coherence.hh
deleted file mode 100644
index 9efb4e192..000000000
--- a/src/mem/cache/coherence/uni_coherence.hh
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-#ifndef __UNI_COHERENCE_HH__
-#define __UNI_COHERENCE_HH__
-
-#include "base/trace.hh"
-#include "base/misc.hh"
-#include "mem/cache/cache_blk.hh"
-#include "mem/cache/miss/mshr_queue.hh"
-#include "mem/packet.hh"
-
-class BaseCache;
-
-class UniCoherence
-{
-  protected:
-    /** Buffers to hold forwarded invalidates. */
-    MSHRQueue cshrs;
-    /** Pointer to the parent cache. */
-    BaseCache *cache;
-
-  public:
-    /**
-     * Construct and initialize this coherence policy.
-     */
-    UniCoherence();
-
-    /**
-     * Set the pointer to the parent cache.
-     * @param _cache The parent cache.
-     */
-    void setCache(BaseCache *_cache)
-    {
-        cache = _cache;
-    }
-
-    /**
-     * Register statistics.
-     * @param name The name to prepend to stat descriptions.
-     */
-    void regStats(const std::string &name)
-    {
-    }
-
-    /**
-     * Return Read.
-     * @param cmd The request's command.
-     * @param state The current state of the cache block.
-     * @return The proper bus command, as determined by the protocol.
-     * @todo Make changes so writebacks don't get here.
-     */
-    MemCmd getBusCmd(MemCmd cmd, CacheBlk::State state)
-    {
-        if (cmd == MemCmd::HardPFReq && state)
-            warn("Trying to issue a prefetch to a block we already have\n");
-        if (cmd == MemCmd::Writeback)
-            return MemCmd::Writeback;
-        return MemCmd::ReadReq;
-    }
-
-    /**
-     * Just return readable and writeable.
-     * @param pkt The bus response.
-     * @param current The current block state.
-     * @return The new state.
-     */
-    CacheBlk::State getNewState(PacketPtr &pkt, CacheBlk::State current)
-    {
-        if (pkt->senderState) //Blocking Buffers don't get mshrs
-        {
-            if (((MSHR *)(pkt->senderState))->originalCmd == MemCmd::HardPFReq) {
-                DPRINTF(HWPrefetch, "Marking a hardware prefetch as such in the state\n");
-                return BlkHWPrefetched | BlkValid | BlkWritable;
-            }
-            else {
-                return BlkValid | BlkWritable;
-            }
-        }
-        //@todo What about prefetching with blocking buffers
-        else
-            return BlkValid | BlkWritable;
-    }
-
-    /**
-     * Return outstanding invalidate to forward.
-     * @return The next invalidate to forward to lower levels of cache.
-     */
-    PacketPtr getPacket();
-
-    /**
-     * Was the CSHR request was sent successfully?
-     * @param pkt The request.
-     * @param success True if the request was sent successfully.
-     */
-    void sendResult(PacketPtr &pkt, MSHR* cshr, bool success);
-
-    /**
-     * Handle snooped bus requests.
-     * @param pkt The snooped bus request.
-     * @param blk The cache block corresponding to the request, if any.
-     * @param mshr The MSHR corresponding to the request, if any.
-     * @param new_state The new coherence state of the block.
-     * @return True if the request should be satisfied locally.
-     */
-    bool handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr,
-                          CacheBlk::State &new_state);
-
-    /**
-     * Return true if this coherence policy can handle fast cache writes.
-     */
-    bool allowFastWrites() { return true; }
-
-    bool hasProtocol() { return false; }
-
-    bool propogateInvalidate(PacketPtr pkt, bool isTiming);
-};
-
-#endif //__UNI_COHERENCE_HH__
diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc
index e8ff26880..281328c2e 100644
--- a/src/mem/cache/miss/blocking_buffer.cc
+++ b/src/mem/cache/miss/blocking_buffer.cc
@@ -64,7 +64,7 @@ BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time)
         std::memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), blk_size);
 
         cache->setBlocked(Blocked_NoWBBuffers);
-        cache->setMasterRequest(Request_WB, time);
+        cache->requestMemSideBus(Request_WB, time);
         return;
     }
 
@@ -77,7 +77,7 @@ BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time)
         miss.pkt->flags |= CACHE_LINE_FILL;
     }
     cache->setBlocked(Blocked_NoMSHRs);
-    cache->setMasterRequest(Request_MSHR, time);
+    cache->requestMemSideBus(Request_MSHR, time);
 }
 
 PacketPtr
@@ -111,7 +111,7 @@ BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr)
         // Forwarding a write/ writeback, don't need to change
         // the command
         assert(mshr == &wb);
-        cache->clearMasterRequest(Request_WB);
+        cache->deassertMemSideBusRequest(Request_WB);
         if (!pkt->needsResponse()) {
             assert(wb.getNumTargets() == 0);
             wb.deallocate();
@@ -121,7 +121,7 @@ BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr)
         }
     } else {
         assert(mshr == &miss);
-        cache->clearMasterRequest(Request_MSHR);
+        cache->deassertMemSideBusRequest(Request_MSHR);
         if (!pkt->needsResponse()) {
             assert(miss.getNumTargets() == 0);
             miss.deallocate();
@@ -178,7 +178,7 @@ BlockingBuffer::squash(int threadNum)
         if (!miss.inService) {
             miss.deallocate();
             cache->clearBlocked(Blocked_NoMSHRs);
-            cache->clearMasterRequest(Request_MSHR);
+            cache->deassertMemSideBusRequest(Request_MSHR);
         }
     }
 }
@@ -203,7 +203,7 @@ BlockingBuffer::doWriteback(Addr addr,
     writebacks[0/*pkt->req->getThreadNum()*/]++;
 
     wb.allocateAsBuffer(pkt);
-    cache->setMasterRequest(Request_WB, curTick);
+    cache->requestMemSideBus(Request_WB, curTick);
     cache->setBlocked(Blocked_NoWBBuffers);
 }
 
@@ -221,7 +221,7 @@ BlockingBuffer::doWriteback(PacketPtr &pkt)
     std::memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
 
     cache->setBlocked(Blocked_NoWBBuffers);
-    cache->setMasterRequest(Request_WB, curTick);
+    cache->requestMemSideBus(Request_WB, curTick);
 }
 
 
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
index 24ca9cfa2..67036ed02 100644
--- a/src/mem/cache/miss/miss_queue.cc
+++ b/src/mem/cache/miss/miss_queue.cc
@@ -348,7 +348,7 @@ MissQueue::allocateMiss(PacketPtr &pkt, int size, Tick time)
     }
     if (pkt->cmd != MemCmd::HardPFReq) {
         //If we need to request the bus (not on HW prefetch), do so
-        cache->setMasterRequest(Request_MSHR, time);
+        cache->requestMemSideBus(Request_MSHR, time);
     }
     return mshr;
 }
@@ -376,7 +376,7 @@ MissQueue::allocateWrite(PacketPtr &pkt, int size, Tick time)
         cache->setBlocked(Blocked_NoWBBuffers);
     }
 
-    cache->setMasterRequest(Request_WB, time);
+    cache->requestMemSideBus(Request_WB, time);
 
     return mshr;
 }
@@ -450,7 +450,7 @@ MissQueue::fetchBlock(Addr addr, int blk_size, Tick time,
     if (mq.isFull()) {
         cache->setBlocked(Blocked_NoMSHRs);
     }
-    cache->setMasterRequest(Request_MSHR, time);
+    cache->requestMemSideBus(Request_MSHR, time);
     return mshr;
 }
 
@@ -534,7 +534,7 @@ MissQueue::markInService(PacketPtr &pkt, MSHR* mshr)
         unblock = wb.isFull();
         wb.markInService(mshr);
         if (!wb.havePending()){
-            cache->clearMasterRequest(Request_WB);
+            cache->deassertMemSideBusRequest(Request_WB);
         }
         if (unblock) {
             // Do we really unblock?
@@ -545,7 +545,7 @@ MissQueue::markInService(PacketPtr &pkt, MSHR* mshr)
         unblock = mq.isFull();
         mq.markInService(mshr);
         if (!mq.havePending()){
-            cache->clearMasterRequest(Request_MSHR);
+            cache->deassertMemSideBusRequest(Request_MSHR);
         }
         if (mshr->originalCmd == MemCmd::HardPFReq) {
             DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
@@ -553,7 +553,7 @@ MissQueue::markInService(PacketPtr &pkt, MSHR* mshr)
             //Also clear pending if need be
             if (!prefetcher->havePending())
             {
-                cache->clearMasterRequest(Request_PF);
+                cache->deassertMemSideBusRequest(Request_PF);
             }
         }
         if (unblock) {
@@ -602,7 +602,7 @@ MissQueue::handleResponse(PacketPtr &pkt, Tick time)
             mshr->pkt->req = mshr->getTarget()->req;
             mq.markPending(mshr, cmd);
             mshr->order = order++;
-            cache->setMasterRequest(Request_MSHR, time);
+            cache->requestMemSideBus(Request_MSHR, time);
         }
         else {
             unblock = mq.isFull();
@@ -683,7 +683,7 @@ MissQueue::squash(int threadNum)
     }
     mq.squash(threadNum);
     if (!mq.havePending()) {
-        cache->clearMasterRequest(Request_MSHR);
+        cache->deassertMemSideBusRequest(Request_MSHR);
     }
     if (unblock && !mq.isFull()) {
         cache->clearBlocked(cause);
diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc
index 44daf75e1..966f7d005 100644
--- a/src/mem/cache/prefetch/base_prefetcher.cc
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@@ -141,7 +141,7 @@ BasePrefetcher::getPacket()
             keepTrying = cache->inCache(pkt->getAddr());
         }
         if (pf.empty()) {
-            cache->clearMasterRequest(Request_PF);
+            cache->deassertMemSideBusRequest(Request_PF);
             if (keepTrying) return NULL; //None left, all were in cache
         }
     } while (keepTrying);
@@ -165,7 +165,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             pfRemovedMSHR++;
             pf.erase(iter);
             if (pf.empty())
-                cache->clearMasterRequest(Request_PF);
+                cache->deassertMemSideBusRequest(Request_PF);
         }
 
         //Remove anything in queue with delay older than time
@@ -182,7 +182,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
                 iter--;
             }
             if (pf.empty())
-                cache->clearMasterRequest(Request_PF);
+                cache->deassertMemSideBusRequest(Request_PF);
         }
 
 
@@ -244,7 +244,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             prefetch->flags |= CACHE_LINE_FILL;
 
             //Make sure to request the bus, with proper delay
-            cache->setMasterRequest(Request_PF, prefetch->time);
+            cache->requestMemSideBus(Request_PF, prefetch->time);
 
             //Increment through the list
             addr++;
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index dc23e9f6d..577f99116 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -61,8 +61,8 @@ typedef std::list<PacketPtr> PacketList;
 #define CACHE_LINE_FILL (1 << 3)
 #define COMPRESSED      (1 << 4)
 #define NO_ALLOCATE     (1 << 5)
-#define SNOOP_COMMIT    (1 << 6)
 
+#define EXPRESS_SNOOP   (1 << 7)
 
 class MemCmd
 {
-- 
cgit v1.2.3


From 9048c695a0ecde709a074259bad9ad1cda57a303 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 22 May 2007 06:29:48 -0700
Subject: Another pass of minor changes in preparation for new protocol.

src/mem/cache/cache_impl.hh:
src/mem/cache/coherence/simple_coherence.hh:
    Get rid of old invalidate propagation logic in preparation
    for new multilevel snoop protocol.
src/mem/cache/coherence/coherence_protocol.cc:
    L2 cache now has protocol, so protocol must handle ReadExReq
    coming in from the CPU side.
src/mem/cache/miss/mshr_queue.cc:
    Assertion is failing, so let's take it out for now.
src/mem/packet.cc:
src/mem/packet.hh:
    Add WritebackAck command.
    Reorganize enum to put responses next to corresponding requests.
    Get rid of unused WriteReqNoAck.

--HG--
extra : convert_revision : 24c519846d161978123f9aa029ae358a41546c73
---
 src/mem/cache/cache_impl.hh                   | 17 ++---------------
 src/mem/cache/coherence/coherence_protocol.cc |  3 +++
 src/mem/cache/coherence/simple_coherence.hh   |  6 ------
 src/mem/cache/miss/mshr_queue.cc              |  1 -
 src/mem/packet.cc                             | 11 ++++++-----
 src/mem/packet.hh                             |  4 ++--
 6 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 6b9eac865..56352c110 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -794,14 +794,7 @@ Cache<TagStore,Coherence>::snoop(PacketPtr &pkt)
         return;
     }
 
-    //Send a timing (true) invalidate up if the protocol calls for it
-    if (coherence->propogateInvalidate(pkt, true)) {
-        //Temp hack, we had a functional read hit in the L1, mark as success
-        pkt->flags |= SATISFIED;
-        pkt->result = Packet::Success;
-        respondToSnoop(pkt, curTick + hitLatency);
-        return;
-    }
+    ///// PROPAGATE SNOOP UPWARD HERE
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     BlkType *blk = tags->findBlock(pkt->getAddr());
@@ -1097,13 +1090,7 @@ template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::snoopProbe(PacketPtr &pkt)
 {
-    //Send a atomic (false) invalidate up if the protocol calls for it
-    if (coherence->propogateInvalidate(pkt, false)) {
-        //Temp hack, we had a functional read hit in the L1, mark as success
-        pkt->flags |= SATISFIED;
-        pkt->result = Packet::Success;
-        return hitLatency;
-    }
+    ///// PROPAGATE SNOOP UPWARD HERE
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
     BlkType *blk = tags->findBlock(pkt->getAddr());
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index 33a8a4e63..bc8de0d26 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -295,11 +295,14 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     tt[Invalid][MC::ReadReq].onRequest(MC::ReadReq);
     // we only support write allocate right now
     tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq);
+    tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq);
     tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd);
+    tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd);
     if (hasOwned) {
         tt[Owned][MC::WriteReq].onRequest(writeToSharedCmd);
+        tt[Owned][MC::ReadExReq].onRequest(MC::ReadExReq);
         tt[Owned][MC::SwapReq].onRequest(writeToSharedCmd);
     }
 
diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh
index 1c89c703a..095260ca4 100644
--- a/src/mem/cache/coherence/simple_coherence.hh
+++ b/src/mem/cache/coherence/simple_coherence.hh
@@ -161,12 +161,6 @@ class SimpleCoherence
     bool allowFastWrites() { return false; }
 
     bool hasProtocol() { return true; }
-
-    bool propogateInvalidate(PacketPtr pkt, bool isTiming)
-    {
-        //For now we do nothing, asssumes simple coherence is top level of cache
-        return false;
-    }
 };
 
 #endif //__SIMPLE_COHERENCE_HH__
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index add11dfe7..e9aa89bf8 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -119,7 +119,6 @@ MSHRQueue::allocate(PacketPtr &pkt, int size)
     if (!pkt->needsResponse()) {
         mshr->allocateAsBuffer(pkt);
     } else {
-        assert(size !=0);
         mshr->allocate(pkt->cmd, aligned_addr, size, pkt);
         allocatedTargets += 1;
     }
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 2463a19ba..8c69def37 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -56,17 +56,18 @@ MemCmd::commandInfo[] =
     { 0, InvalidCmd, "InvalidCmd" },
     /* ReadReq */
     { SET3(IsRead, IsRequest, NeedsResponse), ReadResp, "ReadReq" },
+    /* ReadResp */
+    { SET3(IsRead, IsResponse, HasData), InvalidCmd, "ReadResp" },
     /* WriteReq */
     { SET4(IsWrite, IsRequest, NeedsResponse, HasData),
             WriteResp, "WriteReq" },
-    /* WriteReqNoAck */
-    { SET3(IsWrite, IsRequest, HasData), InvalidCmd, "WriteReqNoAck" },
-    /* ReadResp */
-    { SET3(IsRead, IsResponse, HasData), InvalidCmd, "ReadResp" },
     /* WriteResp */
     { SET2(IsWrite, IsResponse), InvalidCmd, "WriteResp" },
     /* Writeback */
-    { SET3(IsWrite, IsRequest, HasData), InvalidCmd, "Writeback" },
+    { SET4(IsWrite, IsRequest, HasData, NeedsResponse),
+            WritebackAck, "Writeback" },
+    /* WritebackAck */
+    { SET2(IsWrite, IsResponse), InvalidCmd, "WritebackAck" },
     /* SoftPFReq */
     { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
             SoftPFResp, "SoftPFReq" },
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 577f99116..413ffa26b 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -73,11 +73,11 @@ class MemCmd
     {
         InvalidCmd,
         ReadReq,
-        WriteReq,
-        WriteReqNoAck,
         ReadResp,
+        WriteReq,
         WriteResp,
         Writeback,
+        WritebackAck,
         SoftPFReq,
         HardPFReq,
         SoftPFResp,
-- 
cgit v1.2.3


From 41dde5f6fdf195b8d51d70119737c5e3f7391f78 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 22 May 2007 06:32:24 -0700
Subject: memtest.hh: Fix description string. Minor whitespace cleanup.

src/cpu/memtest/memtest.hh:
    Fix description string.
    Minor whitespace cleanup.

--HG--
extra : convert_revision : 0c7213d088da46de9713ca6beabc30523ccb1c8c
---
 src/cpu/memtest/memtest.hh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh
index 264309fd7..84e16b98a 100644
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@@ -85,13 +85,13 @@ class MemTest : public MemObject
         TickEvent(MemTest *c)
             : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) {}
         void process() {cpu->tick();}
-        virtual const char *description() { return "tick event"; }
+        virtual const char *description() { return "MemTest tick"; }
     };
 
     TickEvent tickEvent;
+
     class CpuPort : public Port
     {
-
         MemTest *memtest;
 
       public:
-- 
cgit v1.2.3


From da46364b1878339841e9cda5a62ee104409b6535 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 22 May 2007 07:30:55 -0700
Subject: Fix getDeviceAddressRanges() to get snooping right.

--HG--
extra : convert_revision : 2aeab25ef955ab9db7b968786faff227239fbbe4
---
 src/mem/cache/base_cache.cc |  8 --------
 src/mem/cache/base_cache.hh |  3 ---
 src/mem/cache/cache.hh      |  6 ++++++
 src/mem/cache/cache_impl.hh | 24 ++++++++++++++++++++++++
 4 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index d75d35ebb..a47c19e60 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -57,7 +57,6 @@ BaseCache::BaseCache(const std::string &name, Params &params)
 }
 
 
-
 void
 BaseCache::CachePort::recvStatusChange(Port::Status status)
 {
@@ -66,13 +65,6 @@ BaseCache::CachePort::recvStatusChange(Port::Status status)
     }
 }
 
-void
-BaseCache::CachePort::getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
-{
-    AddrRangeList dummy;
-    otherPort->getPeerAddressRanges(resp, dummy);
-}
-
 int
 BaseCache::CachePort::deviceBlockSize()
 {
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index f7107a86a..a27ac1788 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -91,9 +91,6 @@ class BaseCache : public MemObject
 
         virtual void recvStatusChange(Status status);
 
-        virtual void getDeviceAddressRanges(AddrRangeList &resp,
-                                            bool &snoop);
-
         virtual int deviceBlockSize();
 
         bool recvRetryCommon();
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 75fb50f4e..e14b2efe8 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -89,6 +89,9 @@ class Cache : public BaseCache
         void processRequestEvent();
         void processResponseEvent();
 
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            bool &snoop);
+
         virtual bool recvTiming(PacketPtr pkt);
 
         virtual void recvRetry();
@@ -124,6 +127,9 @@ class Cache : public BaseCache
         void processRequestEvent();
         void processResponseEvent();
 
+        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+                                            bool &snoop);
+
         virtual bool recvTiming(PacketPtr pkt);
 
         virtual void recvRetry();
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 56352c110..a7f96603e 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1139,6 +1139,18 @@ Cache<TagStore,Coherence>::deletePortRefs(Port *p)
 }
 
 
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::CpuSidePort::
+getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
+{
+    // CPU side port doesn't snoop; it's a target only.
+    bool dummy;
+    otherPort->getPeerAddressRanges(resp, dummy);
+    snoop = false;
+}
+
+
 template<class TagStore, class Coherence>
 bool
 Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
@@ -1260,6 +1272,18 @@ Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
 }
 
 
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::
+getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
+{
+    // Memory-side port always snoops.
+    bool dummy;
+    otherPort->getPeerAddressRanges(resp, dummy);
+    snoop = true;
+}
+
+
 template<class TagStore, class Coherence>
 bool
 Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
-- 
cgit v1.2.3


From 35cf19d441ed15d054d00674ec67ab5bc769f6d7 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 17 Jun 2007 17:27:53 -0700
Subject: More major reorg of cache.  Seems to work for atomic mode now, timing
 mode still broken.

configs/example/memtest.py:
    Revamp options.
src/cpu/memtest/memtest.cc:
    No need for memory initialization.
    No need to make atomic response... memory system should do that now.
src/cpu/memtest/memtest.hh:
    MemTest really doesn't want to snoop.
src/mem/bridge.cc:
    checkFunctional() cleanup.
src/mem/bus.cc:
src/mem/bus.hh:
src/mem/cache/base_cache.cc:
src/mem/cache/base_cache.hh:
src/mem/cache/cache.cc:
src/mem/cache/cache.hh:
src/mem/cache/cache_blk.hh:
src/mem/cache/cache_builder.cc:
src/mem/cache/cache_impl.hh:
src/mem/cache/coherence/coherence_protocol.cc:
src/mem/cache/coherence/coherence_protocol.hh:
src/mem/cache/coherence/simple_coherence.hh:
src/mem/cache/miss/SConscript:
src/mem/cache/miss/mshr.cc:
src/mem/cache/miss/mshr.hh:
src/mem/cache/miss/mshr_queue.cc:
src/mem/cache/miss/mshr_queue.hh:
src/mem/cache/prefetch/base_prefetcher.cc:
src/mem/cache/tags/fa_lru.cc:
src/mem/cache/tags/fa_lru.hh:
src/mem/cache/tags/iic.cc:
src/mem/cache/tags/iic.hh:
src/mem/cache/tags/lru.cc:
src/mem/cache/tags/lru.hh:
src/mem/cache/tags/split.cc:
src/mem/cache/tags/split.hh:
src/mem/cache/tags/split_lifo.cc:
src/mem/cache/tags/split_lifo.hh:
src/mem/cache/tags/split_lru.cc:
src/mem/cache/tags/split_lru.hh:
src/mem/packet.cc:
src/mem/packet.hh:
src/mem/physical.cc:
src/mem/physical.hh:
src/mem/tport.cc:
    More major reorg.  Seems to work for atomic mode now,
    timing mode still broken.

--HG--
extra : convert_revision : 7e70dfc4a752393b911880ff028271433855ae87
---
 configs/example/memtest.py                    |   92 +-
 src/cpu/memtest/memtest.cc                    |   28 +-
 src/cpu/memtest/memtest.hh                    |    2 +-
 src/mem/bridge.cc                             |   11 +-
 src/mem/bus.cc                                |   87 +-
 src/mem/bus.hh                                |    3 -
 src/mem/cache/base_cache.cc                   |  413 ++++--
 src/mem/cache/base_cache.hh                   |  230 +--
 src/mem/cache/cache.cc                        |    3 -
 src/mem/cache/cache.hh                        |  271 ++--
 src/mem/cache/cache_blk.hh                    |   25 +-
 src/mem/cache/cache_builder.cc                |   27 +-
 src/mem/cache/cache_impl.hh                   | 1976 +++++++++++--------------
 src/mem/cache/coherence/coherence_protocol.cc |   40 +-
 src/mem/cache/coherence/coherence_protocol.hh |    4 +-
 src/mem/cache/coherence/simple_coherence.hh   |   15 +-
 src/mem/cache/miss/SConscript                 |    3 -
 src/mem/cache/miss/blocking_buffer.cc         |  245 ---
 src/mem/cache/miss/blocking_buffer.hh         |  209 ---
 src/mem/cache/miss/miss_buffer.cc             |   62 -
 src/mem/cache/miss/miss_buffer.hh             |  223 ---
 src/mem/cache/miss/miss_queue.cc              |  752 ----------
 src/mem/cache/miss/miss_queue.hh              |  327 ----
 src/mem/cache/miss/mshr.cc                    |   78 +-
 src/mem/cache/miss/mshr.hh                    |   77 +-
 src/mem/cache/miss/mshr_queue.cc              |   90 +-
 src/mem/cache/miss/mshr_queue.hh              |  137 +-
 src/mem/cache/prefetch/base_prefetcher.cc     |    1 -
 src/mem/cache/tags/fa_lru.cc                  |    5 +-
 src/mem/cache/tags/fa_lru.hh                  |    7 +-
 src/mem/cache/tags/iic.cc                     |   32 +-
 src/mem/cache/tags/iic.hh                     |   27 +-
 src/mem/cache/tags/lru.cc                     |    5 +-
 src/mem/cache/tags/lru.hh                     |   15 +-
 src/mem/cache/tags/split.cc                   |   25 +-
 src/mem/cache/tags/split.hh                   |    7 +-
 src/mem/cache/tags/split_lifo.cc              |    7 +-
 src/mem/cache/tags/split_lifo.hh              |   15 +-
 src/mem/cache/tags/split_lru.cc               |    5 +-
 src/mem/cache/tags/split_lru.hh               |   15 +-
 src/mem/packet.cc                             |   99 +-
 src/mem/packet.hh                             |  194 ++-
 src/mem/physical.cc                           |  208 ++-
 src/mem/physical.hh                           |   12 +-
 src/mem/tport.cc                              |   13 +-
 45 files changed, 2041 insertions(+), 4081 deletions(-)
 delete mode 100644 src/mem/cache/miss/blocking_buffer.cc
 delete mode 100644 src/mem/cache/miss/blocking_buffer.hh
 delete mode 100644 src/mem/cache/miss/miss_buffer.cc
 delete mode 100644 src/mem/cache/miss/miss_buffer.hh
 delete mode 100644 src/mem/cache/miss/miss_queue.cc
 delete mode 100644 src/mem/cache/miss/miss_queue.hh

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 9fd943aaa..9027a9866 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -33,14 +33,32 @@ m5.AddToPath('../common')
 
 parser = optparse.OptionParser()
 
-parser.add_option("--caches", action="store_true")
-parser.add_option("-t", "--timing", action="store_true")
-parser.add_option("-m", "--maxtick", type="int")
-parser.add_option("-l", "--maxloads", default = "1000000000000", type="int")
-parser.add_option("-n", "--numtesters", default = "8", type="int")
-parser.add_option("-p", "--protocol",
-                  default="moesi",
-                  help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)")
+parser.add_option("-c", "--cache-levels", type="int", default=2,
+                  metavar="LEVELS",
+                  help="Number of cache levels [default: %default]")
+parser.add_option("-a", "--atomic", action="store_true",
+                  help="Use atomic (non-timing) mode")
+parser.add_option("-b", "--blocking", action="store_true",
+                  help="Use blocking caches")
+parser.add_option("-l", "--maxloads", default="1G", metavar="N",
+                  help="Stop after N loads [default: %default]")
+parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
+                  metavar="T",
+                  help="Stop after T ticks")
+parser.add_option("-n", "--numtesters", type="int", default=8,
+                  metavar="N",
+                  help="Number of tester pseudo-CPUs [default: %default]")
+parser.add_option("-p", "--protocol", default="moesi",
+                  help="Coherence protocol [default: %default]")
+
+parser.add_option("-f", "--functional", type="int", default=0,
+                  metavar="PCT",
+                  help="Target percentage of functional accesses "
+                  "[default: %default]")
+parser.add_option("-u", "--uncacheable", type="int", default=0,
+                  metavar="PCT",
+                  help="Target percentage of uncacheable accesses "
+                  "[default: %default]")
 
 (options, args) = parser.parse_args()
 
@@ -48,14 +66,29 @@ if args:
      print "Error: script doesn't take any positional arguments"
      sys.exit(1)
 
+# Should generalize this someday... would be cool to have a loop that
+# just iterates, adding a level of caching each time.
+#if options.cache_levels != 2 and options.cache_levels != 0:
+#     print "Error: number of cache levels must be 0 or 2"
+#     sys.exit(1)
+
+if options.blocking:
+     num_l1_mshrs = 1
+     num_l2_mshrs = 1
+else:
+     num_l1_mshrs = 12
+     num_l2_mshrs = 92
+
+block_size = 64
+
 # --------------------
 # Base L1 Cache
 # ====================
 
 class L1(BaseCache):
     latency = '1ns'
-    block_size = 64
-    mshrs = 12
+    block_size = block_size
+    mshrs = num_l1_mshrs
     tgts_per_mshr = 8
     protocol = CoherenceProtocol(protocol=options.protocol)
 
@@ -64,29 +97,31 @@ class L1(BaseCache):
 # ----------------------
 
 class L2(BaseCache):
-    block_size = 64
+    block_size = block_size
     latency = '10ns'
-    mshrs = 92
+    mshrs = num_l2_mshrs
     tgts_per_mshr = 16
     write_buffers = 8
+    protocol = CoherenceProtocol(protocol=options.protocol)
 
-#MAX CORES IS 8 with the false sharing method
-if options.numtesters > 8:
-     print "Error: NUmber of testers limited to 8 because of false sharing"
-     sys,exit(1)
+if options.numtesters > block_size:
+     print "Error: Number of testers limited to %s because of false sharing" \
+           % (block_size)
+     sys.exit(1)
 
-cpus = [ MemTest(atomic=not options.timing, max_loads=options.maxloads,
-                 percent_functional=50, percent_uncacheable=10,
+cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
+                 percent_functional=options.functional,
+                 percent_uncacheable=options.uncacheable,
                  progress_interval=1000)
          for i in xrange(options.numtesters) ]
 
 # system simulated
 system = System(cpu = cpus, funcmem = PhysicalMemory(),
-                physmem = PhysicalMemory(latency = "50ps"),
+                physmem = PhysicalMemory(latency = "100ns"),
                 membus = Bus(clock="500MHz", width=16))
 
 # l2cache & bus
-if options.caches:
+if options.cache_levels == 2:
     system.toL2Bus = Bus(clock="500MHz", width=16)
     system.l2c = L2(size='64kB', assoc=8)
     system.l2c.cpu_side = system.toL2Bus.port
@@ -96,10 +131,14 @@ if options.caches:
 
 # add L1 caches
 for cpu in cpus:
-    if options.caches:
+    if options.cache_levels == 2:
          cpu.l1c = L1(size = '32kB', assoc = 4)
          cpu.test = cpu.l1c.cpu_side
          cpu.l1c.mem_side = system.toL2Bus.port
+    elif options.cache_levels == 1:
+         cpu.l1c = L1(size = '32kB', assoc = 4)
+         cpu.test = cpu.l1c.cpu_side
+         cpu.l1c.mem_side = system.membus.port
     else:
          cpu.test = system.membus.port
     system.funcmem.port = cpu.functional
@@ -113,10 +152,10 @@ system.physmem.port = system.membus.port
 # -----------------------
 
 root = Root( system = system )
-if options.timing:
-    root.system.mem_mode = 'timing'
-else:
+if options.atomic:
     root.system.mem_mode = 'atomic'
+else:
+    root.system.mem_mode = 'timing'
 
 # Not much point in this being higher than the L1 latency
 m5.ticks.setGlobalFrequency('1ns')
@@ -125,9 +164,6 @@ m5.ticks.setGlobalFrequency('1ns')
 m5.instantiate(root)
 
 # simulate until program terminates
-if options.maxtick:
-    exit_event = m5.simulate(options.maxtick)
-else:
-    exit_event = m5.simulate(10000000000000)
+exit_event = m5.simulate(options.maxtick)
 
 print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause()
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 607cf1066..5d89f1b82 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -102,7 +102,6 @@ void
 MemTest::sendPkt(PacketPtr pkt) {
     if (atomic) {
         cachePort.sendAtomic(pkt);
-        pkt->makeAtomicResponse();
         completeRequest(pkt);
     }
     else if (!cachePort.sendTiming(pkt)) {
@@ -165,8 +164,6 @@ MemTest::MemTest(const string &name,
     tickEvent.schedule(0);
 
     id = TESTER_ALLOCATOR++;
-    if (TESTER_ALLOCATOR > 8)
-        panic("False sharing memtester only allows up to 8 testers");
 
     accessRetry = false;
 }
@@ -190,14 +187,8 @@ MemTest::init()
     blockAddrMask = blockSize - 1;
     traceBlockAddr = blockAddr(traceBlockAddr);
 
-    // set up intial memory contents here
-
-    cachePort.memsetBlob(baseAddr1, 1, size);
-    funcPort.memsetBlob(baseAddr1, 1, size);
-    cachePort.memsetBlob(baseAddr2, 2, size);
-    funcPort.memsetBlob(baseAddr2, 2, size);
-    cachePort.memsetBlob(uncacheAddr, 3, size);
-    funcPort.memsetBlob(uncacheAddr, 3, size);
+    // initial memory contents for both physical memory and functional
+    // memory should be 0; no need to initialize them.
 }
 
 static void
@@ -230,15 +221,10 @@ MemTest::completeRequest(PacketPtr pkt)
       case MemCmd::ReadResp:
 
         if (memcmp(pkt_data, data, pkt->getSize()) != 0) {
-            cerr << name() << ": on read of 0x" << hex << req->getPaddr()
-                 << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
-                 << "@ cycle " << dec << curTick
-                 << ", cache returns 0x";
-            printData(cerr, pkt_data, pkt->getSize());
-            cerr << ", expected 0x";
-            printData(cerr, data, pkt->getSize());
-            cerr << endl;
-            fatal("");
+            panic("%s: read of %x (blk %x) @ cycle %d "
+                  "returns %x, expected %x\n", name(),
+                  req->getPaddr(), blockAddr(req->getPaddr()), curTick,
+                  *pkt_data, *data);
         }
 
         numReads++;
@@ -267,7 +253,7 @@ MemTest::completeRequest(PacketPtr pkt)
         break;
 */
       default:
-        panic("invalid command");
+        panic("invalid command %s (%d)", pkt->cmdString(), pkt->cmd.toInt());
     }
 
     if (blockAddr(req->getPaddr()) == traceBlockAddr) {
diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh
index a6b08d61c..565fafb77 100644
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@@ -116,7 +116,7 @@ class MemTest : public MemObject
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             bool &snoop)
-        { resp.clear(); snoop = true; }
+        { resp.clear(); snoop = false; }
     };
 
     CpuPort cachePort;
diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index eebf91a85..fb4574844 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -339,17 +339,14 @@ void
 Bridge::BridgePort::recvFunctional(PacketPtr pkt)
 {
     std::list<PacketBuffer*>::iterator i;
-    bool pktContinue = true;
 
     for (i = sendQueue.begin();  i != sendQueue.end(); ++i) {
-        if (pkt->intersect((*i)->pkt)) {
-            pktContinue &= fixPacket(pkt, (*i)->pkt);
-        }
+        if (pkt->checkFunctional((*i)->pkt))
+            return;
     }
 
-    if (pktContinue) {
-        otherPort->sendFunctional(pkt);
-    }
+    // fall through if pkt still not satisfied
+    otherPort->sendFunctional(pkt);
 }
 
 /** Function called by the port when the bus is receiving a status change.*/
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index ec33bd4c5..bde90c83f 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -33,7 +33,7 @@
  * Definition of a bus object.
  */
 
-
+#include <algorithm>
 #include <limits>
 
 #include "base/misc.hh"
@@ -182,8 +182,7 @@ Bus::recvTiming(PacketPtr pkt)
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
-    if (!(pkt->flags & EXPRESS_SNOOP) &&
-        tickNextIdle > curTick ||
+    if (tickNextIdle > curTick ||
         (retryList.size() && (!inRetry || pktPort != retryList.front())))
     {
         addToRetryList(pktPort);
@@ -199,7 +198,7 @@ Bus::recvTiming(PacketPtr pkt)
         port = findPort(pkt->getAddr(), pkt->getSrc());
         timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
 
-        if (pkt->flags & SATISFIED) {
+        if (pkt->memInhibitAsserted()) {
             //Cache-Cache transfer occuring
             if (inRetry) {
                 retryList.front()->onRetryList(false);
@@ -321,27 +320,6 @@ Bus::findPort(Addr addr, int id)
     return interfaces[dest_id];
 }
 
-Tick
-Bus::atomicSnoop(PacketPtr pkt, Port *responder)
-{
-    Tick response_time = 0;
-
-    for (SnoopIter s_iter = snoopPorts.begin();
-         s_iter != snoopPorts.end();
-         s_iter++) {
-        BusPort *p = *s_iter;
-        if (p != responder && p->getId() != pkt->getSrc()) {
-            Tick response = p->sendAtomic(pkt);
-            if (response) {
-                assert(!response_time);  //Multiple responders
-                response_time = response;
-            }
-        }
-    }
-
-    return response_time;
-}
-
 void
 Bus::functionalSnoop(PacketPtr pkt, Port *responder)
 {
@@ -390,19 +368,56 @@ Bus::recvAtomic(PacketPtr pkt)
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
 
-    // Assume one bus cycle in order to get through.  This may have
-    // some clock skew issues yet again...
-    pkt->finishTime = curTick + clock;
+    // Variables for recording original command and snoop response (if
+    // any)... if a snooper respondes, we will need to restore
+    // original command so that additional snoops can take place
+    // properly
+    MemCmd orig_cmd = pkt->cmd;
+    Packet::Result response_result = Packet::Unknown;
+    MemCmd response_cmd = MemCmd::InvalidCmd;
 
-    Port *port = findPort(pkt->getAddr(), pkt->getSrc());
-    Tick snoopTime = atomicSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
+    Port *target_port = findPort(pkt->getAddr(), pkt->getSrc());
 
-    if (snoopTime)
-        return snoopTime;  //Snoop satisfies it
-    else if (port)
-        return port->sendAtomic(pkt);
-    else
-        return 0;
+    SnoopIter s_end = snoopPorts.end();
+    for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) {
+        BusPort *p = *s_iter;
+        // same port should not have both target addresses and snooping
+        assert(p != target_port);
+        if (p->getId() != pkt->getSrc()) {
+            p->sendAtomic(pkt);
+            if (pkt->result != Packet::Unknown) {
+                // response from snoop agent
+                assert(pkt->cmd != orig_cmd);
+                assert(pkt->memInhibitAsserted());
+                assert(pkt->isResponse());
+                // should only happen once
+                assert(response_result == Packet::Unknown);
+                assert(response_cmd == MemCmd::InvalidCmd);
+                // save response state
+                response_result = pkt->result;
+                response_cmd = pkt->cmd;
+                // restore original packet state for remaining snoopers
+                pkt->cmd = orig_cmd;
+                pkt->result = Packet::Unknown;
+            }
+        }
+    }
+
+    Tick response_time = target_port->sendAtomic(pkt);
+
+    // if we got a response from a snooper, restore it here
+    if (response_result != Packet::Unknown) {
+        assert(response_cmd != MemCmd::InvalidCmd);
+        // no one else should have responded
+        assert(pkt->result == Packet::Unknown);
+        assert(pkt->cmd == orig_cmd);
+        pkt->cmd = response_cmd;
+        pkt->result = response_result;
+    }
+
+    // why do we have this packet field and the return value both???
+    pkt->finishTime = std::max(response_time, curTick + clock);
+    return pkt->finishTime;
 }
 
 /** Function called by the port when the bus is receiving a Functional
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index 5dd98c07e..33619bf45 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -182,9 +182,6 @@ class Bus : public MemObject
      */
     Port *findPort(Addr addr, int id);
 
-    /** Snoop all relevant ports atomicly. */
-    Tick atomicSnoop(PacketPtr pkt, Port* responder);
-
     /** Snoop all relevant ports functionally. */
     void functionalSnoop(PacketPtr pkt, Port *responder);
 
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index a47c19e60..c7006550b 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -41,18 +41,23 @@
 using namespace std;
 
 BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
-    : Port(_name, _cache), cache(_cache), otherPort(NULL)
+    : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL),
+      blocked(false), waitingOnRetry(false), mustSendRetry(false),
+      requestCauses(0)
 {
-    blocked = false;
-    waitingOnRetry = false;
 }
 
 
 BaseCache::BaseCache(const std::string &name, Params &params)
     : MemObject(name),
-      blocked(0), blockedSnoop(0),
+      mshrQueue(params.numMSHRs, 4),
+      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000),
       blkSize(params.blkSize),
-      missCount(params.maxMisses), drainEvent(NULL)
+      numTarget(params.numTargets),
+      blocked(0),
+      noTargetMSHR(NULL),
+      missCount(params.maxMisses),
+      drainEvent(NULL)
 {
 }
 
@@ -71,139 +76,21 @@ BaseCache::CachePort::deviceBlockSize()
     return cache->getBlockSize();
 }
 
-bool
-BaseCache::CachePort::checkFunctional(PacketPtr pkt)
-{
-    //Check storage here first
-    list<PacketPtr>::iterator i = drainList.begin();
-    list<PacketPtr>::iterator iend = drainList.end();
-    bool notDone = true;
-    while (i != iend && notDone) {
-        PacketPtr target = *i;
-        // If the target contains data, and it overlaps the
-        // probed request, need to update data
-        if (target->intersect(pkt)) {
-            DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a drain\n",
-                    pkt->cmdString(), pkt->getAddr() & ~(cache->getBlockSize() - 1));
-            notDone = fixPacket(pkt, target);
-        }
-        i++;
-    }
-    //Also check the response not yet ready to be on the list
-    std::list<std::pair<Tick,PacketPtr> >::iterator j = transmitList.begin();
-    std::list<std::pair<Tick,PacketPtr> >::iterator jend = transmitList.end();
-
-    while (j != jend && notDone) {
-        PacketPtr target = j->second;
-        // If the target contains data, and it overlaps the
-        // probed request, need to update data
-        if (target->intersect(pkt)) {
-            DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a response\n",
-                    pkt->cmdString(), pkt->getAddr() & ~(cache->getBlockSize() - 1));
-            notDone = fixDelayedResponsePacket(pkt, target);
-        }
-        j++;
-    }
-    return notDone;
-}
 
 void
 BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt)
 {
-    bool notDone = checkFunctional(pkt);
-    if (notDone)
+    checkFunctional(pkt);
+    if (pkt->result != Packet::Success)
         sendFunctional(pkt);
 }
 
 
-void
-BaseCache::CachePort::respond(PacketPtr pkt, Tick time)
-{
-    assert(time >= curTick);
-    if (pkt->needsResponse()) {
-        if (transmitList.empty()) {
-            assert(!responseEvent->scheduled());
-            responseEvent->schedule(time);
-            transmitList.push_back(std::pair<Tick,PacketPtr>(time,pkt));
-            return;
-        }
-
-        // something is on the list and this belongs at the end
-        if (time >= transmitList.back().first) {
-            transmitList.push_back(std::pair<Tick,PacketPtr>(time,pkt));
-            return;
-        }
-        // Something is on the list and this belongs somewhere else
-        std::list<std::pair<Tick,PacketPtr> >::iterator i =
-            transmitList.begin();
-        std::list<std::pair<Tick,PacketPtr> >::iterator end =
-            transmitList.end();
-        bool done = false;
-
-        while (i != end && !done) {
-            if (time < i->first) {
-                if (i == transmitList.begin()) {
-                    //Inserting at begining, reschedule
-                    responseEvent->reschedule(time);
-                }
-                transmitList.insert(i,std::pair<Tick,PacketPtr>(time,pkt));
-                done = true;
-            }
-            i++;
-        }
-    }
-    else {
-        assert(0);
-        // this code was on the cpuSidePort only... do we still need it?
-        if (pkt->cmd != MemCmd::UpgradeReq)
-        {
-            delete pkt->req;
-            delete pkt;
-        }
-    }
-}
-
-bool
-BaseCache::CachePort::drainResponse()
-{
-    DPRINTF(CachePort,
-            "%s attempting to send a retry for response (%i waiting)\n",
-            name(), drainList.size());
-    //We have some responses to drain first
-    PacketPtr pkt = drainList.front();
-    if (sendTiming(pkt)) {
-        drainList.pop_front();
-        DPRINTF(CachePort, "%s sucessful in sending a retry for"
-                "response (%i still waiting)\n", name(), drainList.size());
-        if (!drainList.empty() || isBusRequested()) {
-
-            DPRINTF(CachePort, "%s has more responses/requests\n", name());
-            return false;
-        }
-    } else {
-        waitingOnRetry = true;
-        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
-    }
-    return true;
-}
-
-
 bool
 BaseCache::CachePort::recvRetryCommon()
 {
     assert(waitingOnRetry);
     waitingOnRetry = false;
-    if (!drainList.empty()) {
-        if (!drainResponse()) {
-            // more responses to drain... re-request bus
-            scheduleRequestEvent(curTick + 1);
-        }
-        // Check if we're done draining once this list is empty
-        if (drainList.empty()) {
-            cache->checkDrain();
-        }
-        return true;
-    }
     return false;
 }
 
@@ -451,17 +338,289 @@ BaseCache::regStats()
         .desc("number of cache copies performed")
         ;
 
+    writebacks
+        .init(maxThreadsPerCPU)
+        .name(name() + ".writebacks")
+        .desc("number of writebacks")
+        .flags(total)
+        ;
+
+    // MSHR statistics
+    // MSHR hit statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_hits[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_hits")
+            .desc("number of " + cstr + " MSHR hits")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMshrHits
+        .name(name() + ".demand_mshr_hits")
+        .desc("number of demand (read+write) MSHR hits")
+        .flags(total)
+        ;
+    demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq];
+
+    overallMshrHits
+        .name(name() + ".overall_mshr_hits")
+        .desc("number of overall MSHR hits")
+        .flags(total)
+        ;
+    overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] +
+        mshr_hits[MemCmd::HardPFReq];
+
+    // MSHR miss statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_misses[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_misses")
+            .desc("number of " + cstr + " MSHR misses")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMshrMisses
+        .name(name() + ".demand_mshr_misses")
+        .desc("number of demand (read+write) MSHR misses")
+        .flags(total)
+        ;
+    demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq];
+
+    overallMshrMisses
+        .name(name() + ".overall_mshr_misses")
+        .desc("number of overall MSHR misses")
+        .flags(total)
+        ;
+    overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] +
+        mshr_misses[MemCmd::HardPFReq];
+
+    // MSHR miss latency statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_miss_latency[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_miss_latency")
+            .desc("number of " + cstr + " MSHR miss cycles")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    demandMshrMissLatency
+        .name(name() + ".demand_mshr_miss_latency")
+        .desc("number of demand (read+write) MSHR miss cycles")
+        .flags(total)
+        ;
+    demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq]
+        + mshr_miss_latency[MemCmd::WriteReq];
+
+    overallMshrMissLatency
+        .name(name() + ".overall_mshr_miss_latency")
+        .desc("number of overall MSHR miss cycles")
+        .flags(total)
+        ;
+    overallMshrMissLatency = demandMshrMissLatency +
+        mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq];
+
+    // MSHR uncacheable statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_uncacheable[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_uncacheable")
+            .desc("number of " + cstr + " MSHR uncacheable")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    overallMshrUncacheable
+        .name(name() + ".overall_mshr_uncacheable_misses")
+        .desc("number of overall MSHR uncacheable misses")
+        .flags(total)
+        ;
+    overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq]
+        + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq]
+        + mshr_uncacheable[MemCmd::HardPFReq];
+
+    // MSHR miss latency statistics
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshr_uncacheable_lat[access_idx]
+            .init(maxThreadsPerCPU)
+            .name(name() + "." + cstr + "_mshr_uncacheable_latency")
+            .desc("number of " + cstr + " MSHR uncacheable cycles")
+            .flags(total | nozero | nonan)
+            ;
+    }
+
+    overallMshrUncacheableLatency
+        .name(name() + ".overall_mshr_uncacheable_latency")
+        .desc("number of overall MSHR uncacheable cycles")
+        .flags(total)
+        ;
+    overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq]
+        + mshr_uncacheable_lat[MemCmd::WriteReq]
+        + mshr_uncacheable_lat[MemCmd::SoftPFReq]
+        + mshr_uncacheable_lat[MemCmd::HardPFReq];
+
+#if 0
+    // MSHR access formulas
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshrAccesses[access_idx]
+            .name(name() + "." + cstr + "_mshr_accesses")
+            .desc("number of " + cstr + " mshr accesses(hits+misses)")
+            .flags(total | nozero | nonan)
+            ;
+        mshrAccesses[access_idx] =
+            mshr_hits[access_idx] + mshr_misses[access_idx]
+            + mshr_uncacheable[access_idx];
+    }
+
+    demandMshrAccesses
+        .name(name() + ".demand_mshr_accesses")
+        .desc("number of demand (read+write) mshr accesses")
+        .flags(total | nozero | nonan)
+        ;
+    demandMshrAccesses = demandMshrHits + demandMshrMisses;
+
+    overallMshrAccesses
+        .name(name() + ".overall_mshr_accesses")
+        .desc("number of overall (read+write) mshr accesses")
+        .flags(total | nozero | nonan)
+        ;
+    overallMshrAccesses = overallMshrHits + overallMshrMisses
+        + overallMshrUncacheable;
+#endif
+
+    // MSHR miss rate formulas
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        mshrMissRate[access_idx]
+            .name(name() + "." + cstr + "_mshr_miss_rate")
+            .desc("mshr miss rate for " + cstr + " accesses")
+            .flags(total | nozero | nonan)
+            ;
+
+        mshrMissRate[access_idx] =
+            mshr_misses[access_idx] / accesses[access_idx];
+    }
+
+    demandMshrMissRate
+        .name(name() + ".demand_mshr_miss_rate")
+        .desc("mshr miss rate for demand accesses")
+        .flags(total)
+        ;
+    demandMshrMissRate = demandMshrMisses / demandAccesses;
+
+    overallMshrMissRate
+        .name(name() + ".overall_mshr_miss_rate")
+        .desc("mshr miss rate for overall accesses")
+        .flags(total)
+        ;
+    overallMshrMissRate = overallMshrMisses / overallAccesses;
+
+    // mshrMiss latency formulas
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        avgMshrMissLatency[access_idx]
+            .name(name() + "." + cstr + "_avg_mshr_miss_latency")
+            .desc("average " + cstr + " mshr miss latency")
+            .flags(total | nozero | nonan)
+            ;
+
+        avgMshrMissLatency[access_idx] =
+            mshr_miss_latency[access_idx] / mshr_misses[access_idx];
+    }
+
+    demandAvgMshrMissLatency
+        .name(name() + ".demand_avg_mshr_miss_latency")
+        .desc("average overall mshr miss latency")
+        .flags(total)
+        ;
+    demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
+
+    overallAvgMshrMissLatency
+        .name(name() + ".overall_avg_mshr_miss_latency")
+        .desc("average overall mshr miss latency")
+        .flags(total)
+        ;
+    overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
+
+    // mshrUncacheable latency formulas
+    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
+        MemCmd cmd(access_idx);
+        const string &cstr = cmd.toString();
+
+        avgMshrUncacheableLatency[access_idx]
+            .name(name() + "." + cstr + "_avg_mshr_uncacheable_latency")
+            .desc("average " + cstr + " mshr uncacheable latency")
+            .flags(total | nozero | nonan)
+            ;
+
+        avgMshrUncacheableLatency[access_idx] =
+            mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
+    }
+
+    overallAvgMshrUncacheableLatency
+        .name(name() + ".overall_avg_mshr_uncacheable_latency")
+        .desc("average overall mshr uncacheable latency")
+        .flags(total)
+        ;
+    overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable;
+
+    mshr_cap_events
+        .init(maxThreadsPerCPU)
+        .name(name() + ".mshr_cap_events")
+        .desc("number of times MSHR cap was activated")
+        .flags(total)
+        ;
+
+    //software prefetching stats
+    soft_prefetch_mshr_full
+        .init(maxThreadsPerCPU)
+        .name(name() + ".soft_prefetch_mshr_full")
+        .desc("number of mshr full events for SW prefetching instrutions")
+        .flags(total)
+        ;
+
+    mshr_no_allocate_misses
+        .name(name() +".no_allocate_misses")
+        .desc("Number of misses that were no-allocate")
+        ;
+
 }
 
 unsigned int
 BaseCache::drain(Event *de)
 {
+    int count = memSidePort->drain(de) + cpuSidePort->drain(de);
+
     // Set status
-    if (!canDrain()) {
+    if (count != 0) {
         drainEvent = de;
 
         changeState(SimObject::Draining);
-        return 1;
+        return count;
     }
 
     changeState(SimObject::Drained);
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index a27ac1788..5969b4b3f 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -46,11 +46,13 @@
 #include "base/misc.hh"
 #include "base/statistics.hh"
 #include "base/trace.hh"
+#include "mem/cache/miss/mshr_queue.hh"
 #include "mem/mem_object.hh"
 #include "mem/packet.hh"
-#include "mem/port.hh"
+#include "mem/tport.hh"
 #include "mem/request.hh"
 #include "sim/eventq.hh"
+#include "sim/sim_exit.hh"
 
 /**
  * Reasons for Caches to be Blocked.
@@ -79,7 +81,7 @@ class MSHR;
  */
 class BaseCache : public MemObject
 {
-    class CachePort : public Port
+    class CachePort : public SimpleTimingPort
     {
       public:
         BaseCache *cache;
@@ -102,77 +104,76 @@ class BaseCache : public MemObject
 
         void clearBlocked();
 
-        bool checkFunctional(PacketPtr pkt);
-
         void checkAndSendFunctional(PacketPtr pkt);
 
-        bool canDrain() { return drainList.empty() && transmitList.empty(); }
-
-        bool drainResponse();
-
         CachePort *otherPort;
 
         bool blocked;
 
-        bool mustSendRetry;
-
         bool waitingOnRetry;
 
+        bool mustSendRetry;
+
         /**
          * Bit vector for the outstanding requests for the master interface.
          */
         uint8_t requestCauses;
 
-        std::list<PacketPtr> drainList;
-
-        std::list<std::pair<Tick,PacketPtr> > transmitList;
-
         bool isBusRequested() { return requestCauses != 0; }
 
-        // These need to be virtual since the Event objects depend on
-        // cache template parameters.
-        virtual void scheduleRequestEvent(Tick t) = 0;
-
         void requestBus(RequestCause cause, Tick time)
         {
+            DPRINTF(Cache, "Asserting bus request for cause %d\n", cause);
             if (!isBusRequested() && !waitingOnRetry) {
-                scheduleRequestEvent(time);
+                assert(!sendEvent->scheduled());
+                sendEvent->schedule(time);
             }
             requestCauses |= (1 << cause);
         }
 
         void deassertBusRequest(RequestCause cause)
         {
+            DPRINTF(Cache, "Deasserting bus request for cause %d\n", cause);
             requestCauses &= ~(1 << cause);
         }
 
-        void respond(PacketPtr pkt, Tick time);
+        void respond(PacketPtr pkt, Tick time) {
+            schedSendTiming(pkt, time);
+        }
     };
 
   public: //Made public so coherence can get at it.
     CachePort *cpuSidePort;
     CachePort *memSidePort;
 
-  private:
+  protected:
+
+    /** Miss status registers */
+    MSHRQueue mshrQueue;
+
+    /** Write/writeback buffer */
+    MSHRQueue writeBuffer;
+
+    /** Block size of this cache */
+    const int blkSize;
+
+    /** The number of targets for each MSHR. */
+    const int numTarget;
+
+    /** Increasing order number assigned to each incoming request. */
+    uint64_t order;
+
     /**
      * Bit vector of the blocking reasons for the access path.
      * @sa #BlockedCause
      */
     uint8_t blocked;
 
-    /**
-     * Bit vector for the blocking reasons for the snoop path.
-     * @sa #BlockedCause
-     */
-    uint8_t blockedSnoop;
-
-  protected:
-
     /** Stores time the cache blocked for statistics. */
     Tick blockedCycle;
 
-    /** Block size of this cache */
-    const int blkSize;
+    /** Pointer to the MSHR that has no targets. */
+    MSHR *noTargetMSHR;
 
     /** The number of misses to trigger an exit event. */
     Counter missCount;
@@ -246,6 +247,73 @@ class BaseCache : public MemObject
     /** The number of cache copies performed. */
     Stats::Scalar<> cacheCopies;
 
+    /** Number of blocks written back per thread. */
+    Stats::Vector<> writebacks;
+
+    /** Number of misses that hit in the MSHRs per command and thread. */
+    Stats::Vector<> mshr_hits[MemCmd::NUM_MEM_CMDS];
+    /** Demand misses that hit in the MSHRs. */
+    Stats::Formula demandMshrHits;
+    /** Total number of misses that hit in the MSHRs. */
+    Stats::Formula overallMshrHits;
+
+    /** Number of misses that miss in the MSHRs, per command and thread. */
+    Stats::Vector<> mshr_misses[MemCmd::NUM_MEM_CMDS];
+    /** Demand misses that miss in the MSHRs. */
+    Stats::Formula demandMshrMisses;
+    /** Total number of misses that miss in the MSHRs. */
+    Stats::Formula overallMshrMisses;
+
+    /** Number of misses that miss in the MSHRs, per command and thread. */
+    Stats::Vector<> mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
+    /** Total number of misses that miss in the MSHRs. */
+    Stats::Formula overallMshrUncacheable;
+
+    /** Total cycle latency of each MSHR miss, per command and thread. */
+    Stats::Vector<> mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
+    /** Total cycle latency of demand MSHR misses. */
+    Stats::Formula demandMshrMissLatency;
+    /** Total cycle latency of overall MSHR misses. */
+    Stats::Formula overallMshrMissLatency;
+
+    /** Total cycle latency of each MSHR miss, per command and thread. */
+    Stats::Vector<> mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
+    /** Total cycle latency of overall MSHR misses. */
+    Stats::Formula overallMshrUncacheableLatency;
+
+    /** The total number of MSHR accesses per command and thread. */
+    Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS];
+    /** The total number of demand MSHR accesses. */
+    Stats::Formula demandMshrAccesses;
+    /** The total number of MSHR accesses. */
+    Stats::Formula overallMshrAccesses;
+
+    /** The miss rate in the MSHRs pre command and thread. */
+    Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS];
+    /** The demand miss rate in the MSHRs. */
+    Stats::Formula demandMshrMissRate;
+    /** The overall miss rate in the MSHRs. */
+    Stats::Formula overallMshrMissRate;
+
+    /** The average latency of an MSHR miss, per command and thread. */
+    Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS];
+    /** The average latency of a demand MSHR miss. */
+    Stats::Formula demandAvgMshrMissLatency;
+    /** The average overall latency of an MSHR miss. */
+    Stats::Formula overallAvgMshrMissLatency;
+
+    /** The average latency of an MSHR miss, per command and thread. */
+    Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS];
+    /** The average overall latency of an MSHR miss. */
+    Stats::Formula overallAvgMshrUncacheableLatency;
+
+    /** The number of times a thread hit its MSHR cap. */
+    Stats::Vector<> mshr_cap_events;
+    /** The number of times software prefetches caused the MSHR to block. */
+    Stats::Vector<> soft_prefetch_mshr_full;
+
+    Stats::Scalar<> mshr_no_allocate_misses;
+
     /**
      * @}
      */
@@ -260,12 +328,13 @@ class BaseCache : public MemObject
     class Params
     {
       public:
-        /** List of address ranges of this cache. */
-        std::vector<Range<Addr> > addrRange;
         /** The hit latency for this cache. */
         int hitLatency;
         /** The block size of this cache. */
         int blkSize;
+        int numMSHRs;
+        int numTargets;
+        int numWriteBuffers;
         /**
          * The maximum number of misses this cache should handle before
          * ending the simulation.
@@ -275,10 +344,12 @@ class BaseCache : public MemObject
         /**
          * Construct an instance of this parameter class.
          */
-        Params(std::vector<Range<Addr> > addr_range,
-               int hit_latency, int _blkSize, Counter max_misses)
-            : addrRange(addr_range), hitLatency(hit_latency), blkSize(_blkSize),
-              maxMisses(max_misses)
+        Params(int _hitLatency, int _blkSize,
+               int _numMSHRs, int _numTargets, int _numWriteBuffers,
+               Counter _maxMisses)
+            : hitLatency(_hitLatency), blkSize(_blkSize),
+              numMSHRs(_numMSHRs), numTargets(_numTargets),
+              numWriteBuffers(_numWriteBuffers), maxMisses(_maxMisses)
         {
         }
     };
@@ -307,6 +378,10 @@ class BaseCache : public MemObject
         return blkSize;
     }
 
+
+    Addr blockAlign(Addr addr) const { return (addr & ~(blkSize - 1)); }
+
+
     /**
      * Returns true if the cache is blocked for accesses.
      */
@@ -315,14 +390,6 @@ class BaseCache : public MemObject
         return blocked != 0;
     }
 
-    /**
-     * Returns true if the cache is blocked for snoops.
-     */
-    bool isBlockedForSnoop()
-    {
-        return blockedSnoop != 0;
-    }
-
     /**
      * Marks the access path of the cache as blocked for the given cause. This
      * also sets the blocked flag in the slave interface.
@@ -345,23 +412,6 @@ class BaseCache : public MemObject
         }
     }
 
-    /**
-     * Marks the snoop path of the cache as blocked for the given cause. This
-     * also sets the blocked flag in the master interface.
-     * @param cause The reason to block the snoop path.
-     */
-    void setBlockedForSnoop(BlockedCause cause)
-    {
-        uint8_t flag = 1 << cause;
-        uint8_t old_state = blockedSnoop;
-        if (!(blockedSnoop & flag)) {
-            //Wasn't already blocked for this cause
-            blockedSnoop |= flag;
-            if (!old_state)
-                memSidePort->setBlocked();
-        }
-    }
-
     /**
      * Marks the cache as unblocked for the given cause. This also clears the
      * blocked flags in the appropriate interfaces.
@@ -383,13 +433,6 @@ class BaseCache : public MemObject
                 cpuSidePort->clearBlocked();
             }
         }
-        if (blockedSnoop & flag)
-        {
-            blockedSnoop &= ~flag;
-            if (!isBlockedForSnoop()) {
-                memSidePort->clearBlocked();
-            }
-        }
     }
 
     /**
@@ -418,55 +461,26 @@ class BaseCache : public MemObject
     void deassertMemSideBusRequest(RequestCause cause)
     {
         memSidePort->deassertBusRequest(cause);
-        checkDrain();
+        // checkDrain();
     }
 
-    /**
-     * Send a response to the slave interface.
-     * @param pkt The request being responded to.
-     * @param time The time the response is ready.
-     */
-    void respond(PacketPtr pkt, Tick time)
-    {
-        cpuSidePort->respond(pkt, time);
-    }
+    virtual unsigned int drain(Event *de);
 
-    /**
-     * Suppliess the data if cache to cache transfers are enabled.
-     * @param pkt The bus transaction to fulfill.
-     */
-    void respondToSnoop(PacketPtr pkt, Tick time)
-    {
-        memSidePort->respond(pkt, time);
-    }
+    virtual bool inCache(Addr addr) = 0;
 
-    virtual unsigned int drain(Event *de);
+    virtual bool inMissQueue(Addr addr) = 0;
 
-    void checkDrain()
+    void incMissCount(PacketPtr pkt)
     {
-        if (drainEvent && canDrain()) {
-            drainEvent->process();
-            changeState(SimObject::Drained);
-            // Clear the drain event
-            drainEvent = NULL;
-        }
-    }
+        misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
 
-    bool canDrain()
-    {
-        if (isMemSideBusRequested()) {
-            return false;
-        } else if (memSidePort && !memSidePort->canDrain()) {
-            return false;
-        } else if (cpuSidePort && !cpuSidePort->canDrain()) {
-            return false;
+        if (missCount) {
+            --missCount;
+            if (missCount == 0)
+                exitSimLoop("A cache reached the maximum miss count");
         }
-        return true;
     }
 
-    virtual bool inCache(Addr addr) = 0;
-
-    virtual bool inMissQueue(Addr addr) = 0;
 };
 
 #endif //__BASE_CACHE_HH__
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 2b4e7b9c8..96f9a2e11 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -58,9 +58,6 @@
 #include "mem/cache/tags/split_lifo.hh"
 #endif
 
-#include "mem/cache/miss/miss_queue.hh"
-#include "mem/cache/miss/blocking_buffer.hh"
-
 #include "mem/cache/coherence/simple_coherence.hh"
 
 #include "mem/cache/cache_impl.hh"
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index e14b2efe8..16d15cf86 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -45,12 +45,11 @@
 
 #include "mem/cache/base_cache.hh"
 #include "mem/cache/cache_blk.hh"
-#include "mem/cache/miss/miss_buffer.hh"
+#include "mem/cache/miss/mshr.hh"
 
 #include "sim/eventq.hh"
 
 //Forward decleration
-class MSHR;
 class BasePrefetcher;
 
 /**
@@ -86,29 +85,14 @@ class Cache : public BaseCache
             return static_cast<Cache<TagStore,Coherence> *>(cache);
         }
 
-        void processRequestEvent();
-        void processResponseEvent();
-
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             bool &snoop);
 
         virtual bool recvTiming(PacketPtr pkt);
 
-        virtual void recvRetry();
-
         virtual Tick recvAtomic(PacketPtr pkt);
 
         virtual void recvFunctional(PacketPtr pkt);
-
-        typedef EventWrapper<CpuSidePort, &CpuSidePort::processResponseEvent>
-                ResponseEvent;
-
-        typedef EventWrapper<CpuSidePort, &CpuSidePort::processRequestEvent>
-                RequestEvent;
-
-        virtual void scheduleRequestEvent(Tick t) {
-            new RequestEvent(this, t);
-        }
     };
 
     class MemSidePort : public CachePort
@@ -124,8 +108,9 @@ class Cache : public BaseCache
             return static_cast<Cache<TagStore,Coherence> *>(cache);
         }
 
-        void processRequestEvent();
-        void processResponseEvent();
+        void sendPacket();
+
+        void processSendEvent();
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
                                             bool &snoop);
@@ -138,21 +123,13 @@ class Cache : public BaseCache
 
         virtual void recvFunctional(PacketPtr pkt);
 
-        typedef EventWrapper<MemSidePort, &MemSidePort::processResponseEvent>
-                ResponseEvent;
-
-        typedef EventWrapper<MemSidePort, &MemSidePort::processRequestEvent>
-                RequestEvent;
-
-        virtual void scheduleRequestEvent(Tick t) {
-            new RequestEvent(this, t);
-        }
+        typedef EventWrapper<MemSidePort, &MemSidePort::processSendEvent>
+                SendEvent;
     };
 
     /** Tag and data Storage */
     TagStore *tags;
-    /** Miss and Writeback handler */
-    MissBuffer *missQueue;
+
     /** Coherence protocol. */
     Coherence *coherence;
 
@@ -176,23 +153,6 @@ class Cache : public BaseCache
      */
     int hitLatency;
 
-     /**
-      * A permanent mem req to always be used to cause invalidations.
-      * Used to append to target list, to cause an invalidation.
-      */
-    PacketPtr invalidatePkt;
-    Request *invalidateReq;
-
-    /**
-     * Policy class for performing compression.
-     */
-    CompressionAlgorithm *compressionAlg;
-
-    /**
-     * The block size of this cache. Set to value in the Tags object.
-     */
-    const int16_t blkSize;
-
     /**
      * Can this cache should allocate a block on a line-sized write miss.
      */
@@ -200,50 +160,6 @@ class Cache : public BaseCache
 
     const bool prefetchMiss;
 
-    /**
-     * Can the data can be stored in a compressed form.
-     */
-    const bool storeCompressed;
-
-    /**
-     * Do we need to compress blocks on writebacks (i.e. because
-     * writeback bus is compressed but storage is not)?
-     */
-    const bool compressOnWriteback;
-
-    /**
-     * The latency of a compression operation.
-     */
-    const int16_t compLatency;
-
-    /**
-     * Should we use an adaptive compression scheme.
-     */
-    const bool adaptiveCompression;
-
-    /**
-     * Do writebacks need to be compressed (i.e. because writeback bus
-     * is compressed), whether or not they're already compressed for
-     * storage.
-     */
-    const bool writebackCompressed;
-
-    /**
-     * Compare the internal block data to the fast access block data.
-     * @param blk The cache block to check.
-     * @return True if the data is the same.
-     */
-    bool verifyData(BlkType *blk);
-
-    /**
-     * Update the internal data of the block. The data to write is assumed to
-     * be in the fast access data.
-     * @param blk The block with the data to update.
-     * @param writebacks A list to store any generated writebacks.
-     * @param compress_block True if we should compress this block
-     */
-    void updateData(BlkType *blk, PacketList &writebacks, bool compress_block);
-
     /**
      * Handle a replacement for the given request.
      * @param blk A pointer to the block, usually NULL
@@ -251,7 +167,7 @@ class Cache : public BaseCache
      * @param new_state The new state of the block.
      * @param writebacks A list to store any generated writebacks.
      */
-    BlkType* doReplacement(BlkType *blk, PacketPtr &pkt,
+    BlkType* doReplacement(BlkType *blk, PacketPtr pkt,
                            CacheBlk::State new_state, PacketList &writebacks);
 
     /**
@@ -263,59 +179,38 @@ class Cache : public BaseCache
      * @return Pointer to the cache block touched by the request. NULL if it
      * was a miss.
      */
-    BlkType* handleAccess(PacketPtr &pkt, int & lat,
-                          PacketList & writebacks, bool update = true);
-
+    bool access(PacketPtr pkt, BlkType *blk, int & lat);
 
     /**
      *Handle doing the Compare and Swap function for SPARC.
      */
-    void cmpAndSwap(BlkType *blk, PacketPtr &pkt);
-
-    /**
-     * Populates a cache block and handles all outstanding requests for the
-     * satisfied fill request. This version takes an MSHR pointer and uses its
-     * request to fill the cache block, while repsonding to its targets.
-     * @param blk The cache block if it already exists.
-     * @param mshr The MSHR that contains the fill data and targets to satisfy.
-     * @param new_state The state of the new cache block.
-     * @param writebacks List for any writebacks that need to be performed.
-     * @return Pointer to the new cache block.
-     */
-    BlkType* handleFill(BlkType *blk, MSHR * mshr, CacheBlk::State new_state,
-                        PacketList & writebacks, PacketPtr pkt);
+    void cmpAndSwap(BlkType *blk, PacketPtr pkt);
 
     /**
      * Populates a cache block and handles all outstanding requests for the
      * satisfied fill request. This version takes two memory requests. One
      * contains the fill data, the other is an optional target to satisfy.
      * Used for Cache::probe.
-     * @param blk The cache block if it already exists.
      * @param pkt The memory request with the fill data.
-     * @param new_state The state of the new cache block.
+     * @param blk The cache block if it already exists.
      * @param writebacks List for any writebacks that need to be performed.
-     * @param target The memory request to perform after the fill.
      * @return Pointer to the new cache block.
      */
-    BlkType* handleFill(BlkType *blk, PacketPtr &pkt,
-                        CacheBlk::State new_state,
-                        PacketList & writebacks, PacketPtr target = NULL);
+    BlkType *handleFill(PacketPtr pkt, BlkType *blk,
+                        PacketList &writebacks);
 
-    /**
-     * Sets the blk to the new state and handles the given request.
-     * @param blk The cache block being snooped.
-     * @param new_state The new coherence state for the block.
-     * @param pkt The request to satisfy
-     */
-    void handleSnoop(BlkType *blk, CacheBlk::State new_state,
-                     PacketPtr &pkt);
+    bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
+    bool satisfyTarget(MSHR::Target *target, BlkType *blk);
+    void satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
+
+    void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);
 
     /**
      * Sets the blk to the new state.
      * @param blk The cache block being snooped.
      * @param new_state The new coherence state for the block.
      */
-    void handleSnoop(BlkType *blk, CacheBlk::State new_state);
+    void handleSnoop(PacketPtr ptk, BlkType *blk, bool is_timing);
 
     /**
      * Create a writeback request for the given block.
@@ -330,44 +225,24 @@ class Cache : public BaseCache
     {
       public:
         TagStore *tags;
-        MissBuffer *missQueue;
         Coherence *coherence;
         BaseCache::Params baseParams;
         BasePrefetcher*prefetcher;
         bool prefetchAccess;
-        int hitLatency;
-        CompressionAlgorithm *compressionAlg;
-        const int16_t blkSize;
         const bool doFastWrites;
         const bool prefetchMiss;
-        const bool storeCompressed;
-        const bool compressOnWriteback;
-        const int16_t compLatency;
-        const bool adaptiveCompression;
-        const bool writebackCompressed;
 
-        Params(TagStore *_tags, MissBuffer *mq, Coherence *coh,
+        Params(TagStore *_tags, Coherence *coh,
                BaseCache::Params params,
                BasePrefetcher *_prefetcher,
                bool prefetch_access, int hit_latency,
                bool do_fast_writes,
-               bool store_compressed, bool adaptive_compression,
-               bool writeback_compressed,
-               CompressionAlgorithm *_compressionAlg, int comp_latency,
                bool prefetch_miss)
-            : tags(_tags), missQueue(mq), coherence(coh),
+            : tags(_tags), coherence(coh),
               baseParams(params),
               prefetcher(_prefetcher), prefetchAccess(prefetch_access),
-              hitLatency(hit_latency),
-              compressionAlg(_compressionAlg),
-              blkSize(_tags->getBlockSize()),
               doFastWrites(do_fast_writes),
-              prefetchMiss(prefetch_miss),
-              storeCompressed(store_compressed),
-              compressOnWriteback(!store_compressed && writeback_compressed),
-              compLatency(comp_latency),
-              adaptiveCompression(adaptive_compression),
-              writebackCompressed(writeback_compressed)
+              prefetchMiss(prefetch_miss)
         {
         }
     };
@@ -385,85 +260,105 @@ class Cache : public BaseCache
      * @param pkt The request to perform.
      * @return The result of the access.
      */
-    bool access(PacketPtr &pkt);
+    bool timingAccess(PacketPtr pkt);
 
     /**
-     * Selects a request to send on the bus.
-     * @return The memory request to service.
+     * Performs the access specified by the request.
+     * @param pkt The request to perform.
+     * @return The result of the access.
      */
-    PacketPtr getPacket();
+    Tick atomicAccess(PacketPtr pkt);
 
     /**
-     * Was the request was sent successfully?
-     * @param pkt The request.
-     * @param success True if the request was sent successfully.
+     * Performs the access specified by the request.
+     * @param pkt The request to perform.
+     * @return The result of the access.
      */
-    void sendResult(PacketPtr &pkt, MSHR* mshr, bool success);
+    void functionalAccess(PacketPtr pkt, CachePort *otherSidePort);
 
     /**
      * Handles a response (cache line fill/write ack) from the bus.
      * @param pkt The request being responded to.
      */
-    void handleResponse(PacketPtr &pkt);
+    void handleResponse(PacketPtr pkt);
 
     /**
      * Snoops bus transactions to maintain coherence.
      * @param pkt The current bus transaction.
      */
-    void snoop(PacketPtr &pkt);
+    void snoopTiming(PacketPtr pkt);
 
-    void snoopResponse(PacketPtr &pkt);
+    /**
+     * Snoop for the provided request in the cache and return the estimated
+     * time of completion.
+     * @param pkt The memory request to snoop
+     * @return The estimated completion time.
+     */
+    Tick snoopAtomic(PacketPtr pkt);
 
     /**
      * Squash all requests associated with specified thread.
      * intended for use by I-cache.
      * @param threadNum The thread to squash.
      */
-    void squash(int threadNum)
-    {
-        missQueue->squash(threadNum);
-    }
+    void squash(int threadNum);
 
     /**
-     * Return the number of outstanding misses in a Cache.
-     * Default returns 0.
-     *
-     * @retval unsigned The number of missing still outstanding.
+     * Allocate a new MSHR or write buffer to handle a miss.
+     * @param pkt The access that missed.
+     * @param time The time to continue processing the miss.
+     * @param isFill Whether to fetch & allocate a block
+     *               or just forward the request.
      */
-    unsigned outstandingMisses() const
-    {
-        return missQueue->getMisses();
-    }
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool isFill,
+                         bool requestBus);
 
     /**
-     * Perform the access specified in the request and return the estimated
-     * time of completion. This function can either update the hierarchy state
-     * or just perform the access wherever the data is found depending on the
-     * state of the update flag.
-     * @param pkt The memory request to satisfy
-     * @param update If true, update the hierarchy, otherwise just perform the
-     * request.
-     * @return The estimated completion time.
+     * Selects a outstanding request to service.
+     * @return The request to service, NULL if none found.
      */
-    Tick probe(PacketPtr &pkt, bool update, CachePort * otherSidePort);
+    MSHR *getNextMSHR();
+    PacketPtr getPacket();
 
     /**
-     * Snoop for the provided request in the cache and return the estimated
-     * time of completion.
-     * @todo Can a snoop probe not change state?
-     * @param pkt The memory request to satisfy
-     * @param update If true, update the hierarchy, otherwise just perform the
-     * request.
-     * @return The estimated completion time.
+     * Marks a request as in service (sent on the bus). This can have side
+     * effect since storage for no response commands is deallocated once they
+     * are successfully sent.
+     * @param pkt The request that was sent on the bus.
      */
-    Tick snoopProbe(PacketPtr &pkt);
+    void markInService(MSHR *mshr);
+
+    /**
+     * Collect statistics and free resources of a satisfied request.
+     * @param pkt The request that has been satisfied.
+     * @param time The time when the request is satisfied.
+     */
+    void handleResponse(PacketPtr pkt, Tick time);
+
+    /**
+     * Perform the given writeback request.
+     * @param pkt The writeback request.
+     */
+    void doWriteback(PacketPtr pkt);
+
+    /**
+     * Return whether there are any outstanding misses.
+     */
+    bool outstandingMisses() const
+    {
+        return mshrQueue.allocated != 0;
+    }
+
+    CacheBlk *findBlock(Addr addr) {
+        return tags->findBlock(addr);
+    }
 
     bool inCache(Addr addr) {
         return (tags->findBlock(addr) != 0);
     }
 
     bool inMissQueue(Addr addr) {
-        return (missQueue->findMSHR(addr) != 0);
+        return (mshrQueue.findMatch(addr) != 0);
     }
 };
 
diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh
index fa00a0f5a..d2aba9480 100644
--- a/src/mem/cache/cache_blk.hh
+++ b/src/mem/cache/cache_blk.hh
@@ -39,6 +39,7 @@
 
 #include "sim/core.hh"		// for Tick
 #include "arch/isa_traits.hh"	// for Addr
+#include "mem/packet.hh"
 #include "mem/request.hh"
 
 /**
@@ -51,8 +52,6 @@ enum CacheBlkStatusBits {
     BlkWritable =	0x02,
     /** dirty (modified) */
     BlkDirty =		0x04,
-    /** compressed */
-    BlkCompressed =	0x08,
     /** block was referenced */
     BlkReferenced =	0x10,
     /** block was a hardware prefetch yet unaccessed*/
@@ -174,20 +173,11 @@ class CacheBlk
      * Check to see if a block has been written.
      * @return True if the block is dirty.
      */
-    bool isModified() const
+    bool isDirty() const
     {
         return (status & BlkDirty) != 0;
     }
 
-    /**
-     * Check to see if this block contains compressed data.
-     * @return True iF the block's data is compressed.
-     */
-    bool isCompressed() const
-    {
-        return (status & BlkCompressed) != 0;
-    }
-
     /**
      * Check if this block has been referenced.
      * @return True if the block has been referenced.
@@ -213,10 +203,10 @@ class CacheBlk
      * redundant records on the list, but that's OK, as they'll all
      * get blown away at the next store.
      */
-    void trackLoadLocked(Request *req)
+    void trackLoadLocked(PacketPtr pkt)
     {
-        assert(req->isLocked());
-        lockList.push_front(Lock(req));
+        assert(pkt->isLocked());
+        lockList.push_front(Lock(pkt->req));
     }
 
     /**
@@ -230,9 +220,10 @@ class CacheBlk
      * @return True if write should proceed, false otherwise.  Returns
      * false only in the case of a failed store conditional.
      */
-    bool checkWrite(Request *req)
+    bool checkWrite(PacketPtr pkt)
     {
-        if (req->isLocked()) {
+        Request *req = pkt->req;
+        if (pkt->isLocked()) {
             // it's a store conditional... have to check for matching
             // load locked.
             bool success = false;
diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc
index bc1a8a775..307c851a2 100644
--- a/src/mem/cache/cache_builder.cc
+++ b/src/mem/cache/cache_builder.cc
@@ -70,10 +70,6 @@
 #include "base/compression/null_compression.hh"
 #include "base/compression/lzss_compression.hh"
 
-// MissQueue Templates
-#include "mem/cache/miss/miss_queue.hh"
-#include "mem/cache/miss/blocking_buffer.hh"
-
 // Coherence Templates
 #include "mem/cache/coherence/simple_coherence.hh"
 
@@ -207,13 +203,9 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
         else {                                                          \
             BUILD_NULL_PREFETCHER(TAGS);                                \
         }                                                               \
-        Cache<TAGS, c>::Params params(tags, mq, coh, base_params,       \
+        Cache<TAGS, c>::Params params(tags, coh, base_params,       \
                                       pf, prefetch_access, latency, \
                                       true,                             \
-                                      store_compressed,                 \
-                                      adaptive_compression,             \
-                                      compressed_bus,                   \
-                                      compAlg, compression_latency,     \
                                       prefetch_miss);                   \
         Cache<TAGS, c> *retval =                                        \
             new Cache<TAGS, c>(getInstanceName(), params);              \
@@ -301,8 +293,6 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
     } while (0)
 
 #define BUILD_COHERENCE(b) do {						\
-        SimpleCoherence *coh = new SimpleCoherence(protocol);           \
-        BUILD_CACHES(SimpleCoherence);                                  \
     } while (0)
 
 #if defined(USE_TAGGED)
@@ -369,8 +359,9 @@ CREATE_SIM_OBJECT(BaseCache)
     }
 
     // Build BaseCache param object
-    BaseCache::Params base_params(addr_range, latency,
-                                  block_size, max_miss_count);
+    BaseCache::Params base_params(latency, block_size,
+                                  mshrs, tgts_per_mshr, write_buffers,
+                                  max_miss_count);
 
     //Warnings about prefetcher policy
     if (pf_policy == "none" && (prefetch_miss || prefetch_access)) {
@@ -408,14 +399,8 @@ CREATE_SIM_OBJECT(BaseCache)
     const void *repl = NULL;
 #endif
 
-    if (mshrs == 1 /*|| out_bus->doEvents() == false*/) {
-        BlockingBuffer *mq = new BlockingBuffer(true);
-        BUILD_COHERENCE(BlockingBuffer);
-    } else {
-        MissQueue *mq = new MissQueue(mshrs, tgts_per_mshr, write_buffers,
-                                      true, prefetch_miss);
-        BUILD_COHERENCE(MissQueue);
-    }
+    SimpleCoherence *coh = new SimpleCoherence(protocol);
+    BUILD_CACHES(SimpleCoherence);
     return NULL;
 }
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index a7f96603e..0f66e613c 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -37,17 +37,8 @@
  * Cache definitions.
  */
 
-#include <assert.h>
-#include <math.h>
-
-#include <cassert>
-#include <iostream>
-#include <cstring>
-#include <string>
-
 #include "sim/host.hh"
 #include "base/misc.hh"
-#include "cpu/smt.hh"
 
 #include "mem/cache/cache.hh"
 #include "mem/cache/cache_blk.hh"
@@ -56,25 +47,16 @@
 
 #include "sim/sim_exit.hh" // for SimExitEvent
 
-bool SIGNAL_NACK_HACK;
 
 template<class TagStore, class Coherence>
 Cache<TagStore,Coherence>::Cache(const std::string &_name,
                                  Cache<TagStore,Coherence>::Params &params)
     : BaseCache(_name, params.baseParams),
       prefetchAccess(params.prefetchAccess),
-      tags(params.tags), missQueue(params.missQueue),
+      tags(params.tags),
       coherence(params.coherence), prefetcher(params.prefetcher),
-      hitLatency(params.hitLatency),
-      compressionAlg(params.compressionAlg),
-      blkSize(params.blkSize),
       doFastWrites(params.doFastWrites),
-      prefetchMiss(params.prefetchMiss),
-      storeCompressed(params.storeCompressed),
-      compressOnWriteback(params.compressOnWriteback),
-      compLatency(params.compLatency),
-      adaptiveCompression(params.adaptiveCompression),
-      writebackCompressed(params.writebackCompressed)
+      prefetchMiss(params.prefetchMiss)
 {
     cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this);
     memSidePort = new MemSidePort(_name + "-mem_side_port", this);
@@ -82,12 +64,8 @@ Cache<TagStore,Coherence>::Cache(const std::string &_name,
     memSidePort->setOtherPort(cpuSidePort);
 
     tags->setCache(this);
-    missQueue->setCache(this);
-    missQueue->setPrefetcher(prefetcher);
     coherence->setCache(this);
     prefetcher->setCache(this);
-    invalidateReq = new Request((Addr) NULL, blkSize, 0);
-    invalidatePkt = new Packet(invalidateReq, MemCmd::InvalidateReq, 0);
 }
 
 template<class TagStore, class Coherence>
@@ -96,51 +74,221 @@ Cache<TagStore,Coherence>::regStats()
 {
     BaseCache::regStats();
     tags->regStats(name());
-    missQueue->regStats(name());
     coherence->regStats(name());
     prefetcher->regStats(name());
 }
 
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
-                                        PacketList & writebacks, bool update)
+Port *
+Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
+{
+    if (if_name == "" || if_name == "cpu_side") {
+        return cpuSidePort;
+    } else if (if_name == "mem_side") {
+        return memSidePort;
+    } else if (if_name == "functional") {
+        return new CpuSidePort(name() + "-cpu_side_funcport", this);
+    } else {
+        panic("Port name %s unrecognized\n", if_name);
+    }
+}
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::deletePortRefs(Port *p)
+{
+    if (cpuSidePort == p || memSidePort == p)
+        panic("Can only delete functional ports\n");
+
+    delete p;
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 {
-    // Set the block offset here
+    uint64_t overwrite_val;
+    bool overwrite_mem;
+    uint64_t condition_val64;
+    uint32_t condition_val32;
+
     int offset = tags->extractBlkOffset(pkt->getAddr());
+    uint8_t *blk_data = blk->data + offset;
+
+    assert(sizeof(uint64_t) >= pkt->getSize());
+
+    overwrite_mem = true;
+    // keep a copy of our possible write value, and copy what is at the
+    // memory address into the packet
+    pkt->writeData((uint8_t *)&overwrite_val);
+    pkt->setData(blk_data);
+
+    if (pkt->req->isCondSwap()) {
+        if (pkt->getSize() == sizeof(uint64_t)) {
+            condition_val64 = pkt->req->getExtraData();
+            overwrite_mem = !std::memcmp(&condition_val64, blk_data,
+                                         sizeof(uint64_t));
+        } else if (pkt->getSize() == sizeof(uint32_t)) {
+            condition_val32 = (uint32_t)pkt->req->getExtraData();
+            overwrite_mem = !std::memcmp(&condition_val32, blk_data,
+                                         sizeof(uint32_t));
+        } else
+            panic("Invalid size for conditional read/write\n");
+    }
+
+    if (overwrite_mem)
+        std::memcpy(blk_data, &overwrite_val, pkt->getSize());
+}
+
+
+/////////////////////////////////////////////////////
+//
+// MSHR helper functions
+//
+/////////////////////////////////////////////////////
+
+
+template<class TagStore, class Coherence>
+MSHR *
+Cache<TagStore,Coherence>::allocateBuffer(PacketPtr pkt, Tick time,
+                                          bool isFill, bool requestBus)
+{
+    int  size = isFill ? blkSize : pkt->getSize();
+    Addr addr = isFill ? tags->blkAlign(pkt->getAddr()) : pkt->getAddr();
+
+    MSHR *mshr = NULL;
+
+    if (pkt->isWrite()) {
+        /**
+         * @todo Add write merging here.
+         */
+        mshr = writeBuffer.allocate(addr, size, pkt, isFill);
+        mshr->order = order++;
 
-    BlkType *blk = NULL;
-    if (update) {
-        blk = tags->findBlock(pkt->getAddr(), lat);
+        if (writeBuffer.isFull()) {
+            setBlocked(Blocked_NoWBBuffers);
+        }
+
+        if (requestBus) {
+            requestMemSideBus(Request_WB, time);
+        }
     } else {
-        blk = tags->findBlock(pkt->getAddr());
-        lat = 0;
+        mshr = mshrQueue.allocate(addr, size, pkt, isFill);
+        mshr->order = order++;
+        if (mshrQueue.isFull()) {
+            setBlocked(Blocked_NoMSHRs);
+        }
+        if (requestBus) {
+            requestMemSideBus(Request_MSHR, time);
+        }
     }
-    if (blk != NULL) {
 
-        if (!update) {
+    assert(mshr != NULL);
+    return mshr;
+}
 
-            if (pkt->isWrite()){
-                assert(offset < blkSize);
-                assert(pkt->getSize() <= blkSize);
-                assert(offset+pkt->getSize() <= blkSize);
-                std::memcpy(blk->data + offset, pkt->getPtr<uint8_t>(),
-                       pkt->getSize());
-            } else if (pkt->isReadWrite()) {
-                cmpAndSwap(blk, pkt);
-            } else if (!(pkt->flags & SATISFIED)) {
-                pkt->flags |= SATISFIED;
-                pkt->result = Packet::Success;
-                assert(offset < blkSize);
-                assert(pkt->getSize() <= blkSize);
-                assert(offset + pkt->getSize() <=blkSize);
-                std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                       pkt->getSize());
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::markInService(MSHR *mshr)
+{
+    bool unblock = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
+
+    /**
+     * @todo Should include MSHRQueue pointer in MSHR to select the correct
+     * one.
+     */
+    if (mshr->queue == &writeBuffer) {
+        // Forwarding a write/ writeback, don't need to change
+        // the command
+        unblock = writeBuffer.isFull();
+        writeBuffer.markInService(mshr);
+        if (!writeBuffer.havePending()){
+            deassertMemSideBusRequest(Request_WB);
+        }
+        if (unblock) {
+            // Do we really unblock?
+            unblock = !writeBuffer.isFull();
+            cause = Blocked_NoWBBuffers;
+        }
+    } else {
+        assert(mshr->queue == &mshrQueue);
+        unblock = mshrQueue.isFull();
+        mshrQueue.markInService(mshr);
+        if (!mshrQueue.havePending()){
+            deassertMemSideBusRequest(Request_MSHR);
+        }
+#if 0
+        if (mshr->originalCmd == MemCmd::HardPFReq) {
+            DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
+                    name());
+            //Also clear pending if need be
+            if (!prefetcher->havePending())
+            {
+                deassertMemSideBusRequest(Request_PF);
             }
-            return blk;
         }
+#endif
+        if (unblock) {
+            unblock = !mshrQueue.isFull();
+            cause = Blocked_NoMSHRs;
+        }
+    }
+    if (unblock) {
+        clearBlocked(cause);
+    }
+}
+
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::squash(int threadNum)
+{
+    bool unblock = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
 
-        // Hit
+    if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) {
+        noTargetMSHR = NULL;
+        unblock = true;
+        cause = Blocked_NoTargets;
+    }
+    if (mshrQueue.isFull()) {
+        unblock = true;
+        cause = Blocked_NoMSHRs;
+    }
+    mshrQueue.squash(threadNum);
+    if (!mshrQueue.havePending()) {
+        deassertMemSideBusRequest(Request_MSHR);
+    }
+    if (unblock && !mshrQueue.isFull()) {
+        clearBlocked(cause);
+    }
+}
+
+/////////////////////////////////////////////////////
+//
+// Access path: requests coming in from the CPU side
+//
+/////////////////////////////////////////////////////
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
+{
+    bool satisfied = false;  // assume the worst
+
+    if (prefetchAccess) {
+        //We are determining prefetches on access stream, call prefetcher
+        prefetcher->handleMiss(pkt, curTick);
+    }
+
+    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
+            (blk) ? "hit" : "miss");
+
+    if (blk != NULL) {
+        // HIT
         if (blk->isPrefetch()) {
             //Signal that this was a hit under prefetch (no need for
             //use prefetch (only can get here if true)
@@ -154,639 +302,620 @@ Cache<TagStore,Coherence>::handleAccess(PacketPtr &pkt, int & lat,
             }
         }
 
-        if ((pkt->isReadWrite() && blk->isWritable()) ||
-            (pkt->isWrite() && blk->isWritable()) ||
-            (pkt->isRead() && blk->isValid())) {
-
-            // We are satisfying the request
-            pkt->flags |= SATISFIED;
-
-            if (blk->isCompressed()) {
-                // If the data is compressed, need to increase the latency
-                lat += (compLatency/4);
-            }
-
-            bool write_data = false;
-
-            assert(verifyData(blk));
-
-            assert(offset < blkSize);
-            assert(pkt->getSize() <= blkSize);
-            assert(offset+pkt->getSize() <= blkSize);
+        if (pkt->needsExclusive() ? blk->isWritable() : blk->isValid()) {
+            // OK to satisfy access
+            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            satisfied = true;
 
-            if (pkt->isWrite()) {
-                if (blk->checkWrite(pkt->req)) {
-                    write_data = true;
+            if (pkt->cmd == MemCmd::SwapReq) {
+                cmpAndSwap(blk, pkt);
+            } else if (pkt->isWrite()) {
+                if (blk->checkWrite(pkt)) {
                     blk->status |= BlkDirty;
-                    std::memcpy(blk->data + offset, pkt->getPtr<uint8_t>(),
-                           pkt->getSize());
+                    pkt->writeDataToBlock(blk->data, blkSize);
                 }
-            } else if (pkt->isReadWrite()) {
-                cmpAndSwap(blk, pkt);
             } else {
                 assert(pkt->isRead());
-                if (pkt->req->isLocked()) {
-                    blk->trackLoadLocked(pkt->req);
+                if (pkt->isLocked()) {
+                    blk->trackLoadLocked(pkt);
                 }
-                std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                            pkt->getSize());
-            }
-
-            if (write_data ||
-                (adaptiveCompression && blk->isCompressed()))
-            {
-                // If we wrote data, need to update the internal block
-                // data.
-                updateData(blk, writebacks,
-                           !(adaptiveCompression &&
-                             blk->isReferenced()));
+                pkt->setDataFromBlock(blk->data, blkSize);
             }
         } else {
-            // permission violation, treat it as a miss
-            blk = NULL;
+            // permission violation... nothing to do here, leave unsatisfied
+            // for statistics purposes this counts like a complete miss
+            incMissCount(pkt);
         }
     } else {
         // complete miss (no matching block)
-        if (pkt->req->isLocked() && pkt->isWrite()) {
+        incMissCount(pkt);
+
+        if (pkt->isLocked() && pkt->isWrite()) {
             // miss on store conditional... just give up now
             pkt->req->setExtraData(0);
-            pkt->flags |= SATISFIED;
+            satisfied = true;
         }
     }
 
-    return blk;
+    return satisfied;
 }
 
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr &pkt){
-            uint64_t overwrite_val;
-            bool overwrite_mem;
-            uint64_t condition_val64;
-            uint32_t condition_val32;
-
-            int offset = tags->extractBlkOffset(pkt->getAddr());
-
-            assert(sizeof(uint64_t) >= pkt->getSize());
-
-            overwrite_mem = true;
-            // keep a copy of our possible write value, and copy what is at the
-            // memory address into the packet
-            std::memcpy(&overwrite_val, pkt->getPtr<uint8_t>(), pkt->getSize());
-            std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset,
-                        pkt->getSize());
-
-            if (pkt->req->isCondSwap()) {
-                if (pkt->getSize() == sizeof(uint64_t)) {
-                    condition_val64 = pkt->req->getExtraData();
-                    overwrite_mem = !std::memcmp(&condition_val64, blk->data + offset,
-                                                 sizeof(uint64_t));
-                } else if (pkt->getSize() == sizeof(uint32_t)) {
-                    condition_val32 = (uint32_t)pkt->req->getExtraData();
-                    overwrite_mem = !std::memcmp(&condition_val32, blk->data + offset,
-                                                 sizeof(uint32_t));
-                } else
-                    panic("Invalid size for conditional read/write\n");
-            }
-
-            if (overwrite_mem)
-                std::memcpy(blk->data + offset,
-                            &overwrite_val, pkt->getSize());
-
-}
 
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleFill(BlkType *blk, PacketPtr &pkt,
-                                      CacheBlk::State new_state,
-                                      PacketList & writebacks,
-                                      PacketPtr target)
+bool
+Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
 {
-#ifndef NDEBUG
-    BlkType *tmp_blk = tags->findBlock(pkt->getAddr());
-    assert(tmp_blk == blk);
-#endif
-    blk = doReplacement(blk, pkt, new_state, writebacks);
+//@todo Add back in MemDebug Calls
+//    MemDebug::cacheAccess(pkt);
 
+    // we charge hitLatency for doing just about anything here
+    Tick time =  curTick + hitLatency;
 
-    if (pkt->isRead()) {
-        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    if (pkt->req->isUncacheable()) {
+        allocateBuffer(pkt, time, false, true);
+        assert(pkt->needsResponse()); // else we should delete it here??
+        return true;
     }
 
-        blk->whenReady = pkt->finishTime;
+    PacketList writebacks;
+    int lat = hitLatency;
+    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
+    bool satisfied = false;
+
+    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
 
-    // Respond to target, if any
-    if (target) {
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
 
-        target->flags |= SATISFIED;
+    if (!mshr) {
+        // no outstanding access to this block, look up in cache
+        // (otherwise if we allow reads while there's an outstanding
+        // write miss, the read could return stale data out of the
+        // cache block... a more aggressive system could detect the
+        // overlap (if any) and forward data out of the MSHRs, but we
+        // don't do that yet)
+        satisfied = access(pkt, blk, lat);
+    }
 
-        if (target->cmd == MemCmd::InvalidateReq) {
-            tags->invalidateBlk(blk);
-            blk = NULL;
+#if 0
+    // If this is a block size write/hint (WH64) allocate the block here
+    // if the coherence protocol allows it.
+    /** @todo make the fast write alloc (wh64) work with coherence. */
+    /** @todo Do we want to do fast writes for writebacks as well? */
+    if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
+        (pkt->cmd == MemCmd::WriteReq
+         || pkt->cmd == MemCmd::WriteInvalidateReq) ) {
+        // not outstanding misses, can do this
+        MSHR *outstanding_miss = mshrQueue.findMatch(pkt->getAddr());
+        if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) {
+            if (outstanding_miss) {
+                warn("WriteInv doing a fastallocate"
+                     "with an outstanding miss to the same address\n");
+            }
+            blk = handleFill(NULL, pkt, BlkValid | BlkWritable,
+                                   writebacks);
+            ++fastWrites;
         }
+    }
+#endif
 
-        if (blk && ((target->isWrite() || target->isReadWrite()) ?
-                    blk->isWritable() : blk->isValid())) {
-            assert(target->isWrite() || target->isReadWrite() || target->isRead());
-            assert(target->getOffset(blkSize) + target->getSize() <= blkSize);
-            if (target->isWrite()) {
-                if (blk->checkWrite(pkt->req)) {
-                    blk->status |= BlkDirty;
-                    std::memcpy(blk->data + target->getOffset(blkSize),
-                           target->getPtr<uint8_t>(), target->getSize());
-                }
-            } else if (target->isReadWrite()) {
-                cmpAndSwap(blk, target);
-            } else {
-                if (pkt->req->isLocked()) {
-                    blk->trackLoadLocked(pkt->req);
-                }
-                std::memcpy(target->getPtr<uint8_t>(),
-                       blk->data + target->getOffset(blkSize),
-                       target->getSize());
+    // copy writebacks to write buffer
+    while (!writebacks.empty()) {
+        PacketPtr wbPkt = writebacks.front();
+        allocateBuffer(wbPkt, time, false, true);
+        writebacks.pop_front();
+    }
+
+    bool needsResponse = pkt->needsResponse();
+
+    if (satisfied) {
+        assert(needsResponse);
+        pkt->makeTimingResponse();
+        cpuSidePort->respond(pkt, curTick+lat);
+    } else {
+        // miss
+        if (prefetchMiss)
+            prefetcher->handleMiss(pkt, time);
+
+        if (mshr) {
+            // MSHR hit
+            //@todo remove hw_pf here
+            mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
+                mshr->threadNum = -1;
             }
+            mshr->allocateTarget(pkt, true);
+            if (mshr->getNumTargets() == numTarget) {
+                noTargetMSHR = mshr;
+                setBlocked(Blocked_NoTargets);
+                mshrQueue.moveToFront(mshr);
+            }
+        } else {
+            // no MSHR
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            // always mark as cache fill for now... if we implement
+            // no-write-allocate or bypass accesses this will have to
+            // be changed.
+            allocateBuffer(pkt, time, true, true);
         }
     }
 
-    if (blk) {
-        // Need to write the data into the block
-        updateData(blk, writebacks, !adaptiveCompression || true);
+    if (!needsResponse) {
+        // Need to clean up the packet on a writeback miss, but leave
+        // the request for the next level.
+        delete pkt;
     }
-    return blk;
+
+    return true;
 }
 
+
 template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleFill(BlkType *blk, MSHR * mshr,
-                                      CacheBlk::State new_state,
-                                      PacketList & writebacks, PacketPtr pkt)
+Tick
+Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
 {
-/*
-#ifndef NDEBUG
-    BlkType *tmp_blk = findBlock(mshr->pkt->getAddr());
-    assert(tmp_blk == blk);
-#endif
-    PacketPtr pkt = mshr->pkt;*/
-    blk = doReplacement(blk, pkt, new_state, writebacks);
+    // should assert here that there are no outstanding MSHRs or
+    // writebacks... that would mean that someone used an atomic
+    // access in timing mode
 
-    if (pkt->isRead()) {
-        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    if (pkt->req->isUncacheable()) {
+        // Uncacheables just go through
+        return memSidePort->sendAtomic(pkt);
     }
 
-    blk->whenReady = pkt->finishTime;
+    PacketList writebacks;
+    int lat = hitLatency;
+    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
+    bool satisfied = access(pkt, blk, lat);
 
+    if (!satisfied) {
+        // MISS
+        CacheBlk::State old_state = (blk) ? blk->status : 0;
+        MemCmd cmd = coherence->getBusCmd(pkt->cmd, old_state);
+        Packet busPkt = Packet(pkt->req, cmd, Packet::Broadcast, blkSize);
+        busPkt.allocate();
 
-    // respond to MSHR targets, if any
+        DPRINTF(Cache, "Sending a atomic %s for %x\n",
+                busPkt.cmdString(), busPkt.getAddr());
 
-    // First offset for critical word first calculations
-    int initial_offset = 0;
+        lat += memSidePort->sendAtomic(&busPkt);
 
-    if (mshr->hasTargets()) {
-        initial_offset = mshr->getTarget()->getOffset(blkSize);
+        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
+                busPkt.cmdString(), busPkt.getAddr(), old_state);
+
+        blk = handleFill(&busPkt, blk, writebacks);
+        bool status = satisfyCpuSideRequest(pkt, blk);
+        assert(status);
     }
 
-    while (mshr->hasTargets()) {
-        PacketPtr target = mshr->getTarget();
+    // We now have the block one way or another (hit or completed miss)
 
-        target->flags |= SATISFIED;
+    // Handle writebacks if needed
+    while (!writebacks.empty()){
+        PacketPtr wbPkt = writebacks.front();
+        memSidePort->sendAtomic(wbPkt);
+        writebacks.pop_front();
+        delete wbPkt;
+    }
 
-        // How many bytes pass the first request is this one
-        int transfer_offset = target->getOffset(blkSize) - initial_offset;
-        if (transfer_offset < 0) {
-            transfer_offset += blkSize;
-        }
+    if (pkt->needsResponse()) {
+        pkt->makeAtomicResponse();
+        pkt->result = Packet::Success;
+    }
 
-        // If critical word (no offset) return first word time
-        Tick completion_time = tags->getHitLatency() +
-            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
+    return lat;
+}
 
-        if (target->cmd == MemCmd::InvalidateReq) {
-            //Mark the blk as invalid now, if it hasn't been already
-            if (blk) {
-                tags->invalidateBlk(blk);
-                blk = NULL;
-            }
 
-            //Also get rid of the invalidate
-            mshr->popTarget();
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
+                                            CachePort *otherSidePort)
+{
+    Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
+    BlkType *blk = tags->findBlock(pkt->getAddr());
 
-            DPRINTF(Cache, "Popping off a Invalidate for addr %x\n",
-                    pkt->getAddr());
+    if (blk && pkt->checkFunctional(blk_addr, blkSize, blk->data)) {
+        // request satisfied from block
+        return;
+    }
 
-            continue;
-        }
+    // Need to check for outstanding misses and writes
 
-        if (blk && ((target->isWrite() || target->isReadWrite()) ?
-            blk->isWritable() : blk->isValid())) {
-            assert(target->isWrite() || target->isRead() || target->isReadWrite() );
-            assert(target->getOffset(blkSize) + target->getSize() <= blkSize);
-            if (target->isWrite()) {
-                if (blk->checkWrite(pkt->req)) {
-                    blk->status |= BlkDirty;
-                    std::memcpy(blk->data + target->getOffset(blkSize),
-                           target->getPtr<uint8_t>(), target->getSize());
-                }
-            } else if (target->isReadWrite()) {
-                cmpAndSwap(blk, target);
-            } else {
-                if (target->req->isLocked()) {
-                    blk->trackLoadLocked(target->req);
-                }
-                std::memcpy(target->getPtr<uint8_t>(),
-                       blk->data + target->getOffset(blkSize),
-                       target->getSize());
-            }
-        } else {
-            // Invalid access, need to do another request
-            // can occur if block is invalidated, or not correct
-            // permissions
-//            mshr->pkt = pkt;
-            break;
-        }
-        if (!target->req->isUncacheable()) {
-            missLatency[target->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                completion_time - target->time;
+    // There can only be one matching outstanding miss.
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
+    if (mshr) {
+        MSHR::TargetList *targets = mshr->getTargetList();
+        MSHR::TargetList::iterator i = targets->begin();
+        MSHR::TargetList::iterator end = targets->end();
+        for (; i != end; ++i) {
+            PacketPtr targetPkt = i->pkt;
+            if (pkt->checkFunctional(targetPkt))
+                return;
         }
-        respond(target, completion_time);
-        mshr->popTarget();
     }
 
-    if (blk) {
-        // Need to write the data into the block
-        updateData(blk, writebacks, !adaptiveCompression || true);
+    // There can be many matching outstanding writes.
+    std::vector<MSHR*> writes;
+    writeBuffer.findMatches(blk_addr, writes);
+    for (int i = 0; i < writes.size(); ++i) {
+        MSHR *mshr = writes[i];
+        if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData))
+            return;
     }
 
-    return blk;
+    otherSidePort->checkAndSendFunctional(pkt);
 }
 
 
+/////////////////////////////////////////////////////
+//
+// Response handling: responses from the memory side
+//
+/////////////////////////////////////////////////////
+
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::handleSnoop(BlkType *blk,
-                                       CacheBlk::State new_state,
-                                       PacketPtr &pkt)
+Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt, Tick time)
 {
-    //Must have the block to supply
-    assert(blk);
-    // Can only supply data, and if it hasn't already been supllied
-    assert(pkt->isRead());
-    assert(!(pkt->flags & SATISFIED));
-    pkt->flags |= SATISFIED;
-    Addr offset = pkt->getOffset(blkSize);
-    assert(offset < blkSize);
-    assert(pkt->getSize() <= blkSize);
-    assert(offset + pkt->getSize() <=blkSize);
-    std::memcpy(pkt->getPtr<uint8_t>(), blk->data + offset, pkt->getSize());
-
-    handleSnoop(blk, new_state);
+    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+#ifndef NDEBUG
+    int num_targets = mshr->getNumTargets();
+#endif
+
+    bool unblock = false;
+    bool unblock_target = false;
+    BlockedCause cause = NUM_BLOCKED_CAUSES;
+
+    if (mshr->isCacheFill) {
+#if 0
+        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+            curTick - pkt->time;
+#endif
+        // targets were handled in the cache tags
+        if (mshr == noTargetMSHR) {
+            // we always clear at least one target
+            unblock_target = true;
+            cause = Blocked_NoTargets;
+            noTargetMSHR = NULL;
+        }
+
+        if (mshr->hasTargets()) {
+            // Didn't satisfy all the targets, need to resend
+            mshrQueue.markPending(mshr);
+            mshr->order = order++;
+            requestMemSideBus(Request_MSHR, time);
+        }
+        else {
+            unblock = mshrQueue.isFull();
+            mshrQueue.deallocate(mshr);
+            if (unblock) {
+                unblock = !mshrQueue.isFull();
+                cause = Blocked_NoMSHRs;
+            }
+        }
+    } else {
+        if (pkt->req->isUncacheable()) {
+            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+                curTick - pkt->time;
+        }
+        if (mshr->hasTargets() && pkt->req->isUncacheable()) {
+            // Should only have 1 target if we had any
+            assert(num_targets == 1);
+            MSHR::Target *target = mshr->getTarget();
+            assert(target->cpuSide);
+            mshr->popTarget();
+            if (pkt->isRead()) {
+                target->pkt->setData(pkt->getPtr<uint8_t>());
+            }
+            cpuSidePort->respond(target->pkt, time);
+            assert(!mshr->hasTargets());
+        }
+        else if (mshr->hasTargets()) {
+            //Must be a no_allocate with possibly more than one target
+            assert(!mshr->isCacheFill);
+            while (mshr->hasTargets()) {
+                MSHR::Target *target = mshr->getTarget();
+                assert(target->isCpuSide());
+                mshr->popTarget();
+                if (pkt->isRead()) {
+                    target->pkt->setData(pkt->getPtr<uint8_t>());
+                }
+                cpuSidePort->respond(target->pkt, time);
+            }
+        }
+
+        if (pkt->isWrite()) {
+            // If the wrtie buffer is full, we might unblock now
+            unblock = writeBuffer.isFull();
+            writeBuffer.deallocate(mshr);
+            if (unblock) {
+                // Did we really unblock?
+                unblock = !writeBuffer.isFull();
+                cause = Blocked_NoWBBuffers;
+            }
+        } else {
+            unblock = mshrQueue.isFull();
+            mshrQueue.deallocate(mshr);
+            if (unblock) {
+                unblock = !mshrQueue.isFull();
+                cause = Blocked_NoMSHRs;
+            }
+        }
+    }
+    if (unblock || unblock_target) {
+        clearBlocked(cause);
+    }
 }
 
+
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::handleSnoop(BlkType *blk,
-                                       CacheBlk::State new_state)
+Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
 {
-    if (blk && blk->status != new_state) {
-        if ((new_state && BlkValid) == 0) {
-            tags->invalidateBlk(blk);
-        } else {
-            assert(new_state >= 0 && new_state < 128);
-            blk->status = new_state;
+    Tick time = curTick + hitLatency;
+    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+    assert(mshr);
+    if (pkt->result == Packet::Nacked) {
+        //pkt->reinitFromRequest();
+        warn("NACKs from devices not connected to the same bus "
+             "not implemented\n");
+        return;
+    }
+    assert(pkt->result != Packet::BadAddress);
+    assert(pkt->result == Packet::Success);
+    DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
+
+    if (mshr->isCacheFill) {
+        DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
+                pkt->getAddr());
+        BlkType *blk = tags->findBlock(pkt->getAddr());
+        PacketList writebacks;
+        blk = handleFill(pkt, blk, writebacks);
+        satisfyMSHR(mshr, pkt, blk);
+        // copy writebacks to write buffer
+        while (!writebacks.empty()) {
+            PacketPtr wbPkt = writebacks.front();
+            allocateBuffer(wbPkt, time, false, true);
+            writebacks.pop_front();
         }
     }
+    handleResponse(pkt, time);
 }
 
+
+
+
 template<class TagStore, class Coherence>
 PacketPtr
 Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
 {
-    assert(blk && blk->isValid() && blk->isModified());
-    int data_size = blkSize;
-    data_size = blk->size;
-    if (compressOnWriteback) {
-        // not already compressed
-        // need to compress to ship it
-        assert(data_size == blkSize);
-        uint8_t *tmp_data = new uint8_t[blkSize];
-        data_size = compressionAlg->compress(tmp_data,blk->data,
-                                      data_size);
-        delete [] tmp_data;
-    }
+    assert(blk && blk->isValid() && blk->isDirty());
 
-/*    PacketPtr writeback =
-        buildWritebackReq(tags->regenerateBlkAddr(blk->tag, blk->set),
-                          blk->asid, blkSize,
-                          blk->data, data_size);
-*/
+    writebacks[0/*pkt->req->getThreadNum()*/]++;
 
     Request *writebackReq =
         new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0);
     PacketPtr writeback = new Packet(writebackReq, MemCmd::Writeback, -1);
     writeback->allocate();
-    std::memcpy(writeback->getPtr<uint8_t>(),blk->data,blkSize);
+    std::memcpy(writeback->getPtr<uint8_t>(), blk->data, blkSize);
 
     blk->status &= ~BlkDirty;
     return writeback;
 }
 
 
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::verifyData(BlkType *blk)
-{
-    bool retval;
-    // The data stored in the blk
-    uint8_t *blk_data = new uint8_t[blkSize];
-    tags->readData(blk, blk_data);
-    // Pointer for uncompressed data, assumed uncompressed
-    uint8_t *tmp_data = blk_data;
-    // The size of the data being stored, assumed uncompressed
-    int data_size = blkSize;
-
-    // If the block is compressed need to uncompress to access
-    if (blk->isCompressed()){
-        // Allocate new storage for the data
-        tmp_data = new uint8_t[blkSize];
-        data_size = compressionAlg->uncompress(tmp_data,blk_data, blk->size);
-        assert(data_size == blkSize);
-        // Don't need to keep blk_data around
-        delete [] blk_data;
-    } else {
-        assert(blkSize == blk->size);
-    }
-
-    retval = std::memcmp(tmp_data, blk->data, blkSize) == 0;
-    delete [] tmp_data;
-    return retval;
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::updateData(BlkType *blk, PacketList &writebacks,
-                                        bool compress_block)
-{
-    if (storeCompressed && compress_block) {
-        uint8_t *comp_data = new uint8_t[blkSize];
-        int new_size = compressionAlg->compress(comp_data, blk->data, blkSize);
-        if (new_size > (blkSize - tags->getSubBlockSize())){
-            // no benefit to storing it compressed
-            blk->status &= ~BlkCompressed;
-            tags->writeData(blk, blk->data, blkSize,
-                          writebacks);
-        } else {
-            // Store the data compressed
-            blk->status |= BlkCompressed;
-            tags->writeData(blk, comp_data, new_size,
-                          writebacks);
-        }
-        delete [] comp_data;
-    } else {
-        blk->status &= ~BlkCompressed;
-        tags->writeData(blk, blk->data, blkSize, writebacks);
-    }
-}
-
+// Note that the reason we return a list of writebacks rather than
+// inserting them directly in the write buffer is that this function
+// is called by both atomic and timing-mode accesses, and in atomic
+// mode we don't mess with the write buffer (we just perform the
+// writebacks atomically once the original request is complete).
 template<class TagStore, class Coherence>
 typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::doReplacement(BlkType *blk, PacketPtr &pkt,
-                                         CacheBlk::State new_state,
-                                         PacketList &writebacks)
+Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
+                                      PacketList &writebacks)
 {
+    Addr addr = pkt->getAddr();
+
     if (blk == NULL) {
+
         // need to do a replacement
-        BlkList compress_list;
-        blk = tags->findReplacement(pkt, writebacks, compress_list);
-        while (adaptiveCompression && !compress_list.empty()) {
-            updateData(compress_list.front(), writebacks, true);
-            compress_list.pop_front();
-        }
+        blk = tags->findReplacement(addr, writebacks);
         if (blk->isValid()) {
             DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
-                    tags->regenerateBlkAddr(blk->tag,blk->set), pkt->getAddr(),
-                    (blk->isModified()) ? "writeback" : "clean");
+                    tags->regenerateBlkAddr(blk->tag, blk->set), addr,
+                    blk->isDirty() ? "writeback" : "clean");
 
-            if (blk->isModified()) {
-                // Need to write the data back
+            if (blk->isDirty()) {
+                // Save writeback packet for handling by caller
                 writebacks.push_back(writebackBlk(blk));
             }
         }
-        blk->tag = tags->extractTag(pkt->getAddr(), blk);
+
+        blk->tag = tags->extractTag(addr);
+        blk->status = coherence->getNewState(pkt);
+        assert(pkt->isRead());
     } else {
-        // must be a status change
-        // assert(blk->status != new_state);
-        if (blk->status == new_state) warn("Changing state to same value\n");
+        // existing block... probably an upgrade
+        assert(blk->tag == tags->extractTag(addr));
+        // either we're getting new data or the block should already be valid
+        assert(pkt->isRead() || blk->isValid());
+        CacheBlk::State old_state = blk->status;
+        blk->status = coherence->getNewState(pkt, old_state);
+        if (blk->status != old_state)
+            DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
+                    addr, old_state, blk->status);
+        else
+            warn("Changing state to same value\n");
     }
 
-    blk->status = new_state;
+    // if we got new data, copy it in
+    if (pkt->isRead()) {
+        std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+    }
+
+    blk->whenReady = pkt->finishTime;
+
     return blk;
 }
 
 
 template<class TagStore, class Coherence>
 bool
-Cache<TagStore,Coherence>::access(PacketPtr &pkt)
+Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
-//@todo Add back in MemDebug Calls
-//    MemDebug::cacheAccess(pkt);
-    BlkType *blk = NULL;
-    PacketList writebacks;
-    int size = blkSize;
-    int lat = hitLatency;
-    if (prefetchAccess) {
-        //We are determining prefetches on access stream, call prefetcher
-        prefetcher->handleMiss(pkt, curTick);
-    }
-
-    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-
-    if (!pkt->req->isUncacheable()) {
-        if (!missQueue->findMSHR(blk_addr)) {
-            blk = handleAccess(pkt, lat, writebacks);
-        }
-    } else {
-        size = pkt->getSize();
-    }
-    // If this is a block size write/hint (WH64) allocate the block here
-    // if the coherence protocol allows it.
-    /** @todo make the fast write alloc (wh64) work with coherence. */
-    /** @todo Do we want to do fast writes for writebacks as well? */
-    if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() &&
-        (pkt->cmd == MemCmd::WriteReq
-         || pkt->cmd == MemCmd::WriteInvalidateReq) ) {
-        // not outstanding misses, can do this
-        MSHR* outstanding_miss = missQueue->findMSHR(pkt->getAddr());
-        if (pkt->cmd == MemCmd::WriteInvalidateReq || !outstanding_miss) {
-            if (outstanding_miss) {
-                warn("WriteInv doing a fastallocate"
-                     "with an outstanding miss to the same address\n");
+    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
+        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
+        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
+
+        if (pkt->isWrite()) {
+            if (blk->checkWrite(pkt)) {
+                blk->status |= BlkDirty;
+                pkt->writeDataToBlock(blk->data, blkSize);
             }
-            blk = handleFill(NULL, pkt, BlkValid | BlkWritable,
-                                   writebacks);
-            ++fastWrites;
+        } else if (pkt->isReadWrite()) {
+            cmpAndSwap(blk, pkt);
+        } else {
+            if (pkt->isLocked()) {
+                blk->trackLoadLocked(pkt);
+            }
+            pkt->setDataFromBlock(blk->data, blkSize);
         }
+
+        return true;
+    } else {
+        return false;
     }
-    while (!writebacks.empty()) {
-        PacketPtr wbPkt = writebacks.front();
-        missQueue->doWriteback(wbPkt);
-        writebacks.pop_front();
-        delete wbPkt;
-    }
+}
+
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
+{
+    assert(target != NULL);
+    assert(target->isCpuSide());
+    return satisfyCpuSideRequest(target->pkt, blk);
+}
+
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
+                                       BlkType *blk)
+{
+    // respond to MSHR targets, if any
 
-    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(),
-            (blk) ? "hit" : "miss");
+    // First offset for critical word first calculations
+    int initial_offset = 0;
 
-    if (blk) {
-        // Hit
-        hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        // clear dirty bit if write through
-        respond(pkt, curTick+lat);
-        return true;
+    if (mshr->hasTargets()) {
+        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
     }
 
-    // Miss
-    if (!pkt->req->isUncacheable()) {
-        misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        /** @todo Move miss count code into BaseCache */
-        if (missCount) {
-            --missCount;
-            if (missCount == 0)
-                exitSimLoop("A cache reached the maximum miss count");
-        }
-    }
+    while (mshr->hasTargets()) {
+        MSHR::Target *target = mshr->getTarget();
 
-    if (pkt->flags & SATISFIED) {
-        // happens when a store conditional fails because it missed
-        // the cache completely
-        respond(pkt, curTick+lat);
-    } else {
-        missQueue->handleMiss(pkt, size, curTick + hitLatency);
-    }
+        if (!satisfyTarget(target, blk)) {
+            // Invalid access, need to do another request
+            // can occur if block is invalidated, or not correct
+            // permissions
+            break;
+        }
 
-    if (!pkt->needsResponse()) {
-        //Need to clean up the packet on a writeback miss, but leave the request
-        //for the next level.
-        delete pkt;
-    }
 
-    return true;
-}
+        // How many bytes pass the first request is this one
+        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
+        if (transfer_offset < 0) {
+            transfer_offset += blkSize;
+        }
 
+        // If critical word (no offset) return first word time
+        Tick completion_time = tags->getHitLatency() +
+            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
 
-template<class TagStore, class Coherence>
-PacketPtr
-Cache<TagStore,Coherence>::getPacket()
-{
-    assert(missQueue->havePending());
-    PacketPtr pkt = missQueue->getPacket();
-    if (pkt) {
-        if (!pkt->req->isUncacheable()) {
-            if (pkt->cmd == MemCmd::HardPFReq)
-                misses[MemCmd::HardPFReq][0/*pkt->req->getThreadNum()*/]++;
-            BlkType *blk = tags->findBlock(pkt->getAddr());
-            MemCmd cmd =
-                coherence->getBusCmd(pkt->cmd, (blk) ? blk->status : 0);
-            missQueue->setBusCmd(pkt, cmd);
+        if (!target->pkt->req->isUncacheable()) {
+            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                completion_time - target->time;
         }
+        target->pkt->makeTimingResponse();
+        cpuSidePort->respond(target->pkt, completion_time);
+        mshr->popTarget();
     }
-
-    assert(!isMemSideBusRequested() || missQueue->havePending());
-    assert(!pkt || pkt->time <= curTick);
-    SIGNAL_NACK_HACK = false;
-    return pkt;
 }
 
+
+/////////////////////////////////////////////////////
+//
+// Snoop path: requests coming in from the memory side
+//
+/////////////////////////////////////////////////////
+
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::sendResult(PacketPtr &pkt, MSHR* mshr,
-                                                bool success)
+Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
+                                                  uint8_t *blk_data)
 {
-    if (success && !(SIGNAL_NACK_HACK)) {
-        //Remember if it was an upgrade because writeback MSHR's are removed
-        //in Mark in Service
-        bool upgrade = (mshr->pkt && mshr->pkt->cmd == MemCmd::UpgradeReq);
-
-        missQueue->markInService(mshr->pkt, mshr);
-
-        //Temp Hack for UPGRADES
-        if (upgrade) {
-            assert(pkt);  //Upgrades need to be fixed
-            pkt->flags &= ~CACHE_LINE_FILL;
-            BlkType *blk = tags->findBlock(pkt->getAddr());
-            CacheBlk::State old_state = (blk) ? blk->status : 0;
-            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
-            if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from state "
-                        "%i to %i\n", pkt->getAddr(), old_state, new_state);
-            //Set the state on the upgrade
-            std::memcpy(pkt->getPtr<uint8_t>(), blk->data, blkSize);
-            PacketList writebacks;
-            handleFill(blk, mshr, new_state, writebacks, pkt);
-            assert(writebacks.empty());
-            missQueue->handleResponse(pkt, curTick + hitLatency);
-        }
-    } else if (pkt && !pkt->req->isUncacheable()) {
-        pkt->flags &= ~NACKED_LINE;
-        SIGNAL_NACK_HACK = false;
-        pkt->flags &= ~SATISFIED;
-
-//Rmove copy from mshr
-        delete mshr->pkt;
-        mshr->pkt = pkt;
-
-        missQueue->restoreOrigCmd(pkt);
-    }
+    // timing-mode snoop responses require a new packet
+    PacketPtr pkt = new Packet(req_pkt);
+    pkt->allocate();
+    pkt->makeTimingResponse();
+    pkt->setDataFromBlock(blk_data, blkSize);
+    memSidePort->respond(pkt, curTick + hitLatency);
 }
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr &pkt)
+Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
+                                       bool is_timing)
 {
-    BlkType *blk = NULL;
-    if (pkt->senderState) {
-        //Delete temp copy in MSHR, restore it.
-        delete ((MSHR*)pkt->senderState)->pkt;
-        ((MSHR*)pkt->senderState)->pkt = pkt;
-        if (pkt->result == Packet::Nacked) {
-            //pkt->reinitFromRequest();
-            warn("NACKs from devices not connected to the same bus "
-                 "not implemented\n");
-            return;
-        }
-        if (pkt->result == Packet::BadAddress) {
-            //Make the response a Bad address and send it
+    if (!blk || !blk->isValid()) {
+        return;
+    }
+
+    // we may end up modifying both the block state and the packet (if
+    // we respond in atomic mode), so just figure out what to do now
+    // and then do it later
+    bool supply = blk->isDirty() && pkt->isRead();
+    bool invalidate = pkt->isInvalidate();
+
+    if (pkt->isRead() && !pkt->isInvalidate()) {
+        assert(!pkt->needsExclusive());
+        pkt->assertShared();
+        int bits_to_clear = BlkWritable;
+        const bool haveOwnershipState = true; // for now
+        if (!haveOwnershipState) {
+            // if we don't support pure ownership (dirty && !writable),
+            // have to clear dirty bit here, assume memory snarfs data
+            // on cache-to-cache xfer
+            bits_to_clear |= BlkDirty;
         }
-//	MemDebug::cacheResponse(pkt);
-        DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
-
-        if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
-            DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
-                    pkt->getAddr());
-            blk = tags->findBlock(pkt->getAddr());
-            CacheBlk::State old_state = (blk) ? blk->status : 0;
-            PacketList writebacks;
-            CacheBlk::State new_state = coherence->getNewState(pkt,old_state);
-            if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from "
-                        "state %i to %i\n",
-                        pkt->getAddr(),
-                        old_state, new_state);
-            blk = handleFill(blk, (MSHR*)pkt->senderState,
-                                   new_state, writebacks, pkt);
-            while (!writebacks.empty()) {
-                PacketPtr wbPkt = writebacks.front();
-                missQueue->doWriteback(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
-            }
+        blk->status &= ~bits_to_clear;
+    }
+
+    if (supply) {
+        pkt->assertMemInhibit();
+        if (is_timing) {
+            doTimingSupplyResponse(pkt, blk->data);
+        } else {
+            pkt->makeAtomicResponse();
+            pkt->setDataFromBlock(blk->data, blkSize);
         }
-        missQueue->handleResponse(pkt, curTick + hitLatency);
     }
+
+    // Do this last in case it deallocates block data or something
+    // like that
+    if (invalidate) {
+        tags->invalidateBlk(blk);
+    }
+
+    DPRINTF(Cache, "snooped a %s request for addr %x, %snew state is %i\n",
+            pkt->cmdString(), blockAlign(pkt->getAddr()),
+            supply ? "supplying data, " : "", blk->status);
 }
 
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::snoop(PacketPtr &pkt)
+Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
 {
     if (pkt->req->isUncacheable()) {
         //Can't get a hit on an uncacheable address
@@ -794,351 +923,190 @@ Cache<TagStore,Coherence>::snoop(PacketPtr &pkt)
         return;
     }
 
-    ///// PROPAGATE SNOOP UPWARD HERE
+    BlkType *blk = tags->findBlock(pkt->getAddr());
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-    BlkType *blk = tags->findBlock(pkt->getAddr());
-    MSHR *mshr = missQueue->findMSHR(blk_addr);
-    if (coherence->hasProtocol() || pkt->isInvalidate()) {
-        //@todo Move this into handle bus req
-        //If we find an mshr, and it is in service, we need to NACK or
-        //invalidate
-        if (mshr) {
-            if (mshr->inService) {
-                if ((mshr->pkt->isInvalidate() || !mshr->pkt->isCacheFill())
-                    && (pkt->cmd != MemCmd::InvalidateReq
-                        && pkt->cmd != MemCmd::WriteInvalidateReq)) {
-                    //If the outstanding request was an invalidate
-                    //(upgrade,readex,..)  Then we need to ACK the request
-                    //until we get the data Also NACK if the outstanding
-                    //request is not a cachefill (writeback)
-                    assert(!(pkt->flags & SATISFIED));
-                    pkt->flags |= SATISFIED;
-                    pkt->flags |= NACKED_LINE;
-                    SIGNAL_NACK_HACK = true;
-                    ///@todo NACK's from other levels
-                    //warn("NACKs from devices not connected to the same bus "
-                    //"not implemented\n");
-                    //respondToSnoop(pkt, curTick + hitLatency);
-                    return;
-                }
-                else {
-                    //The supplier will be someone else, because we are
-                    //waiting for the data.  This should cause this cache to
-                    //be forced to go to the shared state, not the exclusive
-                    //even though the shared line won't be asserted.  But for
-                    //now we will just invlidate ourselves and allow the other
-                    //cache to go into the exclusive state.  @todo Make it so
-                    //a read to a pending read doesn't invalidate.  @todo Make
-                    //it so that a read to a pending read can't be exclusive
-                    //now.
-
-                    //Set the address so find match works
-                    //panic("Don't have invalidates yet\n");
-                    invalidatePkt->addrOverride(pkt->getAddr());
-
-                    //Append the invalidate on
-                    missQueue->addTarget(mshr,invalidatePkt);
-                    DPRINTF(Cache, "Appending Invalidate to addr: %x\n",
-                            pkt->getAddr());
-                    return;
+    MSHR *mshr = mshrQueue.findMatch(blk_addr);
+    // better not be snooping a request that conflicts with something
+    // we have outstanding...
+    assert(!mshr || !mshr->inService);
+
+    //We also need to check the writeback buffers and handle those
+    std::vector<MSHR *> writebacks;
+    if (writeBuffer.findMatches(blk_addr, writebacks)) {
+        DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n",
+                pkt->getAddr());
+
+        //Look through writebacks for any non-uncachable writes, use that
+        for (int i=0; i<writebacks.size(); i++) {
+            mshr = writebacks[i];
+            assert(!mshr->isUncacheable());
+
+            if (pkt->isRead()) {
+                pkt->assertMemInhibit();
+                if (!pkt->needsExclusive()) {
+                    pkt->assertShared();
+                } else {
+                    // if we're not asserting the shared line, we need to
+                    // invalidate our copy.  we'll do that below as long as
+                    // the packet's invalidate flag is set...
+                    assert(pkt->isInvalidate());
                 }
+                doTimingSupplyResponse(pkt, mshr->writeData);
             }
-        }
-        //We also need to check the writeback buffers and handle those
-        std::vector<MSHR *> writebacks;
-        if (missQueue->findWrites(blk_addr, writebacks)) {
-            DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n",
-                    pkt->getAddr());
-
-            //Look through writebacks for any non-uncachable writes, use that
-            for (int i=0; i<writebacks.size(); i++) {
-                mshr = writebacks[i];
-
-                if (!mshr->pkt->req->isUncacheable()) {
-                    if (pkt->isRead()) {
-                        //Only Upgrades don't get here
-                        //Supply the data
-                        assert(!(pkt->flags & SATISFIED));
-                        pkt->flags |= SATISFIED;
-
-                        //If we are in an exclusive protocol, make it ask again
-                        //to get write permissions (upgrade), signal shared
-                        pkt->flags |= SHARED_LINE;
-
-                        assert(pkt->isRead());
-                        Addr offset = pkt->getAddr() & (blkSize - 1);
-                        assert(offset < blkSize);
-                        assert(pkt->getSize() <= blkSize);
-                        assert(offset + pkt->getSize() <=blkSize);
-                        std::memcpy(pkt->getPtr<uint8_t>(), mshr->pkt->getPtr<uint8_t>() + offset, pkt->getSize());
-
-                        respondToSnoop(pkt, curTick + hitLatency);
-                    }
-
-                    if (pkt->isInvalidate()) {
-                        //This must be an upgrade or other cache will take
-                        //ownership
-                        missQueue->markInService(mshr->pkt, mshr);
-                    }
-                    return;
-                }
+
+            if (pkt->isInvalidate()) {
+                // Invalidation trumps our writeback... discard here
+                assert(0);
+                markInService(mshr);
             }
+            return;
         }
     }
-    CacheBlk::State new_state;
-    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-
-    if (blk && mshr && !mshr->inService && new_state == 0) {
-            //There was a outstanding write to a shared block, not need ReadEx
-            //not update, so change No Allocate param in MSHR
-            mshr->pkt->flags &= ~NO_ALLOCATE;
-    }
-
-    if (satisfy) {
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
-                "now supplying data, new state is %i\n",
-                pkt->cmdString(), blk_addr, new_state);
-
-        handleSnoop(blk, new_state, pkt);
-        respondToSnoop(pkt, curTick + hitLatency);
-        return;
-    }
-    if (blk)
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
-                "new state is %i\n", pkt->cmdString(), blk_addr, new_state);
 
-    handleSnoop(blk, new_state);
+    handleSnoop(pkt, blk, true);
 }
 
+
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::snoopResponse(PacketPtr &pkt)
+Tick
+Cache<TagStore,Coherence>::snoopAtomic(PacketPtr pkt)
 {
-    //Need to handle the response, if NACKED
-    if (pkt->flags & NACKED_LINE) {
-        //Need to mark it as not in service, and retry for bus
-        assert(0); //Yeah, we saw a NACK come through
-
-        //For now this should never get called, we return false when we see a
-        //NACK instead, by doing this we allow the bus_blocked mechanism to
-        //handle the retry For now it retrys in just 2 cycles, need to figure
-        //out how to change that Eventually we will want to also have success
-        //come in as a parameter Need to make sure that we handle the
-        //functionality that happens on successufl return of the sendAddr
-        //function
+    if (pkt->req->isUncacheable()) {
+        // Can't get a hit on an uncacheable address
+        // Revisit this for multi level coherence
+        return hitLatency;
     }
+
+    BlkType *blk = tags->findBlock(pkt->getAddr());
+    handleSnoop(pkt, blk, false);
+    return hitLatency;
 }
 
 
-/**
- * @todo Fix to not assume write allocate
- */
 template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::probe(PacketPtr &pkt, bool update,
-                                           CachePort* otherSidePort)
+MSHR *
+Cache<TagStore,Coherence>::getNextMSHR()
 {
-//    MemDebug::cacheProbe(pkt);
-    if (!pkt->req->isUncacheable()) {
-        if (pkt->isInvalidate() && !pkt->isRead() && !pkt->isWrite()) {
-            //Upgrade or Invalidate, satisfy it, don't forward
-            DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr());
-            pkt->flags |= SATISFIED;
-            return 0;
-        }
-    }
+    // Check both MSHR queue and write buffer for potential requests
+    MSHR *miss_mshr  = mshrQueue.getNextMSHR();
+    MSHR *write_mshr = writeBuffer.getNextMSHR();
 
-    if (!update && (otherSidePort == cpuSidePort)) {
-        // Still need to change data in all locations.
-        otherSidePort->checkAndSendFunctional(pkt);
-        if (pkt->isRead() && pkt->result == Packet::Success)
-            return 0;
+    // Now figure out which one to send... some cases are easy
+    if (miss_mshr && !write_mshr) {
+        return miss_mshr;
+    }
+    if (write_mshr && !miss_mshr) {
+        return write_mshr;
     }
 
-    PacketList writebacks;
-    int lat;
-
-    BlkType *blk = handleAccess(pkt, lat, writebacks, update);
-
-    DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(),
-            pkt->getAddr(), (blk) ? "hit" : "miss");
-
-
-    // Need to check for outstanding misses and writes
-    Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
-
-    // There can only be one matching outstanding miss.
-    MSHR* mshr = missQueue->findMSHR(blk_addr);
-
-    // There can be many matching outstanding writes.
-    std::vector<MSHR*> writes;
-    missQueue->findWrites(blk_addr, writes);
-
-    if (!update) {
-        bool notDone = !(pkt->flags & SATISFIED); //Hit in cache (was a block)
-        // Check for data in MSHR and writebuffer.
-        if (mshr) {
-            MSHR::TargetList *targets = mshr->getTargetList();
-            MSHR::TargetList::iterator i = targets->begin();
-            MSHR::TargetList::iterator end = targets->end();
-            for (; i != end && notDone; ++i) {
-                PacketPtr target = *i;
-                // If the target contains data, and it overlaps the
-                // probed request, need to update data
-                if (target->intersect(pkt)) {
-                    DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a MSHR\n",
-                            pkt->cmdString(), blk_addr);
-                    notDone = fixPacket(pkt, target);
-                }
-            }
-        }
-        for (int i = 0; i < writes.size() && notDone; ++i) {
-            PacketPtr write = writes[i]->pkt;
-            if (write->intersect(pkt)) {
-                DPRINTF(Cache, "Functional %s access to blk_addr %x intersects a writeback\n",
-                        pkt->cmdString(), blk_addr);
-                notDone = fixPacket(pkt, write);
-            }
-        }
-        if (notDone && otherSidePort == memSidePort) {
-            otherSidePort->checkAndSendFunctional(pkt);
-            assert(pkt->result == Packet::Success);
-        }
-        return 0;
-    } else if (!blk && !(pkt->flags & SATISFIED)) {
-        // update the cache state and statistics
-        if (mshr || !writes.empty()){
-            // Can't handle it, return request unsatisfied.
-            panic("Atomic access ran into outstanding MSHR's or WB's!");
-        }
-        if (!pkt->req->isUncacheable() /*Uncacheables just go through*/
-            && (pkt->cmd != MemCmd::Writeback)/*Writebacks on miss fall through*/) {
-                // Fetch the cache block to fill
-            BlkType *blk = tags->findBlock(pkt->getAddr());
-            MemCmd temp_cmd =
-                coherence->getBusCmd(pkt->cmd, (blk) ? blk->status : 0);
-
-            PacketPtr busPkt = new Packet(pkt->req,temp_cmd, -1, blkSize);
-
-            busPkt->allocate();
-
-            busPkt->time = curTick;
-
-            DPRINTF(Cache, "Sending a atomic %s for %x\n",
-                    busPkt->cmdString(), busPkt->getAddr());
-
-            lat = memSidePort->sendAtomic(busPkt);
-
-            //Be sure to flip the response to a request for coherence
-            if (busPkt->needsResponse()) {
-                busPkt->makeAtomicResponse();
+    if (miss_mshr && write_mshr) {
+        // We have one of each... normally we favor the miss request
+        // unless the write buffer is full
+        if (writeBuffer.isFull() && writeBuffer.inServiceEntries == 0) {
+            // Write buffer is full, so we'd like to issue a write;
+            // need to search MSHR queue for conflicting earlier miss.
+            MSHR *conflict_mshr =
+                mshrQueue.findPending(write_mshr->addr, write_mshr->size);
+
+            if (conflict_mshr && conflict_mshr->order < write_mshr->order) {
+                // Service misses in order until conflict is cleared.
+                return conflict_mshr;
             }
 
-/*		if (!(busPkt->flags & SATISFIED)) {
-// blocked at a higher level, just return
-return 0;
-}
-
-*/		misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-
-            CacheBlk::State old_state = (blk) ? blk->status : 0;
-            CacheBlk::State new_state =
-                coherence->getNewState(busPkt, old_state);
-            DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
-                    busPkt->cmdString(), busPkt->getAddr(), old_state);
-            if (old_state != new_state)
-                DPRINTF(Cache, "Block for blk addr %x moving from state "
-                        "%i to %i\n", busPkt->getAddr(), old_state, new_state);
-
-            handleFill(blk, busPkt, new_state, writebacks, pkt);
-            //Free the packet
-            delete busPkt;
-
-            // Handle writebacks if needed
-            while (!writebacks.empty()){
-                PacketPtr wbPkt = writebacks.front();
-                memSidePort->sendAtomic(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
-            }
-                return lat + hitLatency;
-        } else {
-            return memSidePort->sendAtomic(pkt);
+            // No conflicts; issue write
+            return write_mshr;
         }
-    } else {
-        if (blk) {
-            // There was a cache hit.
-            // Handle writebacks if needed
-            while (!writebacks.empty()){
-                PacketPtr wbPkt = writebacks.front();
-                memSidePort->sendAtomic(wbPkt);
-                writebacks.pop_front();
-                delete wbPkt;
-            }
 
-            hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+        // Write buffer isn't full, but need to check it for
+        // conflicting earlier writeback
+        MSHR *conflict_mshr =
+            writeBuffer.findPending(miss_mshr->addr, miss_mshr->size);
+        if (conflict_mshr) {
+            // not sure why we don't check order here... it was in the
+            // original code but commented out.
+
+            // The only way this happens is if we are
+            // doing a write and we didn't have permissions
+            // then subsequently saw a writeback (owned got evicted)
+            // We need to make sure to perform the writeback first
+            // To preserve the dirty data, then we can issue the write
+
+            // should we return write_mshr here instead?  I.e. do we
+            // have to flush writes in order?  I don't think so... not
+            // for Alpha anyway.  Maybe for x86?
+            return conflict_mshr;
         }
 
-        return hitLatency;
+        // No conclifts; issue read
+        return miss_mshr;
+    }
+
+    // fall through... no pending requests.  Try a prefetch.
+    assert(!miss_mshr && !write_mshr);
+    if (!mshrQueue.isFull()) {
+        // If we have a miss queue slot, we can try a prefetch
+        PacketPtr pkt = prefetcher->getPacket();
+        if (pkt) {
+            // Update statistic on number of prefetches issued
+            // (hwpf_mshr_misses)
+            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
+            // Don't request bus, since we already have it
+            return allocateBuffer(pkt, curTick, true, false);
+        }
     }
 
-    return 0;
+    return NULL;
 }
 
+
 template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::snoopProbe(PacketPtr &pkt)
+PacketPtr
+Cache<TagStore,Coherence>::getPacket()
 {
-    ///// PROPAGATE SNOOP UPWARD HERE
+    MSHR *mshr = getNextMSHR();
 
-    Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-    BlkType *blk = tags->findBlock(pkt->getAddr());
-    MSHR *mshr = missQueue->findMSHR(blk_addr);
-    CacheBlk::State new_state = 0;
-    bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
-    if (satisfy) {
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x and "
-                "now supplying data, new state is %i\n",
-                pkt->cmdString(), blk_addr, new_state);
-
-            handleSnoop(blk, new_state, pkt);
-            return hitLatency;
+    if (mshr == NULL) {
+        return NULL;
     }
-    if (blk)
-        DPRINTF(Cache, "Cache snooped a %s request for addr %x, "
-                "new state is %i\n",
-                    pkt->cmdString(), blk_addr, new_state);
-    handleSnoop(blk, new_state);
-    return 0;
-}
 
-template<class TagStore, class Coherence>
-Port *
-Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
-{
-    if (if_name == "" || if_name == "cpu_side") {
-        return cpuSidePort;
-    } else if (if_name == "mem_side") {
-        return memSidePort;
-    } else if (if_name == "functional") {
-        return new CpuSidePort(name() + "-cpu_side_funcport", this);
+    BlkType *blk = tags->findBlock(mshr->addr);
+
+    // use request from 1st target
+    MSHR::Target *tgt1 = mshr->getTarget();
+    PacketPtr tgt1_pkt = tgt1->pkt;
+    PacketPtr pkt;
+
+    if (mshr->isCacheFill) {
+        MemCmd cmd;
+        if (blk && blk->isValid()) {
+            // only reason to be here is that blk is shared
+            // (read-only) and we need exclusive
+            assert(mshr->needsExclusive && !blk->isWritable());
+            cmd = MemCmd::UpgradeReq;
+        } else {
+            // block is invalid
+            cmd = mshr->needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+        }
+        pkt = new Packet(tgt1_pkt->req, cmd, Packet::Broadcast);
     } else {
-        panic("Port name %s unrecognized\n", if_name);
+        assert(blk == NULL);
+        assert(mshr->getNumTargets() == 1);
+        pkt = new Packet(tgt1_pkt->req, tgt1_pkt->cmd, Packet::Broadcast);
     }
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::deletePortRefs(Port *p)
-{
-    if (cpuSidePort == p || memSidePort == p)
-        panic("Can only delete functional ports\n");
 
-    delete p;
+    pkt->senderState = mshr;
+    pkt->allocate();
+    return pkt;
 }
 
 
+///////////////
+//
+// CpuSidePort
+//
+///////////////
+
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::CpuSidePort::
@@ -1155,131 +1123,57 @@ template<class TagStore, class Coherence>
 bool
 Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
 {
-    assert(pkt->result != Packet::Nacked);
-
-    if (!pkt->req->isUncacheable()
-        && pkt->isInvalidate()
-        && !pkt->isRead() && !pkt->isWrite()) {
-        //Upgrade or Invalidate
-        //Look into what happens if two slave caches on bus
-        DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr());
-
-        assert(!(pkt->flags & SATISFIED));
-        pkt->flags |= SATISFIED;
-        //Invalidates/Upgrades need no response if they get the bus
-        return true;
-    }
-
-    if (pkt->isRequest() && blocked)
-    {
+    if (pkt->isRequest() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
         return false;
     }
 
-    if (pkt->isWrite() && (pkt->req->isLocked())) {
-        pkt->req->setExtraData(1);
-    }
-    myCache()->access(pkt);
+    myCache()->timingAccess(pkt);
     return true;
 }
 
 
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::CpuSidePort::recvRetry()
-{
-    recvRetryCommon();
-}
-
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::CpuSidePort::processRequestEvent()
+Tick
+Cache<TagStore,Coherence>::CpuSidePort::recvAtomic(PacketPtr pkt)
 {
-    if (waitingOnRetry)
-        return;
-    //We have some responses to drain first
-    if (!drainList.empty()) {
-        if (!drainResponse()) {
-            // more responses to drain... re-request bus
-            scheduleRequestEvent(curTick + 1);
-        }
-    }
+    return myCache()->atomicAccess(pkt);
 }
 
 
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::CpuSidePort::processResponseEvent()
+Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
 {
-    assert(transmitList.size());
-    assert(transmitList.front().first <= curTick);
-    PacketPtr pkt = transmitList.front().second;
-    transmitList.pop_front();
-    if (!transmitList.empty()) {
-        Tick time = transmitList.front().first;
-        responseEvent->schedule(time <= curTick ? curTick+1 : time);
-    }
-
-    if (pkt->flags & NACKED_LINE)
-        pkt->result = Packet::Nacked;
-    else
-        pkt->result = Packet::Success;
-    pkt->makeTimingResponse();
-    DPRINTF(CachePort, "%s attempting to send a response\n", name());
-    if (!drainList.empty() || waitingOnRetry) {
-        //Already have a list, just append
-        drainList.push_back(pkt);
-        DPRINTF(CachePort, "%s appending response onto drain list\n", name());
-    }
-    else if (!sendTiming(pkt)) {
-        //It failed, save it to list of drain events
-        DPRINTF(CachePort, "%s now waiting for a retry\n", name());
-        drainList.push_back(pkt);
-        waitingOnRetry = true;
-    }
-
-    // Check if we're done draining once this list is empty
-    if (drainList.empty() && transmitList.empty())
-        myCache()->checkDrain();
+    checkFunctional(pkt);
+    if (pkt->result != Packet::Success)
+        myCache()->functionalAccess(pkt, cache->memSidePort);
 }
 
 
 template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::CpuSidePort::recvAtomic(PacketPtr pkt)
-{
-    myCache()->probe(pkt, true, NULL);
-    //TEMP ALWAYS SUCCES FOR NOW
-    pkt->result = Packet::Success;
-    //Fix this timing info
-    return myCache()->hitLatency;
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
+Cache<TagStore,Coherence>::
+CpuSidePort::CpuSidePort(const std::string &_name,
+                         Cache<TagStore,Coherence> *_cache)
+    : BaseCache::CachePort(_name, _cache)
 {
-    if (checkFunctional(pkt)) {
-        //TEMP USE CPU?THREAD 0 0
-        pkt->req->setThreadContext(0,0);
-
-        myCache()->probe(pkt, false, cache->memSidePort);
-        //TEMP ALWAYS SUCCESFUL FOR NOW
-        pkt->result = Packet::Success;
-    }
 }
 
+///////////////
+//
+// MemSidePort
+//
+///////////////
 
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::MemSidePort::
 getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 {
-    // Memory-side port always snoops.
-    bool dummy;
-    otherPort->getPeerAddressRanges(resp, dummy);
+    otherPort->getPeerAddressRanges(resp, snoop);
+    // Memory-side port always snoops, so unconditionally set flag for
+    // caller.
     snoop = true;
 }
 
@@ -1303,177 +1197,115 @@ Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
     if (pkt->isResponse()) {
         myCache()->handleResponse(pkt);
     } else {
-        myCache()->snoop(pkt);
+        myCache()->snoopTiming(pkt);
     }
     return true;
 }
 
+
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::MemSidePort::recvRetry()
+Tick
+Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
 {
-    if (recvRetryCommon()) {
-        return;
-    }
-
-    DPRINTF(CachePort, "%s attempting to send a retry for MSHR\n", name());
-    if (!cache->isMemSideBusRequested()) {
-        //This can happen if I am the owner of a block and see an upgrade
-        //while the block was in my WB Buffers.  I just remove the
-        //wb and de-assert the masterRequest
-        waitingOnRetry = false;
-        return;
-    }
-    PacketPtr pkt = myCache()->getPacket();
-    MSHR* mshr = (MSHR*) pkt->senderState;
-    //Copy the packet, it may be modified/destroyed elsewhere
-    PacketPtr copyPkt = new Packet(*pkt);
-    copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
-    mshr->pkt = copyPkt;
-
-    bool success = sendTiming(pkt);
-    DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-            pkt->getAddr(), success ? "succesful" : "unsuccesful");
-
-    waitingOnRetry = !success;
-    if (waitingOnRetry) {
-        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
-    }
+    // in atomic mode, responses go back to the sender via the
+    // function return from sendAtomic(), not via a separate
+    // sendAtomic() from the responder.  Thus we should never see a
+    // response packet in recvAtomic() (anywhere, not just here).
+    assert(!pkt->isResponse());
+    return myCache()->snoopAtomic(pkt);
+}
 
-    myCache()->sendResult(pkt, mshr, success);
 
-    if (success && cache->isMemSideBusRequested())
-    {
-        DPRINTF(CachePort, "%s has more requests\n", name());
-        //Still more to issue, rerequest in 1 cycle
-        new RequestEvent(this, curTick + 1);
-    }
+template<class TagStore, class Coherence>
+void
+Cache<TagStore,Coherence>::MemSidePort::recvFunctional(PacketPtr pkt)
+{
+    checkFunctional(pkt);
+    if (pkt->result != Packet::Success)
+        myCache()->functionalAccess(pkt, cache->cpuSidePort);
 }
 
 
+
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::MemSidePort::processRequestEvent()
+Cache<TagStore,Coherence>::MemSidePort::sendPacket()
 {
-    if (waitingOnRetry)
-        return;
-    //We have some responses to drain first
-    if (!drainList.empty()) {
-        if (!drainResponse()) {
-            // more responses to drain... re-request bus
-            scheduleRequestEvent(curTick + 1);
-        }
-        return;
-    }
+    // if we have responses that are ready, they take precedence
+    if (deferredPacketReady()) {
+        bool success = sendTiming(transmitList.front().pkt);
 
-    DPRINTF(CachePort, "%s trying to send a MSHR request\n", name());
-    if (!isBusRequested()) {
-        //This can happen if I am the owner of a block and see an upgrade
-        //while the block was in my WB Buffers.  I just remove the
-        //wb and de-assert the masterRequest
-        return;
-    }
+        if (success) {
+            //send successful, remove packet
+            transmitList.pop_front();
+        }
 
-    PacketPtr pkt = myCache()->getPacket();
-    MSHR* mshr = (MSHR*) pkt->senderState;
-    //Copy the packet, it may be modified/destroyed elsewhere
-    PacketPtr copyPkt = new Packet(*pkt);
-    copyPkt->dataStatic<uint8_t>(pkt->getPtr<uint8_t>());
-    mshr->pkt = copyPkt;
+        waitingOnRetry = !success;
+    } else {
+        // check for non-response packets (requests & writebacks)
+        PacketPtr pkt = myCache()->getPacket();
+        MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
-    bool success = sendTiming(pkt);
-    DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-            pkt->getAddr(), success ? "succesful" : "unsuccesful");
+        bool success = sendTiming(pkt);
+        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+                pkt->getAddr(), success ? "successful" : "unsuccessful");
 
-    waitingOnRetry = !success;
-    if (waitingOnRetry) {
-        DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+        waitingOnRetry = !success;
+        if (waitingOnRetry) {
+            DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+        } else {
+            myCache()->markInService(mshr);
+        }
     }
 
-    myCache()->sendResult(pkt, mshr, success);
-    if (success && isBusRequested())
-    {
-        DPRINTF(CachePort, "%s still more MSHR requests to send\n", name());
-        //Still more to issue, rerequest in 1 cycle
-        scheduleRequestEvent(curTick+1);
+
+    // tried to send packet... if it was successful (no retry), see if
+    // we need to rerequest bus or not
+    if (!waitingOnRetry) {
+        if (isBusRequested()) {
+            // more requests/writebacks: rerequest ASAP
+            DPRINTF(CachePort, "%s still more MSHR requests to send\n",
+                    name());
+            sendEvent->schedule(curTick+1);
+        } else if (!transmitList.empty()) {
+            // deferred packets: rerequest bus, but possibly not until later
+            Tick time = transmitList.front().tick;
+            sendEvent->schedule(time <= curTick ? curTick+1 : time);
+        } else {
+            // no more to send right now: if we're draining, we may be done
+            if (drainEvent) {
+                drainEvent->process();
+                drainEvent = NULL;
+            }
+        }
     }
 }
 
-
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::MemSidePort::processResponseEvent()
+Cache<TagStore,Coherence>::MemSidePort::recvRetry()
 {
-    assert(transmitList.size());
-    assert(transmitList.front().first <= curTick);
-    PacketPtr pkt = transmitList.front().second;
-    transmitList.pop_front();
-    if (!transmitList.empty()) {
-        Tick time = transmitList.front().first;
-        responseEvent->schedule(time <= curTick ? curTick+1 : time);
-    }
-
-    if (pkt->flags & NACKED_LINE)
-        pkt->result = Packet::Nacked;
-    else
-        pkt->result = Packet::Success;
-    pkt->makeTimingResponse();
-    DPRINTF(CachePort, "%s attempting to send a response\n", name());
-    if (!drainList.empty() || waitingOnRetry) {
-        //Already have a list, just append
-        drainList.push_back(pkt);
-        DPRINTF(CachePort, "%s appending response onto drain list\n", name());
-    }
-    else if (!sendTiming(pkt)) {
-        //It failed, save it to list of drain events
-        DPRINTF(CachePort, "%s now waiting for a retry\n", name());
-        drainList.push_back(pkt);
-        waitingOnRetry = true;
-    }
-
-    // Check if we're done draining once this list is empty
-    if (drainList.empty() && transmitList.empty())
-        myCache()->checkDrain();
+    assert(waitingOnRetry);
+    sendPacket();
 }
 
 
-template<class TagStore, class Coherence>
-Tick
-Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
-{
-    if (pkt->isResponse())
-        myCache()->handleResponse(pkt);
-    else
-        return myCache()->snoopProbe(pkt);
-    //Fix this timing info
-    return myCache()->hitLatency;
-}
-
 template<class TagStore, class Coherence>
 void
-Cache<TagStore,Coherence>::MemSidePort::recvFunctional(PacketPtr pkt)
+Cache<TagStore,Coherence>::MemSidePort::processSendEvent()
 {
-    myCache()->probe(pkt, false, cache->cpuSidePort);
-    if (pkt->result != Packet::Success)
-        checkFunctional(pkt);
+    assert(!waitingOnRetry);
+    sendPacket();
 }
 
 
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
-CpuSidePort::CpuSidePort(const std::string &_name,
-                         Cache<TagStore,Coherence> *_cache)
-    : BaseCache::CachePort(_name, _cache)
-{
-    responseEvent = new ResponseEvent(this);
-}
-
 template<class TagStore, class Coherence>
 Cache<TagStore,Coherence>::
 MemSidePort::MemSidePort(const std::string &_name,
                          Cache<TagStore,Coherence> *_cache)
     : BaseCache::CachePort(_name, _cache)
 {
-    responseEvent = new ResponseEvent(this);
+    // override default send event from SimpleTimingPort
+    delete sendEvent;
+    sendEvent = new SendEvent(this);
 }
-
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index bc8de0d26..3fd17c8c7 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -139,31 +139,6 @@ CoherenceProtocol::regStats()
         .desc("readEx snoops on exclusive blocks")
         ;
 
-    snoopCount[Shared][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_shared")
-        .desc("Invalidate snoops on shared blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_owned")
-        .desc("Invalidate snoops on owned blocks")
-        ;
-
-    snoopCount[Exclusive][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_exclusive")
-        .desc("Invalidate snoops on exclusive blocks")
-        ;
-
-    snoopCount[Modified][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_modified")
-        .desc("Invalidate snoops on modified blocks")
-        ;
-
-    snoopCount[Invalid][MemCmd::InvalidateReq]
-        .name(name() + ".snoop_inv_invalid")
-        .desc("Invalidate snoops on invalid blocks")
-        ;
-
     snoopCount[Shared][MemCmd::WriteInvalidateReq]
         .name(name() + ".snoop_writeinv_shared")
         .desc("WriteInvalidate snoops on shared blocks")
@@ -219,7 +194,7 @@ CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, PacketPtr &pkt,
                                             CacheBlk::State & new_state)
 {
     new_state = (blk->status & ~stateMask) | Shared;
-    pkt->flags |= SHARED_LINE;
+    pkt->assertShared();
     return supplyTrans(cache, pkt, blk, mshr, new_state);
 }
 
@@ -231,7 +206,7 @@ CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, PacketPtr &pkt,
                                            CacheBlk::State & new_state)
 {
     new_state = (blk->status & ~stateMask) | Owned;
-    pkt->flags |= SHARED_LINE;
+    pkt->assertShared();
     return supplyTrans(cache, pkt, blk, mshr, new_state);
 }
 
@@ -253,7 +228,7 @@ CoherenceProtocol::assertShared(BaseCache *cache, PacketPtr &pkt,
                                             CacheBlk::State & new_state)
 {
     new_state = (blk->status & ~stateMask) | Shared;
-    pkt->flags |= SHARED_LINE;
+    pkt->assertShared();
     return false;
 }
 
@@ -336,12 +311,10 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     //
     tt[Invalid][MC::ReadReq].onSnoop(nullTransition);
     tt[Invalid][MC::ReadExReq].onSnoop(nullTransition);
-    tt[Invalid][MC::InvalidateReq].onSnoop(invalidateTrans);
     tt[Invalid][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
     tt[Shared][MC::ReadReq].onSnoop(hasExclusive
                                    ? assertShared : nullTransition);
     tt[Shared][MC::ReadExReq].onSnoop(invalidateTrans);
-    tt[Shared][MC::InvalidateReq].onSnoop(invalidateTrans);
     tt[Shared][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
     if (doUpgrades) {
         tt[Invalid][MC::UpgradeReq].onSnoop(nullTransition);
@@ -351,13 +324,11 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     tt[Modified][MC::ReadReq].onSnoop(hasOwned
                                      ? supplyAndGotoOwnedTrans
                                      : supplyAndGotoSharedTrans);
-    tt[Modified][MC::InvalidateReq].onSnoop(invalidateTrans);
     tt[Modified][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
 
     if (hasExclusive) {
         tt[Exclusive][MC::ReadReq].onSnoop(assertShared);
         tt[Exclusive][MC::ReadExReq].onSnoop(invalidateTrans);
-        tt[Exclusive][MC::InvalidateReq].onSnoop(invalidateTrans);
         tt[Exclusive][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
     }
 
@@ -365,7 +336,6 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
         tt[Owned][MC::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
         tt[Owned][MC::ReadExReq].onSnoop(supplyAndInvalidateTrans);
         tt[Owned][MC::UpgradeReq].onSnoop(invalidateTrans);
-        tt[Owned][MC::InvalidateReq].onSnoop(invalidateTrans);
         tt[Owned][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
     }
 
@@ -394,7 +364,7 @@ CoherenceProtocol::getBusCmd(MemCmd cmdIn, CacheBlk::State state,
 
 
 CacheBlk::State
-CoherenceProtocol::getNewState(PacketPtr &pkt, CacheBlk::State oldState)
+CoherenceProtocol::getNewState(PacketPtr pkt, CacheBlk::State oldState)
 {
     CacheBlk::State state = oldState & stateMask;
     int cmd_idx = pkt->cmdToIndex();
@@ -406,7 +376,7 @@ CoherenceProtocol::getNewState(PacketPtr &pkt, CacheBlk::State oldState)
 
     //Check if it's exclusive and the shared line was asserted,
     //then  goto shared instead
-    if (newState == Exclusive && (pkt->flags & SHARED_LINE)) {
+    if (newState == Exclusive && pkt->sharedAsserted()) {
         newState = Shared;
     }
 
diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh
index 775bc807a..4b8024582 100644
--- a/src/mem/cache/coherence/coherence_protocol.hh
+++ b/src/mem/cache/coherence/coherence_protocol.hh
@@ -89,8 +89,8 @@ class CoherenceProtocol : public SimObject
      * @param oldState The current block state.
      * @return The new state.
      */
-    CacheBlk::State getNewState(PacketPtr &pkt,
-                                CacheBlk::State oldState);
+    CacheBlk::State getNewState(PacketPtr pkt,
+                                CacheBlk::State oldState = 0);
 
     /**
      * Handle snooped bus requests.
diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh
index 095260ca4..214828ca7 100644
--- a/src/mem/cache/coherence/simple_coherence.hh
+++ b/src/mem/cache/coherence/simple_coherence.hh
@@ -94,25 +94,14 @@ class SimpleCoherence
         return NULL;
     }
 
-    /**
-     * Was the CSHR request was sent successfully?
-     * @param pkt The request.
-     * @param success True if the request was sent successfully.
-     */
-    void sendResult(PacketPtr &pkt, MSHR* cshr, bool success)
-    {
-        //Don't do coherence
-        return;
-    }
-
-
     /**
      * Return the proper state given the current state and the bus response.
      * @param pkt The bus response.
      * @param current The current block state.
      * @return The new state.
      */
-    CacheBlk::State getNewState(PacketPtr &pkt, CacheBlk::State current)
+    CacheBlk::State getNewState(PacketPtr pkt,
+                                CacheBlk::State current = 0)
     {
         return protocol->getNewState(pkt, current);
     }
diff --git a/src/mem/cache/miss/SConscript b/src/mem/cache/miss/SConscript
index 0f81a2570..376d670cd 100644
--- a/src/mem/cache/miss/SConscript
+++ b/src/mem/cache/miss/SConscript
@@ -30,8 +30,5 @@
 
 Import('*')
 
-Source('blocking_buffer.cc')
-Source('miss_buffer.cc')
-Source('miss_queue.cc')
 Source('mshr.cc')
 Source('mshr_queue.cc')
diff --git a/src/mem/cache/miss/blocking_buffer.cc b/src/mem/cache/miss/blocking_buffer.cc
deleted file mode 100644
index 281328c2e..000000000
--- a/src/mem/cache/miss/blocking_buffer.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-/**
- * @file
- * Definitions of a simple buffer for a blocking cache.
- */
-#include <cstring>
-
-#include "mem/cache/base_cache.hh"
-#include "mem/cache/miss/blocking_buffer.hh"
-#include "mem/cache/prefetch/base_prefetcher.hh"
-#include "mem/request.hh"
-
-/**
- * @todo Move writebacks into shared BaseBuffer class.
- */
-void
-BlockingBuffer::regStats(const std::string &name)
-{
-    MissBuffer::regStats(name);
-}
-
-
-void
-BlockingBuffer::handleMiss(PacketPtr &pkt, int blk_size, Tick time)
-{
-    Addr blk_addr = pkt->getAddr() & ~(Addr)(blk_size - 1);
-    if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate ||
-                               !pkt->needsResponse())) {
-        if (!pkt->needsResponse()) {
-            wb.allocateAsBuffer(pkt);
-        } else {
-            wb.allocate(pkt->cmd, blk_addr, blk_size, pkt);
-        }
-
-        std::memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), blk_size);
-
-        cache->setBlocked(Blocked_NoWBBuffers);
-        cache->requestMemSideBus(Request_WB, time);
-        return;
-    }
-
-    if (!pkt->needsResponse()) {
-        miss.allocateAsBuffer(pkt);
-    } else {
-        miss.allocate(pkt->cmd, blk_addr, blk_size, pkt);
-    }
-    if (!pkt->req->isUncacheable()) {
-        miss.pkt->flags |= CACHE_LINE_FILL;
-    }
-    cache->setBlocked(Blocked_NoMSHRs);
-    cache->requestMemSideBus(Request_MSHR, time);
-}
-
-PacketPtr
-BlockingBuffer::getPacket()
-{
-    if (miss.pkt && !miss.inService) {
-        return miss.pkt;
-    }
-    return wb.pkt;
-}
-
-void
-BlockingBuffer::setBusCmd(PacketPtr &pkt, MemCmd cmd)
-{
-    MSHR *mshr = (MSHR*) pkt->senderState;
-    mshr->originalCmd = pkt->cmd;
-    if (pkt->isCacheFill())
-        pkt->cmdOverride(cmd);
-}
-
-void
-BlockingBuffer::restoreOrigCmd(PacketPtr &pkt)
-{
-    pkt->cmdOverride(((MSHR*)(pkt->senderState))->originalCmd);
-}
-
-void
-BlockingBuffer::markInService(PacketPtr &pkt, MSHR* mshr)
-{
-    if (!pkt->isCacheFill() && pkt->isWrite()) {
-        // Forwarding a write/ writeback, don't need to change
-        // the command
-        assert(mshr == &wb);
-        cache->deassertMemSideBusRequest(Request_WB);
-        if (!pkt->needsResponse()) {
-            assert(wb.getNumTargets() == 0);
-            wb.deallocate();
-            cache->clearBlocked(Blocked_NoWBBuffers);
-        } else {
-            wb.inService = true;
-        }
-    } else {
-        assert(mshr == &miss);
-        cache->deassertMemSideBusRequest(Request_MSHR);
-        if (!pkt->needsResponse()) {
-            assert(miss.getNumTargets() == 0);
-            miss.deallocate();
-            cache->clearBlocked(Blocked_NoMSHRs);
-        } else {
-            //mark in service
-            miss.inService = true;
-        }
-    }
-}
-
-void
-BlockingBuffer::handleResponse(PacketPtr &pkt, Tick time)
-{
-    if (pkt->isCacheFill()) {
-        // targets were handled in the cache tags
-        assert((MSHR*)pkt->senderState == &miss);
-        miss.deallocate();
-        cache->clearBlocked(Blocked_NoMSHRs);
-    } else {
-        if (((MSHR*)(pkt->senderState))->hasTargets()) {
-            // Should only have 1 target if we had any
-            assert(((MSHR*)(pkt->senderState))->getNumTargets() == 1);
-            PacketPtr target = ((MSHR*)(pkt->senderState))->getTarget();
-            ((MSHR*)(pkt->senderState))->popTarget();
-            if (pkt->isRead()) {
-                std::memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), target->getSize());
-            }
-            cache->respond(target, time);
-            assert(!((MSHR*)(pkt->senderState))->hasTargets());
-        }
-
-        if (pkt->isWrite()) {
-            assert(((MSHR*)(pkt->senderState)) == &wb);
-            wb.deallocate();
-            cache->clearBlocked(Blocked_NoWBBuffers);
-        } else {
-            miss.deallocate();
-            cache->clearBlocked(Blocked_NoMSHRs);
-        }
-    }
-}
-
-void
-BlockingBuffer::squash(int threadNum)
-{
-    if (miss.threadNum == threadNum) {
-        PacketPtr target = miss.getTarget();
-        miss.popTarget();
-        assert(0/*target->req->getThreadNum()*/ == threadNum);
-        target = NULL;
-        assert(!miss.hasTargets());
-        miss.ntargets=0;
-        if (!miss.inService) {
-            miss.deallocate();
-            cache->clearBlocked(Blocked_NoMSHRs);
-            cache->deassertMemSideBusRequest(Request_MSHR);
-        }
-    }
-}
-
-void
-BlockingBuffer::doWriteback(Addr addr,
-                            int size, uint8_t *data, bool compressed)
-{
-    // Generate request
-    Request * req = new Request(addr, size, 0);
-    PacketPtr pkt = new Packet(req, MemCmd::Writeback, -1);
-    pkt->allocate();
-    if (data) {
-        std::memcpy(pkt->getPtr<uint8_t>(), data, size);
-    }
-
-    if (compressed) {
-        pkt->flags |= COMPRESSED;
-    }
-
-    ///All writebacks charged to same thread @todo figure this out
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
-
-    wb.allocateAsBuffer(pkt);
-    cache->requestMemSideBus(Request_WB, curTick);
-    cache->setBlocked(Blocked_NoWBBuffers);
-}
-
-
-
-void
-BlockingBuffer::doWriteback(PacketPtr &pkt)
-{
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
-
-    wb.allocateAsBuffer(pkt);
-
-    // Since allocate as buffer copies the request,
-    // need to copy data here.
-    std::memcpy(wb.pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
-
-    cache->setBlocked(Blocked_NoWBBuffers);
-    cache->requestMemSideBus(Request_WB, curTick);
-}
-
-
-MSHR *
-BlockingBuffer::findMSHR(Addr addr)
-{
-    if (miss.addr == addr && miss.pkt)
-        return &miss;
-    return NULL;
-}
-
-
-bool
-BlockingBuffer::findWrites(Addr addr, std::vector<MSHR*>& writes)
-{
-    if (wb.addr == addr && wb.pkt) {
-        writes.push_back(&wb);
-        return true;
-    }
-    return false;
-}
diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh
deleted file mode 100644
index 86b24d539..000000000
--- a/src/mem/cache/miss/blocking_buffer.hh
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-/**
- * @file
- * Declaration of a simple buffer for a blocking cache.
- */
-
-#ifndef __BLOCKING_BUFFER_HH__
-#define __BLOCKING_BUFFER_HH__
-
-#include <vector>
-
-#include "base/misc.hh" // for fatal()
-#include "mem/cache/miss/miss_buffer.hh"
-#include "mem/cache/miss/mshr.hh"
-
-/**
- * Miss and writeback storage for a blocking cache.
- */
-class BlockingBuffer : public MissBuffer
-{
-protected:
-    /** Miss storage. */
-    MSHR miss;
-    /** WB storage. */
-    MSHR wb;
-
-public:
-    /**
-     * Builds and initializes this buffer.
-     * @param write_allocate If true, treat write misses the same as reads.
-     */
-    BlockingBuffer(bool write_allocate)
-        : MissBuffer(write_allocate)
-    {
-    }
-
-    /**
-     * Register statistics for this object.
-     * @param name The name of the parent cache.
-     */
-    void regStats(const std::string &name);
-
-    /**
-     * Handle a cache miss properly. Requests the bus and marks the cache as
-     * blocked.
-     * @param pkt The request that missed in the cache.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     */
-    void handleMiss(PacketPtr &pkt, int blk_size, Tick time);
-
-    /**
-     * Fetch the block for the given address and buffer the given target.
-     * @param addr The address to fetch.
-     * @param asid The address space of the address.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     * @param target The target for the fetch.
-     */
-    MSHR* fetchBlock(Addr addr, int blk_size, Tick time,
-                     PacketPtr &target)
-    {
-        fatal("Unimplemented");
-        M5_DUMMY_RETURN
-    }
-
-    /**
-     * Selects a outstanding request to service.
-     * @return The request to service, NULL if none found.
-     */
-    PacketPtr getPacket();
-
-    /**
-     * Set the command to the given bus command.
-     * @param pkt The request to update.
-     * @param cmd The bus command to use.
-     */
-    void setBusCmd(PacketPtr &pkt, MemCmd cmd);
-
-    /**
-     * Restore the original command in case of a bus transmission error.
-     * @param pkt The request to reset.
-     */
-    void restoreOrigCmd(PacketPtr &pkt);
-
-    /**
-     * Marks a request as in service (sent on the bus). This can have side
-     * effect since storage for no response commands is deallocated once they
-     * are successfully sent.
-     * @param pkt The request that was sent on the bus.
-     */
-    void markInService(PacketPtr &pkt, MSHR* mshr);
-
-    /**
-     * Frees the resources of the request and unblock the cache.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    void handleResponse(PacketPtr &pkt, Tick time);
-
-    /**
-     * Removes all outstanding requests for a given thread number. If a request
-     * has been sent to the bus, this function removes all of its targets.
-     * @param threadNum The thread number of the requests to squash.
-     */
-    void squash(int threadNum);
-
-    /**
-     * Return the current number of outstanding misses.
-     * @return the number of outstanding misses.
-     */
-    int getMisses()
-    {
-        return miss.getNumTargets();
-    }
-
-    /**
-     * Searches for the supplied address in the miss "queue".
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @return A pointer to miss if it matches.
-     */
-    MSHR* findMSHR(Addr addr);
-
-    /**
-     * Searches for the supplied address in the write buffer.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @param writes List of pointers to the matching writes.
-     * @return True if there is a matching write.
-     */
-    bool findWrites(Addr addr, std::vector<MSHR*>& writes);
-
-    /**
-     * Perform a writeback of dirty data to the given address.
-     * @param addr The address to write to.
-     * @param asid The address space id.
-     * @param size The number of bytes to write.
-     * @param data The data to write, can be NULL.
-     * @param compressed True if the data is compressed.
-     */
-    void doWriteback(Addr addr,
-                     int size, uint8_t *data, bool compressed);
-
-    /**
-     * Perform a writeback request.
-     * @param pkt The writeback request.
-     */
-    void doWriteback(PacketPtr &pkt);
-
-    /**
-     * Returns true if there are outstanding requests.
-     * @return True if there are outstanding requests.
-     */
-    bool havePending()
-    {
-        return !miss.inService || !wb.inService;
-    }
-
-    /**
-     * Add a target to the given MSHR. This assumes it is in the miss queue.
-     * @param mshr The mshr to add a target to.
-     * @param pkt The target to add.
-     */
-    void addTarget(MSHR *mshr, PacketPtr &pkt)
-    {
-        fatal("Shouldn't call this on a blocking buffer.");
-    }
-
-    /**
-     * Dummy implmentation.
-     */
-    MSHR* allocateTargetList(Addr addr)
-    {
-        fatal("Unimplemented");
-        M5_DUMMY_RETURN
-    }
-};
-
-#endif // __BLOCKING_BUFFER_HH__
diff --git a/src/mem/cache/miss/miss_buffer.cc b/src/mem/cache/miss/miss_buffer.cc
deleted file mode 100644
index 4d9cd0958..000000000
--- a/src/mem/cache/miss/miss_buffer.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2003-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-#include "cpu/smt.hh" //for maxThreadsPerCPU
-#include "mem/cache/base_cache.hh"
-#include "mem/cache/miss/miss_buffer.hh"
-#include "mem/cache/prefetch/base_prefetcher.hh"
-
-/**
- * @todo Move writebacks into shared BaseBuffer class.
- */
-void
-MissBuffer::regStats(const std::string &name)
-{
-    using namespace Stats;
-    writebacks
-        .init(maxThreadsPerCPU)
-        .name(name + ".writebacks")
-        .desc("number of writebacks")
-        .flags(total)
-        ;
-}
-
-void
-MissBuffer::setCache(BaseCache *_cache)
-{
-    cache = _cache;
-    blkSize = cache->getBlockSize();
-}
-
-void
-MissBuffer::setPrefetcher(BasePrefetcher *_prefetcher)
-{
-    prefetcher = _prefetcher;
-}
diff --git a/src/mem/cache/miss/miss_buffer.hh b/src/mem/cache/miss/miss_buffer.hh
deleted file mode 100644
index 9a86db304..000000000
--- a/src/mem/cache/miss/miss_buffer.hh
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2003-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Steve Reinhardt
- */
-
-/**
- * @file
- * MissBuffer declaration.
- */
-
-#ifndef __MISS_BUFFER_HH__
-#define __MISS_BUFFER_HH__
-
-class BaseCache;
-class BasePrefetcher;
-class MSHR;
-
-/**
- * Abstract base class for cache miss buffering.
- */
-class MissBuffer
-{
-  protected:
-    /** True if the cache should allocate on a write miss. */
-    const bool writeAllocate;
-
-    /** Pointer to the parent cache. */
-    BaseCache *cache;
-
-    /** The Prefetcher */
-    BasePrefetcher *prefetcher;
-
-    /** Block size of the parent cache. */
-    int blkSize;
-
-    // Statistics
-    /**
-     * @addtogroup CacheStatistics
-     * @{
-     */
-    /** Number of blocks written back per thread. */
-    Stats::Vector<> writebacks;
-
-    /**
-     * @}
-     */
-
-  public:
-    MissBuffer(bool write_allocate)
-        : writeAllocate(write_allocate)
-    {
-    }
-
-    virtual ~MissBuffer() {}
-
-    /**
-     * Called by the parent cache to set the back pointer.
-     * @param _cache A pointer to the parent cache.
-     */
-    void setCache(BaseCache *_cache);
-
-    void setPrefetcher(BasePrefetcher *_prefetcher);
-
-    /**
-     * Register statistics for this object.
-     * @param name The name of the parent cache.
-     */
-    virtual void regStats(const std::string &name);
-
-    /**
-     * Handle a cache miss properly. Either allocate an MSHR for the request,
-     * or forward it through the write buffer.
-     * @param pkt The request that missed in the cache.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     */
-    virtual void handleMiss(PacketPtr &pkt, int blk_size, Tick time) = 0;
-
-    /**
-     * Fetch the block for the given address and buffer the given target.
-     * @param addr The address to fetch.
-     * @param asid The address space of the address.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     * @param target The target for the fetch.
-     */
-    virtual MSHR *fetchBlock(Addr addr, int blk_size, Tick time,
-                             PacketPtr &target) = 0;
-
-    /**
-     * Selects a outstanding request to service.
-     * @return The request to service, NULL if none found.
-     */
-    virtual PacketPtr getPacket() = 0;
-
-    /**
-     * Set the command to the given bus command.
-     * @param pkt The request to update.
-     * @param cmd The bus command to use.
-     */
-    virtual void setBusCmd(PacketPtr &pkt, MemCmd cmd) = 0;
-
-    /**
-     * Restore the original command in case of a bus transmission error.
-     * @param pkt The request to reset.
-     */
-    virtual void restoreOrigCmd(PacketPtr &pkt) = 0;
-
-    /**
-     * Marks a request as in service (sent on the bus). This can have side
-     * effect since storage for no response commands is deallocated once they
-     * are successfully sent.
-     * @param pkt The request that was sent on the bus.
-     */
-    virtual void markInService(PacketPtr &pkt, MSHR* mshr) = 0;
-
-    /**
-     * Collect statistics and free resources of a satisfied request.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    virtual void handleResponse(PacketPtr &pkt, Tick time) = 0;
-
-    /**
-     * Removes all outstanding requests for a given thread number. If a request
-     * has been sent to the bus, this function removes all of its targets.
-     * @param threadNum The thread number of the requests to squash.
-     */
-    virtual void squash(int threadNum) = 0;
-
-    /**
-     * Return the current number of outstanding misses.
-     * @return the number of outstanding misses.
-     */
-    virtual int getMisses() = 0;
-
-    /**
-     * Searches for the supplied address in the miss queue.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @return The MSHR that contains the address, NULL if not found.
-     * @warning Currently only searches the miss queue. If non write allocate
-     * might need to search the write buffer for coherence.
-     */
-    virtual MSHR* findMSHR(Addr addr) = 0;
-
-    /**
-     * Searches for the supplied address in the write buffer.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @param writes The list of writes that match the address.
-     * @return True if any writes are found
-     */
-    virtual bool findWrites(Addr addr, std::vector<MSHR*>& writes) = 0;
-
-    /**
-     * Perform a writeback of dirty data to the given address.
-     * @param addr The address to write to.
-     * @param asid The address space id.
-     * @param xc The execution context of the address space.
-     * @param size The number of bytes to write.
-     * @param data The data to write, can be NULL.
-     * @param compressed True if the data is compressed.
-     */
-    virtual void doWriteback(Addr addr, int size, uint8_t *data,
-                             bool compressed) = 0;
-
-    /**
-     * Perform the given writeback request.
-     * @param pkt The writeback request.
-     */
-    virtual void doWriteback(PacketPtr &pkt) = 0;
-
-    /**
-     * Returns true if there are outstanding requests.
-     * @return True if there are outstanding requests.
-     */
-    virtual bool havePending() = 0;
-
-    /**
-     * Add a target to the given MSHR. This assumes it is in the miss queue.
-     * @param mshr The mshr to add a target to.
-     * @param pkt The target to add.
-     */
-    virtual void addTarget(MSHR *mshr, PacketPtr &pkt) = 0;
-
-    /**
-     * Allocate a MSHR to hold a list of targets to a block involved in a copy.
-     * If the block is marked done then the MSHR already holds the data to
-     * fill the block. Otherwise the block needs to be fetched.
-     * @param addr The address to buffer.
-     * @param asid The address space ID.
-     * @return A pointer to the allocated MSHR.
-     */
-    virtual MSHR* allocateTargetList(Addr addr) = 0;
-};
-
-#endif //__MISS_BUFFER_HH__
diff --git a/src/mem/cache/miss/miss_queue.cc b/src/mem/cache/miss/miss_queue.cc
deleted file mode 100644
index 67036ed02..000000000
--- a/src/mem/cache/miss/miss_queue.cc
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- *          Ron Dreslinski
- */
-
-/**
- * @file
- * Miss and writeback queue definitions.
- */
-
-#include "cpu/smt.hh" //for maxThreadsPerCPU
-#include "mem/cache/base_cache.hh"
-#include "mem/cache/miss/miss_queue.hh"
-#include "mem/cache/prefetch/base_prefetcher.hh"
-
-using namespace std;
-
-// simple constructor
-/**
- * @todo Remove the +16 from the write buffer constructor once we handle
- * stalling on writebacks do to compression writes.
- */
-MissQueue::MissQueue(int numMSHRs, int numTargets, int write_buffers,
-                     bool write_allocate, bool prefetch_miss)
-    : MissBuffer(write_allocate),
-      mq(numMSHRs, 4), wb(write_buffers,numMSHRs+1000), numMSHR(numMSHRs),
-      numTarget(numTargets), writeBuffers(write_buffers),
-      order(0), prefetchMiss(prefetch_miss)
-{
-    noTargetMSHR = NULL;
-}
-
-
-MissQueue::~MissQueue()
-{
-}
-
-
-void
-MissQueue::regStats(const string &name)
-{
-    MissBuffer::regStats(name);
-
-    using namespace Stats;
-
-    // MSHR hit statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_hits[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_hits")
-            .desc("number of " + cstr + " MSHR hits")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    demandMshrHits
-        .name(name + ".demand_mshr_hits")
-        .desc("number of demand (read+write) MSHR hits")
-        .flags(total)
-        ;
-    demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq];
-
-    overallMshrHits
-        .name(name + ".overall_mshr_hits")
-        .desc("number of overall MSHR hits")
-        .flags(total)
-        ;
-    overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] +
-        mshr_hits[MemCmd::HardPFReq];
-
-    // MSHR miss statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_misses[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_misses")
-            .desc("number of " + cstr + " MSHR misses")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    demandMshrMisses
-        .name(name + ".demand_mshr_misses")
-        .desc("number of demand (read+write) MSHR misses")
-        .flags(total)
-        ;
-    demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq];
-
-    overallMshrMisses
-        .name(name + ".overall_mshr_misses")
-        .desc("number of overall MSHR misses")
-        .flags(total)
-        ;
-    overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] +
-        mshr_misses[MemCmd::HardPFReq];
-
-    // MSHR miss latency statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_miss_latency[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_miss_latency")
-            .desc("number of " + cstr + " MSHR miss cycles")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    demandMshrMissLatency
-        .name(name + ".demand_mshr_miss_latency")
-        .desc("number of demand (read+write) MSHR miss cycles")
-        .flags(total)
-        ;
-    demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq]
-        + mshr_miss_latency[MemCmd::WriteReq];
-
-    overallMshrMissLatency
-        .name(name + ".overall_mshr_miss_latency")
-        .desc("number of overall MSHR miss cycles")
-        .flags(total)
-        ;
-    overallMshrMissLatency = demandMshrMissLatency +
-        mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq];
-
-    // MSHR uncacheable statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_uncacheable[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_uncacheable")
-            .desc("number of " + cstr + " MSHR uncacheable")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    overallMshrUncacheable
-        .name(name + ".overall_mshr_uncacheable_misses")
-        .desc("number of overall MSHR uncacheable misses")
-        .flags(total)
-        ;
-    overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq]
-        + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq]
-        + mshr_uncacheable[MemCmd::HardPFReq];
-
-    // MSHR miss latency statistics
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshr_uncacheable_lat[access_idx]
-            .init(maxThreadsPerCPU)
-            .name(name + "." + cstr + "_mshr_uncacheable_latency")
-            .desc("number of " + cstr + " MSHR uncacheable cycles")
-            .flags(total | nozero | nonan)
-            ;
-    }
-
-    overallMshrUncacheableLatency
-        .name(name + ".overall_mshr_uncacheable_latency")
-        .desc("number of overall MSHR uncacheable cycles")
-        .flags(total)
-        ;
-    overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq]
-        + mshr_uncacheable_lat[MemCmd::WriteReq]
-        + mshr_uncacheable_lat[MemCmd::SoftPFReq]
-        + mshr_uncacheable_lat[MemCmd::HardPFReq];
-
-#if 0
-    // MSHR access formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshrAccesses[access_idx]
-            .name(name + "." + cstr + "_mshr_accesses")
-            .desc("number of " + cstr + " mshr accesses(hits+misses)")
-            .flags(total | nozero | nonan)
-            ;
-        mshrAccesses[access_idx] =
-            mshr_hits[access_idx] + mshr_misses[access_idx]
-            + mshr_uncacheable[access_idx];
-    }
-
-    demandMshrAccesses
-        .name(name + ".demand_mshr_accesses")
-        .desc("number of demand (read+write) mshr accesses")
-        .flags(total | nozero | nonan)
-        ;
-    demandMshrAccesses = demandMshrHits + demandMshrMisses;
-
-    overallMshrAccesses
-        .name(name + ".overall_mshr_accesses")
-        .desc("number of overall (read+write) mshr accesses")
-        .flags(total | nozero | nonan)
-        ;
-    overallMshrAccesses = overallMshrHits + overallMshrMisses
-        + overallMshrUncacheable;
-#endif
-
-    // MSHR miss rate formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        mshrMissRate[access_idx]
-            .name(name + "." + cstr + "_mshr_miss_rate")
-            .desc("mshr miss rate for " + cstr + " accesses")
-            .flags(total | nozero | nonan)
-            ;
-
-        mshrMissRate[access_idx] =
-            mshr_misses[access_idx] / cache->accesses[access_idx];
-    }
-
-    demandMshrMissRate
-        .name(name + ".demand_mshr_miss_rate")
-        .desc("mshr miss rate for demand accesses")
-        .flags(total)
-        ;
-    demandMshrMissRate = demandMshrMisses / cache->demandAccesses;
-
-    overallMshrMissRate
-        .name(name + ".overall_mshr_miss_rate")
-        .desc("mshr miss rate for overall accesses")
-        .flags(total)
-        ;
-    overallMshrMissRate = overallMshrMisses / cache->overallAccesses;
-
-    // mshrMiss latency formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        avgMshrMissLatency[access_idx]
-            .name(name + "." + cstr + "_avg_mshr_miss_latency")
-            .desc("average " + cstr + " mshr miss latency")
-            .flags(total | nozero | nonan)
-            ;
-
-        avgMshrMissLatency[access_idx] =
-            mshr_miss_latency[access_idx] / mshr_misses[access_idx];
-    }
-
-    demandAvgMshrMissLatency
-        .name(name + ".demand_avg_mshr_miss_latency")
-        .desc("average overall mshr miss latency")
-        .flags(total)
-        ;
-    demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
-
-    overallAvgMshrMissLatency
-        .name(name + ".overall_avg_mshr_miss_latency")
-        .desc("average overall mshr miss latency")
-        .flags(total)
-        ;
-    overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
-
-    // mshrUncacheable latency formulas
-    for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
-        MemCmd cmd(access_idx);
-        const string &cstr = cmd.toString();
-
-        avgMshrUncacheableLatency[access_idx]
-            .name(name + "." + cstr + "_avg_mshr_uncacheable_latency")
-            .desc("average " + cstr + " mshr uncacheable latency")
-            .flags(total | nozero | nonan)
-            ;
-
-        avgMshrUncacheableLatency[access_idx] =
-            mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
-    }
-
-    overallAvgMshrUncacheableLatency
-        .name(name + ".overall_avg_mshr_uncacheable_latency")
-        .desc("average overall mshr uncacheable latency")
-        .flags(total)
-        ;
-    overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable;
-
-    mshr_cap_events
-        .init(maxThreadsPerCPU)
-        .name(name + ".mshr_cap_events")
-        .desc("number of times MSHR cap was activated")
-        .flags(total)
-        ;
-
-    //software prefetching stats
-    soft_prefetch_mshr_full
-        .init(maxThreadsPerCPU)
-        .name(name + ".soft_prefetch_mshr_full")
-        .desc("number of mshr full events for SW prefetching instrutions")
-        .flags(total)
-        ;
-
-    mshr_no_allocate_misses
-        .name(name +".no_allocate_misses")
-        .desc("Number of misses that were no-allocate")
-        ;
-
-}
-
-
-MSHR*
-MissQueue::allocateMiss(PacketPtr &pkt, int size, Tick time)
-{
-    MSHR* mshr = mq.allocate(pkt, size);
-    mshr->order = order++;
-    if (!pkt->req->isUncacheable() ){//&& !pkt->isNoAllocate()) {
-        // Mark this as a cache line fill
-        mshr->pkt->flags |= CACHE_LINE_FILL;
-    }
-    if (mq.isFull()) {
-        cache->setBlocked(Blocked_NoMSHRs);
-    }
-    if (pkt->cmd != MemCmd::HardPFReq) {
-        //If we need to request the bus (not on HW prefetch), do so
-        cache->requestMemSideBus(Request_MSHR, time);
-    }
-    return mshr;
-}
-
-
-MSHR*
-MissQueue::allocateWrite(PacketPtr &pkt, int size, Tick time)
-{
-    MSHR* mshr = wb.allocate(pkt,size);
-    mshr->order = order++;
-
-//REMOVING COMPRESSION FOR NOW
-#if 0
-    if (pkt->isCompressed()) {
-        mshr->pkt->deleteData();
-        mshr->pkt->actualSize = pkt->actualSize;
-        mshr->pkt->data = new uint8_t[pkt->actualSize];
-        memcpy(mshr->pkt->data, pkt->data, pkt->actualSize);
-    } else {
-#endif
-        memcpy(mshr->pkt->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
-  //{
-
-    if (wb.isFull()) {
-        cache->setBlocked(Blocked_NoWBBuffers);
-    }
-
-    cache->requestMemSideBus(Request_WB, time);
-
-    return mshr;
-}
-
-
-/**
- * @todo Remove SW prefetches on mshr hits.
- */
-void
-MissQueue::handleMiss(PacketPtr &pkt, int blkSize, Tick time)
-{
-//    if (!cache->isTopLevel())
-    if (prefetchMiss) prefetcher->handleMiss(pkt, time);
-
-    int size = blkSize;
-    Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
-    MSHR* mshr = NULL;
-    if (!pkt->req->isUncacheable()) {
-        mshr = mq.findMatch(blkAddr);
-        if (mshr) {
-            //@todo remove hw_pf here
-            mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-            if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
-                mshr->threadNum = -1;
-            }
-            mq.allocateTarget(mshr, pkt);
-            if (mshr->pkt->isNoAllocate() && !pkt->isNoAllocate()) {
-                //We are adding an allocate after a no-allocate
-                mshr->pkt->flags &= ~NO_ALLOCATE;
-            }
-            if (mshr->getNumTargets() == numTarget) {
-                noTargetMSHR = mshr;
-                cache->setBlocked(Blocked_NoTargets);
-                mq.moveToFront(mshr);
-            }
-            return;
-        }
-        if (pkt->isNoAllocate()) {
-            //Count no-allocate requests differently
-            mshr_no_allocate_misses++;
-        }
-        else {
-            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        }
-    } else {
-        //Count uncacheable accesses
-        mshr_uncacheable[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-        size = pkt->getSize();
-    }
-    if (pkt->isWrite() && (pkt->req->isUncacheable() || !writeAllocate ||
-                               !pkt->needsResponse())) {
-        /**
-         * @todo Add write merging here.
-         */
-        mshr = allocateWrite(pkt, pkt->getSize(), time);
-        return;
-    }
-
-    mshr = allocateMiss(pkt, blkSize, time);
-}
-
-MSHR*
-MissQueue::fetchBlock(Addr addr, int blk_size, Tick time,
-                      PacketPtr &target)
-{
-    Addr blkAddr = addr & ~(Addr)(blk_size - 1);
-    assert(mq.findMatch(addr) == NULL);
-    MSHR *mshr = mq.allocateFetch(blkAddr, blk_size, target);
-    mshr->order = order++;
-    mshr->pkt->flags |= CACHE_LINE_FILL;
-    if (mq.isFull()) {
-        cache->setBlocked(Blocked_NoMSHRs);
-    }
-    cache->requestMemSideBus(Request_MSHR, time);
-    return mshr;
-}
-
-PacketPtr
-MissQueue::getPacket()
-{
-    PacketPtr pkt = mq.getReq();
-    if (((wb.isFull() && wb.inServiceMSHRs == 0) || !pkt ||
-         pkt->time > curTick) && wb.havePending()) {
-        pkt = wb.getReq();
-        // Need to search for earlier miss.
-        MSHR *mshr = mq.findPending(pkt);
-        if (mshr && mshr->order < ((MSHR*)(pkt->senderState))->order) {
-            // Service misses in order until conflict is cleared.
-            return mq.getReq();
-        }
-    }
-    if (pkt) {
-        MSHR* mshr = wb.findPending(pkt);
-        if (mshr /*&& mshr->order < pkt->senderState->order*/) {
-            // The only way this happens is if we are
-            // doing a write and we didn't have permissions
-            // then subsequently saw a writeback(owned got evicted)
-            // We need to make sure to perform the writeback first
-            // To preserve the dirty data, then we can issue the write
-            return wb.getReq();
-        }
-    }
-    else if (!mq.isFull()){
-        //If we have a miss queue slot, we can try a prefetch
-        pkt = prefetcher->getPacket();
-        if (pkt) {
-            //Update statistic on number of prefetches issued (hwpf_mshr_misses)
-            mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
-            //It will request the bus for the future, but should clear that immedieatley
-            allocateMiss(pkt, pkt->getSize(), curTick);
-            pkt = mq.getReq();
-            assert(pkt); //We should get back a req b/c we just put one in
-        }
-    }
-    return pkt;
-}
-
-void
-MissQueue::setBusCmd(PacketPtr &pkt, MemCmd cmd)
-{
-    assert(pkt->senderState != 0);
-    MSHR * mshr = (MSHR*)pkt->senderState;
-    mshr->originalCmd = pkt->cmd;
-    if (cmd == MemCmd::UpgradeReq || cmd == MemCmd::InvalidateReq) {
-        pkt->flags |= NO_ALLOCATE;
-        pkt->flags &= ~CACHE_LINE_FILL;
-    }
-    else if (!pkt->req->isUncacheable() && !pkt->isNoAllocate() &&
-             cmd.needsResponse()) {
-        pkt->flags |= CACHE_LINE_FILL;
-    }
-    if (pkt->isCacheFill() || pkt->isNoAllocate())
-        pkt->cmd = cmd;
-}
-
-void
-MissQueue::restoreOrigCmd(PacketPtr &pkt)
-{
-    pkt->cmd = ((MSHR*)(pkt->senderState))->originalCmd;
-}
-
-void
-MissQueue::markInService(PacketPtr &pkt, MSHR* mshr)
-{
-    bool unblock = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    /**
-     * @todo Should include MSHRQueue pointer in MSHR to select the correct
-     * one.
-     */
-    if ((!pkt->isCacheFill() && pkt->isWrite())) {
-        // Forwarding a write/ writeback, don't need to change
-        // the command
-        unblock = wb.isFull();
-        wb.markInService(mshr);
-        if (!wb.havePending()){
-            cache->deassertMemSideBusRequest(Request_WB);
-        }
-        if (unblock) {
-            // Do we really unblock?
-            unblock = !wb.isFull();
-            cause = Blocked_NoWBBuffers;
-        }
-    } else {
-        unblock = mq.isFull();
-        mq.markInService(mshr);
-        if (!mq.havePending()){
-            cache->deassertMemSideBusRequest(Request_MSHR);
-        }
-        if (mshr->originalCmd == MemCmd::HardPFReq) {
-            DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
-                    cache->name());
-            //Also clear pending if need be
-            if (!prefetcher->havePending())
-            {
-                cache->deassertMemSideBusRequest(Request_PF);
-            }
-        }
-        if (unblock) {
-            unblock = !mq.isFull();
-            cause = Blocked_NoMSHRs;
-        }
-    }
-    if (unblock) {
-        cache->clearBlocked(cause);
-    }
-}
-
-
-void
-MissQueue::handleResponse(PacketPtr &pkt, Tick time)
-{
-    MSHR* mshr = (MSHR*)pkt->senderState;
-    if (((MSHR*)(pkt->senderState))->originalCmd == MemCmd::HardPFReq) {
-        DPRINTF(HWPrefetch, "%s:Handling the response to a HW_PF\n",
-                cache->name());
-    }
-#ifndef NDEBUG
-    int num_targets = mshr->getNumTargets();
-#endif
-
-    bool unblock = false;
-    bool unblock_target = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    if (pkt->isCacheFill() && !pkt->isNoAllocate()) {
-        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-            curTick - pkt->time;
-        // targets were handled in the cache tags
-        if (mshr == noTargetMSHR) {
-            // we always clear at least one target
-            unblock_target = true;
-            cause = Blocked_NoTargets;
-            noTargetMSHR = NULL;
-        }
-
-        if (mshr->hasTargets()) {
-            // Didn't satisfy all the targets, need to resend
-            MemCmd cmd = mshr->getTarget()->cmd;
-            mshr->pkt->setDest(Packet::Broadcast);
-            mshr->pkt->result = Packet::Unknown;
-            mshr->pkt->req = mshr->getTarget()->req;
-            mq.markPending(mshr, cmd);
-            mshr->order = order++;
-            cache->requestMemSideBus(Request_MSHR, time);
-        }
-        else {
-            unblock = mq.isFull();
-            mq.deallocate(mshr);
-            if (unblock) {
-                unblock = !mq.isFull();
-                cause = Blocked_NoMSHRs;
-            }
-        }
-    } else {
-        if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-                curTick - pkt->time;
-        }
-        if (mshr->hasTargets() && pkt->req->isUncacheable()) {
-            // Should only have 1 target if we had any
-            assert(num_targets == 1);
-            PacketPtr target = mshr->getTarget();
-            mshr->popTarget();
-            if (pkt->isRead()) {
-                memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(),
-                       target->getSize());
-            }
-            cache->respond(target, time);
-            assert(!mshr->hasTargets());
-        }
-        else if (mshr->hasTargets()) {
-            //Must be a no_allocate with possibly more than one target
-            assert(mshr->pkt->isNoAllocate());
-            while (mshr->hasTargets()) {
-                PacketPtr target = mshr->getTarget();
-                mshr->popTarget();
-                if (pkt->isRead()) {
-                    memcpy(target->getPtr<uint8_t>(), pkt->getPtr<uint8_t>(),
-                           target->getSize());
-                }
-                cache->respond(target, time);
-            }
-        }
-
-        if (pkt->isWrite()) {
-            // If the wrtie buffer is full, we might unblock now
-            unblock = wb.isFull();
-            wb.deallocate(mshr);
-            if (unblock) {
-                // Did we really unblock?
-                unblock = !wb.isFull();
-                cause = Blocked_NoWBBuffers;
-            }
-        } else {
-            unblock = mq.isFull();
-            mq.deallocate(mshr);
-            if (unblock) {
-                unblock = !mq.isFull();
-                cause = Blocked_NoMSHRs;
-            }
-        }
-    }
-    if (unblock || unblock_target) {
-        cache->clearBlocked(cause);
-    }
-}
-
-void
-MissQueue::squash(int threadNum)
-{
-    bool unblock = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    if (noTargetMSHR && noTargetMSHR->threadNum == threadNum) {
-        noTargetMSHR = NULL;
-        unblock = true;
-        cause = Blocked_NoTargets;
-    }
-    if (mq.isFull()) {
-        unblock = true;
-        cause = Blocked_NoMSHRs;
-    }
-    mq.squash(threadNum);
-    if (!mq.havePending()) {
-        cache->deassertMemSideBusRequest(Request_MSHR);
-    }
-    if (unblock && !mq.isFull()) {
-        cache->clearBlocked(cause);
-    }
-
-}
-
-MSHR*
-MissQueue::findMSHR(Addr addr)
-{
-    return mq.findMatch(addr);
-}
-
-bool
-MissQueue::findWrites(Addr addr, vector<MSHR*> &writes)
-{
-    return wb.findMatches(addr,writes);
-}
-
-void
-MissQueue::doWriteback(Addr addr,
-                       int size, uint8_t *data, bool compressed)
-{
-    // Generate request
-    Request * req = new Request(addr, size, 0);
-    PacketPtr pkt = new Packet(req, MemCmd::Writeback, -1);
-    pkt->allocate();
-    if (data) {
-        memcpy(pkt->getPtr<uint8_t>(), data, size);
-    }
-
-    if (compressed) {
-        pkt->flags |= COMPRESSED;
-    }
-
-    ///All writebacks charged to same thread @todo figure this out
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
-
-    allocateWrite(pkt, 0, curTick);
-}
-
-
-void
-MissQueue::doWriteback(PacketPtr &pkt)
-{
-    writebacks[0/*pkt->req->getThreadNum()*/]++;
-    allocateWrite(pkt, 0, curTick);
-}
-
-
-MSHR*
-MissQueue::allocateTargetList(Addr addr)
-{
-   MSHR* mshr = mq.allocateTargetList(addr, blkSize);
-   mshr->pkt->flags |= CACHE_LINE_FILL;
-   if (mq.isFull()) {
-       cache->setBlocked(Blocked_NoMSHRs);
-   }
-   return mshr;
-}
-
-bool
-MissQueue::havePending()
-{
-    return mq.havePending() || wb.havePending() || prefetcher->havePending();
-}
diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh
deleted file mode 100644
index d3560ff36..000000000
--- a/src/mem/cache/miss/miss_queue.hh
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- */
-
-/**
- * @file
- * Miss and writeback queue declarations.
- */
-
-#ifndef __MISS_QUEUE_HH__
-#define __MISS_QUEUE_HH__
-
-#include <vector>
-
-#include "mem/cache/miss/miss_buffer.hh"
-#include "mem/cache/miss/mshr.hh"
-#include "mem/cache/miss/mshr_queue.hh"
-#include "base/statistics.hh"
-
-/**
- * Manages cache misses and writebacks. Contains MSHRs to store miss data
- * and the writebuffer for writes/writebacks.
- * @todo need to handle data on writes better (encapsulate).
- * @todo need to make replacements/writebacks happen in Cache::access
- */
-class MissQueue : public MissBuffer
-{
-  protected:
-    /** The MSHRs. */
-    MSHRQueue mq;
-    /** Write Buffer. */
-    MSHRQueue wb;
-
-    // PARAMTERS
-
-    /** The number of MSHRs in the miss queue. */
-    const int numMSHR;
-    /** The number of targets for each MSHR. */
-    const int numTarget;
-    /** The number of write buffers. */
-    const int writeBuffers;
-
-    /** Increasing order number assigned to each incoming request. */
-    uint64_t order;
-
-    bool prefetchMiss;
-
-    // Statistics
-    /**
-     * @addtogroup CacheStatistics
-     * @{
-     */
-    /** Number of misses that hit in the MSHRs per command and thread. */
-    Stats::Vector<> mshr_hits[MemCmd::NUM_MEM_CMDS];
-    /** Demand misses that hit in the MSHRs. */
-    Stats::Formula demandMshrHits;
-    /** Total number of misses that hit in the MSHRs. */
-    Stats::Formula overallMshrHits;
-
-    /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector<> mshr_misses[MemCmd::NUM_MEM_CMDS];
-    /** Demand misses that miss in the MSHRs. */
-    Stats::Formula demandMshrMisses;
-    /** Total number of misses that miss in the MSHRs. */
-    Stats::Formula overallMshrMisses;
-
-    /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector<> mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
-    /** Total number of misses that miss in the MSHRs. */
-    Stats::Formula overallMshrUncacheable;
-
-    /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector<> mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
-    /** Total cycle latency of demand MSHR misses. */
-    Stats::Formula demandMshrMissLatency;
-    /** Total cycle latency of overall MSHR misses. */
-    Stats::Formula overallMshrMissLatency;
-
-    /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector<> mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
-    /** Total cycle latency of overall MSHR misses. */
-    Stats::Formula overallMshrUncacheableLatency;
-
-    /** The total number of MSHR accesses per command and thread. */
-    Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS];
-    /** The total number of demand MSHR accesses. */
-    Stats::Formula demandMshrAccesses;
-    /** The total number of MSHR accesses. */
-    Stats::Formula overallMshrAccesses;
-
-    /** The miss rate in the MSHRs pre command and thread. */
-    Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS];
-    /** The demand miss rate in the MSHRs. */
-    Stats::Formula demandMshrMissRate;
-    /** The overall miss rate in the MSHRs. */
-    Stats::Formula overallMshrMissRate;
-
-    /** The average latency of an MSHR miss, per command and thread. */
-    Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average latency of a demand MSHR miss. */
-    Stats::Formula demandAvgMshrMissLatency;
-    /** The average overall latency of an MSHR miss. */
-    Stats::Formula overallAvgMshrMissLatency;
-
-    /** The average latency of an MSHR miss, per command and thread. */
-    Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average overall latency of an MSHR miss. */
-    Stats::Formula overallAvgMshrUncacheableLatency;
-
-    /** The number of times a thread hit its MSHR cap. */
-    Stats::Vector<> mshr_cap_events;
-    /** The number of times software prefetches caused the MSHR to block. */
-    Stats::Vector<> soft_prefetch_mshr_full;
-
-    Stats::Scalar<> mshr_no_allocate_misses;
-
-    /**
-     * @}
-     */
-
-  private:
-    /** Pointer to the MSHR that has no targets. */
-    MSHR* noTargetMSHR;
-
-    /**
-     * Allocate a new MSHR to handle the provided miss.
-     * @param pkt The miss to buffer.
-     * @param size The number of bytes to fetch.
-     * @param time The time the miss occurs.
-     * @return A pointer to the new MSHR.
-     */
-    MSHR* allocateMiss(PacketPtr &pkt, int size, Tick time);
-
-    /**
-     * Allocate a new WriteBuffer to handle the provided write.
-     * @param pkt The write to handle.
-     * @param size The number of bytes to write.
-     * @param time The time the write occurs.
-     * @return A pointer to the new write buffer.
-     */
-    MSHR* allocateWrite(PacketPtr &pkt, int size, Tick time);
-
-  public:
-    /**
-     * Simple Constructor. Initializes all needed internal storage and sets
-     * parameters.
-     * @param numMSHRs The number of outstanding misses to handle.
-     * @param numTargets The number of outstanding targets to each miss.
-     * @param write_buffers The number of outstanding writes to handle.
-     * @param write_allocate If true, treat write misses the same as reads.
-     */
-    MissQueue(int numMSHRs, int numTargets, int write_buffers,
-              bool write_allocate, bool prefetch_miss);
-
-    /**
-     * Deletes all allocated internal storage.
-     */
-    ~MissQueue();
-
-    /**
-     * Register statistics for this object.
-     * @param name The name of the parent cache.
-     */
-    void regStats(const std::string &name);
-
-    /**
-     * Handle a cache miss properly. Either allocate an MSHR for the request,
-     * or forward it through the write buffer.
-     * @param pkt The request that missed in the cache.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     */
-    void handleMiss(PacketPtr &pkt, int blk_size, Tick time);
-
-    /**
-     * Fetch the block for the given address and buffer the given target.
-     * @param addr The address to fetch.
-     * @param asid The address space of the address.
-     * @param blk_size The block size of the cache.
-     * @param time The time the miss is detected.
-     * @param target The target for the fetch.
-     */
-    MSHR* fetchBlock(Addr addr, int blk_size, Tick time,
-                     PacketPtr &target);
-
-    /**
-     * Selects a outstanding request to service.
-     * @return The request to service, NULL if none found.
-     */
-    PacketPtr getPacket();
-
-    /**
-     * Set the command to the given bus command.
-     * @param pkt The request to update.
-     * @param cmd The bus command to use.
-     */
-    void setBusCmd(PacketPtr &pkt, MemCmd cmd);
-
-    /**
-     * Restore the original command in case of a bus transmission error.
-     * @param pkt The request to reset.
-     */
-    void restoreOrigCmd(PacketPtr &pkt);
-
-    /**
-     * Marks a request as in service (sent on the bus). This can have side
-     * effect since storage for no response commands is deallocated once they
-     * are successfully sent.
-     * @param pkt The request that was sent on the bus.
-     */
-    void markInService(PacketPtr &pkt, MSHR* mshr);
-
-    /**
-     * Collect statistics and free resources of a satisfied request.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    void handleResponse(PacketPtr &pkt, Tick time);
-
-    /**
-     * Removes all outstanding requests for a given thread number. If a request
-     * has been sent to the bus, this function removes all of its targets.
-     * @param threadNum The thread number of the requests to squash.
-     */
-    void squash(int threadNum);
-
-    /**
-     * Return the current number of outstanding misses.
-     * @return the number of outstanding misses.
-     */
-    int getMisses()
-    {
-        return mq.getAllocatedTargets();
-    }
-
-    /**
-     * Searches for the supplied address in the miss queue.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @return The MSHR that contains the address, NULL if not found.
-     * @warning Currently only searches the miss queue. If non write allocate
-     * might need to search the write buffer for coherence.
-     */
-    MSHR* findMSHR(Addr addr);
-
-    /**
-     * Searches for the supplied address in the write buffer.
-     * @param addr The address to look for.
-     * @param asid The address space id.
-     * @param writes The list of writes that match the address.
-     * @return True if any writes are found
-     */
-    bool findWrites(Addr addr, std::vector<MSHR*>& writes);
-
-    /**
-     * Perform a writeback of dirty data to the given address.
-     * @param addr The address to write to.
-     * @param asid The address space id.
-     * @param xc The execution context of the address space.
-     * @param size The number of bytes to write.
-     * @param data The data to write, can be NULL.
-     * @param compressed True if the data is compressed.
-     */
-    void doWriteback(Addr addr,
-                     int size, uint8_t *data, bool compressed);
-
-    /**
-     * Perform the given writeback request.
-     * @param pkt The writeback request.
-     */
-    void doWriteback(PacketPtr &pkt);
-
-    /**
-     * Returns true if there are outstanding requests.
-     * @return True if there are outstanding requests.
-     */
-    bool havePending();
-
-    /**
-     * Add a target to the given MSHR. This assumes it is in the miss queue.
-     * @param mshr The mshr to add a target to.
-     * @param pkt The target to add.
-     */
-    void addTarget(MSHR *mshr, PacketPtr &pkt)
-    {
-        mq.allocateTarget(mshr, pkt);
-    }
-
-    /**
-     * Allocate a MSHR to hold a list of targets to a block involved in a copy.
-     * If the block is marked done then the MSHR already holds the data to
-     * fill the block. Otherwise the block needs to be fetched.
-     * @param addr The address to buffer.
-     * @param asid The address space ID.
-     * @return A pointer to the allocated MSHR.
-     */
-    MSHR* allocateTargetList(Addr addr);
-
-};
-
-#endif //__MISS_QUEUE_HH__
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 74dad658b..218d42339 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -54,45 +54,20 @@ MSHR::MSHR()
 }
 
 void
-MSHR::allocate(MemCmd cmd, Addr _addr, int size,
-               PacketPtr &target)
+MSHR::allocate(Addr _addr, int _size, PacketPtr target, bool cacheFill)
 {
     addr = _addr;
-    if (target)
-    {
-        //Have a request, just use it
-        pkt = new Packet(target->req, cmd, Packet::Broadcast, size);
-        pkt->time = curTick;
-        pkt->allocate();
-        pkt->senderState = (Packet::SenderState *)this;
-        allocateTarget(target);
-    }
-    else
-    {
-        //need a request first
-        Request * req = new Request();
-        req->setPhys(addr, size, 0);
-        //Thread context??
-        pkt = new Packet(req, cmd, Packet::Broadcast, size);
-        pkt->time = curTick;
-        pkt->allocate();
-        pkt->senderState = (Packet::SenderState *)this;
-    }
-}
-
-// Since we aren't sure if data is being used, don't copy here.
-/**
- * @todo When we have a "global" data flag, might want to copy data here.
- */
-void
-MSHR::allocateAsBuffer(PacketPtr &target)
-{
-    addr = target->getAddr();
-    threadNum = 0/*target->req->getThreadNum()*/;
-    pkt = new Packet(target->req, target->cmd, -1);
-    pkt->allocate();
-    pkt->senderState = (Packet::SenderState*)this;
-    pkt->time = curTick;
+    size = _size;
+    assert(target);
+    isCacheFill = cacheFill;
+    needsExclusive = target->needsExclusive();
+    _isUncacheable = target->req->isUncacheable();
+    inService = false;
+    threadNum = 0;
+    ntargets = 1;
+    // Don't know of a case where we would allocate a new MSHR for a
+    // snoop (mem0-side request), so set cpuSide to true here.
+    targets.push_back(Target(target, true));
 }
 
 void
@@ -100,8 +75,6 @@ MSHR::deallocate()
 {
     assert(targets.empty());
     assert(ntargets == 0);
-    delete pkt;
-    pkt = NULL;
     inService = false;
     //allocIter = NULL;
     //readyIter = NULL;
@@ -111,16 +84,17 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(PacketPtr &target)
+MSHR::allocateTarget(PacketPtr target, bool cpuSide)
 {
     //If we append an invalidate and we issued a read to the bus,
     //but now have some pending writes, we need to move
     //the invalidate to before the first non-read
-    if (inService && pkt->isRead() && target->isInvalidate()) {
-        std::list<PacketPtr> temp;
+    if (inService && !inServiceForExclusive && needsExclusive
+        && !cpuSide && target->isInvalidate()) {
+        std::list<Target> temp;
 
         while (!targets.empty()) {
-            if (!targets.front()->isRead()) break;
+            if (targets.front().pkt->needsExclusive()) break;
             //Place on top of temp stack
             temp.push_front(targets.front());
             //Remove from targets
@@ -129,7 +103,7 @@ MSHR::allocateTarget(PacketPtr &target)
 
         //Now that we have all the reads off until first non-read, we can
         //place the invalidate on
-        targets.push_front(target);
+        targets.push_front(Target(target, cpuSide));
 
         //Now we pop off the temp_stack and put them back
         while (!temp.empty()) {
@@ -138,22 +112,16 @@ MSHR::allocateTarget(PacketPtr &target)
         }
     }
     else {
-        targets.push_back(target);
+        targets.push_back(Target(target, cpuSide));
     }
 
     ++ntargets;
     assert(targets.size() == ntargets);
-    /**
-     * @todo really prioritize the target commands.
-     */
 
-    if (!inService && target->isWrite()) {
-        pkt->cmd = MemCmd::WriteReq;
-    }
+    needsExclusive = needsExclusive || target->needsExclusive();
 }
 
 
-
 void
 MSHR::dump()
 {
@@ -167,8 +135,8 @@ MSHR::dump()
     for (int i = 0; i < ntargets; i++) {
         assert(tar_it != targets.end());
 
-        ccprintf(cerr, "\t%d: Addr: %x cmd: %d\n",
-                 i, (*tar_it)->getAddr(), (*tar_it)->cmdToIndex());
+        ccprintf(cerr, "\t%d: Addr: %x cmd: %s\n",
+                 i, tar_it->pkt->getAddr(), tar_it->pkt->cmdString());
 
         tar_it++;
     }
@@ -177,6 +145,4 @@ MSHR::dump()
 
 MSHR::~MSHR()
 {
-    if (pkt)
-        pkt = NULL;
 }
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index d0410acda..b38b69c52 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -36,22 +36,39 @@
 #ifndef __MSHR_HH__
 #define __MSHR_HH__
 
-#include "mem/packet.hh"
 #include <list>
-#include <deque>
 
-class MSHR;
+#include "mem/packet.hh"
+
+class CacheBlk;
+class MSHRQueue;
 
 /**
  * Miss Status and handling Register. This class keeps all the information
  * needed to handle a cache miss including a list of target requests.
  */
-class MSHR {
+class MSHR : public Packet::SenderState
+{
+
   public:
+
+    class Target {
+      public:
+        Tick time;      //!< Time when request was received (for stats)
+        PacketPtr pkt;  //!< Pending request packet.
+        bool cpuSide;   //!< Did request come from cpu side or mem side?
+
+        bool isCpuSide() { return cpuSide; }
+
+        Target(PacketPtr _pkt, bool _cpuSide, Tick _time = curTick)
+            : time(_time), pkt(_pkt), cpuSide(_cpuSide)
+        {}
+    };
+
     /** Defines the Data structure of the MSHR targetlist. */
-    typedef std::list<PacketPtr> TargetList;
+    typedef std::list<Target> TargetList;
     /** Target list iterator. */
-    typedef std::list<PacketPtr>::iterator TargetListIterator;
+    typedef std::list<Target>::iterator TargetListIterator;
     /** A list of MSHRs. */
     typedef std::list<MSHR *> List;
     /** MSHR list iterator. */
@@ -59,20 +76,35 @@ class MSHR {
     /** MSHR list const_iterator. */
     typedef List::const_iterator ConstIterator;
 
-    /** Address of the miss. */
+    /** Pointer to queue containing this MSHR. */
+    MSHRQueue *queue;
+
+    /** Address of the request. */
     Addr addr;
-    /** Adress space id of the miss. */
-    short asid;
+
+    /** Size of the request. */
+    int size;
+
+    /** Data associated with the request (if a write). */
+    uint8_t *writeData;
+
     /** True if the request has been sent to the bus. */
     bool inService;
+
+    /** True if we will be putting the returned block in the cache */
+    bool isCacheFill;
+    /** True if we need to get an exclusive copy of the block. */
+    bool needsExclusive;
+    /** True if the request is uncacheable */
+    bool _isUncacheable;
+
+    /** True if the request that has been sent to the bus is for en
+     * exclusive copy. */
+    bool inServiceForExclusive;
     /** Thread number of the miss. */
-    int threadNum;
-    /** The request that is forwarded to the next level of the hierarchy. */
-    PacketPtr pkt;
+    short threadNum;
     /** The number of currently allocated targets. */
     short ntargets;
-    /** The original requesting command. */
-    MemCmd originalCmd;
     /** Order number of assigned by the miss queue. */
     uint64_t order;
 
@@ -81,6 +113,7 @@ class MSHR {
      * @sa MissQueue, MSHRQueue::readyList
      */
     Iterator readyIter;
+
     /**
      * Pointer to this MSHR on the allocated list.
      * @sa MissQueue, MSHRQueue::allocatedList
@@ -92,6 +125,9 @@ private:
     TargetList targets;
 
 public:
+
+    bool isUncacheable() { return _isUncacheable; }
+
     /**
      * Allocate a miss to this MSHR.
      * @param cmd The requesting command.
@@ -100,14 +136,13 @@ public:
      * @param size The number of bytes to request.
      * @param pkt  The original miss.
      */
-    void allocate(MemCmd cmd, Addr addr, int size,
-                  PacketPtr &pkt);
+    void allocate(Addr addr, int size, PacketPtr pkt, bool isFill);
 
     /**
      * Allocate this MSHR as a buffer for the given request.
      * @param target The memory request to buffer.
      */
-    void allocateAsBuffer(PacketPtr &target);
+    void allocateAsBuffer(PacketPtr target);
 
     /**
      * Mark this MSHR as free.
@@ -118,7 +153,7 @@ public:
      * Add a request to the list of targets.
      * @param target The target.
      */
-    void allocateTarget(PacketPtr &target);
+    void allocateTarget(PacketPtr target, bool cpuSide);
 
     /** A simple constructor. */
     MSHR();
@@ -131,7 +166,7 @@ public:
      */
     int getNumTargets()
     {
-        return(ntargets);
+        return ntargets;
     }
 
     /**
@@ -147,9 +182,9 @@ public:
      * Returns a reference to the first target.
      * @return A pointer to the first target.
      */
-    PacketPtr getTarget()
+    Target *getTarget()
     {
-        return targets.front();
+        return &targets.front();
     }
 
     /**
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index e9aa89bf8..d58594798 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -29,22 +29,21 @@
  */
 
 /** @file
- * Definition of the MSHRQueue.
+ * Definition of MSHRQueue class functions.
  */
 
 #include "mem/cache/miss/mshr_queue.hh"
-#include "sim/eventq.hh"
 
 using namespace std;
 
-MSHRQueue::MSHRQueue(int num_mshrs, int reserve)
-    : numMSHRs(num_mshrs + reserve - 1), numReserve(reserve)
+MSHRQueue::MSHRQueue(int num_entries, int reserve)
+    : numEntries(num_entries + reserve - 1), numReserve(reserve)
 {
     allocated = 0;
-    inServiceMSHRs = 0;
-    allocatedTargets = 0;
-    registers = new MSHR[numMSHRs];
-    for (int i = 0; i < numMSHRs; ++i) {
+    inServiceEntries = 0;
+    registers = new MSHR[numEntries];
+    for (int i = 0; i < numEntries; ++i) {
+        registers[i].queue = this;
         freeList.push_back(&registers[i]);
     }
 }
@@ -54,7 +53,7 @@ MSHRQueue::~MSHRQueue()
     delete [] registers;
 }
 
-MSHR*
+MSHR *
 MSHRQueue::findMatch(Addr addr) const
 {
     MSHR::ConstIterator i = allocatedList.begin();
@@ -87,19 +86,19 @@ MSHRQueue::findMatches(Addr addr, vector<MSHR*>& matches) const
 
 }
 
-MSHR*
-MSHRQueue::findPending(PacketPtr &pkt) const
+MSHR *
+MSHRQueue::findPending(Addr addr, int size) const
 {
     MSHR::ConstIterator i = pendingList.begin();
     MSHR::ConstIterator end = pendingList.end();
     for (; i != end; ++i) {
         MSHR *mshr = *i;
-        if (mshr->addr < pkt->getAddr()) {
-            if (mshr->addr + mshr->pkt->getSize() > pkt->getAddr()) {
+        if (mshr->addr < addr) {
+            if (mshr->addr + mshr->size > addr) {
                 return mshr;
             }
         } else {
-            if (pkt->getAddr() + pkt->getSize() > mshr->addr) {
+            if (addr + size > mshr->addr) {
                 return mshr;
             }
         }
@@ -107,21 +106,15 @@ MSHRQueue::findPending(PacketPtr &pkt) const
     return NULL;
 }
 
-MSHR*
-MSHRQueue::allocate(PacketPtr &pkt, int size)
+MSHR *
+MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt, bool isFill)
 {
-    Addr aligned_addr = pkt->getAddr() & ~((Addr)size - 1);
     assert(!freeList.empty());
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
     freeList.pop_front();
 
-    if (!pkt->needsResponse()) {
-        mshr->allocateAsBuffer(pkt);
-    } else {
-        mshr->allocate(pkt->cmd, aligned_addr, size, pkt);
-        allocatedTargets += 1;
-    }
+    mshr->allocate(addr, size, pkt, isFill);
     mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
     mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
 
@@ -129,51 +122,21 @@ MSHRQueue::allocate(PacketPtr &pkt, int size)
     return mshr;
 }
 
-MSHR*
-MSHRQueue::allocateFetch(Addr addr, int size, PacketPtr &target)
-{
-    MSHR *mshr = freeList.front();
-    assert(mshr->getNumTargets() == 0);
-    freeList.pop_front();
-    mshr->allocate(MemCmd::ReadReq, addr, size, target);
-    mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
-    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
-
-    allocated += 1;
-    return mshr;
-}
-
-MSHR*
-MSHRQueue::allocateTargetList(Addr addr, int size)
-{
-    MSHR *mshr = freeList.front();
-    assert(mshr->getNumTargets() == 0);
-    freeList.pop_front();
-    PacketPtr dummy;
-    mshr->allocate(MemCmd::ReadReq, addr, size, dummy);
-    mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
-    mshr->inService = true;
-    ++inServiceMSHRs;
-    ++allocated;
-    return mshr;
-}
-
 
 void
-MSHRQueue::deallocate(MSHR* mshr)
+MSHRQueue::deallocate(MSHR *mshr)
 {
     deallocateOne(mshr);
 }
 
 MSHR::Iterator
-MSHRQueue::deallocateOne(MSHR* mshr)
+MSHRQueue::deallocateOne(MSHR *mshr)
 {
     MSHR::Iterator retval = allocatedList.erase(mshr->allocIter);
     freeList.push_front(mshr);
     allocated--;
-    allocatedTargets -= mshr->getNumTargets();
     if (mshr->inService) {
-        inServiceMSHRs--;
+        inServiceEntries--;
     } else {
         pendingList.erase(mshr->readyIter);
     }
@@ -192,29 +155,29 @@ MSHRQueue::moveToFront(MSHR *mshr)
 }
 
 void
-MSHRQueue::markInService(MSHR* mshr)
+MSHRQueue::markInService(MSHR *mshr)
 {
     //assert(mshr == pendingList.front());
+#if 0
     if (!mshr->pkt->needsResponse() && !(mshr->pkt->cmd == MemCmd::UpgradeReq)) {
         assert(mshr->getNumTargets() == 0);
         deallocate(mshr);
         return;
     }
+#endif
     mshr->inService = true;
     pendingList.erase(mshr->readyIter);
     //mshr->readyIter = NULL;
-    inServiceMSHRs += 1;
+    inServiceEntries += 1;
     //pendingList.pop_front();
 }
 
 void
-MSHRQueue::markPending(MSHR* mshr, MemCmd cmd)
+MSHRQueue::markPending(MSHR *mshr)
 {
     //assert(mshr->readyIter == NULL);
-    mshr->pkt->cmd = cmd;
-    mshr->pkt->flags &= ~SATISFIED;
     mshr->inService = false;
-    --inServiceMSHRs;
+    --inServiceEntries;
     /**
      * @ todo might want to add rerequests to front of pending list for
      * performance.
@@ -231,11 +194,8 @@ MSHRQueue::squash(int threadNum)
         MSHR *mshr = *i;
         if (mshr->threadNum == threadNum) {
             while (mshr->hasTargets()) {
-                PacketPtr target = mshr->getTarget();
                 mshr->popTarget();
-
                 assert(0/*target->req->getThreadNum()*/ == threadNum);
-                target = NULL;
             }
             assert(!mshr->hasTargets());
             assert(mshr->ntargets==0);
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index 5069db661..182dfd5b2 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -32,71 +32,71 @@
  * Declaration of a structure to manage MSHRs.
  */
 
-#ifndef __MSHR_QUEUE_HH__
-#define __MSHR_QUEUE_HH__
+#ifndef __MEM__CACHE__MISS__MSHR_QUEUE_HH__
+#define __MEM__CACHE__MISS__MSHR_QUEUE_HH__
 
 #include <vector>
+
+#include "mem/packet.hh"
 #include "mem/cache/miss/mshr.hh"
 
 /**
  * A Class for maintaining a list of pending and allocated memory requests.
  */
-class MSHRQueue {
+class MSHRQueue
+{
   private:
     /**  MSHR storage. */
-    MSHR* registers;
-    /** Holds pointers to all allocated MSHRs. */
+    MSHR *registers;
+    /** Holds pointers to all allocated entries. */
     MSHR::List allocatedList;
-    /** Holds pointers to MSHRs that haven't been sent to the bus. */
+    /** Holds pointers to entries that haven't been sent to the bus. */
     MSHR::List pendingList;
-    /** Holds non allocated MSHRs. */
+    /** Holds non allocated entries. */
     MSHR::List freeList;
 
     // Parameters
     /**
-     * The total number of MSHRs in this queue. This number is set as the
-     * number of MSHRs requested plus (numReserve - 1). This allows for
-     * the same number of effective MSHRs while still maintaining the reserve.
+     * The total number of entries in this queue. This number is set as the
+     * number of entries requested plus (numReserve - 1). This allows for
+     * the same number of effective entries while still maintaining the reserve.
      */
-    const int numMSHRs;
+    const int numEntries;
 
     /**
-     * The number of MSHRs to hold in reserve. This is needed because copy
-     * operations can allocate upto 4 MSHRs at one time.
+     * The number of entries to hold in reserve. This is needed because copy
+     * operations can allocate upto 4 entries at one time.
      */
     const int numReserve;
 
   public:
-    /** The number of allocated MSHRs. */
+    /** The number of allocated entries. */
     int allocated;
-    /** The number of MSHRs that have been forwarded to the bus. */
-    int inServiceMSHRs;
-    /** The number of targets waiting for response. */
-    int allocatedTargets;
+    /** The number of entries that have been forwarded to the bus. */
+    int inServiceEntries;
 
     /**
-     * Create a queue with a given number of MSHRs.
-     * @param num_mshrs The number of MSHRs in this queue.
-     * @param reserve The minimum number of MSHRs needed to satisfy any access.
+     * Create a queue with a given number of entries.
+     * @param num_entrys The number of entries in this queue.
+     * @param reserve The minimum number of entries needed to satisfy
+     * any access.
      */
-    MSHRQueue(int num_mshrs, int reserve = 1);
+    MSHRQueue(int num_entries, int reserve = 1);
 
     /** Destructor */
     ~MSHRQueue();
 
     /**
-     * Find the first MSHR that matches the provide address and asid.
+     * Find the first MSHR that matches the provided address.
      * @param addr The address to find.
-     * @param asid The address space id.
      * @return Pointer to the matching MSHR, null if not found.
      */
-    MSHR* findMatch(Addr addr) const;
+    MSHR *findMatch(Addr addr) const;
 
     /**
-     * Find and return all the matching MSHRs in the provided vector.
+     * Find and return all the matching entries in the provided vector.
      * @param addr The address to find.
-     * @param asid The address space ID.
-     * @param matches The vector to return pointers to the matching MSHRs.
+     * @param matches The vector to return pointers to the matching entries.
      * @return True if any matches are found, false otherwise.
      * @todo Typedef the vector??
      */
@@ -107,7 +107,7 @@ class MSHRQueue {
      * @param pkt The request to find.
      * @return A pointer to the earliest matching MSHR.
      */
-    MSHR* findPending(PacketPtr &pkt) const;
+    MSHR *findPending(Addr addr, int size) const;
 
     /**
      * Allocates a new MSHR for the request and size. This places the request
@@ -116,60 +116,29 @@ class MSHRQueue {
      * @param size The number in bytes to fetch from memory.
      * @return The a pointer to the MSHR allocated.
      *
-     * @pre There are free MSHRs.
+     * @pre There are free entries.
      */
-    MSHR* allocate(PacketPtr &pkt, int size = 0);
-
-    /**
-     * Allocate a read request for the given address, and places the given
-     * target on the target list.
-     * @param addr The address to fetch.
-     * @param asid The address space for the fetch.
-     * @param size The number of bytes to request.
-     * @param target The first target for the request.
-     * @return Pointer to the new MSHR.
-     */
-    MSHR* allocateFetch(Addr addr, int size, PacketPtr &target);
-
-    /**
-     * Allocate a target list for the given address.
-     * @param addr The address to fetch.
-     * @param asid The address space for the fetch.
-     * @param size The number of bytes to request.
-     * @return Pointer to the new MSHR.
-     */
-    MSHR* allocateTargetList(Addr addr, int size);
+    MSHR *allocate(Addr addr, int size, PacketPtr &pkt, bool isFill);
 
     /**
      * Removes the given MSHR from the queue. This places the MSHR on the
      * free list.
      * @param mshr
      */
-    void deallocate(MSHR* mshr);
-
-    /**
-     * Allocates a target to the given MSHR. Used to keep track of the number
-     * of outstanding targets.
-     * @param mshr The MSHR to allocate the target to.
-     * @param pkt The target request.
-     */
-    void allocateTarget(MSHR* mshr, PacketPtr &pkt)
-    {
-        mshr->allocateTarget(pkt);
-        allocatedTargets += 1;
-    }
+    void deallocate(MSHR *mshr);
 
     /**
-     * Remove a MSHR from the queue. Returns an iterator into the allocatedList
-     * for faster squash implementation.
+     * Remove a MSHR from the queue. Returns an iterator into the
+     * allocatedList for faster squash implementation.
      * @param mshr The MSHR to remove.
      * @return An iterator to the next entry in the allocatedList.
      */
-    MSHR::Iterator deallocateOne(MSHR* mshr);
+    MSHR::Iterator deallocateOne(MSHR *mshr);
 
     /**
-     * Moves the MSHR to the front of the pending list if it is not in service.
-     * @param mshr The mshr to move.
+     * Moves the MSHR to the front of the pending list if it is not
+     * in service.
+     * @param mshr The entry to move.
      */
     void moveToFront(MSHR *mshr);
 
@@ -178,14 +147,13 @@ class MSHRQueue {
      * pendingList. Deallocates the MSHR if it does not expect a response.
      * @param mshr The MSHR to mark in service.
      */
-    void markInService(MSHR* mshr);
+    void markInService(MSHR *mshr);
 
     /**
-     * Mark an in service mshr as pending, used to resend a request.
+     * Mark an in service entry as pending, used to resend a request.
      * @param mshr The MSHR to resend.
-     * @param cmd The command to resend.
      */
-    void markPending(MSHR* mshr, MemCmd cmd);
+    void markPending(MSHR *mshr);
 
     /**
      * Squash outstanding requests with the given thread number. If a request
@@ -204,36 +172,25 @@ class MSHRQueue {
     }
 
     /**
-     * Returns true if there are no free MSHRs.
+     * Returns true if there are no free entries.
      * @return True if this queue is full.
      */
     bool isFull() const
     {
-        return (allocated > numMSHRs - numReserve);
+        return (allocated > numEntries - numReserve);
     }
 
     /**
-     * Returns the request at the head of the pendingList.
+     * Returns the MSHR at the head of the pendingList.
      * @return The next request to service.
      */
-    PacketPtr getReq() const
+    MSHR *getNextMSHR() const
     {
         if (pendingList.empty()) {
             return NULL;
         }
-        MSHR* mshr = pendingList.front();
-        return mshr->pkt;
+        return pendingList.front();
     }
-
-    /**
-     * Returns the number of outstanding targets.
-     * @return the number of allocated targets.
-     */
-    int getAllocatedTargets() const
-    {
-        return allocatedTargets;
-    }
-
 };
 
-#endif //__MSHR_QUEUE_HH__
+#endif //__MEM__CACHE__MISS__MSHR_QUEUE_HH__
diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc
index 966f7d005..d03cfe3ae 100644
--- a/src/mem/cache/prefetch/base_prefetcher.cc
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@@ -241,7 +241,6 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             }
 
             pf.push_back(prefetch);
-            prefetch->flags |= CACHE_LINE_FILL;
 
             //Make sure to request the bus, with proper delay
             cache->requestMemSideBus(Request_PF, prefetch->time);
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
index 42a1fe34f..607e89a75 100644
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -215,14 +215,13 @@ FALRU::findBlock(Addr addr) const
 }
 
 FALRUBlk*
-FALRU::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                       BlkList &compress_blocks)
+FALRU::findReplacement(Addr addr, PacketList &writebacks)
 {
     FALRUBlk * blk = tail;
     assert(blk->inCache == 0);
     moveToHead(blk);
     tagHash.erase(blk->tag);
-    tagHash[blkAlign(pkt->getAddr())] = blk;
+    tagHash[blkAlign(addr)] = blk;
     if (blk->isValid()) {
         replacements[0]++;
     } else {
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index dabbda740..8cbc79813 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -201,11 +201,9 @@ public:
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    FALRUBlk* findReplacement(PacketPtr &pkt, PacketList & writebacks,
-                              BlkList &compress_blocks);
+    FALRUBlk* findReplacement(Addr addr, PacketList & writebacks);
 
     /**
      * Return the hit latency of this cache.
@@ -248,10 +246,9 @@ public:
      * Generate the tag from the addres. For fully associative this is just the
      * block address.
      * @param addr The address to get the tag from.
-     * @param blk ignored here
      * @return The tag.
      */
-    Addr extractTag(Addr addr, FALRUBlk *blk) const
+    Addr extractTag(Addr addr) const
     {
         return blkAlign(addr);
     }
diff --git a/src/mem/cache/tags/iic.cc b/src/mem/cache/tags/iic.cc
index 9c802d0dc..2f95cdb0f 100644
--- a/src/mem/cache/tags/iic.cc
+++ b/src/mem/cache/tags/iic.cc
@@ -303,11 +303,10 @@ IIC::findBlock(Addr addr) const
 
 
 IICTag*
-IIC::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                     BlkList &compress_blocks)
+IIC::findReplacement(Addr addr, PacketList &writebacks)
 {
-    DPRINTF(IIC, "Finding Replacement for %x\n", pkt->getAddr());
-    unsigned set = hash(pkt->getAddr());
+    DPRINTF(IIC, "Finding Replacement for %x\n", addr);
+    unsigned set = hash(addr);
     IICTag *tag_ptr;
     unsigned long *tmp_data = new unsigned long[numSub];
 
@@ -332,12 +331,14 @@ IIC::findReplacement(PacketPtr &pkt, PacketList &writebacks,
 
     list<unsigned long> tag_indexes;
     repl->doAdvance(tag_indexes);
+/*
     while (!tag_indexes.empty()) {
         if (!tagStore[tag_indexes.front()].isCompressed()) {
             compress_blocks.push_back(&tagStore[tag_indexes.front()]);
         }
         tag_indexes.pop_front();
     }
+*/
 
     tag_ptr->re = (void*)repl->add(tag_ptr-tagStore);
 
@@ -355,7 +356,7 @@ IIC::freeReplacementBlock(PacketList & writebacks)
 
     DPRINTF(Cache, "Replacing %x in IIC: %s\n",
             regenerateBlkAddr(tag_ptr->tag,0),
-            tag_ptr->isModified() ? "writeback" : "clean");
+            tag_ptr->isDirty() ? "writeback" : "clean");
     /* write back replaced block data */
     if (tag_ptr && (tag_ptr->isValid())) {
         replacements[0]++;
@@ -363,7 +364,7 @@ IIC::freeReplacementBlock(PacketList & writebacks)
         ++sampledRefs;
         tag_ptr->refCount = 0;
 
-        if (tag_ptr->isModified()) {
+        if (tag_ptr->isDirty()) {
 /*	    PacketPtr writeback =
                 buildWritebackReq(regenerateBlkAddr(tag_ptr->tag, 0),
                                   tag_ptr->req->asid, tag_ptr->xc, blkSize,
@@ -618,24 +619,6 @@ IIC::secondaryChain(Addr tag, unsigned long chain_ptr,
     return NULL;
 }
 
-void
-IIC::decompressBlock(unsigned long index)
-{
-    IICTag *tag_ptr = &tagStore[index];
-    if (tag_ptr->isCompressed()) {
-        // decompress the data here.
-    }
-}
-
-void
-IIC::compressBlock(unsigned long index)
-{
-    IICTag *tag_ptr = &tagStore[index];
-    if (!tag_ptr->isCompressed()) {
-        // Compress the data here.
-    }
-}
-
 void
 IIC::invalidateBlk(IIC::BlkType *tag_ptr)
 {
@@ -672,7 +655,6 @@ void
 IIC::writeData(IICTag *blk, uint8_t *write_data, int size,
                PacketList & writebacks)
 {
-    assert(size < blkSize || !blk->isCompressed());
     DPRINTF(IIC, "Writing %d bytes to %x\n", size,
             blk->tag<<tagShift);
     // Find the number of subblocks needed, (round up)
diff --git a/src/mem/cache/tags/iic.hh b/src/mem/cache/tags/iic.hh
index d0663d330..082b3d15e 100644
--- a/src/mem/cache/tags/iic.hh
+++ b/src/mem/cache/tags/iic.hh
@@ -345,17 +345,6 @@ class IIC : public BaseTags
         return hitLatency;
     }
 
-    /**
-     * Generate the tag from the address.
-     * @param addr The address to a get a tag for.
-     * @param blk Ignored here.
-     * @return the tag.
-     */
-    Addr extractTag(Addr addr, IICTag *blk) const
-    {
-        return (addr >> tagShift);
-    }
-
      /**
      * Generate the tag from the address.
      * @param addr The address to a get a tag for.
@@ -422,18 +411,6 @@ class IIC : public BaseTags
         return tmp;
     }
 
-    /**
-     * Decompress a block if it is compressed.
-     * @param index The tag store index for the block to uncompress.
-     */
-    void decompressBlock(unsigned long index);
-
-    /**
-     * Try and compress a block if it is not already compressed.
-     * @param index The tag store index for the block to compress.
-     */
-    void compressBlock(unsigned long index);
-
     /**
      * Invalidate a block.
      * @param blk The block to invalidate.
@@ -462,11 +439,9 @@ class IIC : public BaseTags
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    IICTag* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    IICTag* findReplacement(Addr addr, PacketList &writebacks);
 
     /**
      * Read the data from the internal storage of the given cache block.
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 8e8779774..334312aaf 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -194,10 +194,9 @@ LRU::findBlock(Addr addr) const
 }
 
 LRUBlk*
-LRU::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                     BlkList &compress_blocks)
+LRU::findReplacement(Addr addr, PacketList &writebacks)
 {
-    unsigned set = extractSet(pkt->getAddr());
+    unsigned set = extractSet(addr);
     // grab a replacement candidate
     LRUBlk *blk = sets[set].blks[assoc-1];
     sets[set].moveToHead(blk);
diff --git a/src/mem/cache/tags/lru.hh b/src/mem/cache/tags/lru.hh
index 75272544c..26038d709 100644
--- a/src/mem/cache/tags/lru.hh
+++ b/src/mem/cache/tags/lru.hh
@@ -189,11 +189,9 @@ public:
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    LRUBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    LRUBlk* findReplacement(Addr addr, PacketList &writebacks);
 
     /**
      * Generate the tag from the given address.
@@ -205,17 +203,6 @@ public:
         return (addr >> tagShift);
     }
 
-   /**
-     * Generate the tag from the given address.
-     * @param addr The address to get the tag from.
-     * @param blk Ignored.
-     * @return The tag of the address.
-     */
-    Addr extractTag(Addr addr, LRUBlk *blk) const
-    {
-        return (addr >> tagShift);
-    }
-
     /**
      * Calculate the set index from the address.
      * @param addr The address to get the set from.
diff --git a/src/mem/cache/tags/split.cc b/src/mem/cache/tags/split.cc
index 5ac87eaba..e22ccbb96 100644
--- a/src/mem/cache/tags/split.cc
+++ b/src/mem/cache/tags/split.cc
@@ -298,27 +298,25 @@ Split::findBlock(Addr addr) const
 }
 
 SplitBlk*
-Split::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                     BlkList &compress_blocks)
+Split::findReplacement(Addr addr, PacketList &writebacks)
 {
     SplitBlk *blk;
 
+    assert(0);
+#if 0
     if (pkt->nic_pkt()) {
         DPRINTF(Split, "finding a replacement for nic_req\n");
         nic_repl++;
         if (lifo && lifo_net)
-            blk = lifo_net->findReplacement(pkt, writebacks,
-                                             compress_blocks);
+            blk = lifo_net->findReplacement(addr, writebacks);
         else if (lru_net)
-            blk = lru_net->findReplacement(pkt, writebacks,
-                                            compress_blocks);
+            blk = lru_net->findReplacement(addr, writebacks);
         // in this case, this is an LRU only cache, it's non partitioned
         else
-            blk = lru->findReplacement(pkt, writebacks, compress_blocks);
+            blk = lru->findReplacement(addr, writebacks);
     } else {
         DPRINTF(Split, "finding replacement for cpu_req\n");
-        blk = lru->findReplacement(pkt, writebacks,
-                                    compress_blocks);
+        blk = lru->findReplacement(addr, writebacks);
         cpu_repl++;
     }
 
@@ -346,6 +344,7 @@ Split::findReplacement(PacketPtr &pkt, PacketList &writebacks,
     // blk attributes for the new blk coming IN
     blk->ts = curTick;
     blk->isNIC = (pkt->nic_pkt()) ? true : false;
+#endif
 
     return blk;
 }
@@ -400,8 +399,13 @@ Split::regenerateBlkAddr(Addr tag, int set) const
 }
 
 Addr
-Split::extractTag(Addr addr, SplitBlk *blk) const
+Split::extractTag(Addr addr) const
 {
+    // need to fix this if we want to use it... old interface of
+    // passing in blk was too weird
+    assert(0);
+    return 0;
+/*
     if (blk->part == 2) {
         if (lifo_net)
             return lifo_net->extractTag(addr);
@@ -411,5 +415,6 @@ Split::extractTag(Addr addr, SplitBlk *blk) const
             panic("this shouldn't happen");
     } else
         return lru->extractTag(addr);
+*/
 }
 
diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh
index 840b68940..ab48ce769 100644
--- a/src/mem/cache/tags/split.hh
+++ b/src/mem/cache/tags/split.hh
@@ -212,20 +212,17 @@ class Split : public BaseTags
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    SplitBlk* findReplacement(Addr addr, PacketList &writebacks);
 
 
     /**
      * Generate the tag from the given address.
      * @param addr The address to get the tag from.
-     * @param blk The block to find the partition it's in
      * @return The tag of the address.
      */
-    Addr extractTag(Addr addr, SplitBlk *blk) const;
+    Addr extractTag(Addr addr) const;
 
     /**
      * Calculate the set index from the address.
diff --git a/src/mem/cache/tags/split_lifo.cc b/src/mem/cache/tags/split_lifo.cc
index d71d1a3ef..4ee2473a4 100644
--- a/src/mem/cache/tags/split_lifo.cc
+++ b/src/mem/cache/tags/split_lifo.cc
@@ -266,10 +266,9 @@ SplitLIFO::findBlock(Addr addr) const
 }
 
 SplitBlk*
-SplitLIFO::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                           BlkList &compress_blocks)
+SplitLIFO::findReplacement(Addr addr, PacketList &writebacks)
 {
-    unsigned set = extractSet(pkt->getAddr());
+    unsigned set = extractSet(addr);
 
     SplitBlk *firstIn = sets[set].firstIn;
     SplitBlk *lastIn = sets[set].lastIn;
@@ -289,7 +288,7 @@ SplitLIFO::findReplacement(PacketPtr &pkt, PacketList &writebacks,
     }
 
     DPRINTF(Split, "just assigned %#x addr into LIFO, replacing %#x status %#x\n",
-            pkt->getAddr(), regenerateBlkAddr(blk->tag, set), blk->status);
+            addr, regenerateBlkAddr(blk->tag, set), blk->status);
     if (blk->isValid()) {
         replacements[0]++;
         totalRefs += blk->refCount;
diff --git a/src/mem/cache/tags/split_lifo.hh b/src/mem/cache/tags/split_lifo.hh
index 0f8adf18d..13ccf7ef4 100644
--- a/src/mem/cache/tags/split_lifo.hh
+++ b/src/mem/cache/tags/split_lifo.hh
@@ -212,11 +212,9 @@ public:
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    SplitBlk* findReplacement(Addr addr, PacketList &writebacks);
 
     /**
      * Generate the tag from the given address.
@@ -228,17 +226,6 @@ public:
         return (addr >> tagShift);
     }
 
-     /**
-     * Generate the tag from the given address.
-     * @param addr The address to get the tag from.
-     * @param blk Ignored
-     * @return The tag of the address.
-     */
-    Addr extractTag(Addr addr, SplitBlk *blk) const
-    {
-        return (addr >> tagShift);
-    }
-
    /**
      * Calculate the set index from the address.
      * @param addr The address to get the set from.
diff --git a/src/mem/cache/tags/split_lru.cc b/src/mem/cache/tags/split_lru.cc
index 7227fb5c1..4d271a92a 100644
--- a/src/mem/cache/tags/split_lru.cc
+++ b/src/mem/cache/tags/split_lru.cc
@@ -213,10 +213,9 @@ SplitLRU::findBlock(Addr addr) const
 }
 
 SplitBlk*
-SplitLRU::findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                     BlkList &compress_blocks)
+SplitLRU::findReplacement(Addr addr, PacketList &writebacks)
 {
-    unsigned set = extractSet(pkt->getAddr());
+    unsigned set = extractSet(addr);
     // grab a replacement candidate
     SplitBlk *blk = sets[set].blks[assoc-1];
     sets[set].moveToHead(blk);
diff --git a/src/mem/cache/tags/split_lru.hh b/src/mem/cache/tags/split_lru.hh
index eb65445ea..a708ef740 100644
--- a/src/mem/cache/tags/split_lru.hh
+++ b/src/mem/cache/tags/split_lru.hh
@@ -195,11 +195,9 @@ public:
      * Find a replacement block for the address provided.
      * @param pkt The request to a find a replacement candidate for.
      * @param writebacks List for any writebacks to be performed.
-     * @param compress_blocks List of blocks to compress, for adaptive comp.
      * @return The block to place the replacement in.
      */
-    SplitBlk* findReplacement(PacketPtr &pkt, PacketList &writebacks,
-                            BlkList &compress_blocks);
+    SplitBlk* findReplacement(Addr addr, PacketList &writebacks);
 
     /**
      * Generate the tag from the given address.
@@ -211,17 +209,6 @@ public:
         return (addr >> tagShift);
     }
 
-    /**
-     * Generate the tag from the given address.
-     * @param addr The address to get the tag from.
-     * @param blk Ignored.
-     * @return The tag of the address.
-     */
-    Addr extractTag(Addr addr, SplitBlk *blk) const
-    {
-        return (addr >> tagShift);
-    }
-
     /**
      * Calculate the set index from the address.
      * @param addr The address to get the set from.
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index a257e16ab..57c6a6381 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -59,15 +59,15 @@ MemCmd::commandInfo[] =
     /* ReadResp */
     { SET3(IsRead, IsResponse, HasData), InvalidCmd, "ReadResp" },
     /* WriteReq */
-    { SET4(IsWrite, IsRequest, NeedsResponse, HasData),
+    { SET5(IsWrite, NeedsExclusive, IsRequest, NeedsResponse, HasData),
             WriteResp, "WriteReq" },
     /* WriteResp */
-    { SET2(IsWrite, IsResponse), InvalidCmd, "WriteResp" },
+    { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WriteResp" },
     /* Writeback */
-    { SET4(IsWrite, IsRequest, HasData, NeedsResponse),
+    { SET5(IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse),
             WritebackAck, "Writeback" },
     /* WritebackAck */
-    { SET2(IsWrite, IsResponse), InvalidCmd, "WritebackAck" },
+    { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WritebackAck" },
     /* SoftPFReq */
     { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
             SoftPFResp, "SoftPFReq" },
@@ -80,27 +80,39 @@ MemCmd::commandInfo[] =
     /* HardPFResp */
     { SET4(IsRead, IsResponse, IsHWPrefetch, HasData),
             InvalidCmd, "HardPFResp" },
-    /* InvalidateReq */
-    { SET2(IsInvalidate, IsRequest), InvalidCmd, "InvalidateReq" },
     /* WriteInvalidateReq */
-    { SET5(IsWrite, IsInvalidate, IsRequest, HasData, NeedsResponse),
+    { SET6(IsWrite, NeedsExclusive, IsInvalidate,
+           IsRequest, HasData, NeedsResponse),
             WriteInvalidateResp, "WriteInvalidateReq" },
     /* WriteInvalidateResp */
-    { SET3(IsWrite, IsInvalidate, IsResponse),
+    { SET4(IsWrite, NeedsExclusive, IsInvalidate, IsResponse),
             InvalidCmd, "WriteInvalidateResp" },
     /* UpgradeReq */
     { SET3(IsInvalidate, IsRequest, IsUpgrade), InvalidCmd, "UpgradeReq" },
     /* ReadExReq */
-    { SET4(IsRead, IsInvalidate, IsRequest, NeedsResponse),
+    { SET5(IsRead, NeedsExclusive, IsInvalidate, IsRequest, NeedsResponse),
             ReadExResp, "ReadExReq" },
     /* ReadExResp */
-    { SET4(IsRead, IsInvalidate, IsResponse, HasData),
+    { SET5(IsRead, NeedsExclusive, IsInvalidate, IsResponse, HasData),
             InvalidCmd, "ReadExResp" },
+    /* LoadLockedReq */
+    { SET4(IsRead, IsLocked, IsRequest, NeedsResponse),
+            ReadResp, "LoadLockedReq" },
+    /* LoadLockedResp */
+    { SET4(IsRead, IsLocked, IsResponse, HasData),
+            InvalidCmd, "LoadLockedResp" },
+    /* StoreCondReq */
+    { SET6(IsWrite, NeedsExclusive, IsLocked,
+           IsRequest, NeedsResponse, HasData),
+            StoreCondResp, "StoreCondReq" },
+    /* StoreCondResp */
+    { SET4(IsWrite, NeedsExclusive, IsLocked, IsResponse),
+            InvalidCmd, "StoreCondResp" },
     /* SwapReq -- for Swap ldstub type operations */
-    { SET4(IsReadWrite, IsRequest, HasData, NeedsResponse),
+    { SET6(IsRead, IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse),
         SwapResp, "SwapReq" },
     /* SwapResp -- for Swap ldstub type operations */
-    { SET3(IsReadWrite, IsResponse, HasData),
+    { SET5(IsRead, IsWrite, NeedsExclusive, IsResponse, HasData),
         InvalidCmd, "SwapResp" }
 };
 
@@ -171,27 +183,28 @@ fixDelayedResponsePacket(PacketPtr func, PacketPtr timing)
 }
 
 bool
-fixPacket(PacketPtr func, PacketPtr timing)
+Packet::checkFunctional(Addr addr, int size, uint8_t *data)
 {
-    Addr funcStart      = func->getAddr();
-    Addr funcEnd        = func->getAddr() + func->getSize() - 1;
-    Addr timingStart    = timing->getAddr();
-    Addr timingEnd      = timing->getAddr() + timing->getSize() - 1;
+    Addr func_start = getAddr();
+    Addr func_end   = getAddr() + getSize() - 1;
+    Addr val_start  = addr;
+    Addr val_end    = val_start + size - 1;
 
-    assert(!(funcStart > timingEnd || timingStart > funcEnd));
+    if (func_start > val_end || val_start > func_end) {
+        // no intersection
+        return false;
+    }
 
-    // this packet can't solve our problem, continue on
-    if (!timing->hasData())
-        return true;
+    // offset of functional request into supplied value (could be
+    // negative if partial overlap)
+    int offset = func_start - val_start;
 
-    if (func->isRead()) {
-        if (funcStart >= timingStart && funcEnd <= timingEnd) {
-            func->allocate();
-            std::memcpy(func->getPtr<uint8_t>(), timing->getPtr<uint8_t>() +
-                    funcStart - timingStart, func->getSize());
-            func->result = Packet::Success;
-            func->flags |= SATISFIED;
-            return false;
+    if (isRead()) {
+        if (func_start >= val_start && func_end <= val_end) {
+            allocate();
+            std::memcpy(getPtr<uint8_t>(), data + offset, getSize());
+            result = Packet::Success;
+            return true;
         } else {
             // In this case the timing packet only partially satisfies
             // the request, so we would need more information to make
@@ -199,25 +212,21 @@ fixPacket(PacketPtr func, PacketPtr timing)
             // something, so the request could continue and get this
             // bit of possibly newer data along with the older data
             // not written to yet.
-            panic("Timing packet only partially satisfies the functional"
-                    "request. Now what?");
+            panic("Memory value only partially satisfies the functional "
+                  "request. Now what?");
         }
-    } else if (func->isWrite()) {
-        if (funcStart >= timingStart) {
-            std::memcpy(timing->getPtr<uint8_t>() + (funcStart - timingStart),
-                   func->getPtr<uint8_t>(),
-                   (std::min(funcEnd, timingEnd) - funcStart) + 1);
-        } else { // timingStart > funcStart
-            std::memcpy(timing->getPtr<uint8_t>(),
-                   func->getPtr<uint8_t>() + (timingStart - funcStart),
-                   (std::min(funcEnd, timingEnd) - timingStart) + 1);
+    } else if (isWrite()) {
+        if (offset >= 0) {
+            std::memcpy(data + offset, getPtr<uint8_t>(),
+                        (std::min(func_end, val_end) - func_start) + 1);
+        } else { // val_start > func_start
+            std::memcpy(data, getPtr<uint8_t>() - offset,
+                        (std::min(func_end, val_end) - val_start) + 1);
         }
         // we always want to keep going with a write
-        return true;
+        return false;
     } else
-        panic("Don't know how to handle command type %#x\n",
-                func->cmdToIndex());
-
+        panic("Don't know how to handle command %s\n", cmdString());
 }
 
 
@@ -247,8 +256,6 @@ operator<<(std::ostream &o, const Packet &p)
         o << "Read ";
     if (p.isWrite())
         o << "Write ";
-    if (p.isReadWrite())
-        o << "Read/Write ";
     if (p.isInvalidate())
         o << "Invalidate ";
     if (p.isRequest())
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index e2349e42f..ca186d875 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -54,16 +54,6 @@ typedef Packet *PacketPtr;
 typedef uint8_t* PacketDataPtr;
 typedef std::list<PacketPtr> PacketList;
 
-//Coherence Flags
-#define NACKED_LINE     (1 << 0)
-#define SATISFIED       (1 << 1)
-#define SHARED_LINE     (1 << 2)
-#define CACHE_LINE_FILL (1 << 3)
-#define COMPRESSED      (1 << 4)
-#define NO_ALLOCATE     (1 << 5)
-
-#define EXPRESS_SNOOP   (1 << 7)
-
 class MemCmd
 {
   public:
@@ -82,12 +72,15 @@ class MemCmd
         HardPFReq,
         SoftPFResp,
         HardPFResp,
-        InvalidateReq,
         WriteInvalidateReq,
         WriteInvalidateResp,
         UpgradeReq,
         ReadExReq,
         ReadExResp,
+        LoadLockedReq,
+        LoadLockedResp,
+        StoreCondReq,
+        StoreCondResp,
         SwapReq,
         SwapResp,
         NUM_MEM_CMDS
@@ -97,18 +90,19 @@ class MemCmd
     /** List of command attributes. */
     enum Attribute
     {
-        IsRead,
-        IsWrite,
-        IsPrefetch,
+        IsRead,         //!< Data flows from responder to requester
+        IsWrite,        //!< Data flows from requester to responder
+        IsPrefetch,     //!< Not a demand access
         IsInvalidate,
-        IsRequest,
-        IsResponse,
-        NeedsResponse,
+        NeedsExclusive, //!< Requires exclusive copy to complete in-cache
+        IsRequest,      //!< Issued by requester
+        IsResponse,     //!< Issue by responder
+        NeedsResponse,  //!< Requester needs response from target
         IsSWPrefetch,
         IsHWPrefetch,
         IsUpgrade,
-        HasData,
-        IsReadWrite,
+        IsLocked,       //!< Alpha/MIPS LL or SC access
+        HasData,        //!< There is an associated payload
         NUM_COMMAND_ATTRIBUTES
     };
 
@@ -141,10 +135,12 @@ class MemCmd
     bool isWrite()  const       { return testCmdAttrib(IsWrite); }
     bool isRequest() const      { return testCmdAttrib(IsRequest); }
     bool isResponse() const     { return testCmdAttrib(IsResponse); }
+    bool needsExclusive() const  { return testCmdAttrib(NeedsExclusive); }
     bool needsResponse() const  { return testCmdAttrib(NeedsResponse); }
     bool isInvalidate() const   { return testCmdAttrib(IsInvalidate); }
     bool hasData() const        { return testCmdAttrib(HasData); }
-    bool isReadWrite() const    { return testCmdAttrib(IsReadWrite); }
+    bool isReadWrite() const    { return isRead() && isWrite(); }
+    bool isLocked() const       { return testCmdAttrib(IsLocked); }
 
     const Command responseCommand() const {
         return commandInfo[cmd].response;
@@ -188,9 +184,6 @@ class Packet
 
     typedef MemCmd::Command Command;
 
-    /** Temporary FLAGS field until cache gets working, this should be in coherence/sender state. */
-    uint64_t flags;
-
   private:
    /** A pointer to the data being transfered.  It can be differnt
     *    sizes at each level of the heirarchy so it belongs in the
@@ -235,6 +228,14 @@ class Packet
     /** Is the 'src' field valid? */
     bool srcValid;
 
+    enum SnoopFlag {
+        MemInhibit,
+        Shared,
+        NUM_SNOOP_FLAGS
+    };
+
+    /** Coherence snoopFlags for snooping */
+    std::bitset<NUM_SNOOP_FLAGS> snoopFlags;
 
   public:
 
@@ -301,14 +302,17 @@ class Packet
     bool isWrite()  const       { return cmd.isWrite(); }
     bool isRequest() const      { return cmd.isRequest(); }
     bool isResponse() const     { return cmd.isResponse(); }
+    bool needsExclusive() const  { return cmd.needsExclusive(); }
     bool needsResponse() const  { return cmd.needsResponse(); }
     bool isInvalidate() const   { return cmd.isInvalidate(); }
     bool hasData() const        { return cmd.hasData(); }
     bool isReadWrite() const    { return cmd.isReadWrite(); }
+    bool isLocked() const       { return cmd.isLocked(); }
 
-    bool isCacheFill() const    { return (flags & CACHE_LINE_FILL) != 0; }
-    bool isNoAllocate() const   { return (flags & NO_ALLOCATE) != 0; }
-    bool isCompressed() const   { return (flags & COMPRESSED) != 0; }
+    void assertMemInhibit()     { snoopFlags[MemInhibit] = true; }
+    void assertShared()         { snoopFlags[Shared] = true; }
+    bool memInhibitAsserted()   { return snoopFlags[MemInhibit]; }
+    bool sharedAsserted()       { return snoopFlags[Shared]; }
 
     bool nic_pkt() { panic("Unimplemented"); M5_DUMMY_RETURN }
 
@@ -327,6 +331,8 @@ class Packet
     /** Accessor function that returns the source index of the packet. */
     short getSrc() const { assert(srcValid); return src; }
     void setSrc(short _src) { src = _src; srcValid = true; }
+    /** Reset source field, e.g. to retransmit packet on different bus. */
+    void clearSrc() { srcValid = false; }
 
     /** Accessor function that returns the destination index of
         the packet. */
@@ -347,13 +353,12 @@ class Packet
     Packet(Request *_req, MemCmd _cmd, short _dest)
         :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(_req->paddr), size(_req->size), dest(_dest),
-           addrSizeValid(_req->validPaddr),
-           srcValid(false),
+           addrSizeValid(_req->validPaddr), srcValid(false),
+           snoopFlags(0),
+           time(curTick),
            req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
            result(Unknown)
     {
-        flags = 0;
-        time = curTick;
     }
 
     /** Alternate constructor if you are trying to create a packet with
@@ -361,14 +366,32 @@ class Packet
      *  this allows for overriding the size/addr of the req.*/
     Packet(Request *_req, MemCmd _cmd, short _dest, int _blkSize)
         :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
-           addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize),
-           dest(_dest),
+           addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), dest(_dest),
            addrSizeValid(_req->validPaddr), srcValid(false),
+           snoopFlags(0),
+           time(curTick),
            req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
            result(Unknown)
     {
-        flags = 0;
-        time = curTick;
+    }
+
+    /** Alternate constructor for copying a packet.  Copy all fields
+     * *except* set data allocation as static... even if the original
+     * packet's data was dynamic, we don't want to free it when the
+     * new packet is deallocated.  Note that if original packet used
+     * dynamic data, user must guarantee that the new packet's
+     * lifetime is less than that of the original packet. */
+    Packet(Packet *origPkt)
+        :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
+           addr(origPkt->addr), size(origPkt->size),
+           dest(origPkt->dest),
+           addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid),
+           snoopFlags(origPkt->snoopFlags),
+           time(curTick),
+           req(origPkt->req), coherence(origPkt->coherence),
+           senderState(origPkt->senderState), cmd(origPkt->cmd),
+           result(origPkt->result)
+    {
     }
 
     /** Destructor. */
@@ -382,7 +405,7 @@ class Packet
      *   multiple transactions. */
     void reinitFromRequest() {
         assert(req->validPaddr);
-        flags = 0;
+        snoopFlags = 0;
         addr = req->paddr;
         size = req->size;
         time = req->time;
@@ -395,29 +418,40 @@ class Packet
         }
     }
 
-    /** Take a request packet and modify it in place to be suitable
-     *   for returning as a response to that request.  Used for timing
-     *   accesses only.  For atomic and functional accesses, the
-     *   request packet is always implicitly passed back *without*
-     *   modifying the destination fields, so this function
-     *   should not be called. */
-    void makeTimingResponse() {
+    /**
+     * Take a request packet and modify it in place to be suitable for
+     * returning as a response to that request.  The source and
+     * destination fields are *not* modified, as is appropriate for
+     * atomic accesses.
+     */
+    void makeAtomicResponse()
+    {
         assert(needsResponse());
         assert(isRequest());
+        assert(result == Unknown);
         cmd = cmd.responseCommand();
+        result = Success;
+    }
+
+    /**
+     * Perform the additional work required for timing responses above
+     * and beyond atomic responses; i.e., change the destination to
+     * point back to the requester and clear the source field.
+     */
+    void convertAtomicToTimingResponse()
+    {
         dest = src;
         srcValid = false;
     }
 
     /**
      * Take a request packet and modify it in place to be suitable for
-     * returning as a response to that request.
+     * returning as a response to a timing request.
      */
-    void makeAtomicResponse()
+    void makeTimingResponse()
     {
-        assert(needsResponse());
-        assert(isRequest());
-        cmd = cmd.responseCommand();
+        makeAtomicResponse();
+        convertAtomicToTimingResponse();
     }
 
     /**
@@ -493,6 +527,40 @@ class Packet
     template <typename T>
     void set(T v);
 
+    /**
+     * Copy data into the packet from the provided pointer.
+     */
+    void setData(uint8_t *p)
+    {
+        std::memcpy(getPtr<uint8_t>(), p, getSize());
+    }
+
+    /**
+     * Copy data into the packet from the provided block pointer,
+     * which is aligned to the given block size.
+     */
+    void setDataFromBlock(uint8_t *blk_data, int blkSize)
+    {
+        setData(blk_data + getOffset(blkSize));
+    }
+
+    /**
+     * Copy data from the packet to the provided block pointer, which
+     * is aligned to the given block size.
+     */
+    void writeData(uint8_t *p)
+    {
+        std::memcpy(p, getPtr<uint8_t>(), getSize());
+    }
+
+    /**
+     * Copy data from the packet to the memory at the provided pointer.
+     */
+    void writeDataToBlock(uint8_t *blk_data, int blkSize)
+    {
+        writeData(blk_data + getOffset(blkSize));
+    }
+
     /**
      * delete the data pointed to in the data pointer. Ok to call to
      * matter how data was allocted.
@@ -504,15 +572,35 @@ class Packet
 
     /** Do the packet modify the same addresses. */
     bool intersect(PacketPtr p);
+
+    /**
+     * Check a functional request against a memory value represented
+     * by a base/size pair and an associated data array.  If the
+     * functional request is a read, it may be satisfied by the memory
+     * value.  If the functional request is a write, it may update the
+     * memory value.
+     */
+    bool checkFunctional(Addr base, int size, uint8_t *data);
+
+    /**
+     * Check a functional request against a memory value stored in
+     * another packet (i.e. an in-transit request or response).
+     */
+    bool checkFunctional(PacketPtr otherPkt) {
+        return (otherPkt->hasData() &&
+                checkFunctional(otherPkt->getAddr(), otherPkt->getSize(),
+                                otherPkt->getPtr<uint8_t>()));
+    }
 };
 
-/** This function given a functional packet and a timing packet either
- * satisfies the timing packet, or updates the timing packet to
- * reflect the updated state in the timing packet. It returns if the
- * functional packet should continue to traverse the memory hierarchy
- * or not.
+
+
+/** Temporary for backwards compatibility.
  */
-bool fixPacket(PacketPtr func, PacketPtr timing);
+inline
+bool fixPacket(PacketPtr func, PacketPtr timing) {
+    return !func->checkFunctional(timing);
+}
 
 /** This function is a wrapper for the fixPacket field that toggles
  * the hasData bit it is used when a response is waiting in the
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index 9d840fe69..93cba96c4 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -58,8 +58,9 @@ PhysicalMemory::PhysicalMemory(Params *p)
         panic("Memory Size not divisible by page size\n");
 
     int map_flags = MAP_ANON | MAP_PRIVATE;
-    pmemAddr = (uint8_t *)mmap(NULL, params()->addrRange.size(), PROT_READ | PROT_WRITE,
-            map_flags, -1, 0);
+    pmemAddr =
+        (uint8_t *)mmap(NULL, params()->addrRange.size(),
+                        PROT_READ | PROT_WRITE, map_flags, -1, 0);
 
     if (pmemAddr == (void *)MAP_FAILED) {
         perror("mmap");
@@ -121,8 +122,9 @@ PhysicalMemory::calculateLatency(PacketPtr pkt)
 // Add load-locked to tracking list.  Should only be called if the
 // operation is a load and the LOCKED flag is set.
 void
-PhysicalMemory::trackLoadLocked(Request *req)
+PhysicalMemory::trackLoadLocked(PacketPtr pkt)
 {
+    Request *req = pkt->req;
     Addr paddr = LockedAddr::mask(req->getPaddr());
 
     // first we check if we already have a locked addr for this
@@ -151,10 +153,11 @@ PhysicalMemory::trackLoadLocked(Request *req)
 // conflict with locked addresses, and for success/failure of store
 // conditionals.
 bool
-PhysicalMemory::checkLockedAddrList(Request *req)
+PhysicalMemory::checkLockedAddrList(PacketPtr pkt)
 {
+    Request *req = pkt->req;
     Addr paddr = LockedAddr::mask(req->getPaddr());
-    bool isLocked = req->isLocked();
+    bool isLocked = pkt->isLocked();
 
     // Initialize return value.  Non-conditional stores always
     // succeed.  Assume conditional stores will fail until proven
@@ -198,74 +201,50 @@ PhysicalMemory::checkLockedAddrList(Request *req)
     return success;
 }
 
-void
-PhysicalMemory::doFunctionalAccess(PacketPtr pkt)
+
+#if TRACING_ON
+
+#define CASE(A, T)                                                      \
+  case sizeof(T):                                                       \
+    DPRINTF(MemoryAccess, A " of size %i on address 0x%x data 0x%x\n",  \
+            pkt->getSize(), pkt->getAddr(), pkt->get<T>());             \
+  break
+
+
+#define TRACE_PACKET(A)                                                 \
+    do {                                                                \
+        switch (pkt->getSize()) {                                       \
+          CASE(A, uint64_t);                                            \
+          CASE(A, uint32_t);                                            \
+          CASE(A, uint16_t);                                            \
+          CASE(A, uint8_t);                                             \
+          default:                                                      \
+            DPRINTF(MemoryAccess, A " of size %i on address 0x%x\n",    \
+                    pkt->getSize(), pkt->getAddr());                    \
+        }                                                               \
+    } while (0)
+
+#else
+
+#define TRACE_PACKET(A)
+
+#endif
+
+Tick
+PhysicalMemory::doAtomicAccess(PacketPtr pkt)
 {
     assert(pkt->getAddr() >= start() &&
            pkt->getAddr() + pkt->getSize() <= start() + size());
 
-    if (pkt->isRead()) {
-        if (pkt->req->isLocked()) {
-            trackLoadLocked(pkt->req);
-        }
-        memcpy(pkt->getPtr<uint8_t>(), pmemAddr + pkt->getAddr() - start(),
-               pkt->getSize());
-#if TRACING_ON
-        switch (pkt->getSize()) {
-          case sizeof(uint64_t):
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint64_t>());
-            break;
-          case sizeof(uint32_t):
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint32_t>());
-            break;
-          case sizeof(uint16_t):
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint16_t>());
-            break;
-          case sizeof(uint8_t):
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint8_t>());
-            break;
-          default:
-            DPRINTF(MemoryAccess, "Read of size %i on address 0x%x\n",
-                    pkt->getSize(), pkt->getAddr());
-        }
-#endif
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(MemoryAccess, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        return 0;
     }
-    else if (pkt->isWrite()) {
-        if (writeOK(pkt->req)) {
-                memcpy(pmemAddr + pkt->getAddr() - start(), pkt->getPtr<uint8_t>(),
-                        pkt->getSize());
-#if TRACING_ON
-            switch (pkt->getSize()) {
-              case sizeof(uint64_t):
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n",
-                        pkt->getSize(), pkt->getAddr(),pkt->get<uint64_t>());
-                break;
-              case sizeof(uint32_t):
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n",
-                        pkt->getSize(), pkt->getAddr(),pkt->get<uint32_t>());
-                break;
-              case sizeof(uint16_t):
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n",
-                        pkt->getSize(), pkt->getAddr(),pkt->get<uint16_t>());
-                break;
-              case sizeof(uint8_t):
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x data 0x%x\n",
-                        pkt->getSize(), pkt->getAddr(),pkt->get<uint8_t>());
-                break;
-              default:
-                DPRINTF(MemoryAccess, "Write of size %i on address 0x%x\n",
-                        pkt->getSize(), pkt->getAddr());
-            }
-#endif
-        }
-    } else if (pkt->isInvalidate()) {
-        //upgrade or invalidate
-        pkt->flags |= SATISFIED;
-    } else if (pkt->isReadWrite()) {
+
+    uint8_t *hostAddr = pmemAddr + pkt->getAddr() - start();
+
+    if (pkt->cmd == MemCmd::SwapReq) {
         IntReg overwrite_val;
         bool overwrite_mem;
         uint64_t condition_val64;
@@ -277,66 +256,76 @@ PhysicalMemory::doFunctionalAccess(PacketPtr pkt)
         // keep a copy of our possible write value, and copy what is at the
         // memory address into the packet
         std::memcpy(&overwrite_val, pkt->getPtr<uint8_t>(), pkt->getSize());
-        std::memcpy(pkt->getPtr<uint8_t>(), pmemAddr + pkt->getAddr() - start(),
-               pkt->getSize());
+        std::memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
 
         if (pkt->req->isCondSwap()) {
             if (pkt->getSize() == sizeof(uint64_t)) {
                 condition_val64 = pkt->req->getExtraData();
-                overwrite_mem = !std::memcmp(&condition_val64, pmemAddr +
-                        pkt->getAddr() - start(), sizeof(uint64_t));
+                overwrite_mem = !std::memcmp(&condition_val64, hostAddr,
+                                             sizeof(uint64_t));
             } else if (pkt->getSize() == sizeof(uint32_t)) {
                 condition_val32 = (uint32_t)pkt->req->getExtraData();
-                overwrite_mem = !std::memcmp(&condition_val32, pmemAddr +
-                        pkt->getAddr() - start(), sizeof(uint32_t));
+                overwrite_mem = !std::memcmp(&condition_val32, hostAddr,
+                                             sizeof(uint32_t));
             } else
                 panic("Invalid size for conditional read/write\n");
         }
 
         if (overwrite_mem)
-            std::memcpy(pmemAddr + pkt->getAddr() - start(),
-               &overwrite_val, pkt->getSize());
+            std::memcpy(hostAddr, &overwrite_val, pkt->getSize());
 
-#if TRACING_ON
-        switch (pkt->getSize()) {
-          case sizeof(uint64_t):
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint64_t>());
-            DPRINTF(MemoryAccess, "New Data 0x%x %s conditional (0x%x) and %s \n",
-                    overwrite_mem, pkt->req->isCondSwap() ? "was" : "wasn't",
-                    condition_val64, overwrite_mem ? "happened" : "didn't happen");
-            break;
-          case sizeof(uint32_t):
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint32_t>());
-            DPRINTF(MemoryAccess, "New Data 0x%x %s conditional (0x%x) and %s \n",
-                    overwrite_mem, pkt->req->isCondSwap() ? "was" : "wasn't",
-                    condition_val32, overwrite_mem ? "happened" : "didn't happen");
-            break;
-          case sizeof(uint16_t):
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint16_t>());
-            DPRINTF(MemoryAccess, "New Data 0x%x wasn't conditional and happned\n",
-                    overwrite_mem);
-            break;
-          case sizeof(uint8_t):
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x old data 0x%x\n",
-                    pkt->getSize(), pkt->getAddr(),pkt->get<uint8_t>());
-            DPRINTF(MemoryAccess, "New Data 0x%x wasn't conditional and happned\n",
-                    overwrite_mem);
-            break;
-          default:
-            DPRINTF(MemoryAccess, "Read/Write of size %i on address 0x%x\n",
-                    pkt->getSize(), pkt->getAddr());
+        TRACE_PACKET("Read/Write");
+    } else if (pkt->isRead()) {
+        assert(!pkt->isWrite());
+        if (pkt->isLocked()) {
+            trackLoadLocked(pkt);
+        }
+        memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
+        TRACE_PACKET("Read");
+    } else if (pkt->isWrite()) {
+        if (writeOK(pkt)) {
+            memcpy(hostAddr, pkt->getPtr<uint8_t>(), pkt->getSize());
+            TRACE_PACKET("Write");
+        }
+    } else if (pkt->isInvalidate()) {
+        //upgrade or invalidate
+        if (pkt->needsResponse()) {
+            pkt->makeAtomicResponse();
         }
-#endif
     } else {
         panic("unimplemented");
     }
 
+    if (pkt->needsResponse()) {
+        pkt->makeAtomicResponse();
+    }
+    return calculateLatency(pkt);
+}
+
+
+void
+PhysicalMemory::doFunctionalAccess(PacketPtr pkt)
+{
+    assert(pkt->getAddr() >= start() &&
+           pkt->getAddr() + pkt->getSize() <= start() + size());
+
+    uint8_t *hostAddr = pmemAddr + pkt->getAddr() - start();
+
+    if (pkt->cmd == MemCmd::ReadReq) {
+        memcpy(pkt->getPtr<uint8_t>(), hostAddr, pkt->getSize());
+        TRACE_PACKET("Read");
+    } else if (pkt->cmd == MemCmd::WriteReq) {
+        memcpy(hostAddr, pkt->getPtr<uint8_t>(), pkt->getSize());
+        TRACE_PACKET("Write");
+    } else {
+        panic("PhysicalMemory: unimplemented functional command %s",
+              pkt->cmdString());
+    }
+
     pkt->result = Packet::Success;
 }
 
+
 Port *
 PhysicalMemory::getPort(const std::string &if_name, int idx)
 {
@@ -407,8 +396,7 @@ PhysicalMemory::MemoryPort::deviceBlockSize()
 Tick
 PhysicalMemory::MemoryPort::recvAtomic(PacketPtr pkt)
 {
-    memory->doFunctionalAccess(pkt);
-    return memory->calculateLatency(pkt);
+    return memory->doAtomicAccess(pkt);
 }
 
 void
diff --git a/src/mem/physical.hh b/src/mem/physical.hh
index b9af5d334..8b13d32c1 100644
--- a/src/mem/physical.hh
+++ b/src/mem/physical.hh
@@ -112,12 +112,12 @@ class PhysicalMemory : public MemObject
     // inline a quick check for an empty locked addr list (hopefully
     // the common case), and do the full list search (if necessary) in
     // this out-of-line function
-    bool checkLockedAddrList(Request *req);
+    bool checkLockedAddrList(PacketPtr pkt);
 
     // Record the address of a load-locked operation so that we can
     // clear the execution context's lock flag if a matching store is
     // performed
-    void trackLoadLocked(Request *req);
+    void trackLoadLocked(PacketPtr pkt);
 
     // Compare a store address with any locked addresses so we can
     // clear the lock flag appropriately.  Return value set to 'false'
@@ -126,17 +126,18 @@ class PhysicalMemory : public MemObject
     // requesting execution context), 'true' otherwise.  Note that
     // this method must be called on *all* stores since even
     // non-conditional stores must clear any matching lock addresses.
-    bool writeOK(Request *req) {
+    bool writeOK(PacketPtr pkt) {
+        Request *req = pkt->req;
         if (lockedAddrList.empty()) {
             // no locked addrs: nothing to check, store_conditional fails
-            bool isLocked = req->isLocked();
+            bool isLocked = pkt->isLocked();
             if (isLocked) {
                 req->setExtraData(0);
             }
             return !isLocked; // only do write if not an sc
         } else {
             // iterate over list...
-            return checkLockedAddrList(req);
+            return checkLockedAddrList(pkt);
         }
     }
 
@@ -175,6 +176,7 @@ class PhysicalMemory : public MemObject
     unsigned int drain(Event *de);
 
   protected:
+    Tick doAtomicAccess(PacketPtr pkt);
     void doFunctionalAccess(PacketPtr pkt);
     virtual Tick calculateLatency(PacketPtr pkt);
     void recvStatusChange(Port::Status status);
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index ed4c0c172..2644a504c 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -67,14 +67,17 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
     // code to hanldle nacks here, but I'm pretty sure it didn't work
     // correctly with the drain code, so that would need to be fixed
     // if we ever added it back.
-    assert(pkt->result != Packet::Nacked);
+    assert(pkt->isRequest());
+    assert(pkt->result == Packet::Unknown);
+    bool needsResponse = pkt->needsResponse();
     Tick latency = recvAtomic(pkt);
     // turn packet around to go back to requester if response expected
-    if (pkt->needsResponse()) {
-        pkt->makeTimingResponse();
+    if (needsResponse) {
+        // recvAtomic() should already have turned packet into atomic response
+        assert(pkt->isResponse());
+        pkt->convertAtomicToTimingResponse();
         schedSendTiming(pkt, curTick + latency);
-    }
-    else if (pkt->cmd != MemCmd::UpgradeReq) {
+    } else {
         delete pkt->req;
         delete pkt;
     }
-- 
cgit v1.2.3


From 83af0fdcf57175adf8077c51e9ba872dd2c04b76 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Thu, 21 Jun 2007 11:59:17 -0700
Subject: Getting closer...

configs/example/memtest.py:
    Add progress interval option.
src/base/traceflags.py:
    Add MemTest flag.
src/cpu/memtest/memtest.cc:
    Clean up tracing.
src/cpu/memtest/memtest.hh:
    Get rid of unused code.

--HG--
extra : convert_revision : 92bd8241a6c90bfb6d908e5a5132cbdb500cbb87
---
 configs/example/memtest.py                    |   7 +-
 src/base/traceflags.py                        |   1 +
 src/cpu/memtest/memtest.cc                    | 140 ++-----
 src/cpu/memtest/memtest.hh                    |  10 -
 src/mem/cache/base_cache.cc                   |   6 +-
 src/mem/cache/base_cache.hh                   | 109 ++++--
 src/mem/cache/cache.hh                        |  18 +-
 src/mem/cache/cache_impl.hh                   | 544 ++++++++++++--------------
 src/mem/cache/coherence/coherence_protocol.cc |   3 +-
 src/mem/cache/miss/mshr.cc                    |   4 +-
 src/mem/cache/miss/mshr.hh                    |   2 +-
 src/mem/cache/miss/mshr_queue.cc              |   9 +-
 src/mem/cache/miss/mshr_queue.hh              |   7 +-
 src/mem/cache/prefetch/base_prefetcher.cc     |   8 +-
 src/mem/packet.cc                             |  12 +-
 src/mem/packet.hh                             |   3 +-
 16 files changed, 410 insertions(+), 473 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 9027a9866..0bc12e7bd 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -60,6 +60,11 @@ parser.add_option("-u", "--uncacheable", type="int", default=0,
                   help="Target percentage of uncacheable accesses "
                   "[default: %default]")
 
+parser.add_option("--progress", type="int", default=1000,
+                  metavar="NLOADS",
+                  help="Progress message interval "
+                  "[default: %default]")
+
 (options, args) = parser.parse_args()
 
 if args:
@@ -112,7 +117,7 @@ if options.numtesters > block_size:
 cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
                  percent_functional=options.functional,
                  percent_uncacheable=options.uncacheable,
-                 progress_interval=1000)
+                 progress_interval=options.progress)
          for i in xrange(options.numtesters) ]
 
 # system simulated
diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index 6b241c410..f4cf7dfd7 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -128,6 +128,7 @@ baseFlags = [
     'Mbox',
     'MemDepUnit',
     'MemoryAccess',
+    'MemTest',
     'O3CPU',
     'OzoneCPU',
     'OzoneLSQ',
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 5d89f1b82..6e8c5d0bf 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -191,29 +191,25 @@ MemTest::init()
     // memory should be 0; no need to initialize them.
 }
 
-static void
-printData(ostream &os, uint8_t *data, int nbytes)
-{
-    os << hex << setfill('0');
-    // assume little-endian: print bytes from highest address to lowest
-    for (uint8_t *dp = data + nbytes - 1; dp >= data; --dp) {
-        os << setw(2) << (unsigned)*dp;
-    }
-    os << dec;
-}
 
 void
 MemTest::completeRequest(PacketPtr pkt)
 {
+    Request *req = pkt->req;
+
+    DPRINTF(MemTest, "completing %s at address %x (blk %x)\n",
+            pkt->isWrite() ? "write" : "read",
+            req->getPaddr(), blockAddr(req->getPaddr()));
+
     MemTestSenderState *state =
         dynamic_cast<MemTestSenderState *>(pkt->senderState);
 
     uint8_t *data = state->data;
     uint8_t *pkt_data = pkt->getPtr<uint8_t>();
-    Request *req = pkt->req;
 
     //Remove the address from the list of outstanding
-    std::set<unsigned>::iterator removeAddr = outstandingAddrs.find(req->getPaddr());
+    std::set<unsigned>::iterator removeAddr =
+        outstandingAddrs.find(req->getPaddr());
     assert(removeAddr != outstandingAddrs.end());
     outstandingAddrs.erase(removeAddr);
 
@@ -237,39 +233,17 @@ MemTest::completeRequest(PacketPtr pkt)
         }
 
         if (numReads >= maxLoads)
-            exitSimLoop("Maximum number of loads reached!");
+            exitSimLoop("maximum number of loads reached");
         break;
 
       case MemCmd::WriteResp:
         numWritesStat++;
         break;
-/*
-      case Copy:
-        //Also remove dest from outstanding list
-        removeAddr = outstandingAddrs.find(req->dest);
-        assert(removeAddr != outstandingAddrs.end());
-        outstandingAddrs.erase(removeAddr);
-        numCopiesStat++;
-        break;
-*/
+
       default:
         panic("invalid command %s (%d)", pkt->cmdString(), pkt->cmd.toInt());
     }
 
-    if (blockAddr(req->getPaddr()) == traceBlockAddr) {
-        cerr << name() << ": completed "
-             << (pkt->isWrite() ? "write" : "read")
-             << " access of "
-             << dec << pkt->getSize() << " bytes at address 0x"
-             << hex << req->getPaddr()
-             << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
-             << ", value = 0x";
-        printData(cerr, pkt_data, pkt->getSize());
-        cerr << " @ cycle " << dec << curTick;
-
-        cerr << endl;
-    }
-
     noResponseCycles = 0;
     delete state;
     delete [] data;
@@ -325,7 +299,7 @@ MemTest::tick()
     //mem tester
     //We can eliminate the lower bits of the offset, and then use the id
     //to offset within the blks
-    offset &= ~63; //Not the low order bits
+    offset = blockAddr(offset);
     offset += id;
     access_size = 0;
 
@@ -351,29 +325,23 @@ MemTest::tick()
     if (cmd < percentReads) {
         // read
 
-        //For now we only allow one outstanding request per addreess per tester
-        //This means we assume CPU does write forwarding to reads that alias something
-        //in the cpu store buffer.
+        // For now we only allow one outstanding request per address
+        // per tester This means we assume CPU does write forwarding
+        // to reads that alias something in the cpu store buffer.
         if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) {
             delete [] result;
             delete req;
             return;
         }
-        else outstandingAddrs.insert(paddr);
+
+        outstandingAddrs.insert(paddr);
 
         // ***** NOTE FOR RON: I'm not sure how to access checkMem. - Kevin
         funcPort.readBlob(req->getPaddr(), result, req->getSize());
 
-        if (blockAddr(paddr) == traceBlockAddr) {
-            cerr << name()
-                 << ": initiating read "
-                 << ((probe) ? "probe of " : "access of ")
-                 << dec << req->getSize() << " bytes from addr 0x"
-                 << hex << paddr
-                 << " (0x" << hex << blockAddr(paddr) << ")"
-                 << " at cycle "
-                 << dec << curTick << endl;
-        }
+        DPRINTF(MemTest,
+                "initiating read at address %x (blk %x) expecting %x\n",
+                req->getPaddr(), blockAddr(req->getPaddr()), *result);
 
         PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
         pkt->dataDynamicArray(new uint8_t[req->getSize()]);
@@ -385,36 +353,25 @@ MemTest::tick()
             pkt->makeAtomicResponse();
             completeRequest(pkt);
         } else {
-//	    req->completionEvent = new MemCompleteEvent(req, result, this);
             sendPkt(pkt);
         }
     } else {
         // write
 
-        //For now we only allow one outstanding request per addreess per tester
-        //This means we assume CPU does write forwarding to reads that alias something
-        //in the cpu store buffer.
+        // For now we only allow one outstanding request per addreess
+        // per tester.  This means we assume CPU does write forwarding
+        // to reads that alias something in the cpu store buffer.
         if (outstandingAddrs.find(paddr) != outstandingAddrs.end()) {
             delete [] result;
             delete req;
             return;
         }
 
-        else outstandingAddrs.insert(paddr);
+        outstandingAddrs.insert(paddr);
+
+        DPRINTF(MemTest, "initiating write at address %x (blk %x) value %x\n",
+                req->getPaddr(), blockAddr(req->getPaddr()), data & 0xff);
 
-/*
-        if (blockAddr(req->getPaddr()) == traceBlockAddr) {
-            cerr << name() << ": initiating write "
-                 << ((probe)?"probe of ":"access of ")
-                 << dec << req->getSize() << " bytes (value = 0x";
-            printData(cerr, data_pkt->getPtr(), req->getSize());
-            cerr << ") to addr 0x"
-                 << hex << req->getPaddr()
-                 << " (0x" << hex << blockAddr(req->getPaddr()) << ")"
-                 << " at cycle "
-                 << dec << curTick << endl;
-        }
-*/
         PacketPtr pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
         uint8_t *pkt_data = new uint8_t[req->getSize()];
         pkt->dataDynamicArray(pkt_data);
@@ -429,54 +386,9 @@ MemTest::tick()
             pkt->makeAtomicResponse();
             completeRequest(pkt);
         } else {
-//	    req->completionEvent = new MemCompleteEvent(req, NULL, this);
             sendPkt(pkt);
         }
     }
-/*    else {
-        // copy
-        unsigned source_align = random() % 100;
-        unsigned dest_align = random() % 100;
-        unsigned offset2 = random() % size;
-
-        Addr source = ((base) ? baseAddr1 : baseAddr2) + offset;
-        Addr dest = ((base) ? baseAddr2 : baseAddr1) + offset2;
-        if (outstandingAddrs.find(source) != outstandingAddrs.end()) return;
-        else outstandingAddrs.insert(source);
-        if (outstandingAddrs.find(dest) != outstandingAddrs.end()) return;
-        else outstandingAddrs.insert(dest);
-
-        if (source_align >= percentSourceUnaligned) {
-            source = blockAddr(source);
-        }
-        if (dest_align >= percentDestUnaligned) {
-            dest = blockAddr(dest);
-        }
-        req->cmd = Copy;
-        req->flags &= ~UNCACHEABLE;
-        req->paddr = source;
-        req->dest = dest;
-        delete [] req->data;
-        req->data = new uint8_t[blockSize];
-        req->size = blockSize;
-        if (source == traceBlockAddr || dest == traceBlockAddr) {
-            cerr << name()
-                 << ": initiating copy of "
-                 << dec << req->size << " bytes from addr 0x"
-                 << hex << source
-                 << " (0x" << hex << blockAddr(source) << ")"
-                 << " to addr 0x"
-                 << hex << dest
-                 << " (0x" << hex << blockAddr(dest) << ")"
-                 << " at cycle "
-                 << dec << curTick << endl;
-        }*
-        cacheInterface->access(req);
-        uint8_t result[blockSize];
-        checkMem->access(Read, source, &result, blockSize);
-        checkMem->access(Write, dest, &result, blockSize);
-    }
-*/
 }
 
 void
diff --git a/src/cpu/memtest/memtest.hh b/src/cpu/memtest/memtest.hh
index 565fafb77..f4713709a 100644
--- a/src/cpu/memtest/memtest.hh
+++ b/src/cpu/memtest/memtest.hh
@@ -35,8 +35,6 @@
 #include <set>
 
 #include "base/statistics.hh"
-//#include "mem/functional/functional.hh"
-//#include "mem/mem_interface.hh"
 #include "sim/eventq.hh"
 #include "sim/sim_exit.hh"
 #include "sim/sim_object.hh"
@@ -50,9 +48,6 @@ class MemTest : public MemObject
   public:
 
     MemTest(const std::string &name,
-//	    MemInterface *_cache_interface,
-//	    PhysicalMemory *main_mem,
-//	    PhysicalMemory *check_mem,
             unsigned _memorySize,
             unsigned _percentReads,
             unsigned _percentFunctional,
@@ -136,12 +131,7 @@ class MemTest : public MemObject
         uint8_t *data;
     };
 
-//    Request *dataReq;
     PacketPtr retryPkt;
-//    MemInterface *cacheInterface;
-//    PhysicalMemory *mainMem;
-//    PhysicalMemory *checkMem;
-//    SimpleThread *thread;
 
     bool accessRetry;
 
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index c7006550b..8b476e100 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -50,8 +50,9 @@ BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
 
 BaseCache::BaseCache(const std::string &name, Params &params)
     : MemObject(name),
-      mshrQueue(params.numMSHRs, 4),
-      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000),
+      mshrQueue(params.numMSHRs, 4, MSHRQueue_MSHRs),
+      writeBuffer(params.numWriteBuffers, params.numMSHRs+1000,
+                  MSHRQueue_WriteBuffer),
       blkSize(params.blkSize),
       numTarget(params.numTargets),
       blocked(0),
@@ -128,6 +129,7 @@ BaseCache::init()
     cpuSidePort->sendStatusChange(Port::RangeChange);
 }
 
+
 void
 BaseCache::regStats()
 {
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 5969b4b3f..10fd3289c 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -54,41 +54,49 @@
 #include "sim/eventq.hh"
 #include "sim/sim_exit.hh"
 
-/**
- * Reasons for Caches to be Blocked.
- */
-enum BlockedCause{
-    Blocked_NoMSHRs,
-    Blocked_NoTargets,
-    Blocked_NoWBBuffers,
-    Blocked_Coherence,
-    NUM_BLOCKED_CAUSES
-};
-
-/**
- * Reasons for cache to request a bus.
- */
-enum RequestCause{
-    Request_MSHR,
-    Request_WB,
-    Request_Coherence,
-    Request_PF
-};
-
 class MSHR;
 /**
  * A basic cache interface. Implements some common functions for speed.
  */
 class BaseCache : public MemObject
 {
+    /**
+     * Indexes to enumerate the MSHR queues.
+     */
+    enum MSHRQueueIndex {
+        MSHRQueue_MSHRs,
+        MSHRQueue_WriteBuffer
+    };
+
+    /**
+     * Reasons for caches to be blocked.
+     */
+    enum BlockedCause {
+        Blocked_NoMSHRs = MSHRQueue_MSHRs,
+        Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
+        Blocked_NoTargets,
+        NUM_BLOCKED_CAUSES
+    };
+
+  public:
+    /**
+     * Reasons for cache to request a bus.
+     */
+    enum RequestCause {
+        Request_MSHR = MSHRQueue_MSHRs,
+        Request_WB = MSHRQueue_WriteBuffer,
+        Request_PF,
+        NUM_REQUEST_CAUSES
+    };
+
+  private:
+
     class CachePort : public SimpleTimingPort
     {
       public:
         BaseCache *cache;
 
       protected:
-        Event *responseEvent;
-
         CachePort(const std::string &_name, BaseCache *_cache);
 
         virtual void recvStatusChange(Status status);
@@ -154,6 +162,36 @@ class BaseCache : public MemObject
     /** Write/writeback buffer */
     MSHRQueue writeBuffer;
 
+    MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size,
+                                 PacketPtr pkt, Tick time, bool requestBus)
+    {
+        MSHR *mshr = mq->allocate(addr, size, pkt);
+        mshr->order = order++;
+
+        if (mq->isFull()) {
+            setBlocked((BlockedCause)mq->index);
+        }
+
+        if (requestBus) {
+            requestMemSideBus((RequestCause)mq->index, time);
+        }
+
+        return mshr;
+    }
+
+    void markInServiceInternal(MSHR *mshr)
+    {
+        MSHRQueue *mq = mshr->queue;
+        bool wasFull = mq->isFull();
+        mq->markInService(mshr);
+        if (!mq->havePending()) {
+            deassertMemSideBusRequest((RequestCause)mq->index);
+        }
+        if (wasFull && !mq->isFull()) {
+            clearBlocked((BlockedCause)mq->index);
+        }
+    }
+
     /** Block size of this cache */
     const int blkSize;
 
@@ -382,6 +420,31 @@ class BaseCache : public MemObject
     Addr blockAlign(Addr addr) const { return (addr & ~(blkSize - 1)); }
 
 
+    MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    {
+        return allocateBufferInternal(&mshrQueue,
+                                      blockAlign(pkt->getAddr()), blkSize,
+                                      pkt, time, requestBus);
+    }
+
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus)
+    {
+        MSHRQueue *mq = NULL;
+
+        if (pkt->isWrite() && !pkt->isRead()) {
+            /**
+             * @todo Add write merging here.
+             */
+            mq = &writeBuffer;
+        } else {
+            mq = &mshrQueue;
+        }
+
+        return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
+                                      pkt, time, requestBus);
+    }
+
+
     /**
      * Returns true if the cache is blocked for accesses.
      */
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 16d15cf86..06fce1a71 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -179,7 +179,7 @@ class Cache : public BaseCache
      * @return Pointer to the cache block touched by the request. NULL if it
      * was a miss.
      */
-    bool access(PacketPtr pkt, BlkType *blk, int & lat);
+    bool access(PacketPtr pkt, BlkType *&blk, int &lat);
 
     /**
      *Handle doing the Compare and Swap function for SPARC.
@@ -201,7 +201,7 @@ class Cache : public BaseCache
 
     bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
     bool satisfyTarget(MSHR::Target *target, BlkType *blk);
-    void satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
+    bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
 
     void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);
 
@@ -310,15 +310,16 @@ class Cache : public BaseCache
      * @param isFill Whether to fetch & allocate a block
      *               or just forward the request.
      */
-    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool isFill,
-                         bool requestBus);
+    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus);
 
     /**
      * Selects a outstanding request to service.
      * @return The request to service, NULL if none found.
      */
+    PacketPtr getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                           bool needsExclusive);
     MSHR *getNextMSHR();
-    PacketPtr getPacket();
+    PacketPtr getTimingPacket();
 
     /**
      * Marks a request as in service (sent on the bus). This can have side
@@ -328,13 +329,6 @@ class Cache : public BaseCache
      */
     void markInService(MSHR *mshr);
 
-    /**
-     * Collect statistics and free resources of a satisfied request.
-     * @param pkt The request that has been satisfied.
-     * @param time The time when the request is satisfied.
-     */
-    void handleResponse(PacketPtr pkt, Tick time);
-
     /**
      * Perform the given writeback request.
      * @param pkt The writeback request.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 0f66e613c..81fcb4158 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -152,40 +152,21 @@ Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 template<class TagStore, class Coherence>
 MSHR *
 Cache<TagStore,Coherence>::allocateBuffer(PacketPtr pkt, Tick time,
-                                          bool isFill, bool requestBus)
+                                          bool requestBus)
 {
-    int  size = isFill ? blkSize : pkt->getSize();
-    Addr addr = isFill ? tags->blkAlign(pkt->getAddr()) : pkt->getAddr();
+    MSHRQueue *mq = NULL;
 
-    MSHR *mshr = NULL;
-
-    if (pkt->isWrite()) {
+    if (pkt->isWrite() && !pkt->isRead()) {
         /**
          * @todo Add write merging here.
          */
-        mshr = writeBuffer.allocate(addr, size, pkt, isFill);
-        mshr->order = order++;
-
-        if (writeBuffer.isFull()) {
-            setBlocked(Blocked_NoWBBuffers);
-        }
-
-        if (requestBus) {
-            requestMemSideBus(Request_WB, time);
-        }
+        mq = &writeBuffer;
     } else {
-        mshr = mshrQueue.allocate(addr, size, pkt, isFill);
-        mshr->order = order++;
-        if (mshrQueue.isFull()) {
-            setBlocked(Blocked_NoMSHRs);
-        }
-        if (requestBus) {
-            requestMemSideBus(Request_MSHR, time);
-        }
+        mq = &mshrQueue;
     }
 
-    assert(mshr != NULL);
-    return mshr;
+    return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
+                                  pkt, time, requestBus);
 }
 
 
@@ -193,33 +174,7 @@ template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::markInService(MSHR *mshr)
 {
-    bool unblock = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    /**
-     * @todo Should include MSHRQueue pointer in MSHR to select the correct
-     * one.
-     */
-    if (mshr->queue == &writeBuffer) {
-        // Forwarding a write/ writeback, don't need to change
-        // the command
-        unblock = writeBuffer.isFull();
-        writeBuffer.markInService(mshr);
-        if (!writeBuffer.havePending()){
-            deassertMemSideBusRequest(Request_WB);
-        }
-        if (unblock) {
-            // Do we really unblock?
-            unblock = !writeBuffer.isFull();
-            cause = Blocked_NoWBBuffers;
-        }
-    } else {
-        assert(mshr->queue == &mshrQueue);
-        unblock = mshrQueue.isFull();
-        mshrQueue.markInService(mshr);
-        if (!mshrQueue.havePending()){
-            deassertMemSideBusRequest(Request_MSHR);
-        }
+    markInServiceInternal(mshr);
 #if 0
         if (mshr->originalCmd == MemCmd::HardPFReq) {
             DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n",
@@ -231,14 +186,6 @@ Cache<TagStore,Coherence>::markInService(MSHR *mshr)
             }
         }
 #endif
-        if (unblock) {
-            unblock = !mshrQueue.isFull();
-            cause = Blocked_NoMSHRs;
-        }
-    }
-    if (unblock) {
-        clearBlocked(cause);
-    }
 }
 
 
@@ -275,9 +222,16 @@ Cache<TagStore,Coherence>::squash(int threadNum)
 
 template<class TagStore, class Coherence>
 bool
-Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
+Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 {
+    if (pkt->req->isUncacheable())  {
+        blk = NULL;
+        lat = hitLatency;
+        return false;
+    }
+
     bool satisfied = false;  // assume the worst
+    blk = tags->findBlock(pkt->getAddr(), lat);
 
     if (prefetchAccess) {
         //We are determining prefetches on access stream, call prefetcher
@@ -307,6 +261,8 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
             hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             satisfied = true;
 
+            // Check RMW operations first since both isRead() and
+            // isWrite() will be true for them
             if (pkt->cmd == MemCmd::SwapReq) {
                 cmpAndSwap(blk, pkt);
             } else if (pkt->isWrite()) {
@@ -314,12 +270,16 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *blk, int &lat)
                     blk->status |= BlkDirty;
                     pkt->writeDataToBlock(blk->data, blkSize);
                 }
-            } else {
-                assert(pkt->isRead());
+            } else if (pkt->isRead()) {
                 if (pkt->isLocked()) {
                     blk->trackLoadLocked(pkt);
                 }
                 pkt->setDataFromBlock(blk->data, blkSize);
+            } else {
+                // Not a read or write... must be an upgrade.  it's OK
+                // to just ack those as long as we have an exclusive
+                // copy at this level.
+                assert(pkt->cmd == MemCmd::UpgradeReq);
             }
         } else {
             // permission violation... nothing to do here, leave unsatisfied
@@ -351,19 +311,24 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
     // we charge hitLatency for doing just about anything here
     Tick time =  curTick + hitLatency;
 
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return true;
+    }
+
     if (pkt->req->isUncacheable()) {
-        allocateBuffer(pkt, time, false, true);
+        allocateBuffer(pkt, time, true);
         assert(pkt->needsResponse()); // else we should delete it here??
         return true;
     }
 
     PacketList writebacks;
     int lat = hitLatency;
-    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
     bool satisfied = false;
 
     Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
-
     MSHR *mshr = mshrQueue.findMatch(blk_addr);
 
     if (!mshr) {
@@ -373,6 +338,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
         // cache block... a more aggressive system could detect the
         // overlap (if any) and forward data out of the MSHRs, but we
         // don't do that yet)
+        BlkType *blk = NULL;
         satisfied = access(pkt, blk, lat);
     }
 
@@ -401,7 +367,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
     // copy writebacks to write buffer
     while (!writebacks.empty()) {
         PacketPtr wbPkt = writebacks.front();
-        allocateBuffer(wbPkt, time, false, true);
+        allocateBuffer(wbPkt, time, true);
         writebacks.pop_front();
     }
 
@@ -435,7 +401,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
             // always mark as cache fill for now... if we implement
             // no-write-allocate or bypass accesses this will have to
             // be changed.
-            allocateBuffer(pkt, time, true, true);
+            allocateMissBuffer(pkt, time, true);
         }
     }
 
@@ -449,54 +415,109 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
 }
 
 
+template<class TagStore, class Coherence>
+PacketPtr
+Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                                        bool needsExclusive)
+{
+    bool blkValid = blk && blk->isValid();
+
+    if (cpu_pkt->req->isUncacheable()) {
+        assert(blk == NULL);
+        return NULL;
+    }
+
+    if (!blkValid &&
+        (cpu_pkt->cmd == MemCmd::Writeback ||
+         cpu_pkt->cmd == MemCmd::UpgradeReq)) {
+            // For now, writebacks from upper-level caches that
+            // completely miss in the cache just go through. If we had
+            // "fast write" support (where we could write the whole
+            // block w/o fetching new data) we might want to allocate
+            // on writeback misses instead.
+        return NULL;
+    }
+
+    MemCmd cmd;
+    const bool useUpgrades = true;
+    if (blkValid && useUpgrades) {
+        // only reason to be here is that blk is shared
+        // (read-only) and we need exclusive
+        assert(needsExclusive && !blk->isWritable());
+        cmd = MemCmd::UpgradeReq;
+    } else {
+        // block is invalid
+        cmd = needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+    }
+    PacketPtr pkt = new Packet(cpu_pkt->req, cmd, Packet::Broadcast, blkSize);
+
+    pkt->allocate();
+    return pkt;
+}
+
+
 template<class TagStore, class Coherence>
 Tick
 Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
 {
+    int lat = hitLatency;
+
+    if (pkt->memInhibitAsserted()) {
+        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
+                pkt->getAddr());
+        assert(!pkt->req->isUncacheable());
+        return lat;
+    }
+
     // should assert here that there are no outstanding MSHRs or
     // writebacks... that would mean that someone used an atomic
     // access in timing mode
 
-    if (pkt->req->isUncacheable()) {
-        // Uncacheables just go through
-        return memSidePort->sendAtomic(pkt);
-    }
-
-    PacketList writebacks;
-    int lat = hitLatency;
-    BlkType *blk = tags->findBlock(pkt->getAddr(), lat);
-    bool satisfied = access(pkt, blk, lat);
+    BlkType *blk = NULL;
 
-    if (!satisfied) {
+    if (!access(pkt, blk, lat)) {
         // MISS
-        CacheBlk::State old_state = (blk) ? blk->status : 0;
-        MemCmd cmd = coherence->getBusCmd(pkt->cmd, old_state);
-        Packet busPkt = Packet(pkt->req, cmd, Packet::Broadcast, blkSize);
-        busPkt.allocate();
+        PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive());
 
-        DPRINTF(Cache, "Sending a atomic %s for %x\n",
-                busPkt.cmdString(), busPkt.getAddr());
+        bool isCacheFill = (busPkt != NULL);
 
-        lat += memSidePort->sendAtomic(&busPkt);
+        if (busPkt == NULL) {
+            // just forwarding the same request to the next level
+            // no local cache operation involved
+            busPkt = pkt;
+        }
 
-        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
-                busPkt.cmdString(), busPkt.getAddr(), old_state);
+        DPRINTF(Cache, "Sending an atomic %s for %x\n",
+                busPkt->cmdString(), busPkt->getAddr());
 
-        blk = handleFill(&busPkt, blk, writebacks);
-        bool status = satisfyCpuSideRequest(pkt, blk);
-        assert(status);
-    }
+#if TRACING_ON
+        CacheBlk::State old_state = blk ? blk->status : 0;
+#endif
 
-    // We now have the block one way or another (hit or completed miss)
+        lat += memSidePort->sendAtomic(busPkt);
 
-    // Handle writebacks if needed
-    while (!writebacks.empty()){
-        PacketPtr wbPkt = writebacks.front();
-        memSidePort->sendAtomic(wbPkt);
-        writebacks.pop_front();
-        delete wbPkt;
+        DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n",
+                busPkt->cmdString(), busPkt->getAddr(), old_state);
+
+        if (isCacheFill) {
+            PacketList writebacks;
+            blk = handleFill(busPkt, blk, writebacks);
+            bool status = satisfyCpuSideRequest(pkt, blk);
+            assert(status);
+            delete busPkt;
+
+            // Handle writebacks if needed
+            while (!writebacks.empty()){
+                PacketPtr wbPkt = writebacks.front();
+                memSidePort->sendAtomic(wbPkt);
+                writebacks.pop_front();
+                delete wbPkt;
+            }
+        }
     }
 
+    // We now have the block one way or another (hit or completed miss)
+
     if (pkt->needsResponse()) {
         pkt->makeAtomicResponse();
         pkt->result = Packet::Success;
@@ -553,98 +574,94 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 //
 /////////////////////////////////////////////////////
 
+
 template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt, Tick time)
+bool
+Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
-    MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
-#ifndef NDEBUG
-    int num_targets = mshr->getNumTargets();
-#endif
-
-    bool unblock = false;
-    bool unblock_target = false;
-    BlockedCause cause = NUM_BLOCKED_CAUSES;
-
-    if (mshr->isCacheFill) {
-#if 0
-        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-            curTick - pkt->time;
-#endif
-        // targets were handled in the cache tags
-        if (mshr == noTargetMSHR) {
-            // we always clear at least one target
-            unblock_target = true;
-            cause = Blocked_NoTargets;
-            noTargetMSHR = NULL;
-        }
+    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
+        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
+        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
 
-        if (mshr->hasTargets()) {
-            // Didn't satisfy all the targets, need to resend
-            mshrQueue.markPending(mshr);
-            mshr->order = order++;
-            requestMemSideBus(Request_MSHR, time);
-        }
-        else {
-            unblock = mshrQueue.isFull();
-            mshrQueue.deallocate(mshr);
-            if (unblock) {
-                unblock = !mshrQueue.isFull();
-                cause = Blocked_NoMSHRs;
+        if (pkt->isWrite()) {
+            if (blk->checkWrite(pkt)) {
+                blk->status |= BlkDirty;
+                pkt->writeDataToBlock(blk->data, blkSize);
             }
+        } else if (pkt->isReadWrite()) {
+            cmpAndSwap(blk, pkt);
+        } else {
+            if (pkt->isLocked()) {
+                blk->trackLoadLocked(pkt);
+            }
+            pkt->setDataFromBlock(blk->data, blkSize);
         }
+
+        return true;
     } else {
-        if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-                curTick - pkt->time;
-        }
-        if (mshr->hasTargets() && pkt->req->isUncacheable()) {
-            // Should only have 1 target if we had any
-            assert(num_targets == 1);
-            MSHR::Target *target = mshr->getTarget();
-            assert(target->cpuSide);
-            mshr->popTarget();
-            if (pkt->isRead()) {
-                target->pkt->setData(pkt->getPtr<uint8_t>());
-            }
-            cpuSidePort->respond(target->pkt, time);
-            assert(!mshr->hasTargets());
+        return false;
+    }
+}
+
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
+{
+    assert(target != NULL);
+    assert(target->isCpuSide());
+    return satisfyCpuSideRequest(target->pkt, blk);
+}
+
+template<class TagStore, class Coherence>
+bool
+Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
+                                       BlkType *blk)
+{
+    // respond to MSHR targets, if any
+
+    // First offset for critical word first calculations
+    int initial_offset = 0;
+
+    if (mshr->hasTargets()) {
+        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
+    }
+
+    while (mshr->hasTargets()) {
+        MSHR::Target *target = mshr->getTarget();
+
+        if (!satisfyTarget(target, blk)) {
+            // Invalid access, need to do another request
+            // can occur if block is invalidated, or not correct
+            // permissions
+            MSHRQueue *mq = mshr->queue;
+            mq->markPending(mshr);
+            mshr->order = order++;
+            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+            return false;
         }
-        else if (mshr->hasTargets()) {
-            //Must be a no_allocate with possibly more than one target
-            assert(!mshr->isCacheFill);
-            while (mshr->hasTargets()) {
-                MSHR::Target *target = mshr->getTarget();
-                assert(target->isCpuSide());
-                mshr->popTarget();
-                if (pkt->isRead()) {
-                    target->pkt->setData(pkt->getPtr<uint8_t>());
-                }
-                cpuSidePort->respond(target->pkt, time);
-            }
+
+
+        // How many bytes pass the first request is this one
+        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
+        if (transfer_offset < 0) {
+            transfer_offset += blkSize;
         }
 
-        if (pkt->isWrite()) {
-            // If the wrtie buffer is full, we might unblock now
-            unblock = writeBuffer.isFull();
-            writeBuffer.deallocate(mshr);
-            if (unblock) {
-                // Did we really unblock?
-                unblock = !writeBuffer.isFull();
-                cause = Blocked_NoWBBuffers;
-            }
-        } else {
-            unblock = mshrQueue.isFull();
-            mshrQueue.deallocate(mshr);
-            if (unblock) {
-                unblock = !mshrQueue.isFull();
-                cause = Blocked_NoMSHRs;
-            }
+        // If critical word (no offset) return first word time
+        Tick completion_time = tags->getHitLatency() +
+            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
+
+        if (!target->pkt->req->isUncacheable()) {
+            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                completion_time - target->time;
         }
+        target->pkt->makeTimingResponse();
+        cpuSidePort->respond(target->pkt, completion_time);
+        mshr->popTarget();
     }
-    if (unblock || unblock_target) {
-        clearBlocked(cause);
-    }
+
+    return true;
 }
 
 
@@ -665,21 +682,60 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
     assert(pkt->result == Packet::Success);
     DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
 
+    MSHRQueue *mq = mshr->queue;
+    bool wasFull = mq->isFull();
+
+    if (mshr == noTargetMSHR) {
+        // we always clear at least one target
+        clearBlocked(Blocked_NoTargets);
+        noTargetMSHR = NULL;
+    }
+
+    // Can we deallocate MSHR when done?
+    bool deallocate = false;
+
     if (mshr->isCacheFill) {
+#if 0
+        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+            curTick - pkt->time;
+#endif
         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
         PacketList writebacks;
         blk = handleFill(pkt, blk, writebacks);
-        satisfyMSHR(mshr, pkt, blk);
+        deallocate = satisfyMSHR(mshr, pkt, blk);
         // copy writebacks to write buffer
         while (!writebacks.empty()) {
             PacketPtr wbPkt = writebacks.front();
-            allocateBuffer(wbPkt, time, false, true);
+            allocateBuffer(wbPkt, time, true);
             writebacks.pop_front();
         }
+    } else {
+        if (pkt->req->isUncacheable()) {
+            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
+                curTick - pkt->time;
+        }
+
+        while (mshr->hasTargets()) {
+            MSHR::Target *target = mshr->getTarget();
+            assert(target->isCpuSide());
+            mshr->popTarget();
+            if (pkt->isRead()) {
+                target->pkt->setData(pkt->getPtr<uint8_t>());
+            }
+            cpuSidePort->respond(target->pkt, time);
+        }
+        assert(!mshr->hasTargets());
+        deallocate = true;
+    }
+
+    if (deallocate) {
+        mq->deallocate(mshr);
+        if (wasFull && !mq->isFull()) {
+            clearBlocked((BlockedCause)mq->index);
+        }
     }
-    handleResponse(pkt, time);
 }
 
 
@@ -717,6 +773,8 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
     Addr addr = pkt->getAddr();
 
     if (blk == NULL) {
+        // better have read new data
+        assert(pkt->isRead());
 
         // need to do a replacement
         blk = tags->findReplacement(addr, writebacks);
@@ -733,7 +791,6 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 
         blk->tag = tags->extractTag(addr);
         blk->status = coherence->getNewState(pkt);
-        assert(pkt->isRead());
     } else {
         // existing block... probably an upgrade
         assert(blk->tag == tags->extractTag(addr));
@@ -759,90 +816,6 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 }
 
 
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
-{
-    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
-        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
-        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
-
-        if (pkt->isWrite()) {
-            if (blk->checkWrite(pkt)) {
-                blk->status |= BlkDirty;
-                pkt->writeDataToBlock(blk->data, blkSize);
-            }
-        } else if (pkt->isReadWrite()) {
-            cmpAndSwap(blk, pkt);
-        } else {
-            if (pkt->isLocked()) {
-                blk->trackLoadLocked(pkt);
-            }
-            pkt->setDataFromBlock(blk->data, blkSize);
-        }
-
-        return true;
-    } else {
-        return false;
-    }
-}
-
-
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
-{
-    assert(target != NULL);
-    assert(target->isCpuSide());
-    return satisfyCpuSideRequest(target->pkt, blk);
-}
-
-template<class TagStore, class Coherence>
-void
-Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
-                                       BlkType *blk)
-{
-    // respond to MSHR targets, if any
-
-    // First offset for critical word first calculations
-    int initial_offset = 0;
-
-    if (mshr->hasTargets()) {
-        initial_offset = mshr->getTarget()->pkt->getOffset(blkSize);
-    }
-
-    while (mshr->hasTargets()) {
-        MSHR::Target *target = mshr->getTarget();
-
-        if (!satisfyTarget(target, blk)) {
-            // Invalid access, need to do another request
-            // can occur if block is invalidated, or not correct
-            // permissions
-            break;
-        }
-
-
-        // How many bytes pass the first request is this one
-        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
-        if (transfer_offset < 0) {
-            transfer_offset += blkSize;
-        }
-
-        // If critical word (no offset) return first word time
-        Tick completion_time = tags->getHitLatency() +
-            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
-
-        if (!target->pkt->req->isUncacheable()) {
-            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                completion_time - target->time;
-        }
-        target->pkt->makeTimingResponse();
-        cpuSidePort->respond(target->pkt, completion_time);
-        mshr->popTarget();
-    }
-}
-
-
 /////////////////////////////////////////////////////
 //
 // Snoop path: requests coming in from the memory side
@@ -1052,7 +1025,7 @@ Cache<TagStore,Coherence>::getNextMSHR()
             // (hwpf_mshr_misses)
             mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             // Don't request bus, since we already have it
-            return allocateBuffer(pkt, curTick, true, false);
+            return allocateMissBuffer(pkt, curTick, false);
         }
     }
 
@@ -1062,7 +1035,7 @@ Cache<TagStore,Coherence>::getNextMSHR()
 
 template<class TagStore, class Coherence>
 PacketPtr
-Cache<TagStore,Coherence>::getPacket()
+Cache<TagStore,Coherence>::getTimingPacket()
 {
     MSHR *mshr = getNextMSHR();
 
@@ -1073,30 +1046,21 @@ Cache<TagStore,Coherence>::getPacket()
     BlkType *blk = tags->findBlock(mshr->addr);
 
     // use request from 1st target
-    MSHR::Target *tgt1 = mshr->getTarget();
-    PacketPtr tgt1_pkt = tgt1->pkt;
-    PacketPtr pkt;
+    PacketPtr tgt_pkt = mshr->getTarget()->pkt;
+    PacketPtr pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
 
-    if (mshr->isCacheFill) {
-        MemCmd cmd;
-        if (blk && blk->isValid()) {
-            // only reason to be here is that blk is shared
-            // (read-only) and we need exclusive
-            assert(mshr->needsExclusive && !blk->isWritable());
-            cmd = MemCmd::UpgradeReq;
-        } else {
-            // block is invalid
-            cmd = mshr->needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq;
+    mshr->isCacheFill = (pkt != NULL);
+
+    if (pkt == NULL) {
+        // make copy of current packet to forward
+        pkt = new Packet(tgt_pkt);
+        pkt->allocate();
+        if (pkt->isWrite()) {
+            pkt->setData(tgt_pkt->getPtr<uint8_t>());
         }
-        pkt = new Packet(tgt1_pkt->req, cmd, Packet::Broadcast);
-    } else {
-        assert(blk == NULL);
-        assert(mshr->getNumTargets() == 1);
-        pkt = new Packet(tgt1_pkt->req, tgt1_pkt->cmd, Packet::Broadcast);
     }
 
     pkt->senderState = mshr;
-    pkt->allocate();
     return pkt;
 }
 
@@ -1243,7 +1207,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
         waitingOnRetry = !success;
     } else {
         // check for non-response packets (requests & writebacks)
-        PacketPtr pkt = myCache()->getPacket();
+        PacketPtr pkt = myCache()->getTimingPacket();
         MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
         bool success = sendTiming(pkt);
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
index 3fd17c8c7..47d2b469f 100644
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ b/src/mem/cache/coherence/coherence_protocol.cc
@@ -259,7 +259,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     MC::Command writeToSharedCmd =
         doUpgrades ? MC::UpgradeReq : MC::ReadExReq;
     MC::Command writeToSharedResp =
-        doUpgrades ? MC::UpgradeReq : MC::ReadExResp;
+        doUpgrades ? MC::UpgradeResp : MC::ReadExResp;
 
     // Note that all transitions by default cause a panic.
     // Override the valid transitions with the appropriate actions here.
@@ -272,6 +272,7 @@ CoherenceProtocol::CoherenceProtocol(const string &name,
     tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq);
     tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq);
+    tt[Invalid][MC::UpgradeReq].onRequest(MC::UpgradeReq);
     tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd);
     tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq);
     tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd);
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 218d42339..1f2c05a6e 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -54,12 +54,12 @@ MSHR::MSHR()
 }
 
 void
-MSHR::allocate(Addr _addr, int _size, PacketPtr target, bool cacheFill)
+MSHR::allocate(Addr _addr, int _size, PacketPtr target)
 {
     addr = _addr;
     size = _size;
     assert(target);
-    isCacheFill = cacheFill;
+    isCacheFill = false;
     needsExclusive = target->needsExclusive();
     _isUncacheable = target->req->isUncacheable();
     inService = false;
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index b38b69c52..47f6a819b 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -136,7 +136,7 @@ public:
      * @param size The number of bytes to request.
      * @param pkt  The original miss.
      */
-    void allocate(Addr addr, int size, PacketPtr pkt, bool isFill);
+    void allocate(Addr addr, int size, PacketPtr pkt);
 
     /**
      * Allocate this MSHR as a buffer for the given request.
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index d58594798..6b030a865 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -36,8 +36,9 @@
 
 using namespace std;
 
-MSHRQueue::MSHRQueue(int num_entries, int reserve)
-    : numEntries(num_entries + reserve - 1), numReserve(reserve)
+MSHRQueue::MSHRQueue(int num_entries, int reserve, int _index)
+    : numEntries(num_entries + reserve - 1), numReserve(reserve),
+      index(_index)
 {
     allocated = 0;
     inServiceEntries = 0;
@@ -107,14 +108,14 @@ MSHRQueue::findPending(Addr addr, int size) const
 }
 
 MSHR *
-MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt, bool isFill)
+MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt)
 {
     assert(!freeList.empty());
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
     freeList.pop_front();
 
-    mshr->allocate(addr, size, pkt, isFill);
+    mshr->allocate(addr, size, pkt);
     mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
     mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
 
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index 182dfd5b2..806aa9c64 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -74,6 +74,9 @@ class MSHRQueue
     int allocated;
     /** The number of entries that have been forwarded to the bus. */
     int inServiceEntries;
+    /** The index of this queue within the cache (MSHR queue vs. write
+     * buffer). */
+    const int index;
 
     /**
      * Create a queue with a given number of entries.
@@ -81,7 +84,7 @@ class MSHRQueue
      * @param reserve The minimum number of entries needed to satisfy
      * any access.
      */
-    MSHRQueue(int num_entries, int reserve = 1);
+    MSHRQueue(int num_entries, int reserve, int index);
 
     /** Destructor */
     ~MSHRQueue();
@@ -118,7 +121,7 @@ class MSHRQueue
      *
      * @pre There are free entries.
      */
-    MSHR *allocate(Addr addr, int size, PacketPtr &pkt, bool isFill);
+    MSHR *allocate(Addr addr, int size, PacketPtr &pkt);
 
     /**
      * Removes the given MSHR from the queue. This places the MSHR on the
diff --git a/src/mem/cache/prefetch/base_prefetcher.cc b/src/mem/cache/prefetch/base_prefetcher.cc
index d03cfe3ae..378363665 100644
--- a/src/mem/cache/prefetch/base_prefetcher.cc
+++ b/src/mem/cache/prefetch/base_prefetcher.cc
@@ -141,7 +141,7 @@ BasePrefetcher::getPacket()
             keepTrying = cache->inCache(pkt->getAddr());
         }
         if (pf.empty()) {
-            cache->deassertMemSideBusRequest(Request_PF);
+            cache->deassertMemSideBusRequest(BaseCache::Request_PF);
             if (keepTrying) return NULL; //None left, all were in cache
         }
     } while (keepTrying);
@@ -165,7 +165,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             pfRemovedMSHR++;
             pf.erase(iter);
             if (pf.empty())
-                cache->deassertMemSideBusRequest(Request_PF);
+                cache->deassertMemSideBusRequest(BaseCache::Request_PF);
         }
 
         //Remove anything in queue with delay older than time
@@ -182,7 +182,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
                 iter--;
             }
             if (pf.empty())
-                cache->deassertMemSideBusRequest(Request_PF);
+                cache->deassertMemSideBusRequest(BaseCache::Request_PF);
         }
 
 
@@ -243,7 +243,7 @@ BasePrefetcher::handleMiss(PacketPtr &pkt, Tick time)
             pf.push_back(prefetch);
 
             //Make sure to request the bus, with proper delay
-            cache->requestMemSideBus(Request_PF, prefetch->time);
+            cache->requestMemSideBus(BaseCache::Request_PF, prefetch->time);
 
             //Increment through the list
             addr++;
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 57c6a6381..cd0ed8a2e 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -64,10 +64,8 @@ MemCmd::commandInfo[] =
     /* WriteResp */
     { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WriteResp" },
     /* Writeback */
-    { SET5(IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse),
-            WritebackAck, "Writeback" },
-    /* WritebackAck */
-    { SET3(IsWrite, NeedsExclusive, IsResponse), InvalidCmd, "WritebackAck" },
+    { SET4(IsWrite, NeedsExclusive, IsRequest, HasData),
+            InvalidCmd, "Writeback" },
     /* SoftPFReq */
     { SET4(IsRead, IsRequest, IsSWPrefetch, NeedsResponse),
             SoftPFResp, "SoftPFReq" },
@@ -88,7 +86,11 @@ MemCmd::commandInfo[] =
     { SET4(IsWrite, NeedsExclusive, IsInvalidate, IsResponse),
             InvalidCmd, "WriteInvalidateResp" },
     /* UpgradeReq */
-    { SET3(IsInvalidate, IsRequest, IsUpgrade), InvalidCmd, "UpgradeReq" },
+    { SET4(IsInvalidate, NeedsExclusive, IsRequest, NeedsResponse),
+            UpgradeResp, "UpgradeReq" },
+    /* UpgradeResp */
+    { SET3(IsInvalidate, NeedsExclusive, IsResponse),
+            InvalidCmd, "UpgradeResp" },
     /* ReadExReq */
     { SET5(IsRead, NeedsExclusive, IsInvalidate, IsRequest, NeedsResponse),
             ReadExResp, "ReadExReq" },
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index ca186d875..6291b7c1d 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -67,7 +67,6 @@ class MemCmd
         WriteReq,
         WriteResp,
         Writeback,
-        WritebackAck,
         SoftPFReq,
         HardPFReq,
         SoftPFResp,
@@ -75,6 +74,7 @@ class MemCmd
         WriteInvalidateReq,
         WriteInvalidateResp,
         UpgradeReq,
+        UpgradeResp,
         ReadExReq,
         ReadExResp,
         LoadLockedReq,
@@ -100,7 +100,6 @@ class MemCmd
         NeedsResponse,  //!< Requester needs response from target
         IsSWPrefetch,
         IsHWPrefetch,
-        IsUpgrade,
         IsLocked,       //!< Alpha/MIPS LL or SC access
         HasData,        //!< There is an associated payload
         NUM_COMMAND_ATTRIBUTES
-- 
cgit v1.2.3


From bdd5fd20fb19eb52ef812cd284094e5513646e36 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Fri, 22 Jun 2007 09:24:07 -0700
Subject: Fixes to hitLatency, blocking, buffer allocation. Single-cpu timing
 mode seems to work now.

--HG--
extra : convert_revision : 720f6172df18a1c941e5bd0e8fdfbd686c13c7ad
---
 src/mem/cache/base_cache.cc      |  1 +
 src/mem/cache/base_cache.hh      | 31 ++++++++++------------
 src/mem/cache/cache.hh           | 26 -------------------
 src/mem/cache/cache_impl.hh      | 56 +++++++++++++++++-----------------------
 src/mem/cache/miss/mshr.hh       | 24 +++++++----------
 src/mem/cache/miss/mshr_queue.cc | 10 +++----
 6 files changed, 53 insertions(+), 95 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 8b476e100..1f5182574 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -54,6 +54,7 @@ BaseCache::BaseCache(const std::string &name, Params &params)
       writeBuffer(params.numWriteBuffers, params.numMSHRs+1000,
                   MSHRQueue_WriteBuffer),
       blkSize(params.blkSize),
+      hitLatency(params.hitLatency),
       numTarget(params.numTargets),
       blocked(0),
       noTargetMSHR(NULL),
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 10fd3289c..27134b2ad 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -195,6 +195,11 @@ class BaseCache : public MemObject
     /** Block size of this cache */
     const int blkSize;
 
+    /**
+     * The latency of a hit in this device.
+     */
+    int hitLatency;
+
     /** The number of targets for each MSHR. */
     const int numTarget;
 
@@ -464,15 +469,10 @@ class BaseCache : public MemObject
         if (blocked == 0) {
             blocked_causes[cause]++;
             blockedCycle = curTick;
+            cpuSidePort->setBlocked();
         }
-        int old_state = blocked;
-        if (!(blocked & flag)) {
-            //Wasn't already blocked for this cause
-            blocked |= flag;
-            DPRINTF(Cache,"Blocking for cause %s\n", cause);
-            if (!old_state)
-                cpuSidePort->setBlocked();
-        }
+        blocked |= flag;
+        DPRINTF(Cache,"Blocking for cause %d, mask=%d\n", cause, blocked);
     }
 
     /**
@@ -485,16 +485,11 @@ class BaseCache : public MemObject
     void clearBlocked(BlockedCause cause)
     {
         uint8_t flag = 1 << cause;
-        DPRINTF(Cache,"Unblocking for cause %s, causes left=%i\n",
-                cause, blocked);
-        if (blocked & flag)
-        {
-            blocked &= ~flag;
-            if (!isBlocked()) {
-                blocked_cycles[cause] += curTick - blockedCycle;
-                DPRINTF(Cache,"Unblocking from all causes\n");
-                cpuSidePort->clearBlocked();
-            }
+        blocked &= ~flag;
+        DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
+        if (blocked == 0) {
+            blocked_cycles[cause] += curTick - blockedCycle;
+            cpuSidePort->clearBlocked();
         }
     }
 
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 06fce1a71..a93b761ec 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -136,23 +136,6 @@ class Cache : public BaseCache
     /** Prefetcher */
     BasePrefetcher *prefetcher;
 
-    /**
-     * The clock ratio of the outgoing bus.
-     * Used for calculating critical word first.
-     */
-    int busRatio;
-
-     /**
-      * The bus width in bytes of the outgoing bus.
-      * Used for calculating critical word first.
-      */
-    int busWidth;
-
-    /**
-     * The latency of a hit in this device.
-     */
-    int hitLatency;
-
     /**
      * Can this cache should allocate a block on a line-sized write miss.
      */
@@ -303,15 +286,6 @@ class Cache : public BaseCache
      */
     void squash(int threadNum);
 
-    /**
-     * Allocate a new MSHR or write buffer to handle a miss.
-     * @param pkt The access that missed.
-     * @param time The time to continue processing the miss.
-     * @param isFill Whether to fetch & allocate a block
-     *               or just forward the request.
-     */
-    MSHR *allocateBuffer(PacketPtr pkt, Tick time, bool requestBus);
-
     /**
      * Selects a outstanding request to service.
      * @return The request to service, NULL if none found.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 81fcb4158..0649b5061 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -149,27 +149,6 @@ Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 /////////////////////////////////////////////////////
 
 
-template<class TagStore, class Coherence>
-MSHR *
-Cache<TagStore,Coherence>::allocateBuffer(PacketPtr pkt, Tick time,
-                                          bool requestBus)
-{
-    MSHRQueue *mq = NULL;
-
-    if (pkt->isWrite() && !pkt->isRead()) {
-        /**
-         * @todo Add write merging here.
-         */
-        mq = &writeBuffer;
-    } else {
-        mq = &mshrQueue;
-    }
-
-    return allocateBufferInternal(mq, pkt->getAddr(), pkt->getSize(),
-                                  pkt, time, requestBus);
-}
-
-
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::markInService(MSHR *mshr)
@@ -438,6 +417,8 @@ Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
         return NULL;
     }
 
+    assert(cpu_pkt->needsResponse());
+
     MemCmd cmd;
     const bool useUpgrades = true;
     if (blkValid && useUpgrades) {
@@ -1043,23 +1024,34 @@ Cache<TagStore,Coherence>::getTimingPacket()
         return NULL;
     }
 
-    BlkType *blk = tags->findBlock(mshr->addr);
-
     // use request from 1st target
     PacketPtr tgt_pkt = mshr->getTarget()->pkt;
-    PacketPtr pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
+    PacketPtr pkt = NULL;
 
-    mshr->isCacheFill = (pkt != NULL);
-
-    if (pkt == NULL) {
-        // make copy of current packet to forward
-        pkt = new Packet(tgt_pkt);
-        pkt->allocate();
-        if (pkt->isWrite()) {
-            pkt->setData(tgt_pkt->getPtr<uint8_t>());
+    if (mshr->isSimpleForward()) {
+        // no response expected, just forward packet as it is
+        assert(tags->findBlock(mshr->addr) == NULL);
+        pkt = tgt_pkt;
+    } else {
+        BlkType *blk = tags->findBlock(mshr->addr);
+        pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive);
+
+        mshr->isCacheFill = (pkt != NULL);
+
+        if (pkt == NULL) {
+            // not a cache block request, but a response is expected
+            assert(!mshr->isSimpleForward());
+            // make copy of current packet to forward, keep current
+            // copy for response handling
+            pkt = new Packet(tgt_pkt);
+            pkt->allocate();
+            if (pkt->isWrite()) {
+                pkt->setData(tgt_pkt->getPtr<uint8_t>());
+            }
         }
     }
 
+    assert(pkt != NULL);
     pkt->senderState = mshr;
     return pkt;
 }
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 47f6a819b..195438e46 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -164,28 +164,19 @@ public:
      * Returns the current number of allocated targets.
      * @return The current number of allocated targets.
      */
-    int getNumTargets()
-    {
-        return ntargets;
-    }
+    int getNumTargets() { return ntargets; }
 
     /**
      * Returns a pointer to the target list.
      * @return a pointer to the target list.
      */
-    TargetList* getTargetList()
-    {
-        return &targets;
-    }
+    TargetList* getTargetList() { return &targets; }
 
     /**
      * Returns a reference to the first target.
      * @return A pointer to the first target.
      */
-    Target *getTarget()
-    {
-        return &targets.front();
-    }
+    Target *getTarget() { return &targets.front(); }
 
     /**
      * Pop first target.
@@ -200,9 +191,14 @@ public:
      * Returns true if there are targets left.
      * @return true if there are targets
      */
-    bool hasTargets()
+    bool hasTargets() { return !targets.empty(); }
+
+    bool isSimpleForward()
     {
-        return !targets.empty();
+        if (getNumTargets() != 1)
+            return false;
+        Target *tgt = getTarget();
+        return tgt->isCpuSide() && !tgt->pkt->needsResponse();
     }
 
     /**
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 6b030a865..3407e2588 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -158,14 +158,14 @@ MSHRQueue::moveToFront(MSHR *mshr)
 void
 MSHRQueue::markInService(MSHR *mshr)
 {
-    //assert(mshr == pendingList.front());
-#if 0
-    if (!mshr->pkt->needsResponse() && !(mshr->pkt->cmd == MemCmd::UpgradeReq)) {
-        assert(mshr->getNumTargets() == 0);
+    if (mshr->isSimpleForward()) {
+        // we just forwarded the request packet & don't expect a
+        // response, so get rid of it
+        assert(mshr->getNumTargets() == 1);
+        mshr->popTarget();
         deallocate(mshr);
         return;
     }
-#endif
     mshr->inService = true;
     pendingList.erase(mshr->readyIter);
     //mshr->readyIter = NULL;
-- 
cgit v1.2.3


From 57ff2604e59647c6afe988767186f13c80c1aa16 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 23 Jun 2007 13:24:33 -0700
Subject: Minor fix plus new assertion to catch similar bugs.

src/cpu/memtest/memtest.cc:
    Need to set packet source field so that response from cache
    doesn't run into assertion failure when copying source to dest.
src/mem/packet.hh:
    Copy source field when copying packets.
    Assert that source is valid before copying it to dest
    when turning packets around.

--HG--
extra : convert_revision : 09e3cfda424aa89fe170e21e955b295746832bf8
---
 src/cpu/memtest/memtest.cc | 2 ++
 src/mem/packet.hh          | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 6e8c5d0bf..019b4328c 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -344,6 +344,7 @@ MemTest::tick()
                 req->getPaddr(), blockAddr(req->getPaddr()), *result);
 
         PacketPtr pkt = new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+        pkt->setSrc(0);
         pkt->dataDynamicArray(new uint8_t[req->getSize()]);
         MemTestSenderState *state = new MemTestSenderState(result);
         pkt->senderState = state;
@@ -373,6 +374,7 @@ MemTest::tick()
                 req->getPaddr(), blockAddr(req->getPaddr()), data & 0xff);
 
         PacketPtr pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
+        pkt->setSrc(0);
         uint8_t *pkt_data = new uint8_t[req->getSize()];
         pkt->dataDynamicArray(pkt_data);
         memcpy(pkt_data, &data, req->getSize());
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 80da045ef..fc1c283ed 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -384,7 +384,7 @@ class Packet : public FastAlloc
     Packet(Packet *origPkt)
         :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(origPkt->addr), size(origPkt->size),
-           dest(origPkt->dest),
+           src(origPkt->src), dest(origPkt->dest),
            addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid),
            snoopFlags(origPkt->snoopFlags),
            time(curTick),
@@ -440,7 +440,7 @@ class Packet : public FastAlloc
      */
     void convertAtomicToTimingResponse()
     {
-        dest = src;
+        dest = getSrc();
         srcValid = false;
     }
 
-- 
cgit v1.2.3


From 47bce8ef7875420b2e26ebd834ed0d4146b65d5b Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 24 Jun 2007 17:32:31 -0700
Subject: Better handling of deferred targets.

--HG--
extra : convert_revision : 0fbc28c32c1eeb3dd672df14c1d53bd516f81d0f
---
 src/mem/cache/base_cache.cc |   3 +-
 src/mem/cache/base_cache.hh |   2 -
 src/mem/cache/cache.hh      |   3 +-
 src/mem/cache/cache_impl.hh | 127 ++++++++++++++++++++++----------------------
 src/mem/cache/miss/mshr.cc  |  90 ++++++++++++++++++++++---------
 src/mem/cache/miss/mshr.hh  |  17 +++---
 6 files changed, 141 insertions(+), 101 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 1f5182574..ac577f5a2 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -42,8 +42,7 @@ using namespace std;
 
 BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
     : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL),
-      blocked(false), waitingOnRetry(false), mustSendRetry(false),
-      requestCauses(0)
+      blocked(false), mustSendRetry(false), requestCauses(0)
 {
 }
 
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 27134b2ad..b35fc0811 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -118,8 +118,6 @@ class BaseCache : public MemObject
 
         bool blocked;
 
-        bool waitingOnRetry;
-
         bool mustSendRetry;
 
         /**
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index a93b761ec..2a95dc53c 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -182,8 +182,7 @@ class Cache : public BaseCache
     BlkType *handleFill(PacketPtr pkt, BlkType *blk,
                         PacketList &writebacks);
 
-    bool satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
-    bool satisfyTarget(MSHR::Target *target, BlkType *blk);
+    void satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
     bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
 
     void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 0649b5061..b4d334249 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -368,7 +368,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
             if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
                 mshr->threadNum = -1;
             }
-            mshr->allocateTarget(pkt, true);
+            mshr->allocateTarget(pkt);
             if (mshr->getNumTargets() == numTarget) {
                 noTargetMSHR = mshr;
                 setBlocked(Blocked_NoTargets);
@@ -483,8 +483,7 @@ Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
         if (isCacheFill) {
             PacketList writebacks;
             blk = handleFill(busPkt, blk, writebacks);
-            bool status = satisfyCpuSideRequest(pkt, blk);
-            assert(status);
+            satisfyCpuSideRequest(pkt, blk);
             delete busPkt;
 
             // Handle writebacks if needed
@@ -538,12 +537,14 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 
     // There can be many matching outstanding writes.
     std::vector<MSHR*> writes;
-    writeBuffer.findMatches(blk_addr, writes);
+    assert(!writeBuffer.findMatches(blk_addr, writes));
+/*  Need to change this to iterate through targets in mshr??
     for (int i = 0; i < writes.size(); ++i) {
         MSHR *mshr = writes[i];
         if (pkt->checkFunctional(mshr->addr, mshr->size, mshr->writeData))
             return;
     }
+*/
 
     otherSidePort->checkAndSendFunctional(pkt);
 }
@@ -557,43 +558,30 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 
 
 template<class TagStore, class Coherence>
-bool
+void
 Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
-    if (blk && (pkt->needsExclusive() ? blk->isWritable() : blk->isValid())) {
-        assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
-        assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
-
-        if (pkt->isWrite()) {
-            if (blk->checkWrite(pkt)) {
-                blk->status |= BlkDirty;
-                pkt->writeDataToBlock(blk->data, blkSize);
-            }
-        } else if (pkt->isReadWrite()) {
-            cmpAndSwap(blk, pkt);
-        } else {
-            if (pkt->isLocked()) {
-                blk->trackLoadLocked(pkt);
-            }
-            pkt->setDataFromBlock(blk->data, blkSize);
+    assert(blk);
+    assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
+    assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
+    assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
+
+    if (pkt->isWrite()) {
+        if (blk->checkWrite(pkt)) {
+            blk->status |= BlkDirty;
+            pkt->writeDataToBlock(blk->data, blkSize);
         }
-
-        return true;
+    } else if (pkt->isReadWrite()) {
+        cmpAndSwap(blk, pkt);
     } else {
-        return false;
+        if (pkt->isLocked()) {
+            blk->trackLoadLocked(pkt);
+        }
+        pkt->setDataFromBlock(blk->data, blkSize);
     }
 }
 
 
-template<class TagStore, class Coherence>
-bool
-Cache<TagStore,Coherence>::satisfyTarget(MSHR::Target *target, BlkType *blk)
-{
-    assert(target != NULL);
-    assert(target->isCpuSide());
-    return satisfyCpuSideRequest(target->pkt, blk);
-}
-
 template<class TagStore, class Coherence>
 bool
 Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
@@ -611,37 +599,42 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
     while (mshr->hasTargets()) {
         MSHR::Target *target = mshr->getTarget();
 
-        if (!satisfyTarget(target, blk)) {
-            // Invalid access, need to do another request
-            // can occur if block is invalidated, or not correct
-            // permissions
-            MSHRQueue *mq = mshr->queue;
-            mq->markPending(mshr);
-            mshr->order = order++;
-            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
-            return false;
-        }
+        if (target->isCpuSide()) {
+            satisfyCpuSideRequest(target->pkt, blk);
+            // How many bytes pass the first request is this one
+            int transfer_offset =
+                target->pkt->getOffset(blkSize) - initial_offset;
+            if (transfer_offset < 0) {
+                transfer_offset += blkSize;
+            }
 
+            // If critical word (no offset) return first word time
+            Tick completion_time = tags->getHitLatency() +
+                transfer_offset ? pkt->finishTime : pkt->firstWordTime;
 
-        // How many bytes pass the first request is this one
-        int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset;
-        if (transfer_offset < 0) {
-            transfer_offset += blkSize;
+            if (!target->pkt->req->isUncacheable()) {
+                missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
+                    completion_time - target->time;
+            }
+            target->pkt->makeTimingResponse();
+            cpuSidePort->respond(target->pkt, completion_time);
+        } else {
+            // response to snoop request
+            DPRINTF(Cache, "processing deferred snoop...\n");
+            handleSnoop(target->pkt, blk, true);
         }
 
-        // If critical word (no offset) return first word time
-        Tick completion_time = tags->getHitLatency() +
-            transfer_offset ? pkt->finishTime : pkt->firstWordTime;
-
-        if (!target->pkt->req->isUncacheable()) {
-            missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                completion_time - target->time;
-        }
-        target->pkt->makeTimingResponse();
-        cpuSidePort->respond(target->pkt, completion_time);
         mshr->popTarget();
     }
 
+    if (mshr->promoteDeferredTargets()) {
+        MSHRQueue *mq = mshr->queue;
+        mq->markPending(mshr);
+        mshr->order = order++;
+        requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+        return false;
+    }
+
     return true;
 }
 
@@ -653,6 +646,7 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
     Tick time = curTick + hitLatency;
     MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
     assert(mshr);
+
     if (pkt->result == Packet::Nacked) {
         //pkt->reinitFromRequest();
         warn("NACKs from devices not connected to the same bus "
@@ -661,7 +655,7 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
     }
     assert(pkt->result != Packet::BadAddress);
     assert(pkt->result == Packet::Success);
-    DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr());
+    DPRINTF(Cache, "Handling response to %x\n", pkt->getAddr());
 
     MSHRQueue *mq = mshr->queue;
     bool wasFull = mq->isFull();
@@ -883,7 +877,12 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
     MSHR *mshr = mshrQueue.findMatch(blk_addr);
     // better not be snooping a request that conflicts with something
     // we have outstanding...
-    assert(!mshr || !mshr->inService);
+    if (mshr && mshr->inService) {
+        assert(mshr->getNumTargets() < numTarget); //handle later
+        mshr->allocateSnoopTarget(pkt);
+        assert(mshr->getNumTargets() < numTarget); //handle later
+        return;
+    }
 
     //We also need to check the writeback buffers and handle those
     std::vector<MSHR *> writebacks;
@@ -895,6 +894,9 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
         for (int i=0; i<writebacks.size(); i++) {
             mshr = writebacks[i];
             assert(!mshr->isUncacheable());
+            assert(mshr->getNumTargets() == 1);
+            PacketPtr wb_pkt = mshr->getTarget()->pkt;
+            assert(wb_pkt->cmd == MemCmd::Writeback);
 
             if (pkt->isRead()) {
                 pkt->assertMemInhibit();
@@ -906,7 +908,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
                     // the packet's invalidate flag is set...
                     assert(pkt->isInvalidate());
                 }
-                doTimingSupplyResponse(pkt, mshr->writeData);
+                doTimingSupplyResponse(pkt, wb_pkt->getPtr<uint8_t>());
             }
 
             if (pkt->isInvalidate()) {
@@ -1208,7 +1210,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
 
         waitingOnRetry = !success;
         if (waitingOnRetry) {
-            DPRINTF(CachePort, "%s now waiting on a retry\n", name());
+            DPRINTF(CachePort, "now waiting on a retry\n");
         } else {
             myCache()->markInService(mshr);
         }
@@ -1220,8 +1222,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     if (!waitingOnRetry) {
         if (isBusRequested()) {
             // more requests/writebacks: rerequest ASAP
-            DPRINTF(CachePort, "%s still more MSHR requests to send\n",
-                    name());
+            DPRINTF(CachePort, "still more MSHR requests to send\n");
             sendEvent->schedule(curTick+1);
         } else if (!transmitList.empty()) {
             // deferred packets: rerequest bus, but possibly not until later
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 1f2c05a6e..24ff3b33c 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -68,12 +68,16 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target)
     // Don't know of a case where we would allocate a new MSHR for a
     // snoop (mem0-side request), so set cpuSide to true here.
     targets.push_back(Target(target, true));
+    assert(deferredTargets.empty());
+    deferredNeedsExclusive = false;
+    pendingInvalidate = false;
 }
 
 void
 MSHR::deallocate()
 {
     assert(targets.empty());
+    assert(deferredTargets.empty());
     assert(ntargets == 0);
     inService = false;
     //allocIter = NULL;
@@ -84,41 +88,77 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(PacketPtr target, bool cpuSide)
+MSHR::allocateTarget(PacketPtr target)
 {
-    //If we append an invalidate and we issued a read to the bus,
-    //but now have some pending writes, we need to move
-    //the invalidate to before the first non-read
-    if (inService && !inServiceForExclusive && needsExclusive
-        && !cpuSide && target->isInvalidate()) {
-        std::list<Target> temp;
-
-        while (!targets.empty()) {
-            if (targets.front().pkt->needsExclusive()) break;
-            //Place on top of temp stack
-            temp.push_front(targets.front());
-            //Remove from targets
-            targets.pop_front();
+    if (inService) {
+        if (!deferredTargets.empty() || pendingInvalidate ||
+            (!needsExclusive && target->needsExclusive())) {
+            // need to put on deferred list
+            deferredTargets.push_back(Target(target, true));
+            if (target->needsExclusive()) {
+                deferredNeedsExclusive = true;
+            }
+        } else {
+            // still OK to append to outstanding request
+            targets.push_back(Target(target, true));
+        }
+    } else {
+        if (target->needsExclusive()) {
+            needsExclusive = true;
         }
 
-        //Now that we have all the reads off until first non-read, we can
-        //place the invalidate on
-        targets.push_front(Target(target, cpuSide));
+        targets.push_back(Target(target, true));
+    }
 
-        //Now we pop off the temp_stack and put them back
-        while (!temp.empty()) {
-            targets.push_front(temp.front());
-            temp.pop_front();
-        }
+    ++ntargets;
+}
+
+void
+MSHR::allocateSnoopTarget(PacketPtr target)
+{
+    assert(inService); // don't bother to call otherwise
+
+    if (pendingInvalidate) {
+        // a prior snoop has already appended an invalidation, so
+        // logically we don't have the block anymore...
+        return;
     }
-    else {
-        targets.push_back(Target(target, cpuSide));
+
+    if (needsExclusive) {
+        // We're awaiting an exclusive copy, so ownership is pending.
+        // It's up to us to respond once the data arrives.
+        target->assertMemInhibit();
+    } else if (target->needsExclusive()) {
+        // This transaction will take away our pending copy
+        pendingInvalidate = true;
+    } else {
+        // If we're not going to supply data or perform an
+        // invalidation, we don't need to save this.
+        return;
     }
 
+    targets.push_back(Target(target, false));
     ++ntargets;
+}
+
+
+bool
+MSHR::promoteDeferredTargets()
+{
+    if (deferredTargets.empty()) {
+        return false;
+    }
+
+    assert(targets.empty());
+    targets = deferredTargets;
+    deferredTargets.clear();
     assert(targets.size() == ntargets);
 
-    needsExclusive = needsExclusive || target->needsExclusive();
+    needsExclusive = deferredNeedsExclusive;
+    pendingInvalidate = false;
+    deferredNeedsExclusive = false;
+
+    return true;
 }
 
 
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 195438e46..f4e090a12 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -85,9 +85,6 @@ class MSHR : public Packet::SenderState
     /** Size of the request. */
     int size;
 
-    /** Data associated with the request (if a write). */
-    uint8_t *writeData;
-
     /** True if the request has been sent to the bus. */
     bool inService;
 
@@ -95,12 +92,13 @@ class MSHR : public Packet::SenderState
     bool isCacheFill;
     /** True if we need to get an exclusive copy of the block. */
     bool needsExclusive;
+
     /** True if the request is uncacheable */
     bool _isUncacheable;
 
-    /** True if the request that has been sent to the bus is for en
-     * exclusive copy. */
-    bool inServiceForExclusive;
+    bool deferredNeedsExclusive;
+    bool pendingInvalidate;
+
     /** Thread number of the miss. */
     short threadNum;
     /** The number of currently allocated targets. */
@@ -124,6 +122,8 @@ private:
     /** List of all requests that match the address */
     TargetList targets;
 
+    TargetList deferredTargets;
+
 public:
 
     bool isUncacheable() { return _isUncacheable; }
@@ -153,7 +153,8 @@ public:
      * Add a request to the list of targets.
      * @param target The target.
      */
-    void allocateTarget(PacketPtr target, bool cpuSide);
+    void allocateTarget(PacketPtr target);
+    void allocateSnoopTarget(PacketPtr target);
 
     /** A simple constructor. */
     MSHR();
@@ -201,6 +202,8 @@ public:
         return tgt->isCpuSide() && !tgt->pkt->needsResponse();
     }
 
+    bool promoteDeferredTargets();
+
     /**
      * Prints the contents of this MSHR to stderr.
      */
-- 
cgit v1.2.3


From 529f12a531c331e4bdcf595a3aaf65ee5ef6b72d Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 25 Jun 2007 06:47:05 -0700
Subject: Get rid of requestCauses.  Use timestamped queue to make sure we
 don't re-request bus prematurely.  Use callback to avoid calling sendRetry()
 recursively within recvTiming.

--HG--
extra : convert_revision : a907a2781b4b00aa8eb1ea7147afc81d6b424140
---
 src/mem/cache/base_cache.cc      |  6 ++++--
 src/mem/cache/base_cache.hh      | 42 ++++++++++++-------------------------
 src/mem/cache/cache_impl.hh      | 28 ++++++++++++-------------
 src/mem/cache/miss/mshr.cc       | 24 +++++++++++++--------
 src/mem/cache/miss/mshr.hh       | 26 +++++++++++------------
 src/mem/cache/miss/mshr_queue.cc | 45 +++++++++++++++++++++++++++++-----------
 src/mem/cache/miss/mshr_queue.hh | 23 +++++++++++++-------
 src/mem/tport.cc                 | 22 +++++++++++---------
 src/mem/tport.hh                 | 18 ++++++++++++++++
 9 files changed, 137 insertions(+), 97 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index ac577f5a2..5062d6e87 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -42,7 +42,7 @@ using namespace std;
 
 BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache)
     : SimpleTimingPort(_name, _cache), cache(_cache), otherPort(NULL),
-      blocked(false), mustSendRetry(false), requestCauses(0)
+      blocked(false), mustSendRetry(false)
 {
 }
 
@@ -116,7 +116,9 @@ BaseCache::CachePort::clearBlocked()
     {
         DPRINTF(Cache, "Cache Sending Retry\n");
         mustSendRetry = false;
-        sendRetry();
+        SendRetryEvent *ev = new SendRetryEvent(this, true);
+        // @TODO: need to find a better time (next bus cycle?)
+        ev->schedule(curTick + 1);
     }
 }
 
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index b35fc0811..09484a14a 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -41,6 +41,7 @@
 #include <vector>
 #include <string>
 #include <list>
+#include <algorithm>
 #include <inttypes.h>
 
 #include "base/misc.hh"
@@ -105,6 +106,9 @@ class BaseCache : public MemObject
 
         bool recvRetryCommon();
 
+        typedef EventWrapper<Port, &Port::sendRetry>
+            SendRetryEvent;
+
       public:
         void setOtherPort(CachePort *_otherPort) { otherPort = _otherPort; }
 
@@ -120,27 +124,12 @@ class BaseCache : public MemObject
 
         bool mustSendRetry;
 
-        /**
-         * Bit vector for the outstanding requests for the master interface.
-         */
-        uint8_t requestCauses;
-
-        bool isBusRequested() { return requestCauses != 0; }
-
         void requestBus(RequestCause cause, Tick time)
         {
             DPRINTF(Cache, "Asserting bus request for cause %d\n", cause);
-            if (!isBusRequested() && !waitingOnRetry) {
-                assert(!sendEvent->scheduled());
-                sendEvent->schedule(time);
+            if (!waitingOnRetry) {
+                schedSendEvent(time);
             }
-            requestCauses |= (1 << cause);
-        }
-
-        void deassertBusRequest(RequestCause cause)
-        {
-            DPRINTF(Cache, "Deasserting bus request for cause %d\n", cause);
-            requestCauses &= ~(1 << cause);
         }
 
         void respond(PacketPtr pkt, Tick time) {
@@ -163,8 +152,7 @@ class BaseCache : public MemObject
     MSHR *allocateBufferInternal(MSHRQueue *mq, Addr addr, int size,
                                  PacketPtr pkt, Tick time, bool requestBus)
     {
-        MSHR *mshr = mq->allocate(addr, size, pkt);
-        mshr->order = order++;
+        MSHR *mshr = mq->allocate(addr, size, pkt, time, order++);
 
         if (mq->isFull()) {
             setBlocked((BlockedCause)mq->index);
@@ -182,9 +170,6 @@ class BaseCache : public MemObject
         MSHRQueue *mq = mshr->queue;
         bool wasFull = mq->isFull();
         mq->markInService(mshr);
-        if (!mq->havePending()) {
-            deassertMemSideBusRequest((RequestCause)mq->index);
-        }
         if (wasFull && !mq->isFull()) {
             clearBlocked((BlockedCause)mq->index);
         }
@@ -491,13 +476,10 @@ class BaseCache : public MemObject
         }
     }
 
-    /**
-     * True if the memory-side bus should be requested.
-     * @return True if there are outstanding requests for the master bus.
-     */
-    bool isMemSideBusRequested()
+    Tick nextMSHRReadyTick()
     {
-        return memSidePort->isBusRequested();
+        return std::min(mshrQueue.nextMSHRReadyTick(),
+                        writeBuffer.nextMSHRReadyTick());
     }
 
     /**
@@ -516,7 +498,9 @@ class BaseCache : public MemObject
      */
     void deassertMemSideBusRequest(RequestCause cause)
     {
-        memSidePort->deassertBusRequest(cause);
+        // obsolete!!
+        assert(false);
+        // memSidePort->deassertBusRequest(cause);
         // checkDrain();
     }
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b4d334249..7610b5a41 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -185,9 +185,6 @@ Cache<TagStore,Coherence>::squash(int threadNum)
         cause = Blocked_NoMSHRs;
     }
     mshrQueue.squash(threadNum);
-    if (!mshrQueue.havePending()) {
-        deassertMemSideBusRequest(Request_MSHR);
-    }
     if (unblock && !mshrQueue.isFull()) {
         clearBlocked(cause);
     }
@@ -368,11 +365,14 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
             if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) {
                 mshr->threadNum = -1;
             }
-            mshr->allocateTarget(pkt);
+            mshr->allocateTarget(pkt, time, order++);
             if (mshr->getNumTargets() == numTarget) {
                 noTargetMSHR = mshr;
                 setBlocked(Blocked_NoTargets);
-                mshrQueue.moveToFront(mshr);
+                // need to be careful with this... if this mshr isn't
+                // ready yet (i.e. time > curTick_, we don't want to
+                // move it ahead of mshrs that are ready
+                // mshrQueue.moveToFront(mshr);
             }
         } else {
             // no MSHR
@@ -630,7 +630,6 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
     if (mshr->promoteDeferredTargets()) {
         MSHRQueue *mq = mshr->queue;
         mq->markPending(mshr);
-        mshr->order = order++;
         requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
         return false;
     }
@@ -879,7 +878,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
     // we have outstanding...
     if (mshr && mshr->inService) {
         assert(mshr->getNumTargets() < numTarget); //handle later
-        mshr->allocateSnoopTarget(pkt);
+        mshr->allocateSnoopTarget(pkt, curTick, order++);
         assert(mshr->getNumTargets() < numTarget); //handle later
         return;
     }
@@ -1202,6 +1201,7 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     } else {
         // check for non-response packets (requests & writebacks)
         PacketPtr pkt = myCache()->getTimingPacket();
+        assert(pkt != NULL);
         MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
         bool success = sendTiming(pkt);
@@ -1220,14 +1220,12 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     // tried to send packet... if it was successful (no retry), see if
     // we need to rerequest bus or not
     if (!waitingOnRetry) {
-        if (isBusRequested()) {
-            // more requests/writebacks: rerequest ASAP
-            DPRINTF(CachePort, "still more MSHR requests to send\n");
-            sendEvent->schedule(curTick+1);
-        } else if (!transmitList.empty()) {
-            // deferred packets: rerequest bus, but possibly not until later
-            Tick time = transmitList.front().tick;
-            sendEvent->schedule(time <= curTick ? curTick+1 : time);
+        Tick nextReady = std::min(deferredPacketReadyTick(),
+                                  myCache()->nextMSHRReadyTick());
+        // @TODO: need to facotr in prefetch requests here somehow
+        if (nextReady != MaxTick) {
+            DPRINTF(CachePort, "more packets to send @ %d\n", nextReady);
+            sendEvent->schedule(std::max(nextReady, curTick + 1));
         } else {
             // no more to send right now: if we're draining, we may be done
             if (drainEvent) {
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 24ff3b33c..8fa11ab2e 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -37,6 +37,7 @@
 #include <assert.h>
 #include <string>
 #include <vector>
+#include <algorithm>
 
 #include "mem/cache/miss/mshr.hh"
 #include "sim/core.hh" // for curTick
@@ -54,10 +55,13 @@ MSHR::MSHR()
 }
 
 void
-MSHR::allocate(Addr _addr, int _size, PacketPtr target)
+MSHR::allocate(Addr _addr, int _size, PacketPtr target,
+               Tick when, Counter _order)
 {
     addr = _addr;
     size = _size;
+    readyTick = when;
+    order = _order;
     assert(target);
     isCacheFill = false;
     needsExclusive = target->needsExclusive();
@@ -66,8 +70,8 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target)
     threadNum = 0;
     ntargets = 1;
     // Don't know of a case where we would allocate a new MSHR for a
-    // snoop (mem0-side request), so set cpuSide to true here.
-    targets.push_back(Target(target, true));
+    // snoop (mem-side request), so set cpuSide to true here.
+    targets.push_back(Target(target, when, _order, true));
     assert(deferredTargets.empty());
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
@@ -88,33 +92,33 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(PacketPtr target)
+MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order)
 {
     if (inService) {
         if (!deferredTargets.empty() || pendingInvalidate ||
             (!needsExclusive && target->needsExclusive())) {
             // need to put on deferred list
-            deferredTargets.push_back(Target(target, true));
+            deferredTargets.push_back(Target(target, when, _order, true));
             if (target->needsExclusive()) {
                 deferredNeedsExclusive = true;
             }
         } else {
             // still OK to append to outstanding request
-            targets.push_back(Target(target, true));
+            targets.push_back(Target(target, when, _order, true));
         }
     } else {
         if (target->needsExclusive()) {
             needsExclusive = true;
         }
 
-        targets.push_back(Target(target, true));
+        targets.push_back(Target(target, when, _order, true));
     }
 
     ++ntargets;
 }
 
 void
-MSHR::allocateSnoopTarget(PacketPtr target)
+MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order)
 {
     assert(inService); // don't bother to call otherwise
 
@@ -137,7 +141,7 @@ MSHR::allocateSnoopTarget(PacketPtr target)
         return;
     }
 
-    targets.push_back(Target(target, false));
+    targets.push_back(Target(target, when, _order, false));
     ++ntargets;
 }
 
@@ -157,6 +161,8 @@ MSHR::promoteDeferredTargets()
     needsExclusive = deferredNeedsExclusive;
     pendingInvalidate = false;
     deferredNeedsExclusive = false;
+    order = targets.front().order;
+    readyTick = std::max(curTick, targets.front().time);
 
     return true;
 }
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index f4e090a12..92288cf52 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -55,13 +55,14 @@ class MSHR : public Packet::SenderState
     class Target {
       public:
         Tick time;      //!< Time when request was received (for stats)
+        Counter order;  //!< Global order (for memory consistency mgmt)
         PacketPtr pkt;  //!< Pending request packet.
         bool cpuSide;   //!< Did request come from cpu side or mem side?
 
         bool isCpuSide() { return cpuSide; }
 
-        Target(PacketPtr _pkt, bool _cpuSide, Tick _time = curTick)
-            : time(_time), pkt(_pkt), cpuSide(_cpuSide)
+        Target(PacketPtr _pkt, Tick _time, Counter _order, bool _cpuSide)
+            : time(_time), order(_order), pkt(_pkt), cpuSide(_cpuSide)
         {}
     };
 
@@ -79,6 +80,12 @@ class MSHR : public Packet::SenderState
     /** Pointer to queue containing this MSHR. */
     MSHRQueue *queue;
 
+    /** Cycle when ready to issue */
+    Tick readyTick;
+
+    /** Order number assigned by the miss queue. */
+    Counter order;
+
     /** Address of the request. */
     Addr addr;
 
@@ -103,8 +110,6 @@ class MSHR : public Packet::SenderState
     short threadNum;
     /** The number of currently allocated targets. */
     short ntargets;
-    /** Order number of assigned by the miss queue. */
-    uint64_t order;
 
     /**
      * Pointer to this MSHR on the ready list.
@@ -136,13 +141,8 @@ public:
      * @param size The number of bytes to request.
      * @param pkt  The original miss.
      */
-    void allocate(Addr addr, int size, PacketPtr pkt);
-
-    /**
-     * Allocate this MSHR as a buffer for the given request.
-     * @param target The memory request to buffer.
-     */
-    void allocateAsBuffer(PacketPtr target);
+    void allocate(Addr addr, int size, PacketPtr pkt,
+                  Tick when, Counter _order);
 
     /**
      * Mark this MSHR as free.
@@ -153,8 +153,8 @@ public:
      * Add a request to the list of targets.
      * @param target The target.
      */
-    void allocateTarget(PacketPtr target);
-    void allocateSnoopTarget(PacketPtr target);
+    void allocateTarget(PacketPtr target, Tick when, Counter order);
+    void allocateSnoopTarget(PacketPtr target, Tick when, Counter order);
 
     /** A simple constructor. */
     MSHR();
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 3407e2588..18184bd20 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -90,8 +90,8 @@ MSHRQueue::findMatches(Addr addr, vector<MSHR*>& matches) const
 MSHR *
 MSHRQueue::findPending(Addr addr, int size) const
 {
-    MSHR::ConstIterator i = pendingList.begin();
-    MSHR::ConstIterator end = pendingList.end();
+    MSHR::ConstIterator i = readyList.begin();
+    MSHR::ConstIterator end = readyList.end();
     for (; i != end; ++i) {
         MSHR *mshr = *i;
         if (mshr->addr < addr) {
@@ -107,17 +107,37 @@ MSHRQueue::findPending(Addr addr, int size) const
     return NULL;
 }
 
+
+MSHR::Iterator
+MSHRQueue::addToReadyList(MSHR *mshr)
+{
+    if (readyList.empty() || readyList.back()->readyTick <= mshr->readyTick) {
+        return readyList.insert(readyList.end(), mshr);
+    }
+
+    MSHR::Iterator i = readyList.begin();
+    MSHR::Iterator end = readyList.end();
+    for (; i != end; ++i) {
+        if ((*i)->readyTick > mshr->readyTick) {
+            return readyList.insert(i, mshr);
+        }
+    }
+    assert(false);
+}
+
+
 MSHR *
-MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt)
+MSHRQueue::allocate(Addr addr, int size, PacketPtr &pkt,
+                    Tick when, Counter order)
 {
     assert(!freeList.empty());
     MSHR *mshr = freeList.front();
     assert(mshr->getNumTargets() == 0);
     freeList.pop_front();
 
-    mshr->allocate(addr, size, pkt);
+    mshr->allocate(addr, size, pkt, when, order);
     mshr->allocIter = allocatedList.insert(allocatedList.end(), mshr);
-    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
+    mshr->readyIter = addToReadyList(mshr);
 
     allocated += 1;
     return mshr;
@@ -139,7 +159,7 @@ MSHRQueue::deallocateOne(MSHR *mshr)
     if (mshr->inService) {
         inServiceEntries--;
     } else {
-        pendingList.erase(mshr->readyIter);
+        readyList.erase(mshr->readyIter);
     }
     mshr->deallocate();
     return retval;
@@ -150,14 +170,15 @@ MSHRQueue::moveToFront(MSHR *mshr)
 {
     if (!mshr->inService) {
         assert(mshr == *(mshr->readyIter));
-        pendingList.erase(mshr->readyIter);
-        mshr->readyIter = pendingList.insert(pendingList.begin(), mshr);
+        readyList.erase(mshr->readyIter);
+        mshr->readyIter = readyList.insert(readyList.begin(), mshr);
     }
 }
 
 void
 MSHRQueue::markInService(MSHR *mshr)
 {
+    assert(!mshr->inService);
     if (mshr->isSimpleForward()) {
         // we just forwarded the request packet & don't expect a
         // response, so get rid of it
@@ -167,23 +188,23 @@ MSHRQueue::markInService(MSHR *mshr)
         return;
     }
     mshr->inService = true;
-    pendingList.erase(mshr->readyIter);
+    readyList.erase(mshr->readyIter);
     //mshr->readyIter = NULL;
     inServiceEntries += 1;
-    //pendingList.pop_front();
+    //readyList.pop_front();
 }
 
 void
 MSHRQueue::markPending(MSHR *mshr)
 {
-    //assert(mshr->readyIter == NULL);
+    assert(mshr->inService);
     mshr->inService = false;
     --inServiceEntries;
     /**
      * @ todo might want to add rerequests to front of pending list for
      * performance.
      */
-    mshr->readyIter = pendingList.insert(pendingList.end(), mshr);
+    mshr->readyIter = addToReadyList(mshr);
 }
 
 void
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index 806aa9c64..fd61dec8b 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -51,7 +51,7 @@ class MSHRQueue
     /** Holds pointers to all allocated entries. */
     MSHR::List allocatedList;
     /** Holds pointers to entries that haven't been sent to the bus. */
-    MSHR::List pendingList;
+    MSHR::List readyList;
     /** Holds non allocated entries. */
     MSHR::List freeList;
 
@@ -69,6 +69,9 @@ class MSHRQueue
      */
     const int numReserve;
 
+    MSHR::Iterator addToReadyList(MSHR *mshr);
+
+
   public:
     /** The number of allocated entries. */
     int allocated;
@@ -121,7 +124,8 @@ class MSHRQueue
      *
      * @pre There are free entries.
      */
-    MSHR *allocate(Addr addr, int size, PacketPtr &pkt);
+    MSHR *allocate(Addr addr, int size, PacketPtr &pkt,
+                   Tick when, Counter order);
 
     /**
      * Removes the given MSHR from the queue. This places the MSHR on the
@@ -147,7 +151,7 @@ class MSHRQueue
 
     /**
      * Mark the given MSHR as in service. This removes the MSHR from the
-     * pendingList. Deallocates the MSHR if it does not expect a response.
+     * readyList. Deallocates the MSHR if it does not expect a response.
      * @param mshr The MSHR to mark in service.
      */
     void markInService(MSHR *mshr);
@@ -171,7 +175,7 @@ class MSHRQueue
      */
     bool havePending() const
     {
-        return !pendingList.empty();
+        return !readyList.empty();
     }
 
     /**
@@ -184,15 +188,20 @@ class MSHRQueue
     }
 
     /**
-     * Returns the MSHR at the head of the pendingList.
+     * Returns the MSHR at the head of the readyList.
      * @return The next request to service.
      */
     MSHR *getNextMSHR() const
     {
-        if (pendingList.empty()) {
+        if (readyList.empty() || readyList.front()->readyTick > curTick) {
             return NULL;
         }
-        return pendingList.front();
+        return readyList.front();
+    }
+
+    Tick nextMSHRReadyTick() const
+    {
+        return readyList.empty() ? MaxTick : readyList.front()->readyTick;
     }
 };
 
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 2644a504c..0a2127490 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -91,28 +91,30 @@ SimpleTimingPort::schedSendTiming(PacketPtr pkt, Tick when)
     assert(when > curTick);
 
     // Nothing is on the list: add it and schedule an event
-    if (transmitList.empty()) {
-        assert(!sendEvent->scheduled());
-        sendEvent->schedule(when);
-        transmitList.push_back(DeferredPacket(when, pkt));
+    if (transmitList.empty() || when < transmitList.front().tick) {
+        transmitList.push_front(DeferredPacket(when, pkt));
+        schedSendEvent(when);
         return;
     }
 
-    // something is on the list and this belongs at the end
+    // list is non-empty and this is not the head, so event should
+    // already be scheduled
+    assert(waitingOnRetry ||
+           (sendEvent->scheduled() && sendEvent->when() <= when));
+
+    // list is non-empty & this belongs at the end
     if (when >= transmitList.back().tick) {
         transmitList.push_back(DeferredPacket(when, pkt));
         return;
     }
-    // Something is on the list and this belongs somewhere else
+
+    // this belongs in the middle somewhere
     DeferredPacketIterator i = transmitList.begin();
+    i++; // already checked for insertion at front
     DeferredPacketIterator end = transmitList.end();
 
     for (; i != end; ++i) {
         if (when < i->tick) {
-            if (i == transmitList.begin()) {
-                //Inserting at begining, reschedule
-                sendEvent->reschedule(when);
-            }
             transmitList.insert(i, DeferredPacket(when, pkt));
             return;
         }
diff --git a/src/mem/tport.hh b/src/mem/tport.hh
index ea0f05ed1..bfed29f34 100644
--- a/src/mem/tport.hh
+++ b/src/mem/tport.hh
@@ -105,6 +105,24 @@ class SimpleTimingPort : public Port
     bool deferredPacketReady()
     { return !transmitList.empty() && transmitList.front().tick <= curTick; }
 
+    Tick deferredPacketReadyTick()
+    { return transmitList.empty() ? MaxTick : transmitList.front().tick; }
+
+    void schedSendEvent(Tick when)
+    {
+        if (waitingOnRetry) {
+            assert(!sendEvent->scheduled());
+            return;
+        }
+
+        if (!sendEvent->scheduled()) {
+            sendEvent->schedule(when);
+        } else if (sendEvent->when() > when) {
+            sendEvent->reschedule(when);
+        }
+    }
+
+
     /** Schedule a sendTiming() event to be called in the future.
      * @param pkt packet to send
      * @param absolute time (in ticks) to send packet
-- 
cgit v1.2.3


From f697e959a17646500bca7c12e6bb7b30e047f1bb Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 25 Jun 2007 22:23:29 -0700
Subject: Couple minor bug fixes...

src/mem/cache/cache_impl.hh:
    Handle grants with no packet.
src/mem/cache/miss/mshr.cc:
    Fix MSHR snoop hit handling.

--HG--
extra : convert_revision : f365283afddaa07cb9e050b2981ad6a898c14451
---
 src/mem/cache/cache_impl.hh | 27 ++++++++++++++++-----------
 src/mem/cache/miss/mshr.cc  | 10 ++++++----
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 7610b5a41..48efc5ca3 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -912,7 +912,6 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
 
             if (pkt->isInvalidate()) {
                 // Invalidation trumps our writeback... discard here
-                assert(0);
                 markInService(mshr);
             }
             return;
@@ -1201,18 +1200,24 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     } else {
         // check for non-response packets (requests & writebacks)
         PacketPtr pkt = myCache()->getTimingPacket();
-        assert(pkt != NULL);
-        MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
+        if (pkt == NULL) {
+            // can happen if e.g. we attempt a writeback and fail, but
+            // before the retry, the writeback is eliminated because
+            // we snoop another cache's ReadEx.
+            waitingOnRetry = false;
+        } else {
+            MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
-        bool success = sendTiming(pkt);
-        DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
-                pkt->getAddr(), success ? "successful" : "unsuccessful");
+            bool success = sendTiming(pkt);
+            DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+                    pkt->getAddr(), success ? "successful" : "unsuccessful");
 
-        waitingOnRetry = !success;
-        if (waitingOnRetry) {
-            DPRINTF(CachePort, "now waiting on a retry\n");
-        } else {
-            myCache()->markInService(mshr);
+            waitingOnRetry = !success;
+            if (waitingOnRetry) {
+                DPRINTF(CachePort, "now waiting on a retry\n");
+            } else {
+                myCache()->markInService(mshr);
+            }
         }
     }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 8fa11ab2e..fc8d2175e 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -132,13 +132,15 @@ MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order)
         // We're awaiting an exclusive copy, so ownership is pending.
         // It's up to us to respond once the data arrives.
         target->assertMemInhibit();
-    } else if (target->needsExclusive()) {
+    }
+
+    if (target->needsExclusive()) {
         // This transaction will take away our pending copy
         pendingInvalidate = true;
     } else {
-        // If we're not going to supply data or perform an
-        // invalidation, we don't need to save this.
-        return;
+        // We'll keep our pending copy, but we can't let the other guy
+        // think he's getting it exclusive
+        target->assertShared();
     }
 
     targets.push_back(Target(target, when, _order, false));
-- 
cgit v1.2.3


From 7dacbcf49262605a75e461149ec7bd7a00fca7b7 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 26 Jun 2007 14:53:15 -0700
Subject: Handle replacement of block with pending upgrade.

src/mem/cache/tags/lru.cc:
    Add some replacement DPRINTFs

--HG--
extra : convert_revision : 7993ec24d6af7e7774d04ce36f20e3f43f887fd9
---
 src/mem/cache/cache_impl.hh | 27 ++++++++++++++++++-----
 src/mem/cache/miss/mshr.cc  | 53 +++++++++++++++++++++++++++++++++++++++++++++
 src/mem/cache/miss/mshr.hh  | 11 ++++++++++
 src/mem/cache/tags/lru.cc   |  4 ++++
 4 files changed, 89 insertions(+), 6 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 48efc5ca3..d01adde78 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -300,7 +300,6 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
         return true;
     }
 
-    PacketList writebacks;
     int lat = hitLatency;
     bool satisfied = false;
 
@@ -319,6 +318,8 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
     }
 
 #if 0
+    PacketList writebacks;
+
     // If this is a block size write/hint (WH64) allocate the block here
     // if the coherence protocol allows it.
     /** @todo make the fast write alloc (wh64) work with coherence. */
@@ -338,7 +339,6 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
             ++fastWrites;
         }
     }
-#endif
 
     // copy writebacks to write buffer
     while (!writebacks.empty()) {
@@ -346,6 +346,7 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
         allocateBuffer(wbPkt, time, true);
         writebacks.pop_front();
     }
+#endif
 
     bool needsResponse = pkt->needsResponse();
 
@@ -676,6 +677,15 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
+
+        if (blk == NULL && pkt->cmd == MemCmd::UpgradeResp) {
+            if (!mshr->handleReplacedPendingUpgrade(pkt)) {
+                mq->markPending(mshr);
+                requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+                return;
+            }
+        }
+
         PacketList writebacks;
         blk = handleFill(pkt, blk, writebacks);
         deallocate = satisfyMSHR(mshr, pkt, blk);
@@ -747,15 +757,20 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
     Addr addr = pkt->getAddr();
 
     if (blk == NULL) {
-        // better have read new data
+        // better have read new data...
         assert(pkt->isRead());
 
         // need to do a replacement
         blk = tags->findReplacement(addr, writebacks);
         if (blk->isValid()) {
+            Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set);
+            MSHR *repl_mshr = mshrQueue.findMatch(repl_addr);
+            if (repl_mshr) {
+                repl_mshr->handleReplacement(blk, blkSize);
+            }
+
             DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
-                    tags->regenerateBlkAddr(blk->tag, blk->set), addr,
-                    blk->isDirty() ? "writeback" : "clean");
+                    repl_addr, addr, blk->isDirty() ? "writeback" : "clean");
 
             if (blk->isDirty()) {
                 // Save writeback packet for handling by caller
@@ -992,7 +1007,7 @@ Cache<TagStore,Coherence>::getNextMSHR()
             return conflict_mshr;
         }
 
-        // No conclifts; issue read
+        // No conflicts; issue read
         return miss_mshr;
     }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index fc8d2175e..ca5e38601 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -75,6 +75,8 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     assert(deferredTargets.empty());
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
+    replacedPendingUpgrade = false;
+    data = NULL;
 }
 
 void
@@ -170,6 +172,57 @@ MSHR::promoteDeferredTargets()
 }
 
 
+void
+MSHR::handleReplacement(CacheBlk *blk, int blkSize)
+{
+    // must be an outstanding upgrade request on block we're about to
+    // replace...
+    assert(!blk->isWritable());
+    assert(needsExclusive);
+    replacedPendingUpgrade = true;
+
+    // if it's dirty, just remember what happened and allow the
+    // writeback to continue.  we'll reissue a ReadEx later whether
+    // the upgrade succeeds or not
+    if (blk->isDirty()) {
+        replacedPendingUpgradeDirty = true;
+        return;
+    }
+
+    // if not dirty, we need to save it off as it will be only valid
+    // copy in system if upgrade is successful (and may need to be
+    // written back then, as the current owner if any will be
+    // invalidating its block)
+    replacedPendingUpgradeDirty = false;
+    data = new uint8_t[blkSize];
+    std::memcpy(data, blk->data, blkSize);
+}
+
+
+bool
+MSHR::handleReplacedPendingUpgrade(Packet *pkt)
+{
+    // @TODO: if upgrade is nacked and replacedPendingUpgradeDirty is true, then we need to writeback the data (or rel
+    assert(pkt->cmd == MemCmd::UpgradeResp);
+    assert(replacedPendingUpgrade);
+    replacedPendingUpgrade = false; // reset
+    if (replacedPendingUpgradeDirty) {
+        // we wrote back the previous copy; just reissue as a ReadEx
+        return false;
+    }
+
+    // previous copy was not dirty, but we are now owner...  fake out
+    // cache by taking saved data and converting UpgradeResp to
+    // ReadExResp
+    assert(data);
+    pkt->cmd = MemCmd::ReadExResp;
+    pkt->setData(data);
+    delete [] data;
+    data = NULL;
+    return true;
+}
+
+
 void
 MSHR::dump()
 {
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 92288cf52..a9380d99a 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -105,12 +105,20 @@ class MSHR : public Packet::SenderState
 
     bool deferredNeedsExclusive;
     bool pendingInvalidate;
+    /** Is there a pending upgrade that got replaced? */
+    bool replacedPendingUpgrade;
+    bool replacedPendingUpgradeDirty;
 
     /** Thread number of the miss. */
     short threadNum;
     /** The number of currently allocated targets. */
     short ntargets;
 
+
+    /** Data buffer (if needed).  Currently used only for pending
+     * upgrade handling. */
+    uint8_t *data;
+
     /**
      * Pointer to this MSHR on the ready list.
      * @sa MissQueue, MSHRQueue::readyList
@@ -204,6 +212,9 @@ public:
 
     bool promoteDeferredTargets();
 
+    void handleReplacement(CacheBlk *blk, int blkSize);
+    bool handleReplacedPendingUpgrade(Packet *pkt);
+
     /**
      * Prints the contents of this MSHR to stderr.
      */
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 334312aaf..fa46aff7b 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -173,6 +173,8 @@ LRU::findBlock(Addr addr, int &lat)
     if (blk != NULL) {
         // move this block to head of the MRU list
         sets[set].moveToHead(blk);
+        DPRINTF(Cache, "set %x: moving blk %x to MRU\n",
+                set, regenerateBlkAddr(tag, set));
         if (blk->whenReady > curTick
             && blk->whenReady - curTick > hitLatency) {
             lat = blk->whenReady - curTick;
@@ -214,6 +216,8 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
         }
     }
 
+    DPRINTF(Cache, "set %x: selecting blk %x for replacement\n",
+            set, regenerateBlkAddr(blk->tag, set));
     return blk;
 }
 
-- 
cgit v1.2.3


From 69ff6d9163c431272fc084b8e051996b44590a53 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 26 Jun 2007 18:01:22 -0400
Subject: cache_impl.hh: Change target overflow from assertion to warning.

src/mem/cache/cache_impl.hh:
    Change target overflow from assertion to warning.

--HG--
extra : convert_revision : ceca990ed916bbf96dedd4836c40df522803f173
---
 src/mem/cache/cache_impl.hh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index d01adde78..a73612f24 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -892,9 +892,9 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
     // better not be snooping a request that conflicts with something
     // we have outstanding...
     if (mshr && mshr->inService) {
-        assert(mshr->getNumTargets() < numTarget); //handle later
         mshr->allocateSnoopTarget(pkt, curTick, order++);
-        assert(mshr->getNumTargets() < numTarget); //handle later
+        if (mshr->getNumTargets() > numTarget)
+           warn("allocating bonus target for snoop"); //handle later
         return;
     }
 
-- 
cgit v1.2.3


From 1b20df5607e86d3b384716792274fe01fa4f3f80 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 26 Jun 2007 22:23:10 -0700
Subject: Handle deferred snoops better.

--HG--
extra : convert_revision : 703da6128832eb0d5cfed7724e5105f4b3fe4f90
---
 src/mem/cache/cache.hh      |  6 ++--
 src/mem/cache/cache_impl.hh | 34 ++++++++++---------
 src/mem/cache/miss/mshr.cc  | 82 ++++++++++++++++++++++++++++-----------------
 src/mem/cache/miss/mshr.hh  |  3 +-
 src/mem/cache/tags/lru.cc   |  5 +--
 src/mem/tport.cc            | 13 ++++++-
 6 files changed, 91 insertions(+), 52 deletions(-)

diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 2a95dc53c..161fb801d 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -185,14 +185,16 @@ class Cache : public BaseCache
     void satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk);
     bool satisfyMSHR(MSHR *mshr, PacketPtr pkt, BlkType *blk);
 
-    void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data);
+    void doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data,
+                                bool already_copied);
 
     /**
      * Sets the blk to the new state.
      * @param blk The cache block being snooped.
      * @param new_state The new coherence state for the block.
      */
-    void handleSnoop(PacketPtr ptk, BlkType *blk, bool is_timing);
+    void handleSnoop(PacketPtr ptk, BlkType *blk,
+                     bool is_timing, bool is_deferred);
 
     /**
      * Create a writeback request for the given block.
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index a73612f24..599eecc82 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -622,7 +622,7 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
         } else {
             // response to snoop request
             DPRINTF(Cache, "processing deferred snoop...\n");
-            handleSnoop(target->pkt, blk, true);
+            handleSnoop(target->pkt, blk, true, true);
         }
 
         mshr->popTarget();
@@ -678,12 +678,10 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
 
-        if (blk == NULL && pkt->cmd == MemCmd::UpgradeResp) {
-            if (!mshr->handleReplacedPendingUpgrade(pkt)) {
-                mq->markPending(mshr);
-                requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
-                return;
-            }
+        if (!mshr->handleFill(pkt, blk)) {
+            mq->markPending(mshr);
+            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
+            return;
         }
 
         PacketList writebacks;
@@ -814,10 +812,12 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
-                                                  uint8_t *blk_data)
+                                                  uint8_t *blk_data,
+                                                  bool already_copied)
 {
-    // timing-mode snoop responses require a new packet
-    PacketPtr pkt = new Packet(req_pkt);
+    // timing-mode snoop responses require a new packet, unless we
+    // already made a copy...
+    PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt);
     pkt->allocate();
     pkt->makeTimingResponse();
     pkt->setDataFromBlock(blk_data, blkSize);
@@ -827,7 +827,7 @@ Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
 template<class TagStore, class Coherence>
 void
 Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
-                                       bool is_timing)
+                                       bool is_timing, bool is_deferred)
 {
     if (!blk || !blk->isValid()) {
         return;
@@ -854,9 +854,10 @@ Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
     }
 
     if (supply) {
+        assert(!pkt->memInhibitAsserted());
         pkt->assertMemInhibit();
         if (is_timing) {
-            doTimingSupplyResponse(pkt, blk->data);
+            doTimingSupplyResponse(pkt, blk->data, is_deferred);
         } else {
             pkt->makeAtomicResponse();
             pkt->setDataFromBlock(blk->data, blkSize);
@@ -892,6 +893,8 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
     // better not be snooping a request that conflicts with something
     // we have outstanding...
     if (mshr && mshr->inService) {
+        DPRINTF(Cache, "Deferring snoop on in-service MSHR to blk %x\n",
+                blk_addr);
         mshr->allocateSnoopTarget(pkt, curTick, order++);
         if (mshr->getNumTargets() > numTarget)
            warn("allocating bonus target for snoop"); //handle later
@@ -913,6 +916,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
             assert(wb_pkt->cmd == MemCmd::Writeback);
 
             if (pkt->isRead()) {
+                assert(!pkt->memInhibitAsserted());
                 pkt->assertMemInhibit();
                 if (!pkt->needsExclusive()) {
                     pkt->assertShared();
@@ -922,7 +926,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
                     // the packet's invalidate flag is set...
                     assert(pkt->isInvalidate());
                 }
-                doTimingSupplyResponse(pkt, wb_pkt->getPtr<uint8_t>());
+                doTimingSupplyResponse(pkt, wb_pkt->getPtr<uint8_t>(), false);
             }
 
             if (pkt->isInvalidate()) {
@@ -933,7 +937,7 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
         }
     }
 
-    handleSnoop(pkt, blk, true);
+    handleSnoop(pkt, blk, true, false);
 }
 
 
@@ -948,7 +952,7 @@ Cache<TagStore,Coherence>::snoopAtomic(PacketPtr pkt)
     }
 
     BlkType *blk = tags->findBlock(pkt->getAddr());
-    handleSnoop(pkt, blk, false);
+    handleSnoop(pkt, blk, false, false);
     return hitLatency;
 }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index ca5e38601..23645cb27 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -75,6 +75,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     assert(deferredTargets.empty());
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
+    pendingShared = false;
     replacedPendingUpgrade = false;
     data = NULL;
 }
@@ -120,7 +121,7 @@ MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order)
 }
 
 void
-MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order)
+MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order)
 {
     assert(inService); // don't bother to call otherwise
 
@@ -130,23 +131,33 @@ MSHR::allocateSnoopTarget(PacketPtr target, Tick when, Counter _order)
         return;
     }
 
-    if (needsExclusive) {
-        // We're awaiting an exclusive copy, so ownership is pending.
-        // It's up to us to respond once the data arrives.
-        target->assertMemInhibit();
-    }
+    DPRINTF(Cache, "deferred snoop on %x: %s %s\n", addr,
+            needsExclusive ? "needsExclusive" : "",
+            pkt->needsExclusive() ? "pkt->needsExclusive()" : "");
+
+    if (needsExclusive || pkt->needsExclusive()) {
+        // actual target device (typ. PhysicalMemory) will delete the
+        // packet on reception, so we need to save a copy here
+        targets.push_back(Target(new Packet(pkt), when, _order, false));
+        ++ntargets;
+
+        if (needsExclusive) {
+            // We're awaiting an exclusive copy, so ownership is pending.
+            // It's up to us to respond once the data arrives.
+            pkt->assertMemInhibit();
+        }
 
-    if (target->needsExclusive()) {
-        // This transaction will take away our pending copy
-        pendingInvalidate = true;
+        if (pkt->needsExclusive()) {
+            // This transaction will take away our pending copy
+            pendingInvalidate = true;
+        }
     } else {
-        // We'll keep our pending copy, but we can't let the other guy
-        // think he's getting it exclusive
-        target->assertShared();
+        // Read to a read: no conflict, so no need to record as
+        // target, but make sure neither reader thinks he's getting an
+        // exclusive copy
+        pendingShared = true;
+        pkt->assertShared();
     }
-
-    targets.push_back(Target(target, when, _order, false));
-    ++ntargets;
 }
 
 
@@ -164,6 +175,7 @@ MSHR::promoteDeferredTargets()
 
     needsExclusive = deferredNeedsExclusive;
     pendingInvalidate = false;
+    pendingShared = false;
     deferredNeedsExclusive = false;
     order = targets.front().order;
     readyTick = std::max(curTick, targets.front().time);
@@ -200,25 +212,33 @@ MSHR::handleReplacement(CacheBlk *blk, int blkSize)
 
 
 bool
-MSHR::handleReplacedPendingUpgrade(Packet *pkt)
+MSHR::handleFill(Packet *pkt, CacheBlk *blk)
 {
-    // @TODO: if upgrade is nacked and replacedPendingUpgradeDirty is true, then we need to writeback the data (or rel
-    assert(pkt->cmd == MemCmd::UpgradeResp);
-    assert(replacedPendingUpgrade);
-    replacedPendingUpgrade = false; // reset
-    if (replacedPendingUpgradeDirty) {
-        // we wrote back the previous copy; just reissue as a ReadEx
-        return false;
+    if (replacedPendingUpgrade) {
+        // block was replaced while upgrade request was in service
+        assert(pkt->cmd == MemCmd::UpgradeResp);
+        assert(blk == NULL);
+        assert(replacedPendingUpgrade);
+        replacedPendingUpgrade = false; // reset
+        if (replacedPendingUpgradeDirty) {
+            // we wrote back the previous copy; just reissue as a ReadEx
+            return false;
+        }
+
+        // previous copy was not dirty, but we are now owner...  fake out
+        // cache by taking saved data and converting UpgradeResp to
+        // ReadExResp
+        assert(data);
+        pkt->cmd = MemCmd::ReadExResp;
+        pkt->setData(data);
+        delete [] data;
+        data = NULL;
+    } else if (pendingShared) {
+        // we snooped another read while this read was in
+        // service... assert shared line on its behalf
+        pkt->assertShared();
     }
 
-    // previous copy was not dirty, but we are now owner...  fake out
-    // cache by taking saved data and converting UpgradeResp to
-    // ReadExResp
-    assert(data);
-    pkt->cmd = MemCmd::ReadExResp;
-    pkt->setData(data);
-    delete [] data;
-    data = NULL;
     return true;
 }
 
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index a9380d99a..07fe5c96c 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -105,6 +105,7 @@ class MSHR : public Packet::SenderState
 
     bool deferredNeedsExclusive;
     bool pendingInvalidate;
+    bool pendingShared;
     /** Is there a pending upgrade that got replaced? */
     bool replacedPendingUpgrade;
     bool replacedPendingUpgradeDirty;
@@ -213,7 +214,7 @@ public:
     bool promoteDeferredTargets();
 
     void handleReplacement(CacheBlk *blk, int blkSize);
-    bool handleReplacedPendingUpgrade(Packet *pkt);
+    bool handleFill(Packet *pkt, CacheBlk *blk);
 
     /**
      * Prints the contents of this MSHR to stderr.
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index fa46aff7b..3269aa4db 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -207,6 +207,9 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
         totalRefs += blk->refCount;
         ++sampledRefs;
         blk->refCount = 0;
+
+        DPRINTF(Cache, "set %x: selecting blk %x for replacement\n",
+                set, regenerateBlkAddr(blk->tag, set));
     } else if (!blk->isTouched) {
         tagsInUse++;
         blk->isTouched = true;
@@ -216,8 +219,6 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
         }
     }
 
-    DPRINTF(Cache, "set %x: selecting blk %x for replacement\n",
-            set, regenerateBlkAddr(blk->tag, set));
     return blk;
 }
 
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 0a2127490..6c8c12ce2 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -69,11 +69,21 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
     // if we ever added it back.
     assert(pkt->isRequest());
     assert(pkt->result == Packet::Unknown);
+
+    if (pkt->memInhibitAsserted()) {
+        // snooper will supply based on copy of packet
+        // still target's responsibility to delete packet
+        delete pkt->req;
+        delete pkt;
+        return true;
+    }
+
     bool needsResponse = pkt->needsResponse();
     Tick latency = recvAtomic(pkt);
     // turn packet around to go back to requester if response expected
     if (needsResponse) {
-        // recvAtomic() should already have turned packet into atomic response
+        // recvAtomic() should already have turned packet into
+        // atomic response
         assert(pkt->isResponse());
         pkt->convertAtomicToTimingResponse();
         schedSendTiming(pkt, curTick + latency);
@@ -81,6 +91,7 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
         delete pkt->req;
         delete pkt;
     }
+
     return true;
 }
 
-- 
cgit v1.2.3


From c4903e088247ad187356864459d2e4be77d97154 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 26 Jun 2007 23:30:30 -0700
Subject: Revamp replacement-of-upgrade handling.

--HG--
extra : convert_revision : 9bc09d8ae6d50e6dfbb4ab21514612f9aa102a2e
---
 src/mem/cache/cache.hh      |  3 +++
 src/mem/cache/cache_impl.hh | 44 ++++++++++++++++++++++++++------------
 src/mem/cache/miss/mshr.cc  | 51 +--------------------------------------------
 src/mem/cache/miss/mshr.hh  |  6 +-----
 4 files changed, 36 insertions(+), 68 deletions(-)

diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 161fb801d..9e8c35066 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -136,6 +136,9 @@ class Cache : public BaseCache
     /** Prefetcher */
     BasePrefetcher *prefetcher;
 
+    /** Temporary cache block for occasional transitory use */
+    BlkType *tempBlock;
+
     /**
      * Can this cache should allocate a block on a line-sized write miss.
      */
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 599eecc82..3685bc8cb 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -58,6 +58,9 @@ Cache<TagStore,Coherence>::Cache(const std::string &_name,
       doFastWrites(params.doFastWrites),
       prefetchMiss(params.prefetchMiss)
 {
+    tempBlock = new BlkType();
+    tempBlock->data = new uint8_t[blkSize];
+
     cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this);
     memSidePort = new MemSidePort(_name + "-mem_side_port", this);
     cpuSidePort->setOtherPort(memSidePort);
@@ -678,11 +681,8 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
 
-        if (!mshr->handleFill(pkt, blk)) {
-            mq->markPending(mshr);
-            requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
-            return;
-        }
+        // give mshr a chance to do some dirty work
+        mshr->handleFill(pkt, blk);
 
         PacketList writebacks;
         blk = handleFill(pkt, blk, writebacks);
@@ -693,6 +693,13 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
             allocateBuffer(wbPkt, time, true);
             writebacks.pop_front();
         }
+        // if we used temp block, clear it out
+        if (blk == tempBlock) {
+            if (blk->isDirty()) {
+                allocateBuffer(writebackBlk(blk), time, true);
+            }
+            tags->invalidateBlk(blk);
+        }
     } else {
         if (pkt->req->isUncacheable()) {
             mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
@@ -764,15 +771,26 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
             Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set);
             MSHR *repl_mshr = mshrQueue.findMatch(repl_addr);
             if (repl_mshr) {
-                repl_mshr->handleReplacement(blk, blkSize);
-            }
-
-            DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
-                    repl_addr, addr, blk->isDirty() ? "writeback" : "clean");
+                // must be an outstanding upgrade request on block
+                // we're about to replace...
+                assert(!blk->isWritable());
+                assert(repl_mshr->needsExclusive);
+                // too hard to replace block with transient state;
+                // just use temporary storage to complete the current
+                // request and then get rid of it
+                assert(!tempBlock->isValid());
+                blk = tempBlock;
+                tempBlock->set = tags->extractSet(addr);
+                DPRINTF(Cache, "using temp block for %x\n", addr);
+            } else {
+                DPRINTF(Cache, "replacement: replacing %x with %x: %s\n",
+                        repl_addr, addr,
+                        blk->isDirty() ? "writeback" : "clean");
 
-            if (blk->isDirty()) {
-                // Save writeback packet for handling by caller
-                writebacks.push_back(writebackBlk(blk));
+                if (blk->isDirty()) {
+                    // Save writeback packet for handling by caller
+                    writebacks.push_back(writebackBlk(blk));
+                }
             }
         }
 
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 23645cb27..63b3cacc2 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -76,7 +76,6 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
     pendingShared = false;
-    replacedPendingUpgrade = false;
     data = NULL;
 }
 
@@ -185,61 +184,13 @@ MSHR::promoteDeferredTargets()
 
 
 void
-MSHR::handleReplacement(CacheBlk *blk, int blkSize)
-{
-    // must be an outstanding upgrade request on block we're about to
-    // replace...
-    assert(!blk->isWritable());
-    assert(needsExclusive);
-    replacedPendingUpgrade = true;
-
-    // if it's dirty, just remember what happened and allow the
-    // writeback to continue.  we'll reissue a ReadEx later whether
-    // the upgrade succeeds or not
-    if (blk->isDirty()) {
-        replacedPendingUpgradeDirty = true;
-        return;
-    }
-
-    // if not dirty, we need to save it off as it will be only valid
-    // copy in system if upgrade is successful (and may need to be
-    // written back then, as the current owner if any will be
-    // invalidating its block)
-    replacedPendingUpgradeDirty = false;
-    data = new uint8_t[blkSize];
-    std::memcpy(data, blk->data, blkSize);
-}
-
-
-bool
 MSHR::handleFill(Packet *pkt, CacheBlk *blk)
 {
-    if (replacedPendingUpgrade) {
-        // block was replaced while upgrade request was in service
-        assert(pkt->cmd == MemCmd::UpgradeResp);
-        assert(blk == NULL);
-        assert(replacedPendingUpgrade);
-        replacedPendingUpgrade = false; // reset
-        if (replacedPendingUpgradeDirty) {
-            // we wrote back the previous copy; just reissue as a ReadEx
-            return false;
-        }
-
-        // previous copy was not dirty, but we are now owner...  fake out
-        // cache by taking saved data and converting UpgradeResp to
-        // ReadExResp
-        assert(data);
-        pkt->cmd = MemCmd::ReadExResp;
-        pkt->setData(data);
-        delete [] data;
-        data = NULL;
-    } else if (pendingShared) {
+    if (pendingShared) {
         // we snooped another read while this read was in
         // service... assert shared line on its behalf
         pkt->assertShared();
     }
-
-    return true;
 }
 
 
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 07fe5c96c..4db7b1cfe 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -106,9 +106,6 @@ class MSHR : public Packet::SenderState
     bool deferredNeedsExclusive;
     bool pendingInvalidate;
     bool pendingShared;
-    /** Is there a pending upgrade that got replaced? */
-    bool replacedPendingUpgrade;
-    bool replacedPendingUpgradeDirty;
 
     /** Thread number of the miss. */
     short threadNum;
@@ -213,8 +210,7 @@ public:
 
     bool promoteDeferredTargets();
 
-    void handleReplacement(CacheBlk *blk, int blkSize);
-    bool handleFill(Packet *pkt, CacheBlk *blk);
+    void handleFill(Packet *pkt, CacheBlk *blk);
 
     /**
      * Prints the contents of this MSHR to stderr.
-- 
cgit v1.2.3


From 9117c94f9c74f0674d75731385a106d17a1dee09 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Wed, 27 Jun 2007 20:54:13 -0700
Subject: Get rid of coherence protocol object.

--HG--
extra : convert_revision : 4ff144342dca23af9a12a2169ca318a002654b42
---
 configs/example/memtest.py                    |   4 -
 src/mem/cache/cache.cc                        |  12 +-
 src/mem/cache/cache.hh                        |  28 +-
 src/mem/cache/cache_builder.cc                |  94 ++----
 src/mem/cache/cache_impl.hh                   | 191 +++++------
 src/mem/cache/coherence/CoherenceProtocol.py  |   8 -
 src/mem/cache/coherence/SConscript            |  36 --
 src/mem/cache/coherence/coherence_protocol.cc | 469 --------------------------
 src/mem/cache/coherence/coherence_protocol.hh | 257 --------------
 src/mem/cache/coherence/simple_coherence.hh   | 163 ---------
 10 files changed, 140 insertions(+), 1122 deletions(-)
 delete mode 100644 src/mem/cache/coherence/CoherenceProtocol.py
 delete mode 100644 src/mem/cache/coherence/SConscript
 delete mode 100644 src/mem/cache/coherence/coherence_protocol.cc
 delete mode 100644 src/mem/cache/coherence/coherence_protocol.hh
 delete mode 100644 src/mem/cache/coherence/simple_coherence.hh

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 0bc12e7bd..0e6260b5d 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -48,8 +48,6 @@ parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
 parser.add_option("-n", "--numtesters", type="int", default=8,
                   metavar="N",
                   help="Number of tester pseudo-CPUs [default: %default]")
-parser.add_option("-p", "--protocol", default="moesi",
-                  help="Coherence protocol [default: %default]")
 
 parser.add_option("-f", "--functional", type="int", default=0,
                   metavar="PCT",
@@ -95,7 +93,6 @@ class L1(BaseCache):
     block_size = block_size
     mshrs = num_l1_mshrs
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol=options.protocol)
 
 # ----------------------
 # Base L2 Cache
@@ -107,7 +104,6 @@ class L2(BaseCache):
     mshrs = num_l2_mshrs
     tgts_per_mshr = 16
     write_buffers = 8
-    protocol = CoherenceProtocol(protocol=options.protocol)
 
 if options.numtesters > block_size:
      print "Error: Number of testers limited to %s because of false sharing" \
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 96f9a2e11..c640d4a60 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -58,8 +58,6 @@
 #include "mem/cache/tags/split_lifo.hh"
 #endif
 
-#include "mem/cache/coherence/simple_coherence.hh"
-
 #include "mem/cache/cache_impl.hh"
 
 // Template Instantiations
@@ -67,23 +65,23 @@
 
 
 #if defined(USE_CACHE_FALRU)
-template class Cache<FALRU, SimpleCoherence>;
+template class Cache<FALRU>;
 #endif
 
 #if defined(USE_CACHE_IIC)
-template class Cache<IIC, SimpleCoherence>;
+template class Cache<IIC>;
 #endif
 
 #if defined(USE_CACHE_LRU)
-template class Cache<LRU, SimpleCoherence>;
+template class Cache<LRU>;
 #endif
 
 #if defined(USE_CACHE_SPLIT)
-template class Cache<Split, SimpleCoherence>;
+template class Cache<Split>;
 #endif
 
 #if defined(USE_CACHE_SPLIT_LIFO)
-template class Cache<SplitLIFO, SimpleCoherence>;
+template class Cache<SplitLIFO>;
 #endif
 
 #endif //DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index 9e8c35066..57028a05e 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -39,9 +39,7 @@
 #ifndef __CACHE_HH__
 #define __CACHE_HH__
 
-#include "base/compression/base.hh"
 #include "base/misc.hh" // fatal, panic, and warn
-#include "cpu/smt.hh" // SMT_MAX_THREADS
 
 #include "mem/cache/base_cache.hh"
 #include "mem/cache/cache_blk.hh"
@@ -55,11 +53,9 @@ class BasePrefetcher;
 /**
  * A template-policy based cache. The behavior of the cache can be altered by
  * supplying different template policies. TagStore handles all tag and data
- * storage @sa TagStore. Buffering handles all misses and writes/writebacks
- * @sa MissQueue. Coherence handles all coherence policy details @sa
- * UniCoherence, SimpleMultiCoherence.
+ * storage @sa TagStore.
  */
-template <class TagStore, class Coherence>
+template <class TagStore>
 class Cache : public BaseCache
 {
   public:
@@ -76,13 +72,13 @@ class Cache : public BaseCache
     {
       public:
         CpuSidePort(const std::string &_name,
-                    Cache<TagStore,Coherence> *_cache);
+                    Cache<TagStore> *_cache);
 
         // BaseCache::CachePort just has a BaseCache *; this function
         // lets us get back the type info we lost when we stored the
         // cache pointer there.
-        Cache<TagStore,Coherence> *myCache() {
-            return static_cast<Cache<TagStore,Coherence> *>(cache);
+        Cache<TagStore> *myCache() {
+            return static_cast<Cache<TagStore> *>(cache);
         }
 
         virtual void getDeviceAddressRanges(AddrRangeList &resp,
@@ -99,13 +95,13 @@ class Cache : public BaseCache
     {
       public:
         MemSidePort(const std::string &_name,
-                    Cache<TagStore,Coherence> *_cache);
+                    Cache<TagStore> *_cache);
 
         // BaseCache::CachePort just has a BaseCache *; this function
         // lets us get back the type info we lost when we stored the
         // cache pointer there.
-        Cache<TagStore,Coherence> *myCache() {
-            return static_cast<Cache<TagStore,Coherence> *>(cache);
+        Cache<TagStore> *myCache() {
+            return static_cast<Cache<TagStore> *>(cache);
         }
 
         void sendPacket();
@@ -130,9 +126,6 @@ class Cache : public BaseCache
     /** Tag and data Storage */
     TagStore *tags;
 
-    /** Coherence protocol. */
-    Coherence *coherence;
-
     /** Prefetcher */
     BasePrefetcher *prefetcher;
 
@@ -212,20 +205,19 @@ class Cache : public BaseCache
     {
       public:
         TagStore *tags;
-        Coherence *coherence;
         BaseCache::Params baseParams;
         BasePrefetcher*prefetcher;
         bool prefetchAccess;
         const bool doFastWrites;
         const bool prefetchMiss;
 
-        Params(TagStore *_tags, Coherence *coh,
+        Params(TagStore *_tags,
                BaseCache::Params params,
                BasePrefetcher *_prefetcher,
                bool prefetch_access, int hit_latency,
                bool do_fast_writes,
                bool prefetch_miss)
-            : tags(_tags), coherence(coh),
+            : tags(_tags),
               baseParams(params),
               prefetcher(_prefetcher), prefetchAccess(prefetch_access),
               doFastWrites(do_fast_writes),
diff --git a/src/mem/cache/cache_builder.cc b/src/mem/cache/cache_builder.cc
index 307c851a2..65418b68d 100644
--- a/src/mem/cache/cache_builder.cc
+++ b/src/mem/cache/cache_builder.cc
@@ -42,7 +42,6 @@
 #include "mem/cache/base_cache.hh"
 #include "mem/cache/cache.hh"
 #include "mem/bus.hh"
-#include "mem/cache/coherence/coherence_protocol.hh"
 #include "sim/builder.hh"
 
 // Tag Templates
@@ -66,13 +65,6 @@
 #include "mem/cache/tags/split_lifo.hh"
 #endif
 
-// Compression Templates
-#include "base/compression/null_compression.hh"
-#include "base/compression/lzss_compression.hh"
-
-// Coherence Templates
-#include "mem/cache/coherence/simple_coherence.hh"
-
 //Prefetcher Headers
 #if defined(USE_GHB)
 #include "mem/cache/prefetch/ghb_prefetcher.hh"
@@ -100,16 +92,11 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(BaseCache)
     Param<int> tgts_per_mshr;
     Param<int> write_buffers;
     Param<bool> prioritizeRequests;
-    SimObjectParam<CoherenceProtocol *> protocol;
     Param<Addr> trace_addr;
     Param<int> hash_delay;
 #if defined(USE_CACHE_IIC)
     SimObjectParam<Repl *> repl;
 #endif
-    Param<bool> compressed_bus;
-    Param<bool> store_compressed;
-    Param<bool> adaptive_compression;
-    Param<int> compression_latency;
     Param<int> subblock_size;
     Param<Counter> max_miss_count;
     VectorParam<Range<Addr> > addr_range;
@@ -144,23 +131,12 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache)
     INIT_PARAM_DFLT(write_buffers, "number of write buffers", 8),
     INIT_PARAM_DFLT(prioritizeRequests, "always service demand misses first",
                     false),
-    INIT_PARAM_DFLT(protocol, "coherence protocol to use in the cache", NULL),
     INIT_PARAM_DFLT(trace_addr, "address to trace", 0),
 
     INIT_PARAM_DFLT(hash_delay, "time in cycles of hash access",1),
 #if defined(USE_CACHE_IIC)
     INIT_PARAM_DFLT(repl, "replacement policy",NULL),
 #endif
-    INIT_PARAM_DFLT(compressed_bus,
-                    "This cache connects to a compressed memory",
-                    false),
-    INIT_PARAM_DFLT(store_compressed, "Store compressed data in the cache",
-                    false),
-    INIT_PARAM_DFLT(adaptive_compression, "Use an adaptive compression scheme",
-                    false),
-    INIT_PARAM_DFLT(compression_latency,
-                    "Latency in cycles of compression algorithm",
-                    0),
     INIT_PARAM_DFLT(subblock_size,
                     "Size of subblock in IIC used for compression",
                     0),
@@ -188,7 +164,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(BaseCache)
 END_INIT_SIM_OBJECT_PARAMS(BaseCache)
 
 
-#define BUILD_CACHE(TAGS, tags, c)                                      \
+#define BUILD_CACHE(TAGS, tags)                                      \
     do {                                                                \
         BasePrefetcher *pf;                                           \
         if (pf_policy == "tagged") {                                    \
@@ -203,12 +179,12 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
         else {                                                          \
             BUILD_NULL_PREFETCHER(TAGS);                                \
         }                                                               \
-        Cache<TAGS, c>::Params params(tags, coh, base_params,       \
-                                      pf, prefetch_access, latency, \
-                                      true,                             \
-                                      prefetch_miss);                   \
-        Cache<TAGS, c> *retval =                                        \
-            new Cache<TAGS, c>(getInstanceName(), params);              \
+        Cache<TAGS>::Params params(tags, base_params,       \
+                                   pf, prefetch_access, latency,        \
+                                   true,                                \
+                                   prefetch_miss);                      \
+        Cache<TAGS> *retval =                                        \
+            new Cache<TAGS>(getInstanceName(), params);              \
         return retval;                                                  \
     } while (0)
 
@@ -216,79 +192,68 @@ END_INIT_SIM_OBJECT_PARAMS(BaseCache)
         panic("%s not compiled into M5", x);		\
     } while (0)
 
-#define BUILD_COMPRESSED_CACHE(TAGS, tags, c)           \
-    do {                                                \
-        CompressionAlgorithm *compAlg;                  \
-        if (compressed_bus || store_compressed) {       \
-            compAlg = new LZSSCompression();            \
-        } else {                                        \
-            compAlg = new NullCompression();            \
-        }                                               \
-        BUILD_CACHE(TAGS, tags, c);                     \
-    } while (0)
-
 #if defined(USE_CACHE_FALRU)
-#define BUILD_FALRU_CACHE(c) do {			    \
+#define BUILD_FALRU_CACHE do {			    \
         FALRU *tags = new FALRU(block_size, size, latency); \
-        BUILD_COMPRESSED_CACHE(FALRU, tags, c);		\
+        BUILD_CACHE(FALRU, tags);		\
     } while (0)
 #else
-#define BUILD_FALRU_CACHE(c) BUILD_CACHE_PANIC("falru cache")
+#define BUILD_FALRU_CACHE BUILD_CACHE_PANIC("falru cache")
 #endif
 
 #if defined(USE_CACHE_LRU)
-#define BUILD_LRU_CACHE(c) do {				\
+#define BUILD_LRU_CACHE do {				\
         LRU *tags = new LRU(numSets, block_size, assoc, latency);	\
-        BUILD_COMPRESSED_CACHE(LRU, tags, c);			\
+        BUILD_CACHE(LRU, tags);			\
     } while (0)
 #else
-#define BUILD_LRU_CACHE(c) BUILD_CACHE_PANIC("lru cache")
+#define BUILD_LRU_CACHE BUILD_CACHE_PANIC("lru cache")
 #endif
 
 #if defined(USE_CACHE_SPLIT)
-#define BUILD_SPLIT_CACHE(c) do {					\
+#define BUILD_SPLIT_CACHE do {					\
         Split *tags = new Split(numSets, block_size, assoc, split_size, lifo, \
                                 two_queue, latency);		\
-        BUILD_COMPRESSED_CACHE(Split, tags, c);			\
+        BUILD_CACHE(Split, tags);			\
     } while (0)
 #else
-#define BUILD_SPLIT_CACHE(c) BUILD_CACHE_PANIC("split cache")
+#define BUILD_SPLIT_CACHE BUILD_CACHE_PANIC("split cache")
 #endif
 
 #if defined(USE_CACHE_SPLIT_LIFO)
-#define BUILD_SPLIT_LIFO_CACHE(c) do {				\
+#define BUILD_SPLIT_LIFO_CACHE do {				\
         SplitLIFO *tags = new SplitLIFO(block_size, size, assoc,        \
                                         latency, two_queue, -1);	\
-        BUILD_COMPRESSED_CACHE(SplitLIFO, tags, c);			\
+        BUILD_CACHE(SplitLIFO, tags);			\
     } while (0)
 #else
-#define BUILD_SPLIT_LIFO_CACHE(c) BUILD_CACHE_PANIC("lifo cache")
+#define BUILD_SPLIT_LIFO_CACHE BUILD_CACHE_PANIC("lifo cache")
 #endif
 
 #if defined(USE_CACHE_IIC)
-#define BUILD_IIC_CACHE(c) do {			\
+#define BUILD_IIC_CACHE do {			\
         IIC *tags = new IIC(iic_params);		\
-        BUILD_COMPRESSED_CACHE(IIC, tags, c);	\
+        BUILD_CACHE(IIC, tags);	\
     } while (0)
 #else
-#define BUILD_IIC_CACHE(c) BUILD_CACHE_PANIC("iic")
+#define BUILD_IIC_CACHE BUILD_CACHE_PANIC("iic")
 #endif
 
-#define BUILD_CACHES(c) do {				\
+#define BUILD_CACHES do {				\
         if (repl == NULL) {				\
             if (numSets == 1) {				\
-                BUILD_FALRU_CACHE(c);		\
+                BUILD_FALRU_CACHE;		\
             } else {					\
                 if (split == true) {			\
-                    BUILD_SPLIT_CACHE(c);		\
+                    BUILD_SPLIT_CACHE;		\
                 } else if (lifo == true) {		\
-                    BUILD_SPLIT_LIFO_CACHE(c);	\
+                    BUILD_SPLIT_LIFO_CACHE;	\
                 } else {				\
-                    BUILD_LRU_CACHE(c);		\
+                    BUILD_LRU_CACHE;		\
                 }					\
             }						\
         } else {					\
-            BUILD_IIC_CACHE(c);			\
+            BUILD_IIC_CACHE;			\
         }						\
     } while (0)
 
@@ -399,8 +364,7 @@ CREATE_SIM_OBJECT(BaseCache)
     const void *repl = NULL;
 #endif
 
-    SimpleCoherence *coh = new SimpleCoherence(protocol);
-    BUILD_CACHES(SimpleCoherence);
+    BUILD_CACHES;
     return NULL;
 }
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 3685bc8cb..b76d7e392 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -48,13 +48,13 @@
 #include "sim/sim_exit.hh" // for SimExitEvent
 
 
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::Cache(const std::string &_name,
-                                 Cache<TagStore,Coherence>::Params &params)
+template<class TagStore>
+Cache<TagStore>::Cache(const std::string &_name,
+                       Cache<TagStore>::Params &params)
     : BaseCache(_name, params.baseParams),
       prefetchAccess(params.prefetchAccess),
       tags(params.tags),
-      coherence(params.coherence), prefetcher(params.prefetcher),
+      prefetcher(params.prefetcher),
       doFastWrites(params.doFastWrites),
       prefetchMiss(params.prefetchMiss)
 {
@@ -67,23 +67,21 @@ Cache<TagStore,Coherence>::Cache(const std::string &_name,
     memSidePort->setOtherPort(cpuSidePort);
 
     tags->setCache(this);
-    coherence->setCache(this);
     prefetcher->setCache(this);
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::regStats()
+Cache<TagStore>::regStats()
 {
     BaseCache::regStats();
     tags->regStats(name());
-    coherence->regStats(name());
     prefetcher->regStats(name());
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Port *
-Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
+Cache<TagStore>::getPort(const std::string &if_name, int idx)
 {
     if (if_name == "" || if_name == "cpu_side") {
         return cpuSidePort;
@@ -96,9 +94,9 @@ Cache<TagStore,Coherence>::getPort(const std::string &if_name, int idx)
     }
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::deletePortRefs(Port *p)
+Cache<TagStore>::deletePortRefs(Port *p)
 {
     if (cpuSidePort == p || memSidePort == p)
         panic("Can only delete functional ports\n");
@@ -107,9 +105,9 @@ Cache<TagStore,Coherence>::deletePortRefs(Port *p)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
+Cache<TagStore>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 {
     uint64_t overwrite_val;
     bool overwrite_mem;
@@ -152,9 +150,9 @@ Cache<TagStore,Coherence>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 /////////////////////////////////////////////////////
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::markInService(MSHR *mshr)
+Cache<TagStore>::markInService(MSHR *mshr)
 {
     markInServiceInternal(mshr);
 #if 0
@@ -171,9 +169,9 @@ Cache<TagStore,Coherence>::markInService(MSHR *mshr)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::squash(int threadNum)
+Cache<TagStore>::squash(int threadNum)
 {
     bool unblock = false;
     BlockedCause cause = NUM_BLOCKED_CAUSES;
@@ -199,9 +197,9 @@ Cache<TagStore,Coherence>::squash(int threadNum)
 //
 /////////////////////////////////////////////////////
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *&blk, int &lat)
+Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 {
     if (pkt->req->isUncacheable())  {
         blk = NULL;
@@ -280,9 +278,9 @@ Cache<TagStore,Coherence>::access(PacketPtr pkt, BlkType *&blk, int &lat)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
+Cache<TagStore>::timingAccess(PacketPtr pkt)
 {
 //@todo Add back in MemDebug Calls
 //    MemDebug::cacheAccess(pkt);
@@ -398,10 +396,10 @@ Cache<TagStore,Coherence>::timingAccess(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 PacketPtr
-Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
-                                        bool needsExclusive)
+Cache<TagStore>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
+                              bool needsExclusive)
 {
     bool blkValid = blk && blk->isValid();
 
@@ -441,9 +439,9 @@ Cache<TagStore,Coherence>::getBusPacket(PacketPtr cpu_pkt, BlkType *blk,
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Tick
-Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
+Cache<TagStore>::atomicAccess(PacketPtr pkt)
 {
     int lat = hitLatency;
 
@@ -511,10 +509,10 @@ Cache<TagStore,Coherence>::atomicAccess(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
-                                            CachePort *otherSidePort)
+Cache<TagStore>::functionalAccess(PacketPtr pkt,
+                                  CachePort *otherSidePort)
 {
     Addr blk_addr = pkt->getAddr() & ~(blkSize - 1);
     BlkType *blk = tags->findBlock(pkt->getAddr());
@@ -561,9 +559,9 @@ Cache<TagStore,Coherence>::functionalAccess(PacketPtr pkt,
 /////////////////////////////////////////////////////
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
+Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 {
     assert(blk);
     assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
@@ -586,10 +584,10 @@ Cache<TagStore,Coherence>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
-                                       BlkType *blk)
+Cache<TagStore>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
+                             BlkType *blk)
 {
     // respond to MSHR targets, if any
 
@@ -642,9 +640,9 @@ Cache<TagStore,Coherence>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
+Cache<TagStore>::handleResponse(PacketPtr pkt)
 {
     Tick time = curTick + hitLatency;
     MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
@@ -730,9 +728,9 @@ Cache<TagStore,Coherence>::handleResponse(PacketPtr pkt)
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 PacketPtr
-Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
+Cache<TagStore>::writebackBlk(BlkType *blk)
 {
     assert(blk && blk->isValid() && blk->isDirty());
 
@@ -754,12 +752,13 @@ Cache<TagStore,Coherence>::writebackBlk(BlkType *blk)
 // is called by both atomic and timing-mode accesses, and in atomic
 // mode we don't mess with the write buffer (we just perform the
 // writebacks atomically once the original request is complete).
-template<class TagStore, class Coherence>
-typename Cache<TagStore,Coherence>::BlkType*
-Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
-                                      PacketList &writebacks)
+template<class TagStore>
+typename Cache<TagStore>::BlkType*
+Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
+                            PacketList &writebacks)
 {
     Addr addr = pkt->getAddr();
+    CacheBlk::State old_state = blk ? blk->status : 0;
 
     if (blk == NULL) {
         // better have read new data...
@@ -795,21 +794,24 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
         }
 
         blk->tag = tags->extractTag(addr);
-        blk->status = coherence->getNewState(pkt);
     } else {
         // existing block... probably an upgrade
         assert(blk->tag == tags->extractTag(addr));
         // either we're getting new data or the block should already be valid
         assert(pkt->isRead() || blk->isValid());
-        CacheBlk::State old_state = blk->status;
-        blk->status = coherence->getNewState(pkt, old_state);
-        if (blk->status != old_state)
-            DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
-                    addr, old_state, blk->status);
-        else
-            warn("Changing state to same value\n");
     }
 
+    if (pkt->needsExclusive()) {
+        blk->status = BlkValid | BlkWritable | BlkDirty;
+    } else if (!pkt->sharedAsserted()) {
+        blk->status = BlkValid | BlkWritable;
+    } else {
+        blk->status = BlkValid;
+    }
+
+    DPRINTF(Cache, "Block addr %x moving from state %i to %i\n",
+            addr, old_state, blk->status);
+
     // if we got new data, copy it in
     if (pkt->isRead()) {
         std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
@@ -827,11 +829,11 @@ Cache<TagStore,Coherence>::handleFill(PacketPtr pkt, BlkType *blk,
 //
 /////////////////////////////////////////////////////
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
-                                                  uint8_t *blk_data,
-                                                  bool already_copied)
+Cache<TagStore>::doTimingSupplyResponse(PacketPtr req_pkt,
+                                        uint8_t *blk_data,
+                                        bool already_copied)
 {
     // timing-mode snoop responses require a new packet, unless we
     // already made a copy...
@@ -842,10 +844,10 @@ Cache<TagStore,Coherence>::doTimingSupplyResponse(PacketPtr req_pkt,
     memSidePort->respond(pkt, curTick + hitLatency);
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
-                                       bool is_timing, bool is_deferred)
+Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
+                             bool is_timing, bool is_deferred)
 {
     if (!blk || !blk->isValid()) {
         return;
@@ -894,9 +896,9 @@ Cache<TagStore,Coherence>::handleSnoop(PacketPtr pkt, BlkType *blk,
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
+Cache<TagStore>::snoopTiming(PacketPtr pkt)
 {
     if (pkt->req->isUncacheable()) {
         //Can't get a hit on an uncacheable address
@@ -959,9 +961,9 @@ Cache<TagStore,Coherence>::snoopTiming(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Tick
-Cache<TagStore,Coherence>::snoopAtomic(PacketPtr pkt)
+Cache<TagStore>::snoopAtomic(PacketPtr pkt)
 {
     if (pkt->req->isUncacheable()) {
         // Can't get a hit on an uncacheable address
@@ -975,9 +977,9 @@ Cache<TagStore,Coherence>::snoopAtomic(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 MSHR *
-Cache<TagStore,Coherence>::getNextMSHR()
+Cache<TagStore>::getNextMSHR()
 {
     // Check both MSHR queue and write buffer for potential requests
     MSHR *miss_mshr  = mshrQueue.getNextMSHR();
@@ -1051,9 +1053,9 @@ Cache<TagStore,Coherence>::getNextMSHR()
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 PacketPtr
-Cache<TagStore,Coherence>::getTimingPacket()
+Cache<TagStore>::getTimingPacket()
 {
     MSHR *mshr = getNextMSHR();
 
@@ -1100,9 +1102,9 @@ Cache<TagStore,Coherence>::getTimingPacket()
 //
 ///////////////
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::CpuSidePort::
+Cache<TagStore>::CpuSidePort::
 getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 {
     // CPU side port doesn't snoop; it's a target only.
@@ -1112,9 +1114,9 @@ getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
+Cache<TagStore>::CpuSidePort::recvTiming(PacketPtr pkt)
 {
     if (pkt->isRequest() && blocked) {
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
@@ -1127,17 +1129,17 @@ Cache<TagStore,Coherence>::CpuSidePort::recvTiming(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Tick
-Cache<TagStore,Coherence>::CpuSidePort::recvAtomic(PacketPtr pkt)
+Cache<TagStore>::CpuSidePort::recvAtomic(PacketPtr pkt)
 {
     return myCache()->atomicAccess(pkt);
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
+Cache<TagStore>::CpuSidePort::recvFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
     if (pkt->result != Packet::Success)
@@ -1145,10 +1147,10 @@ Cache<TagStore,Coherence>::CpuSidePort::recvFunctional(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
+template<class TagStore>
+Cache<TagStore>::
 CpuSidePort::CpuSidePort(const std::string &_name,
-                         Cache<TagStore,Coherence> *_cache)
+                         Cache<TagStore> *_cache)
     : BaseCache::CachePort(_name, _cache)
 {
 }
@@ -1159,9 +1161,9 @@ CpuSidePort::CpuSidePort(const std::string &_name,
 //
 ///////////////
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::
+Cache<TagStore>::MemSidePort::
 getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 {
     otherPort->getPeerAddressRanges(resp, snoop);
@@ -1171,9 +1173,9 @@ getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 bool
-Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
+Cache<TagStore>::MemSidePort::recvTiming(PacketPtr pkt)
 {
     // this needs to be fixed so that the cache updates the mshr and sends the
     // packet back out on the link, but it probably won't happen so until this
@@ -1196,9 +1198,9 @@ Cache<TagStore,Coherence>::MemSidePort::recvTiming(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 Tick
-Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
+Cache<TagStore>::MemSidePort::recvAtomic(PacketPtr pkt)
 {
     // in atomic mode, responses go back to the sender via the
     // function return from sendAtomic(), not via a separate
@@ -1209,9 +1211,9 @@ Cache<TagStore,Coherence>::MemSidePort::recvAtomic(PacketPtr pkt)
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::recvFunctional(PacketPtr pkt)
+Cache<TagStore>::MemSidePort::recvFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
     if (pkt->result != Packet::Success)
@@ -1220,9 +1222,9 @@ Cache<TagStore,Coherence>::MemSidePort::recvFunctional(PacketPtr pkt)
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::sendPacket()
+Cache<TagStore>::MemSidePort::sendPacket()
 {
     // if we have responses that are ready, they take precedence
     if (deferredPacketReady()) {
@@ -1278,28 +1280,27 @@ Cache<TagStore,Coherence>::MemSidePort::sendPacket()
     }
 }
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::recvRetry()
+Cache<TagStore>::MemSidePort::recvRetry()
 {
     assert(waitingOnRetry);
     sendPacket();
 }
 
 
-template<class TagStore, class Coherence>
+template<class TagStore>
 void
-Cache<TagStore,Coherence>::MemSidePort::processSendEvent()
+Cache<TagStore>::MemSidePort::processSendEvent()
 {
     assert(!waitingOnRetry);
     sendPacket();
 }
 
 
-template<class TagStore, class Coherence>
-Cache<TagStore,Coherence>::
-MemSidePort::MemSidePort(const std::string &_name,
-                         Cache<TagStore,Coherence> *_cache)
+template<class TagStore>
+Cache<TagStore>::
+MemSidePort::MemSidePort(const std::string &_name, Cache<TagStore> *_cache)
     : BaseCache::CachePort(_name, _cache)
 {
     // override default send event from SimpleTimingPort
diff --git a/src/mem/cache/coherence/CoherenceProtocol.py b/src/mem/cache/coherence/CoherenceProtocol.py
deleted file mode 100644
index 82adb6862..000000000
--- a/src/mem/cache/coherence/CoherenceProtocol.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from m5.SimObject import SimObject
-from m5.params import *
-class Coherence(Enum): vals = ['uni', 'msi', 'mesi', 'mosi', 'moesi']
-
-class CoherenceProtocol(SimObject):
-    type = 'CoherenceProtocol'
-    do_upgrades = Param.Bool(True, "use upgrade transactions?")
-    protocol = Param.Coherence("name of coherence protocol")
diff --git a/src/mem/cache/coherence/SConscript b/src/mem/cache/coherence/SConscript
deleted file mode 100644
index 91720b20e..000000000
--- a/src/mem/cache/coherence/SConscript
+++ /dev/null
@@ -1,36 +0,0 @@
-# -*- mode:python -*-
-
-# Copyright (c) 2006 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# Authors: Nathan Binkert
-
-Import('*')
-
-SimObject('CoherenceProtocol.py')
-
-Source('coherence_protocol.cc')
-
diff --git a/src/mem/cache/coherence/coherence_protocol.cc b/src/mem/cache/coherence/coherence_protocol.cc
deleted file mode 100644
index 47d2b469f..000000000
--- a/src/mem/cache/coherence/coherence_protocol.cc
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (c) 2002-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- *          Steve Reinhardt
- *          Ron Dreslinski
- */
-
-/**
- * @file
- * Definitions of CoherenceProtocol.
- */
-
-#include <string>
-
-#include "base/misc.hh"
-#include "mem/cache/miss/mshr.hh"
-#include "mem/cache/cache.hh"
-#include "mem/cache/coherence/coherence_protocol.hh"
-#include "sim/builder.hh"
-
-using namespace std;
-
-
-CoherenceProtocol::StateTransition::StateTransition()
-    : busCmd(MemCmd::InvalidCmd), newState(-1), snoopFunc(invalidTransition)
-{
-}
-
-
-void
-CoherenceProtocol::regStats()
-{
-    // Even though we count all the possible transitions in the
-    // requestCount and snoopCount arrays, most of these are invalid,
-    // so we just select the interesting ones to print here.
-
-    requestCount[Invalid][MemCmd::ReadReq]
-        .name(name() + ".read_invalid")
-        .desc("read misses to invalid blocks")
-        ;
-
-    requestCount[Invalid][MemCmd::WriteReq]
-        .name(name() +".write_invalid")
-        .desc("write misses to invalid blocks")
-        ;
-
-    requestCount[Invalid][MemCmd::SoftPFReq]
-        .name(name() +".swpf_invalid")
-        .desc("soft prefetch misses to invalid blocks")
-        ;
-
-    requestCount[Invalid][MemCmd::HardPFReq]
-        .name(name() +".hwpf_invalid")
-        .desc("hard prefetch misses to invalid blocks")
-        ;
-
-    requestCount[Shared][MemCmd::WriteReq]
-        .name(name() + ".write_shared")
-        .desc("write misses to shared blocks")
-        ;
-
-    requestCount[Owned][MemCmd::WriteReq]
-        .name(name() + ".write_owned")
-        .desc("write misses to owned blocks")
-        ;
-
-    snoopCount[Shared][MemCmd::ReadReq]
-        .name(name() + ".snoop_read_shared")
-        .desc("read snoops on shared blocks")
-        ;
-
-    snoopCount[Shared][MemCmd::ReadExReq]
-        .name(name() + ".snoop_readex_shared")
-        .desc("readEx snoops on shared blocks")
-        ;
-
-    snoopCount[Shared][MemCmd::UpgradeReq]
-        .name(name() + ".snoop_upgrade_shared")
-        .desc("upgradee snoops on shared blocks")
-        ;
-
-    snoopCount[Modified][MemCmd::ReadReq]
-        .name(name() + ".snoop_read_modified")
-        .desc("read snoops on modified blocks")
-        ;
-
-    snoopCount[Modified][MemCmd::ReadExReq]
-        .name(name() + ".snoop_readex_modified")
-        .desc("readEx snoops on modified blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::ReadReq]
-        .name(name() + ".snoop_read_owned")
-        .desc("read snoops on owned blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::ReadExReq]
-        .name(name() + ".snoop_readex_owned")
-        .desc("readEx snoops on owned blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::UpgradeReq]
-        .name(name() + ".snoop_upgrade_owned")
-        .desc("upgrade snoops on owned blocks")
-        ;
-
-    snoopCount[Exclusive][MemCmd::ReadReq]
-        .name(name() + ".snoop_read_exclusive")
-        .desc("read snoops on exclusive blocks")
-        ;
-
-    snoopCount[Exclusive][MemCmd::ReadExReq]
-        .name(name() + ".snoop_readex_exclusive")
-        .desc("readEx snoops on exclusive blocks")
-        ;
-
-    snoopCount[Shared][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_shared")
-        .desc("WriteInvalidate snoops on shared blocks")
-        ;
-
-    snoopCount[Owned][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_owned")
-        .desc("WriteInvalidate snoops on owned blocks")
-        ;
-
-    snoopCount[Exclusive][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_exclusive")
-        .desc("WriteInvalidate snoops on exclusive blocks")
-        ;
-
-    snoopCount[Modified][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_modified")
-        .desc("WriteInvalidate snoops on modified blocks")
-        ;
-
-    snoopCount[Invalid][MemCmd::WriteInvalidateReq]
-        .name(name() + ".snoop_writeinv_invalid")
-        .desc("WriteInvalidate snoops on invalid blocks")
-        ;
-}
-
-
-bool
-CoherenceProtocol::invalidateTrans(BaseCache *cache, PacketPtr &pkt,
-                                   CacheBlk *blk, MSHR *mshr,
-                                   CacheBlk::State & new_state)
-{
-    // invalidate the block
-    new_state = (blk->status & ~stateMask) | Invalid;
-    return false;
-}
-
-
-bool
-CoherenceProtocol::supplyTrans(BaseCache *cache, PacketPtr &pkt,
-                               CacheBlk *blk,
-                               MSHR *mshr,
-                               CacheBlk::State & new_state)
-{
-    return true;
-}
-
-
-bool
-CoherenceProtocol::supplyAndGotoSharedTrans(BaseCache *cache, PacketPtr &pkt,
-                                            CacheBlk *blk,
-                                            MSHR *mshr,
-                                            CacheBlk::State & new_state)
-{
-    new_state = (blk->status & ~stateMask) | Shared;
-    pkt->assertShared();
-    return supplyTrans(cache, pkt, blk, mshr, new_state);
-}
-
-
-bool
-CoherenceProtocol::supplyAndGotoOwnedTrans(BaseCache *cache, PacketPtr &pkt,
-                                           CacheBlk *blk,
-                                           MSHR *mshr,
-                                           CacheBlk::State & new_state)
-{
-    new_state = (blk->status & ~stateMask) | Owned;
-    pkt->assertShared();
-    return supplyTrans(cache, pkt, blk, mshr, new_state);
-}
-
-
-bool
-CoherenceProtocol::supplyAndInvalidateTrans(BaseCache *cache, PacketPtr &pkt,
-                                            CacheBlk *blk,
-                                            MSHR *mshr,
-                                            CacheBlk::State & new_state)
-{
-    new_state = (blk->status & ~stateMask) | Invalid;
-    return supplyTrans(cache, pkt, blk, mshr, new_state);
-}
-
-bool
-CoherenceProtocol::assertShared(BaseCache *cache, PacketPtr &pkt,
-                                            CacheBlk *blk,
-                                            MSHR *mshr,
-                                            CacheBlk::State & new_state)
-{
-    new_state = (blk->status & ~stateMask) | Shared;
-    pkt->assertShared();
-    return false;
-}
-
-CoherenceProtocol::CoherenceProtocol(const string &name,
-                                     const string &protocol,
-                                     const bool doUpgrades)
-    : SimObject(name)
-{
-    // Python should catch this, but in case it doesn't...
-    if (!(protocol == "msi"  || protocol == "mesi" ||
-          protocol == "mosi" || protocol == "moesi")) {
-        fatal("CoherenceProtocol: unrecognized protocol %s\n",  protocol);
-    }
-
-    bool hasOwned = (protocol == "mosi" || protocol == "moesi");
-    bool hasExclusive = (protocol == "mesi" || protocol == "moesi");
-
-    if (hasOwned && !doUpgrades) {
-        fatal("CoherenceProtocol: ownership protocols require upgrade "
-              "transactions\n(write miss on owned block generates ReadExcl, "
-              "which will clobber dirty block)\n");
-    }
-
-    // set up a few shortcuts to save typing & visual clutter
-    typedef MemCmd MC;
-    StateTransition (&tt)[stateMax+1][MC::NUM_MEM_CMDS] = transitionTable;
-
-    MC::Command writeToSharedCmd =
-        doUpgrades ? MC::UpgradeReq : MC::ReadExReq;
-    MC::Command writeToSharedResp =
-        doUpgrades ? MC::UpgradeResp : MC::ReadExResp;
-
-    // Note that all transitions by default cause a panic.
-    // Override the valid transitions with the appropriate actions here.
-
-    //
-    // ----- incoming requests: specify outgoing bus request -----
-    //
-    tt[Invalid][MC::ReadReq].onRequest(MC::ReadReq);
-    // we only support write allocate right now
-    tt[Invalid][MC::WriteReq].onRequest(MC::ReadExReq);
-    tt[Invalid][MC::ReadExReq].onRequest(MC::ReadExReq);
-    tt[Invalid][MC::SwapReq].onRequest(MC::ReadExReq);
-    tt[Invalid][MC::UpgradeReq].onRequest(MC::UpgradeReq);
-    tt[Shared][MC::WriteReq].onRequest(writeToSharedCmd);
-    tt[Shared][MC::ReadExReq].onRequest(MC::ReadExReq);
-    tt[Shared][MC::SwapReq].onRequest(writeToSharedCmd);
-    if (hasOwned) {
-        tt[Owned][MC::WriteReq].onRequest(writeToSharedCmd);
-        tt[Owned][MC::ReadExReq].onRequest(MC::ReadExReq);
-        tt[Owned][MC::SwapReq].onRequest(writeToSharedCmd);
-    }
-
-    // Prefetching causes a read
-    tt[Invalid][MC::SoftPFReq].onRequest(MC::ReadReq);
-    tt[Invalid][MC::HardPFReq].onRequest(MC::ReadReq);
-
-    //
-    // ----- on response to given request: specify new state -----
-    //
-    tt[Invalid][MC::ReadExResp].onResponse(Modified);
-    tt[Shared][writeToSharedResp].onResponse(Modified);
-    // Go to Exclusive state on read response if we have one (will
-    // move into shared if the shared line is asserted in the
-    // getNewState function)
-    //
-    // originally had this as:
-    // tt[Invalid][MC::ReadResp].onResponse(hasExclusive ? Exclusive: Shared);
-    // ...but for some reason that caused a link error...
-    if (hasExclusive) {
-        tt[Invalid][MC::ReadResp].onResponse(Exclusive);
-    } else {
-        tt[Invalid][MC::ReadResp].onResponse(Shared);
-    }
-    if (hasOwned) {
-        tt[Owned][writeToSharedResp].onResponse(Modified);
-    }
-
-    //
-    // ----- bus snoop transition functions -----
-    //
-    tt[Invalid][MC::ReadReq].onSnoop(nullTransition);
-    tt[Invalid][MC::ReadExReq].onSnoop(nullTransition);
-    tt[Invalid][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-    tt[Shared][MC::ReadReq].onSnoop(hasExclusive
-                                   ? assertShared : nullTransition);
-    tt[Shared][MC::ReadExReq].onSnoop(invalidateTrans);
-    tt[Shared][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-    if (doUpgrades) {
-        tt[Invalid][MC::UpgradeReq].onSnoop(nullTransition);
-        tt[Shared][MC::UpgradeReq].onSnoop(invalidateTrans);
-    }
-    tt[Modified][MC::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-    tt[Modified][MC::ReadReq].onSnoop(hasOwned
-                                     ? supplyAndGotoOwnedTrans
-                                     : supplyAndGotoSharedTrans);
-    tt[Modified][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-
-    if (hasExclusive) {
-        tt[Exclusive][MC::ReadReq].onSnoop(assertShared);
-        tt[Exclusive][MC::ReadExReq].onSnoop(invalidateTrans);
-        tt[Exclusive][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-    }
-
-    if (hasOwned) {
-        tt[Owned][MC::ReadReq].onSnoop(supplyAndGotoOwnedTrans);
-        tt[Owned][MC::ReadExReq].onSnoop(supplyAndInvalidateTrans);
-        tt[Owned][MC::UpgradeReq].onSnoop(invalidateTrans);
-        tt[Owned][MC::WriteInvalidateReq].onSnoop(invalidateTrans);
-    }
-
-    // @todo add in hardware prefetch to this list
-}
-
-
-MemCmd
-CoherenceProtocol::getBusCmd(MemCmd cmdIn, CacheBlk::State state,
-                             MSHR *mshr)
-{
-    state &= stateMask;
-    int cmd_idx = cmdIn.toInt();
-
-    assert(0 <= state && state <= stateMax);
-    assert(0 <= cmd_idx && cmd_idx < MemCmd::NUM_MEM_CMDS);
-
-    MemCmd::Command cmdOut = transitionTable[state][cmd_idx].busCmd;
-
-    assert(cmdOut != MemCmd::InvalidCmd);
-
-    ++requestCount[state][cmd_idx];
-
-    return cmdOut;
-}
-
-
-CacheBlk::State
-CoherenceProtocol::getNewState(PacketPtr pkt, CacheBlk::State oldState)
-{
-    CacheBlk::State state = oldState & stateMask;
-    int cmd_idx = pkt->cmdToIndex();
-
-    assert(0 <= state && state <= stateMax);
-    assert(0 <= cmd_idx && cmd_idx < MemCmd::NUM_MEM_CMDS);
-
-    CacheBlk::State newState = transitionTable[state][cmd_idx].newState;
-
-    //Check if it's exclusive and the shared line was asserted,
-    //then  goto shared instead
-    if (newState == Exclusive && pkt->sharedAsserted()) {
-        newState = Shared;
-    }
-
-    assert(newState != -1);
-
-    //Make sure not to loose any other state information
-    newState = (oldState & ~stateMask) | newState;
-    return newState;
-}
-
-
-bool
-CoherenceProtocol::handleBusRequest(BaseCache *cache, PacketPtr &pkt,
-                                    CacheBlk *blk,
-                                    MSHR *mshr,
-                                    CacheBlk::State & new_state)
-{
-    if (blk == NULL) {
-        // nothing to do if we don't have a block
-        return false;
-    }
-
-    CacheBlk::State state = blk->status & stateMask;
-    int cmd_idx = pkt->cmdToIndex();
-
-    assert(0 <= state && state <= stateMax);
-    assert(0 <= cmd_idx && cmd_idx < MemCmd::NUM_MEM_CMDS);
-
-//    assert(mshr == NULL); // can't currently handle outstanding requests
-    //Check first if MSHR, and also insure, if there is one, that it is not in service
-    assert(!mshr || mshr->inService == 0);
-    ++snoopCount[state][cmd_idx];
-
-    bool ret = transitionTable[state][cmd_idx].snoopFunc(cache, pkt, blk, mshr,
-                                                     new_state);
-
-
-
-    return ret;
-}
-
-bool
-CoherenceProtocol::nullTransition(BaseCache *cache, PacketPtr &pkt,
-                                  CacheBlk *blk, MSHR *mshr,
-                                  CacheBlk::State & new_state)
-{
-    // do nothing
-    if (blk)
-        new_state = blk->status;
-    return false;
-}
-
-
-bool
-CoherenceProtocol::invalidTransition(BaseCache *cache, PacketPtr &pkt,
-                                     CacheBlk *blk, MSHR *mshr,
-                                     CacheBlk::State & new_state)
-{
-    panic("Invalid transition");
-    return false;
-}
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol)
-
-    Param<string> protocol;
-    Param<bool> do_upgrades;
-
-END_DECLARE_SIM_OBJECT_PARAMS(CoherenceProtocol)
-
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol)
-
-    INIT_PARAM(protocol, "name of coherence protocol"),
-    INIT_PARAM_DFLT(do_upgrades, "use upgrade transactions?", true)
-
-END_INIT_SIM_OBJECT_PARAMS(CoherenceProtocol)
-
-
-CREATE_SIM_OBJECT(CoherenceProtocol)
-{
-    return new CoherenceProtocol(getInstanceName(), protocol,
-                                 do_upgrades);
-}
-
-REGISTER_SIM_OBJECT("CoherenceProtocol", CoherenceProtocol)
-
-#endif // DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/mem/cache/coherence/coherence_protocol.hh b/src/mem/cache/coherence/coherence_protocol.hh
deleted file mode 100644
index 4b8024582..000000000
--- a/src/mem/cache/coherence/coherence_protocol.hh
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Copyright (c) 2002-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- *          Ron Dreslinski
- *          Steve Reinhardt
- */
-
-/**
- * @file
- * Declaration of CoherenceProcotol a basic coherence policy.
- */
-#ifndef __COHERENCE_PROTOCOL_HH__
-#define __COHERENCE_PROTOCOL_HH__
-
-#include <string>
-
-#include "sim/sim_object.hh"
-#include "mem/packet.hh"
-#include "mem/cache/cache_blk.hh"
-#include "base/statistics.hh"
-
-class BaseCache;
-class MSHR;
-
-/**
- * A simple coherence policy for the memory hierarchy. Currently implements
- * MSI, MESI, and MOESI protocols.
- */
-class CoherenceProtocol : public SimObject
-{
-  public:
-    /**
-     * Contruct and initialize this policy.
-     * @param name The name of this policy.
-     * @param protocol The string representation of the protocol to use.
-     * @param doUpgrades True if bus upgrades should be used.
-     */
-    CoherenceProtocol(const std::string &name, const std::string &protocol,
-                      const bool doUpgrades);
-
-    /**
-     * Destructor.
-     */
-    virtual ~CoherenceProtocol() {};
-
-    /**
-     * Register statistics
-     */
-    virtual void regStats();
-
-    /**
-     * Get the proper bus command for the given command and status.
-     * @param cmd The request's command.
-     * @param status The current state of the cache block.
-     * @param mshr The MSHR matching the request.
-     * @return The proper bus command, as determined by the protocol.
-     */
-    MemCmd getBusCmd(MemCmd cmd, CacheBlk::State status,
-                         MSHR *mshr = NULL);
-
-    /**
-     * Return the proper state given the current state and the bus response.
-     * @param pkt The bus response.
-     * @param oldState The current block state.
-     * @return The new state.
-     */
-    CacheBlk::State getNewState(PacketPtr pkt,
-                                CacheBlk::State oldState = 0);
-
-    /**
-     * Handle snooped bus requests.
-     * @param cache The cache that snooped the request.
-     * @param pkt The snooped bus request.
-     * @param blk The cache block corresponding to the request, if any.
-     * @param mshr The MSHR corresponding to the request, if any.
-     * @param new_state The new coherence state of the block.
-     * @return True if the request should be satisfied locally.
-     */
-    bool handleBusRequest(BaseCache *cache, PacketPtr &pkt, CacheBlk *blk,
-                          MSHR *mshr, CacheBlk::State &new_state);
-
-  protected:
-    /** Snoop function type. */
-    typedef bool (*SnoopFuncType)(BaseCache *, PacketPtr &, CacheBlk *,
-                                  MSHR *, CacheBlk::State&);
-
-    //
-    // Standard snoop transition functions
-    //
-
-    /**
-     * Do nothing transition.
-     */
-    static bool nullTransition(BaseCache *, PacketPtr &, CacheBlk *,
-                               MSHR *, CacheBlk::State&);
-
-    /**
-     * Invalid transition, basically panic.
-     */
-    static bool invalidTransition(BaseCache *, PacketPtr &, CacheBlk *,
-                                  MSHR *, CacheBlk::State&);
-
-    /**
-     * Invalidate block, move to Invalid state.
-     */
-    static bool invalidateTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                                MSHR *, CacheBlk::State&);
-
-    /**
-     * Supply data, no state transition.
-     */
-    static bool supplyTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                            MSHR *, CacheBlk::State&);
-
-    /**
-     * Supply data and go to Shared state.
-     */
-    static bool supplyAndGotoSharedTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                                         MSHR *, CacheBlk::State&);
-
-    /**
-     * Supply data and go to Owned state.
-     */
-    static bool supplyAndGotoOwnedTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                                        MSHR *, CacheBlk::State&);
-
-    /**
-     * Invalidate block, supply data, and go to Invalid state.
-     */
-    static bool supplyAndInvalidateTrans(BaseCache *, PacketPtr &, CacheBlk *,
-                                         MSHR *, CacheBlk::State&);
-
-    /**
-     * Assert the shared line for a block that is shared/exclusive.
-     */
-    static bool assertShared(BaseCache *, PacketPtr &, CacheBlk *,
-                                         MSHR *, CacheBlk::State&);
-
-    /**
-     * Definition of protocol state transitions.
-     */
-    class StateTransition
-    {
-        friend class CoherenceProtocol;
-
-        /** The bus command of this transition. */
-        Packet::Command busCmd;
-        /** The state to transition to. */
-        int newState;
-        /** The snoop function for this transition. */
-        SnoopFuncType snoopFunc;
-
-        /**
-         * Constructor, defaults to invalid transition.
-         */
-        StateTransition();
-
-        /**
-         * Initialize bus command.
-         * @param cmd The bus command to use.
-         */
-        void onRequest(Packet::Command cmd)
-        {
-            busCmd = cmd;
-        }
-
-        /**
-         * Set the transition state.
-         * @param s The new state.
-         */
-        void onResponse(CacheBlk::State s)
-        {
-            newState = s;
-        }
-
-        /**
-         * Initialize the snoop function.
-         * @param f The new snoop function.
-         */
-        void onSnoop(SnoopFuncType f)
-        {
-            snoopFunc = f;
-        }
-    };
-
-    friend class CoherenceProtocol::StateTransition;
-
-    /** Mask to select status bits relevant to coherence protocol. */
-    static const int stateMask = BlkValid | BlkWritable | BlkDirty;
-
-    /** The Modified (M) state. */
-    static const int Modified = BlkValid | BlkWritable | BlkDirty;
-    /** The Owned (O) state. */
-    static const int Owned = BlkValid | BlkDirty;
-    /** The Exclusive (E) state. */
-    static const int Exclusive = BlkValid | BlkWritable;
-    /** The Shared (S) state. */
-    static const int Shared = BlkValid;
-    /** The Invalid (I) state. */
-    static const int Invalid = 0;
-
-    /**
-     * Maximum state encoding value (used to size transition lookup
-     * table).  Could be more than number of states, depends on
-     * encoding of status bits.
-     */
-    static const int stateMax = stateMask;
-
-    /**
-     * The table of all possible transitions, organized by starting state and
-     * request command.
-     */
-    StateTransition transitionTable[stateMax+1][MemCmd::NUM_MEM_CMDS];
-
-    /**
-     * @addtogroup CoherenceStatistics
-     * @{
-     */
-    /**
-     * State accesses from parent cache.
-     */
-    Stats::Scalar<> requestCount[stateMax+1][MemCmd::NUM_MEM_CMDS];
-    /**
-     * State accesses from snooped requests.
-     */
-    Stats::Scalar<> snoopCount[stateMax+1][MemCmd::NUM_MEM_CMDS];
-    /**
-     * @}
-     */
-};
-
-#endif // __COHERENCE_PROTOCOL_HH__
diff --git a/src/mem/cache/coherence/simple_coherence.hh b/src/mem/cache/coherence/simple_coherence.hh
deleted file mode 100644
index 214828ca7..000000000
--- a/src/mem/cache/coherence/simple_coherence.hh
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Erik Hallnor
- *          Ron Dreslinski
- */
-
-/**
- * @file
- * Declaration of a simple coherence policy.
- */
-
-#ifndef __SIMPLE_COHERENCE_HH__
-#define __SIMPLE_COHERENCE_HH__
-
-#include <string>
-
-#include "mem/packet.hh"
-#include "mem/cache/cache_blk.hh"
-#include "mem/cache/miss/mshr_queue.hh"
-#include "mem/cache/coherence/coherence_protocol.hh"
-
-class BaseCache;
-
-/**
- * A simple MP coherence policy. This policy assumes an atomic bus and only one
- * level of cache.
- */
-class SimpleCoherence
-{
-  protected:
-    /** Pointer to the parent cache. */
-    BaseCache *cache;
-    /** Pointer to the coherence protocol. */
-    CoherenceProtocol *protocol;
-
-  public:
-    /**
-     * Construct and initialize this coherence policy.
-     * @param _protocol The coherence protocol to use.
-     */
-    SimpleCoherence(CoherenceProtocol *_protocol)
-        : protocol(_protocol)
-    {
-    }
-
-    /**
-     * Set the pointer to the parent cache.
-     * @param _cache The parent cache.
-     */
-    void setCache(BaseCache *_cache)
-    {
-        cache = _cache;
-    }
-
-    /**
-     * Register statistics.
-     * @param name The name to prepend to stat descriptions.
-     */
-    void regStats(const std::string &name)
-    {
-    }
-
-    /**
-     * This policy does not forward invalidates, return NULL.
-     * @return NULL.
-     */
-    PacketPtr getPacket()
-    {
-        return NULL;
-    }
-
-    /**
-     * Return the proper state given the current state and the bus response.
-     * @param pkt The bus response.
-     * @param current The current block state.
-     * @return The new state.
-     */
-    CacheBlk::State getNewState(PacketPtr pkt,
-                                CacheBlk::State current = 0)
-    {
-        return protocol->getNewState(pkt, current);
-    }
-
-    /**
-     * Handle snooped bus requests.
-     * @param pkt The snooped bus request.
-     * @param blk The cache block corresponding to the request, if any.
-     * @param mshr The MSHR corresponding to the request, if any.
-     * @param new_state Return the new state for the block.
-     */
-    bool handleBusRequest(PacketPtr &pkt, CacheBlk *blk, MSHR *mshr,
-                          CacheBlk::State &new_state)
-    {
-//	assert(mshr == NULL);
-//Got rid of, there could be an MSHR, but it can't be in service
-        if (blk != NULL)
-        {
-            if (pkt->cmd != MemCmd::Writeback) {
-                return protocol->handleBusRequest(cache, pkt, blk, mshr,
-                                              new_state);
-            }
-            else { //It is a writeback, must be ownership protocol, just keep state
-                new_state = blk->status;
-            }
-        }
-        return false;
-    }
-
-    /**
-     * Get the proper bus command for the given command and status.
-     * @param cmd The request's command.
-     * @param state The current state of the cache block.
-     * @return The proper bus command, as determined by the protocol.
-     */
-    MemCmd getBusCmd(MemCmd cmd,
-                                  CacheBlk::State state)
-    {
-        if (cmd == MemCmd::Writeback) return MemCmd::Writeback;
-        return protocol->getBusCmd(cmd, state);
-    }
-
-    /**
-     * Return true if this coherence policy can handle fast cache writes.
-     */
-    bool allowFastWrites() { return false; }
-
-    bool hasProtocol() { return true; }
-};
-
-#endif //__SIMPLE_COHERENCE_HH__
-
-
-
-
-
-
-
-
-- 
cgit v1.2.3


From 6ab53415efe3e06c06589a8a6ef38185ff6f94b7 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 10:16:18 -0700
Subject: Get rid of Packet result field.  Error responses are now encoded in
 cmd field.

--HG--
extra : convert_revision : d67819b7e3ee4b9a5bf08541104de0a89485e90b
---
 src/arch/sparc/tlb.cc          |   4 +-
 src/cpu/o3/fetch_impl.hh       |   6 --
 src/cpu/o3/lsq_unit.hh         |   6 --
 src/cpu/o3/lsq_unit_impl.hh    |  26 -------
 src/cpu/ozone/lw_lsq.hh        |  10 ---
 src/cpu/ozone/lw_lsq_impl.hh   |  18 -----
 src/cpu/simple/atomic.cc       |  76 ++++++------------
 src/cpu/simple/atomic.hh       |  10 +--
 src/cpu/simple/base.hh         |   3 -
 src/cpu/simple/timing.cc       |   8 +-
 src/dev/alpha/console.cc       |  11 +--
 src/dev/alpha/tsunami_cchip.cc |   5 +-
 src/dev/alpha/tsunami_io.cc    |   6 +-
 src/dev/alpha/tsunami_pchip.cc |   6 +-
 src/dev/i8254xGBe.cc           |   4 +-
 src/dev/ide_ctrl.cc            |  14 ++--
 src/dev/io_device.cc           |   4 +-
 src/dev/isa_fake.cc            |   9 +--
 src/dev/ns_gige.cc             |   8 +-
 src/dev/pciconfigall.cc        |   4 +-
 src/dev/pcidev.cc              |   5 +-
 src/dev/sparc/dtod.cc          |   3 +-
 src/dev/sparc/iob.cc           |   5 +-
 src/dev/sparc/mm_disk.cc       |   6 +-
 src/dev/uart8250.cc            |   6 +-
 src/mem/bridge.cc              |  17 ++---
 src/mem/bridge.hh              |   2 +-
 src/mem/bus.cc                 |  30 ++++----
 src/mem/cache/base_cache.cc    |   2 +-
 src/mem/cache/cache_impl.hh    |  12 ++-
 src/mem/packet.cc              |  19 ++---
 src/mem/packet.hh              | 169 ++++++++++++++++++-----------------------
 src/mem/physical.cc            |   2 +-
 src/mem/port.cc                |   3 +-
 src/mem/tport.cc               |  21 +++--
 35 files changed, 199 insertions(+), 341 deletions(-)

diff --git a/src/arch/sparc/tlb.cc b/src/arch/sparc/tlb.cc
index 09266fd6e..68df19618 100644
--- a/src/arch/sparc/tlb.cc
+++ b/src/arch/sparc/tlb.cc
@@ -1023,7 +1023,7 @@ doMmuReadError:
         panic("need to impl DTB::doMmuRegRead() got asi=%#x, va=%#x\n",
             (uint32_t)asi, va);
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return tc->getCpuPtr()->cycles(1);
 }
 
@@ -1268,7 +1268,7 @@ doMmuWriteError:
         panic("need to impl DTB::doMmuRegWrite() got asi=%#x, va=%#x d=%#x\n",
             (uint32_t)pkt->req->getAsi(), pkt->getAddr(), data);
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return tc->getCpuPtr()->cycles(1);
 }
 
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
index 01e9b5b31..aa0c69ac4 100644
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -628,12 +628,6 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
         // Now do the timing access to see whether or not the instruction
         // exists within the cache.
         if (!icachePort->sendTiming(data_pkt)) {
-            if (data_pkt->result == Packet::BadAddress) {
-                fault = TheISA::genMachineCheckFault();
-                delete mem_req;
-                memReq[tid] = NULL;
-                warn("Bad address!\n");
-            }
             assert(retryPkt == NULL);
             assert(retryTid == -1);
             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index cc33e025d..d964b9f9f 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -653,8 +653,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
         data_pkt->senderState = state;
 
         if (!dcachePort->sendTiming(data_pkt)) {
-            Packet::Result result = data_pkt->result;
-
             // Delete state and data packet because a load retry
             // initiates a pipeline restart; it does not retry.
             delete state;
@@ -663,10 +661,6 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
 
             req = NULL;
 
-            if (result == Packet::BadAddress) {
-                return TheISA::genMachineCheckFault();
-            }
-
             // If the access didn't succeed, tell the LSQ by setting
             // the retry thread id.
             lsq->setRetryTid(lsqID);
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index bde4f8079..91e616589 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -690,9 +690,6 @@ LSQUnit<Impl>::writebackStores()
         }
 
         if (!dcachePort->sendTiming(data_pkt)) {
-            if (data_pkt->result == Packet::BadAddress) {
-                panic("LSQ sent out a bad address for a completed store!");
-            }
             // Need to handle becoming blocked on a store.
             DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
                     "retry later\n",
@@ -844,26 +841,6 @@ LSQUnit<Impl>::storePostSend(PacketPtr pkt)
 #endif
     }
 
-    if (pkt->result != Packet::Success) {
-        DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
-                storeWBIdx);
-
-        DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
-                storeQueue[storeWBIdx].inst->seqNum);
-
-        //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
-
-        //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
-
-        // @todo: Increment stat here.
-    } else {
-        DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
-                storeWBIdx);
-
-        DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
-                storeQueue[storeWBIdx].inst->seqNum);
-    }
-
     incrStIdx(storeWBIdx);
 }
 
@@ -952,9 +929,6 @@ LSQUnit<Impl>::recvRetry()
         assert(retryPkt != NULL);
 
         if (dcachePort->sendTiming(retryPkt)) {
-            if (retryPkt->result == Packet::BadAddress) {
-                panic("LSQ sent out a bad address for a completed store!");
-            }
             storePostSend(retryPkt);
             retryPkt = NULL;
             isStoreBlocked = false;
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index 2048ad6bb..d9e0d04ac 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -661,16 +661,6 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
         cpu->lockFlag = true;
     }
 
-    if (data_pkt->result != Packet::Success) {
-        DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n");
-        DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
-                inst->seqNum);
-    } else {
-        DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache hit!\n");
-        DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
-                inst->seqNum);
-    }
-
     return NoFault;
 }
 
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index f26b06453..eefc0df83 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -853,24 +853,6 @@ OzoneLWLSQ<Impl>::storePostSend(PacketPtr pkt, DynInstPtr &inst)
         }
 #endif
     }
-
-    if (pkt->result != Packet::Success) {
-        DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
-
-        DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
-                inst->seqNum);
-
-        //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
-
-        //DPRINTF(OzoneLWLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size());
-
-        // @todo: Increment stat here.
-    } else {
-        DPRINTF(OzoneLSQ,"D-Cache: Write Hit!\n");
-
-        DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
-                inst->seqNum);
-    }
 }
 
 template <class Impl>
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 03ff1282b..bcd6662c8 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -148,23 +148,9 @@ AtomicSimpleCPU::AtomicSimpleCPU(Params *p)
     icachePort.snoopRangeSent = false;
     dcachePort.snoopRangeSent = false;
 
-    ifetch_req = new Request();
-    ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT
-    ifetch_pkt = new Packet(ifetch_req, MemCmd::ReadReq, Packet::Broadcast);
-    ifetch_pkt->dataStatic(&inst);
-
-    data_read_req = new Request();
-    data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too
-    data_read_pkt = new Packet(data_read_req, MemCmd::ReadReq,
-                               Packet::Broadcast);
-    data_read_pkt->dataStatic(&dataReg);
-
-    data_write_req = new Request();
-    data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too
-    data_write_pkt = new Packet(data_write_req, MemCmd::WriteReq,
-                                Packet::Broadcast);
-    data_swap_pkt = new Packet(data_write_req, MemCmd::SwapReq,
-                                Packet::Broadcast);
+    ifetch_req.setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT
+    data_read_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too
+    data_write_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too
 }
 
 
@@ -282,9 +268,7 @@ Fault
 AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 {
     // use the CPU's statically allocated read request and packet objects
-    Request *req = data_read_req;
-    PacketPtr pkt = data_read_pkt;
-
+    Request *req = &data_read_req;
     req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
     if (traceData) {
@@ -296,19 +280,15 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 
     // Now do the access.
     if (fault == NoFault) {
-        pkt->reinitFromRequest();
+        Packet pkt = Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+        pkt.dataStatic(&data);
 
         if (req->isMmapedIpr())
-            dcache_latency = TheISA::handleIprRead(thread->getTC(),pkt);
+            dcache_latency = TheISA::handleIprRead(thread->getTC(), &pkt);
         else
-            dcache_latency = dcachePort.sendAtomic(pkt);
+            dcache_latency = dcachePort.sendAtomic(&pkt);
         dcache_access = true;
-#if !defined(NDEBUG)
-        if (pkt->result != Packet::Success)
-            panic("Unable to find responder for address pa = %#X va = %#X\n",
-                    pkt->req->getPaddr(), pkt->req->getVaddr());
-#endif
-        data = pkt->get<T>();
+        assert(!pkt.isError());
 
         if (req->isLocked()) {
             TheISA::handleLockedRead(thread, req);
@@ -378,16 +358,9 @@ Fault
 AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 {
     // use the CPU's statically allocated write request and packet objects
-    Request *req = data_write_req;
-    PacketPtr pkt;
-
+    Request *req = &data_write_req;
     req->setVirt(0, addr, sizeof(T), flags, thread->readPC());
 
-    if (req->isSwap())
-        pkt = data_swap_pkt;
-    else
-        pkt = data_write_pkt;
-
     if (traceData) {
         traceData->setAddr(addr);
     }
@@ -397,6 +370,11 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     // Now do the access.
     if (fault == NoFault) {
+        Packet pkt =
+            Packet(req, req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq,
+                   Packet::Broadcast);
+        pkt.dataStatic(&data);
+
         bool do_access = true;  // flag to suppress cache access
 
         if (req->isLocked()) {
@@ -409,27 +387,19 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
 
         if (do_access) {
-            pkt->reinitFromRequest();
-            pkt->dataStatic(&data);
-
             if (req->isMmapedIpr()) {
-                dcache_latency = TheISA::handleIprWrite(thread->getTC(), pkt);
+                dcache_latency = TheISA::handleIprWrite(thread->getTC(), &pkt);
             } else {
                 data = htog(data);
-                dcache_latency = dcachePort.sendAtomic(pkt);
+                dcache_latency = dcachePort.sendAtomic(&pkt);
             }
             dcache_access = true;
-
-#if !defined(NDEBUG)
-            if (pkt->result != Packet::Success)
-                panic("Unable to find responder for address pa = %#X va = %#X\n",
-                        pkt->req->getPaddr(), pkt->req->getVaddr());
-#endif
+            assert(!pkt.isError());
         }
 
         if (req->isSwap()) {
             assert(res);
-            *res = pkt->get<T>();
+            *res = pkt.get<T>();
         } else if (res) {
             *res = req->getExtraData();
         }
@@ -513,7 +483,7 @@ AtomicSimpleCPU::tick()
         if (!curStaticInst || !curStaticInst->isDelayedCommit())
             checkForInterrupts();
 
-        Fault fault = setupFetchRequest(ifetch_req);
+        Fault fault = setupFetchRequest(&ifetch_req);
 
         if (fault == NoFault) {
             Tick icache_latency = 0;
@@ -524,9 +494,11 @@ AtomicSimpleCPU::tick()
             //if(predecoder.needMoreBytes())
             //{
                 icache_access = true;
-                ifetch_pkt->reinitFromRequest();
+                Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq,
+                                           Packet::Broadcast);
+                ifetch_pkt.dataStatic(&inst);
 
-                icache_latency = icachePort.sendAtomic(ifetch_pkt);
+                icache_latency = icachePort.sendAtomic(&ifetch_pkt);
                 // ifetch_req is initialized to read the instruction directly
                 // into the CPU object's inst field.
             //}
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index b127e3791..28e883b24 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -121,13 +121,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU
     };
     DcachePort dcachePort;
 
-    Request  *ifetch_req;
-    PacketPtr ifetch_pkt;
-    Request  *data_read_req;
-    PacketPtr data_read_pkt;
-    Request  *data_write_req;
-    PacketPtr data_write_pkt;
-    PacketPtr data_swap_pkt;
+    Request ifetch_req;
+    Request data_read_req;
+    Request data_write_req;
 
     bool dcache_access;
     Tick dcache_latency;
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 243167db0..0550aa036 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -131,9 +131,6 @@ class BaseSimpleCPU : public BaseCPU
     // The predecoder
     TheISA::Predecoder predecoder;
 
-    // Static data storage
-    TheISA::LargestRead dataReg;
-
     StaticInstPtr curStaticInst;
     StaticInstPtr curMacroStaticInst;
 
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 7698a588d..b4e4a4433 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -501,7 +501,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
 {
     // received a response from the icache: execute the received
     // instruction
-    assert(pkt->result == Packet::Success);
+    assert(!pkt->isError());
     assert(_status == IcacheWaitResponse);
 
     _status = Running;
@@ -569,7 +569,7 @@ TimingSimpleCPU::IcachePort::recvTiming(PacketPtr pkt)
 
         return true;
     }
-    else if (pkt->result == Packet::Nacked) {
+    else if (pkt->wasNacked()) {
         assert(cpu->_status == IcacheWaitResponse);
         pkt->reinitNacked();
         if (!sendTiming(pkt)) {
@@ -600,7 +600,7 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
 {
     // received a response from the dcache: complete the load or store
     // instruction
-    assert(pkt->result == Packet::Success);
+    assert(!pkt->isError());
     assert(_status == DcacheWaitResponse);
     _status = Running;
 
@@ -663,7 +663,7 @@ TimingSimpleCPU::DcachePort::recvTiming(PacketPtr pkt)
 
         return true;
     }
-    else if (pkt->result == Packet::Nacked) {
+    else if (pkt->wasNacked()) {
         assert(cpu->_status == DcacheWaitResponse);
         pkt->reinitNacked();
         if (!sendTiming(pkt)) {
diff --git a/src/dev/alpha/console.cc b/src/dev/alpha/console.cc
index 443f376a5..55549a154 100644
--- a/src/dev/alpha/console.cc
+++ b/src/dev/alpha/console.cc
@@ -102,7 +102,6 @@ AlphaConsole::read(PacketPtr pkt)
      * machine dependent address swizzle is required?
      */
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
 
     Addr daddr = pkt->getAddr() - pioAddr;
@@ -130,7 +129,7 @@ AlphaConsole::read(PacketPtr pkt)
                     /* Old console code read in everyting as a 32bit int
                      * we now break that for better error checking.
                      */
-                  pkt->result = Packet::BadAddress;
+                  pkt->setBadAddress();
             }
             DPRINTF(AlphaConsole, "read: offset=%#x val=%#x\n", daddr,
                     pkt->get<uint32_t>());
@@ -187,17 +186,15 @@ AlphaConsole::read(PacketPtr pkt)
                     pkt->get<uint64_t>());
             break;
         default:
-          pkt->result = Packet::BadAddress;
+          pkt->setBadAddress();
     }
-    if (pkt->result == Packet::Unknown)
-        pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
 Tick
 AlphaConsole::write(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     Addr daddr = pkt->getAddr() - pioAddr;
 
@@ -245,7 +242,7 @@ AlphaConsole::write(PacketPtr pkt)
             panic("Unknown 64bit access, %#x\n", daddr);
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
 
     return pioDelay;
 }
diff --git a/src/dev/alpha/tsunami_cchip.cc b/src/dev/alpha/tsunami_cchip.cc
index 118160adf..a7175d90c 100644
--- a/src/dev/alpha/tsunami_cchip.cc
+++ b/src/dev/alpha/tsunami_cchip.cc
@@ -78,7 +78,6 @@ TsunamiCChip::read(PacketPtr pkt)
 {
     DPRINTF(Tsunami, "read  va=%#x size=%d\n", pkt->getAddr(), pkt->getSize());
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
 
     Addr regnum = (pkt->getAddr() - pioAddr) >> 6;
@@ -181,7 +180,7 @@ TsunamiCChip::read(PacketPtr pkt)
     DPRINTF(Tsunami, "Tsunami CChip: read  regnum=%#x size=%d data=%lld\n",
             regnum, pkt->getSize(), pkt->get<uint64_t>());
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -365,7 +364,7 @@ TsunamiCChip::write(PacketPtr pkt)
               panic("default in cchip read reached, accessing 0x%x\n");
         }  // swtich(regnum)
     } // not BIG_TSUNAMI write
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/alpha/tsunami_io.cc b/src/dev/alpha/tsunami_io.cc
index 58933428c..f59a06fba 100644
--- a/src/dev/alpha/tsunami_io.cc
+++ b/src/dev/alpha/tsunami_io.cc
@@ -461,7 +461,6 @@ TsunamiIO::frequency() const
 Tick
 TsunamiIO::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
 
     Addr daddr = pkt->getAddr() - pioAddr;
@@ -520,14 +519,13 @@ TsunamiIO::read(PacketPtr pkt)
     } else {
        panic("I/O Read - invalid size - va %#x size %d\n", pkt->getAddr(), pkt->getSize());
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
 Tick
 TsunamiIO::write(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     Addr daddr = pkt->getAddr() - pioAddr;
 
@@ -600,7 +598,7 @@ TsunamiIO::write(PacketPtr pkt)
         panic("I/O Write - va%#x size %d data %#x\n", pkt->getAddr(), pkt->getSize(), pkt->get<uint8_t>());
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/alpha/tsunami_pchip.cc b/src/dev/alpha/tsunami_pchip.cc
index f30199337..be164e5b9 100644
--- a/src/dev/alpha/tsunami_pchip.cc
+++ b/src/dev/alpha/tsunami_pchip.cc
@@ -71,7 +71,6 @@ TsunamiPChip::TsunamiPChip(Params *p)
 Tick
 TsunamiPChip::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
 
     pkt->allocate();
@@ -145,7 +144,7 @@ TsunamiPChip::read(PacketPtr pkt)
       default:
           panic("Default in PChip Read reached reading 0x%x\n", daddr);
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 
 }
@@ -153,7 +152,6 @@ TsunamiPChip::read(PacketPtr pkt)
 Tick
 TsunamiPChip::write(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     Addr daddr = (pkt->getAddr() - pioAddr) >> 6;
 
@@ -224,7 +222,7 @@ TsunamiPChip::write(PacketPtr pkt)
 
     } // uint64_t
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/i8254xGBe.cc b/src/dev/i8254xGBe.cc
index baf13c49a..7ea4c704b 100644
--- a/src/dev/i8254xGBe.cc
+++ b/src/dev/i8254xGBe.cc
@@ -271,7 +271,7 @@ IGbE::read(PacketPtr pkt)
             pkt->set<uint32_t>(0);
     };
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -543,7 +543,7 @@ IGbE::write(PacketPtr pkt)
            panic("Write request to unknown register number: %#x\n", daddr);
     };
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc
index 921ba1cd0..01243ae73 100644
--- a/src/dev/ide_ctrl.cc
+++ b/src/dev/ide_ctrl.cc
@@ -295,7 +295,7 @@ IdeController::readConfig(PacketPtr pkt)
       default:
         panic("invalid access size(?) for PCI configspace!\n");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 
 }
@@ -403,7 +403,7 @@ IdeController::writeConfig(PacketPtr pkt)
             bm_enabled = false;
         break;
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 }
 
@@ -423,7 +423,7 @@ IdeController::read(PacketPtr pkt)
     parseAddr(pkt->getAddr(), offset, channel, reg_type);
 
     if (!io_enabled) {
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
         return pioDelay;
     }
 
@@ -490,7 +490,7 @@ IdeController::read(PacketPtr pkt)
     DPRINTF(IdeCtrl, "read from offset: %#x size: %#x data: %#x\n",
             offset, pkt->getSize(), pkt->get<uint32_t>());
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -506,7 +506,7 @@ IdeController::write(PacketPtr pkt)
     parseAddr(pkt->getAddr(), offset, channel, reg_type);
 
     if (!io_enabled) {
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
         DPRINTF(IdeCtrl, "io not enabled\n");
         return pioDelay;
     }
@@ -514,7 +514,7 @@ IdeController::write(PacketPtr pkt)
     switch (reg_type) {
       case BMI_BLOCK:
         if (!bm_enabled) {
-            pkt->result = Packet::Success;
+            pkt->makeAtomicResponse();
             return pioDelay;
         }
 
@@ -673,7 +673,7 @@ IdeController::write(PacketPtr pkt)
             offset, pkt->getSize(), pkt->get<uint32_t>());
 
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc
index ecbb391ef..806d13d07 100644
--- a/src/dev/io_device.cc
+++ b/src/dev/io_device.cc
@@ -100,9 +100,7 @@ DmaPort::DmaPort(DmaDevice *dev, System *s)
 bool
 DmaPort::recvTiming(PacketPtr pkt)
 {
-
-
-    if (pkt->result == Packet::Nacked) {
+    if (pkt->wasNacked()) {
         DPRINTF(DMA, "Received nacked Pkt %#x with State: %#x Addr: %#x\n",
                pkt, pkt->senderState, pkt->getAddr());
 
diff --git a/src/dev/isa_fake.cc b/src/dev/isa_fake.cc
index c36ddeb83..5cd0afb36 100644
--- a/src/dev/isa_fake.cc
+++ b/src/dev/isa_fake.cc
@@ -56,7 +56,6 @@ IsaFake::IsaFake(Params *p)
 Tick
 IsaFake::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
 
     if (params()->warnAccess != "")
         warn("Device %s accessed by read to address %#x size=%d\n",
@@ -64,7 +63,7 @@ IsaFake::read(PacketPtr pkt)
     if (params()->retBadAddr) {
         DPRINTF(Tsunami, "read to bad address va=%#x size=%d\n",
                 pkt->getAddr(), pkt->getSize());
-        pkt->result = Packet::BadAddress;
+        pkt->setBadAddress();
     } else {
         assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
         DPRINTF(Tsunami, "read  va=%#x size=%d\n",
@@ -85,7 +84,7 @@ IsaFake::read(PacketPtr pkt)
           default:
             panic("invalid access size!\n");
         }
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
     }
     return pioDelay;
 }
@@ -117,7 +116,7 @@ IsaFake::write(PacketPtr pkt)
     if (params()->retBadAddr) {
         DPRINTF(Tsunami, "write to bad address va=%#x size=%d \n",
                 pkt->getAddr(), pkt->getSize());
-        pkt->result = Packet::BadAddress;
+        pkt->setBadAddress();
     } else {
         DPRINTF(Tsunami, "write - va=%#x size=%d \n",
                 pkt->getAddr(), pkt->getSize());
@@ -140,7 +139,7 @@ IsaFake::write(PacketPtr pkt)
                 panic("invalid access size!\n");
             }
         }
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
     }
     return pioDelay;
 }
diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc
index e9d9c419d..86f664238 100644
--- a/src/dev/ns_gige.cc
+++ b/src/dev/ns_gige.cc
@@ -487,7 +487,7 @@ NSGigE::writeConfig(PacketPtr pkt)
             ioEnable = false;
         break;
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 }
 
@@ -519,7 +519,7 @@ NSGigE::read(PacketPtr pkt)
         // doesn't actually DEPEND upon their values
         // MIB are just hardware stats keepers
         pkt->set<uint32_t>(0);
-        pkt->result = Packet::Success;
+        pkt->makeAtomicResponse();
         return pioDelay;
     } else if (daddr > 0x3FC)
         panic("Something is messed up!\n");
@@ -715,7 +715,7 @@ NSGigE::read(PacketPtr pkt)
         DPRINTF(EthernetPIO, "read from %#x: data=%d data=%#x\n",
                 daddr, reg, reg);
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -1122,7 +1122,7 @@ NSGigE::write(PacketPtr pkt)
     } else {
         panic("Invalid Request Size");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/pciconfigall.cc b/src/dev/pciconfigall.cc
index bd1855847..b07ee1a49 100644
--- a/src/dev/pciconfigall.cc
+++ b/src/dev/pciconfigall.cc
@@ -54,7 +54,6 @@ PciConfigAll::PciConfigAll(Params *p)
 Tick
 PciConfigAll::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
 
     pkt->allocate();
 
@@ -74,14 +73,13 @@ PciConfigAll::read(PacketPtr pkt)
       default:
         panic("invalid access size(?) for PCI configspace!\n");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return params()->pio_delay;
 }
 
 Tick
 PciConfigAll::write(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     panic("Attempting to write to config space on non-existant device\n");
     M5_DUMMY_RETURN
 }
diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc
index c2a2bc02d..85337c841 100644
--- a/src/dev/pcidev.cc
+++ b/src/dev/pcidev.cc
@@ -68,7 +68,6 @@ PciDev::PciConfigPort::PciConfigPort(PciDev *dev, int busid, int devid,
 Tick
 PciDev::PciConfigPort::recvAtomic(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= configAddr &&
            pkt->getAddr() < configAddr + PCI_CONFIG_SIZE);
     return pkt->isRead() ? device->readConfig(pkt) : device->writeConfig(pkt);
@@ -156,7 +155,7 @@ PciDev::readConfig(PacketPtr pkt)
       default:
         panic("invalid access size(?) for PCI configspace!\n");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 
 }
@@ -283,7 +282,7 @@ PciDev::writeConfig(PacketPtr pkt)
       default:
         panic("invalid access size(?) for PCI configspace!\n");
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return configDelay;
 
 }
diff --git a/src/dev/sparc/dtod.cc b/src/dev/sparc/dtod.cc
index 42275c60a..22df873b6 100644
--- a/src/dev/sparc/dtod.cc
+++ b/src/dev/sparc/dtod.cc
@@ -74,7 +74,6 @@ DumbTOD::DumbTOD(Params *p)
 Tick
 DumbTOD::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     assert(pkt->getSize() == 8);
 
@@ -82,7 +81,7 @@ DumbTOD::read(PacketPtr pkt)
     pkt->set(todTime);
     todTime += 1000;
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/sparc/iob.cc b/src/dev/sparc/iob.cc
index e686e51f7..b27f45eba 100644
--- a/src/dev/sparc/iob.cc
+++ b/src/dev/sparc/iob.cc
@@ -72,7 +72,6 @@ Iob::Iob(Params *p)
 Tick
 Iob::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
 
     if (pkt->getAddr() >= iobManAddr && pkt->getAddr() < iobManAddr + iobManSize)
         readIob(pkt);
@@ -81,7 +80,7 @@ Iob::read(PacketPtr pkt)
     else
         panic("Invalid address reached Iob\n");
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -176,7 +175,7 @@ Iob::write(PacketPtr pkt)
         panic("Invalid address reached Iob\n");
 
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/sparc/mm_disk.cc b/src/dev/sparc/mm_disk.cc
index 81c5c589a..bbb773c48 100644
--- a/src/dev/sparc/mm_disk.cc
+++ b/src/dev/sparc/mm_disk.cc
@@ -61,7 +61,6 @@ MmDisk::read(PacketPtr pkt)
     uint32_t d32;
     uint64_t d64;
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     accessAddr = pkt->getAddr() - pioAddr;
 
@@ -101,7 +100,7 @@ MmDisk::read(PacketPtr pkt)
         panic("Invalid access size\n");
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -115,7 +114,6 @@ MmDisk::write(PacketPtr pkt)
     uint32_t d32;
     uint64_t d64;
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     accessAddr = pkt->getAddr() - pioAddr;
 
@@ -157,7 +155,7 @@ MmDisk::write(PacketPtr pkt)
         panic("Invalid access size\n");
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/dev/uart8250.cc b/src/dev/uart8250.cc
index 50307aad4..0ad80e077 100644
--- a/src/dev/uart8250.cc
+++ b/src/dev/uart8250.cc
@@ -111,7 +111,6 @@ Uart8250::Uart8250(Params *p)
 Tick
 Uart8250::read(PacketPtr pkt)
 {
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     assert(pkt->getSize() == 1);
 
@@ -186,7 +185,7 @@ Uart8250::read(PacketPtr pkt)
 /*    uint32_t d32 = *data;
     DPRINTF(Uart, "Register read to register %#x returned %#x\n", daddr, d32);
 */
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
@@ -194,7 +193,6 @@ Tick
 Uart8250::write(PacketPtr pkt)
 {
 
-    assert(pkt->result == Packet::Unknown);
     assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
     assert(pkt->getSize() == 1);
 
@@ -272,7 +270,7 @@ Uart8250::write(PacketPtr pkt)
             panic("Tried to access a UART port that doesn't exist\n");
             break;
     }
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
     return pioDelay;
 }
 
diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index fb4574844..77178d518 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -121,14 +121,13 @@ Bridge::BridgePort::recvTiming(PacketPtr pkt)
                     otherPort->sendQueue.size(), otherPort->queuedRequests,
                     otherPort->outstandingResponses);
 
-    if (pkt->isRequest() && otherPort->reqQueueFull() && pkt->result !=
-            Packet::Nacked) {
+    if (pkt->isRequest() && otherPort->reqQueueFull() && !pkt->wasNacked()) {
         DPRINTF(BusBridge, "Remote queue full, nacking\n");
         nackRequest(pkt);
         return true;
     }
 
-    if (pkt->needsResponse() && pkt->result != Packet::Nacked)
+    if (pkt->needsResponse() && !pkt->wasNacked())
         if (respQueueFull()) {
             DPRINTF(BusBridge, "Local queue full, no space for response, nacking\n");
             DPRINTF(BusBridge, "queue size: %d outreq: %d outstanding resp: %d\n",
@@ -149,7 +148,7 @@ void
 Bridge::BridgePort::nackRequest(PacketPtr pkt)
 {
     // Nack the packet
-    pkt->result = Packet::Nacked;
+    pkt->setNacked();
     pkt->setDest(pkt->getSrc());
 
     //put it on the list to send
@@ -194,7 +193,7 @@ Bridge::BridgePort::nackRequest(PacketPtr pkt)
 void
 Bridge::BridgePort::queueForSendTiming(PacketPtr pkt)
 {
-    if (pkt->isResponse() || pkt->result == Packet::Nacked) {
+    if (pkt->isResponse() || pkt->wasNacked()) {
         // This is a response for a request we forwarded earlier.  The
         // corresponding PacketBuffer should be stored in the packet's
         // senderState field.
@@ -206,7 +205,7 @@ Bridge::BridgePort::queueForSendTiming(PacketPtr pkt)
 
         // Check if this packet was expecting a response and it's a nacked
         // packet, in which case we will never being seeing it
-        if (buf->expectResponse && pkt->result == Packet::Nacked)
+        if (buf->expectResponse && pkt->wasNacked())
             --outstandingResponses;
 
 
@@ -217,7 +216,7 @@ Bridge::BridgePort::queueForSendTiming(PacketPtr pkt)
     }
 
 
-    if (pkt->isRequest() && pkt->result != Packet::Nacked) {
+    if (pkt->isRequest() && !pkt->wasNacked()) {
         ++queuedRequests;
     }
 
@@ -251,7 +250,7 @@ Bridge::BridgePort::trySend()
 
     // Ugly! @todo When multilevel coherence works this will be removed
     if (pkt->cmd == MemCmd::WriteInvalidateReq && fixPartialWrite &&
-            pkt->result != Packet::Nacked) {
+            !pkt->wasNacked()) {
         PacketPtr funcPkt = new Packet(pkt->req, MemCmd::WriteReq,
                             Packet::Broadcast);
         funcPkt->dataStatic(pkt->getPtr<uint8_t>());
@@ -264,7 +263,7 @@ Bridge::BridgePort::trySend()
             buf->origSrc, pkt->getDest(), pkt->getAddr());
 
     bool wasReq = pkt->isRequest();
-    bool wasNacked = pkt->result == Packet::Nacked;
+    bool wasNacked = pkt->wasNacked();
 
     if (sendTiming(pkt)) {
         // send successful
diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh
index 89d626611..7af764437 100644
--- a/src/mem/bridge.hh
+++ b/src/mem/bridge.hh
@@ -86,7 +86,7 @@ class Bridge : public MemObject
                   expectResponse(_pkt->needsResponse() && !nack)
 
             {
-                if (!pkt->isResponse() && !nack && pkt->result != Packet::Nacked)
+                if (!pkt->isResponse() && !nack && !pkt->wasNacked())
                     pkt->senderState = this;
             }
 
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index ffd5e25a7..83ce0f87d 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -173,9 +173,8 @@ bool
 Bus::recvTiming(PacketPtr pkt)
 {
     Port *port;
-    DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s result %d\n",
-            pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString(),
-            pkt->result);
+    DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n",
+            pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
 
     BusPort *pktPort;
     if (pkt->getSrc() == defaultId)
@@ -329,6 +328,8 @@ Bus::functionalSnoop(PacketPtr pkt, Port *responder)
     // id after each
     int src_id = pkt->getSrc();
 
+    assert(pkt->isRequest()); // hasn't already been satisfied
+
     for (SnoopIter s_iter = snoopPorts.begin();
          s_iter != snoopPorts.end();
          s_iter++) {
@@ -336,7 +337,7 @@ Bus::functionalSnoop(PacketPtr pkt, Port *responder)
         if (p != responder && p->getId() != src_id) {
             p->sendFunctional(pkt);
         }
-        if (pkt->result == Packet::Success) {
+        if (pkt->isResponse()) {
             break;
         }
         pkt->setSrc(src_id);
@@ -369,14 +370,15 @@ Bus::recvAtomic(PacketPtr pkt)
     DPRINTF(Bus, "recvAtomic: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
+    assert(pkt->isRequest());
 
     // Variables for recording original command and snoop response (if
     // any)... if a snooper respondes, we will need to restore
     // original command so that additional snoops can take place
     // properly
     MemCmd orig_cmd = pkt->cmd;
-    Packet::Result response_result = Packet::Unknown;
     MemCmd response_cmd = MemCmd::InvalidCmd;
+    int orig_src = pkt->getSrc();
 
     Port *target_port = findPort(pkt->getAddr(), pkt->getSrc());
 
@@ -387,20 +389,18 @@ Bus::recvAtomic(PacketPtr pkt)
         assert(p != target_port);
         if (p->getId() != pkt->getSrc()) {
             p->sendAtomic(pkt);
-            if (pkt->result != Packet::Unknown) {
+            if (pkt->isResponse()) {
                 // response from snoop agent
                 assert(pkt->cmd != orig_cmd);
                 assert(pkt->memInhibitAsserted());
-                assert(pkt->isResponse());
                 // should only happen once
-                assert(response_result == Packet::Unknown);
                 assert(response_cmd == MemCmd::InvalidCmd);
                 // save response state
-                response_result = pkt->result;
                 response_cmd = pkt->cmd;
                 // restore original packet state for remaining snoopers
                 pkt->cmd = orig_cmd;
-                pkt->result = Packet::Unknown;
+                pkt->setSrc(orig_src);
+                pkt->setDest(Packet::Broadcast);
             }
         }
     }
@@ -408,13 +408,11 @@ Bus::recvAtomic(PacketPtr pkt)
     Tick response_time = target_port->sendAtomic(pkt);
 
     // if we got a response from a snooper, restore it here
-    if (response_result != Packet::Unknown) {
-        assert(response_cmd != MemCmd::InvalidCmd);
+    if (response_cmd != MemCmd::InvalidCmd) {
         // no one else should have responded
-        assert(pkt->result == Packet::Unknown);
+        assert(!pkt->isResponse());
         assert(pkt->cmd == orig_cmd);
         pkt->cmd = response_cmd;
-        pkt->result = response_result;
     }
 
     // why do we have this packet field and the return value both???
@@ -434,8 +432,8 @@ Bus::recvFunctional(PacketPtr pkt)
     Port* port = findPort(pkt->getAddr(), pkt->getSrc());
     functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
 
-    // If the snooping found what we were looking for, we're done.
-    if (pkt->result != Packet::Success && port) {
+    // If the snooping hasn't found what we were looking for, keep going.
+    if (!pkt->isResponse() && port) {
         port->sendFunctional(pkt);
     }
 }
diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 5062d6e87..870658675 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -82,7 +82,7 @@ void
 BaseCache::CachePort::checkAndSendFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
-    if (pkt->result != Packet::Success)
+    if (!pkt->isResponse())
         sendFunctional(pkt);
 }
 
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b76d7e392..1823ea6b9 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -502,7 +502,6 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
 
     if (pkt->needsResponse()) {
         pkt->makeAtomicResponse();
-        pkt->result = Packet::Success;
     }
 
     return lat;
@@ -648,14 +647,13 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
     MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
     assert(mshr);
 
-    if (pkt->result == Packet::Nacked) {
+    if (pkt->wasNacked()) {
         //pkt->reinitFromRequest();
         warn("NACKs from devices not connected to the same bus "
              "not implemented\n");
         return;
     }
-    assert(pkt->result != Packet::BadAddress);
-    assert(pkt->result == Packet::Success);
+    assert(!pkt->isError());
     DPRINTF(Cache, "Handling response to %x\n", pkt->getAddr());
 
     MSHRQueue *mq = mshr->queue;
@@ -1142,7 +1140,7 @@ void
 Cache<TagStore>::CpuSidePort::recvFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
-    if (pkt->result != Packet::Success)
+    if (!pkt->isResponse())
         myCache()->functionalAccess(pkt, cache->memSidePort);
 }
 
@@ -1180,7 +1178,7 @@ Cache<TagStore>::MemSidePort::recvTiming(PacketPtr pkt)
     // this needs to be fixed so that the cache updates the mshr and sends the
     // packet back out on the link, but it probably won't happen so until this
     // gets fixed, just panic when it does
-    if (pkt->result == Packet::Nacked)
+    if (pkt->wasNacked())
         panic("Need to implement cache resending nacked packets!\n");
 
     if (pkt->isRequest() && blocked) {
@@ -1216,7 +1214,7 @@ void
 Cache<TagStore>::MemSidePort::recvFunctional(PacketPtr pkt)
 {
     checkFunctional(pkt);
-    if (pkt->result != Packet::Success)
+    if (!pkt->isResponse())
         myCache()->functionalAccess(pkt, cache->cpuSidePort);
 }
 
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index cd0ed8a2e..55fe13f3c 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -115,7 +115,13 @@ MemCmd::commandInfo[] =
         SwapResp, "SwapReq" },
     /* SwapResp -- for Swap ldstub type operations */
     { SET5(IsRead, IsWrite, NeedsExclusive, IsResponse, HasData),
-        InvalidCmd, "SwapResp" }
+            InvalidCmd, "SwapResp" },
+    /* NetworkNackError  -- nacked at network layer (not by protocol) */
+    { SET2(IsRequest, IsError), InvalidCmd, "NetworkNackError" },
+    /* InvalidDestError  -- packet dest field invalid */
+    { SET2(IsRequest, IsError), InvalidCmd, "InvalidDestError" },
+    /* BadAddressError   -- memory address invalid */
+    { SET2(IsRequest, IsError), InvalidCmd, "BadAddressError" }
 };
 
 
@@ -205,7 +211,7 @@ Packet::checkFunctional(Addr addr, int size, uint8_t *data)
         if (func_start >= val_start && func_end <= val_end) {
             allocate();
             std::memcpy(getPtr<uint8_t>(), data + offset, getSize());
-            result = Packet::Success;
+            makeResponse();
             return true;
         } else {
             // In this case the timing packet only partially satisfies
@@ -245,15 +251,6 @@ operator<<(std::ostream &o, const Packet &p)
     o <<  p.getAddr() + p.getSize() - 1 << "] ";
     o.unsetf(std::ios_base::hex| std::ios_base::showbase);
 
-    if (p.result == Packet::Success)
-        o << "Successful ";
-    if (p.result == Packet::BadAddress)
-        o << "BadAddress ";
-    if (p.result == Packet::Nacked)
-        o << "Nacked ";
-    if (p.result == Packet::Unknown)
-        o << "Inflight ";
-
     if (p.isRead())
         o << "Read ";
     if (p.isWrite())
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index fc1c283ed..10b9f490c 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -84,6 +84,13 @@ class MemCmd
         StoreCondResp,
         SwapReq,
         SwapResp,
+        // Error responses
+        // @TODO these should be classified as responses rather than
+        // requests; coding them as requests initially for backwards
+        // compatibility
+        NetworkNackError,  // nacked at network layer (not by protocol)
+        InvalidDestError,  // packet dest field invalid
+        BadAddressError,   // memory address invalid
         NUM_MEM_CMDS
     };
 
@@ -103,6 +110,7 @@ class MemCmd
         IsHWPrefetch,
         IsLocked,       //!< Alpha/MIPS LL or SC access
         HasData,        //!< There is an associated payload
+        IsError,        //!< Error response
         NUM_COMMAND_ATTRIBUTES
     };
 
@@ -135,12 +143,13 @@ class MemCmd
     bool isWrite()  const       { return testCmdAttrib(IsWrite); }
     bool isRequest() const      { return testCmdAttrib(IsRequest); }
     bool isResponse() const     { return testCmdAttrib(IsResponse); }
-    bool needsExclusive() const  { return testCmdAttrib(NeedsExclusive); }
+    bool needsExclusive() const { return testCmdAttrib(NeedsExclusive); }
     bool needsResponse() const  { return testCmdAttrib(NeedsResponse); }
     bool isInvalidate() const   { return testCmdAttrib(IsInvalidate); }
     bool hasData() const        { return testCmdAttrib(HasData); }
     bool isReadWrite() const    { return isRead() && isWrite(); }
     bool isLocked() const       { return testCmdAttrib(IsLocked); }
+    bool isError() const        { return testCmdAttrib(IsError); }
 
     const Command responseCommand() const {
         return commandInfo[cmd].response;
@@ -184,6 +193,12 @@ class Packet : public FastAlloc
 
     typedef MemCmd::Command Command;
 
+    /** The command field of the packet. */
+    MemCmd cmd;
+
+    /** A pointer to the original request. */
+    RequestPtr req;
+
   private:
    /** A pointer to the data being transfered.  It can be differnt
     *    sizes at each level of the heirarchy so it belongs in the
@@ -223,19 +238,28 @@ class Packet : public FastAlloc
      *   (unlike * addr, size, and src). */
     short dest;
 
+    /** The original value of the command field.  Only valid when the
+     * current command field is an error condition; in that case, the
+     * previous contents of the command field are copied here.  This
+     * field is *not* set on non-error responses.
+     */
+    MemCmd origCmd;
+
     /** Are the 'addr' and 'size' fields valid? */
     bool addrSizeValid;
     /** Is the 'src' field valid? */
     bool srcValid;
+    bool destValid;
 
-    enum SnoopFlag {
+    enum Flag {
+        // Snoop flags
         MemInhibit,
         Shared,
-        NUM_SNOOP_FLAGS
+        NUM_PACKET_FLAGS
     };
 
-    /** Coherence snoopFlags for snooping */
-    std::bitset<NUM_SNOOP_FLAGS> snoopFlags;
+    /** Status flags */
+    std::bitset<NUM_PACKET_FLAGS> flags;
 
   public:
 
@@ -252,22 +276,6 @@ class Packet : public FastAlloc
      *   should be routed based on its address. */
     static const short Broadcast = -1;
 
-    /** A pointer to the original request. */
-    RequestPtr req;
-
-    /** A virtual base opaque structure used to hold coherence-related
-     *    state.  A specific subclass would be derived from this to
-     *    carry state specific to a particular coherence protocol.  */
-    class CoherenceState : public FastAlloc {
-      public:
-        virtual ~CoherenceState() {}
-    };
-
-    /** This packet's coherence state.  Caches should use
-     *   dynamic_cast<> to cast to the state appropriate for the
-     *   system's coherence protocol.  */
-    CoherenceState *coherence;
-
     /** A virtual base opaque structure used to hold state associated
      *    with the packet but specific to the sending device (e.g., an
      *    MSHR).  A pointer to this state is returned in the packet's
@@ -284,11 +292,6 @@ class Packet : public FastAlloc
      *   to cast to the state appropriate to the sender. */
     SenderState *senderState;
 
-  public:
-
-    /** The command field of the packet. */
-    MemCmd cmd;
-
     /** Return the string name of the cmd field (for debugging and
      *   tracing). */
     const std::string &cmdString() const { return cmd.toString(); }
@@ -296,68 +299,59 @@ class Packet : public FastAlloc
     /** Return the index of this command. */
     inline int cmdToIndex() const { return cmd.toInt(); }
 
-  public:
-
     bool isRead() const         { return cmd.isRead(); }
     bool isWrite()  const       { return cmd.isWrite(); }
     bool isRequest() const      { return cmd.isRequest(); }
     bool isResponse() const     { return cmd.isResponse(); }
-    bool needsExclusive() const  { return cmd.needsExclusive(); }
+    bool needsExclusive() const { return cmd.needsExclusive(); }
     bool needsResponse() const  { return cmd.needsResponse(); }
     bool isInvalidate() const   { return cmd.isInvalidate(); }
     bool hasData() const        { return cmd.hasData(); }
     bool isReadWrite() const    { return cmd.isReadWrite(); }
     bool isLocked() const       { return cmd.isLocked(); }
-
-    void assertMemInhibit()     { snoopFlags[MemInhibit] = true; }
-    void assertShared()         { snoopFlags[Shared] = true; }
-    bool memInhibitAsserted()   { return snoopFlags[MemInhibit]; }
-    bool sharedAsserted()       { return snoopFlags[Shared]; }
+    bool isError() const        { return cmd.isError(); }
+
+    // Snoop flags
+    void assertMemInhibit()     { flags[MemInhibit] = true; }
+    void assertShared()         { flags[Shared] = true; }
+    bool memInhibitAsserted()   { return flags[MemInhibit]; }
+    bool sharedAsserted()       { return flags[Shared]; }
+
+    // Network error conditions... encapsulate them as methods since
+    // their encoding keeps changing (from result field to command
+    // field, etc.)
+    void setNacked()     { origCmd = cmd; cmd = MemCmd::NetworkNackError; }
+    void setBadAddress() { origCmd = cmd; cmd = MemCmd::BadAddressError; }
+    bool wasNacked()     { return cmd == MemCmd::NetworkNackError; }
+    bool hadBadAddress() { return cmd == MemCmd::BadAddressError; }
 
     bool nic_pkt() { panic("Unimplemented"); M5_DUMMY_RETURN }
 
-    /** Possible results of a packet's request. */
-    enum Result
-    {
-        Success,
-        BadAddress,
-        Nacked,
-        Unknown
-    };
-
-    /** The result of this packet's request. */
-    Result result;
-
     /** Accessor function that returns the source index of the packet. */
-    short getSrc() const { assert(srcValid); return src; }
+    short getSrc() const    { assert(srcValid); return src; }
     void setSrc(short _src) { src = _src; srcValid = true; }
     /** Reset source field, e.g. to retransmit packet on different bus. */
     void clearSrc() { srcValid = false; }
 
     /** Accessor function that returns the destination index of
         the packet. */
-    short getDest() const { return dest; }
-    void setDest(short _dest) { dest = _dest; }
+    short getDest() const     { assert(destValid); return dest; }
+    void setDest(short _dest) { dest = _dest; destValid = true; }
 
     Addr getAddr() const { assert(addrSizeValid); return addr; }
-    int getSize() const { assert(addrSizeValid); return size; }
+    int getSize() const  { assert(addrSizeValid); return size; }
     Addr getOffset(int blkSize) const { return addr & (Addr)(blkSize - 1); }
 
-    void addrOverride(Addr newAddr) { assert(addrSizeValid); addr = newAddr; }
-    void cmdOverride(MemCmd newCmd) { cmd = newCmd; }
-
     /** Constructor.  Note that a Request object must be constructed
      *   first, but the Requests's physical address and size fields
      *   need not be valid. The command and destination addresses
      *   must be supplied.  */
     Packet(Request *_req, MemCmd _cmd, short _dest)
-        :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
+        :  cmd(_cmd), req(_req),
+           data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(_req->paddr), size(_req->size), dest(_dest),
-           addrSizeValid(_req->validPaddr), srcValid(false),
-           snoopFlags(0),
-           time(curTick),
-           req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
-           result(Unknown)
+           addrSizeValid(_req->validPaddr), srcValid(false), destValid(true),
+           flags(0), time(curTick), senderState(NULL)
     {
     }
 
@@ -365,13 +359,11 @@ class Packet : public FastAlloc
      *  a request that is for a whole block, not the address from the req.
      *  this allows for overriding the size/addr of the req.*/
     Packet(Request *_req, MemCmd _cmd, short _dest, int _blkSize)
-        :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
+        :  cmd(_cmd), req(_req),
+           data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(_req->paddr & ~(_blkSize - 1)), size(_blkSize), dest(_dest),
-           addrSizeValid(_req->validPaddr), srcValid(false),
-           snoopFlags(0),
-           time(curTick),
-           req(_req), coherence(NULL), senderState(NULL), cmd(_cmd),
-           result(Unknown)
+           addrSizeValid(_req->validPaddr), srcValid(false), destValid(true),
+           flags(0), time(curTick), senderState(NULL)
     {
     }
 
@@ -382,15 +374,14 @@ class Packet : public FastAlloc
      * dynamic data, user must guarantee that the new packet's
      * lifetime is less than that of the original packet. */
     Packet(Packet *origPkt)
-        :  data(NULL), staticData(false), dynamicData(false), arrayData(false),
+        :  cmd(origPkt->cmd), req(origPkt->req),
+           data(NULL), staticData(false), dynamicData(false), arrayData(false),
            addr(origPkt->addr), size(origPkt->size),
            src(origPkt->src), dest(origPkt->dest),
-           addrSizeValid(origPkt->addrSizeValid), srcValid(origPkt->srcValid),
-           snoopFlags(origPkt->snoopFlags),
-           time(curTick),
-           req(origPkt->req), coherence(origPkt->coherence),
-           senderState(origPkt->senderState), cmd(origPkt->cmd),
-           result(origPkt->result)
+           addrSizeValid(origPkt->addrSizeValid),
+           srcValid(origPkt->srcValid), destValid(origPkt->destValid),
+           flags(origPkt->flags),
+           time(curTick), senderState(origPkt->senderState)
     {
     }
 
@@ -405,12 +396,11 @@ class Packet : public FastAlloc
      *   multiple transactions. */
     void reinitFromRequest() {
         assert(req->validPaddr);
-        snoopFlags = 0;
+        flags = 0;
         addr = req->paddr;
         size = req->size;
         time = req->time;
         addrSizeValid = true;
-        result = Unknown;
         if (dynamicData) {
             deleteData();
             dynamicData = false;
@@ -424,34 +414,24 @@ class Packet : public FastAlloc
      * destination fields are *not* modified, as is appropriate for
      * atomic accesses.
      */
-    void makeAtomicResponse()
+    void makeResponse()
     {
         assert(needsResponse());
         assert(isRequest());
-        assert(result == Unknown);
         cmd = cmd.responseCommand();
-        result = Success;
+        dest = src;
+        destValid = srcValid;
+        srcValid = false;
     }
 
-    /**
-     * Perform the additional work required for timing responses above
-     * and beyond atomic responses; i.e., change the destination to
-     * point back to the requester and clear the source field.
-     */
-    void convertAtomicToTimingResponse()
+    void makeAtomicResponse()
     {
-        dest = getSrc();
-        srcValid = false;
+        makeResponse();
     }
 
-    /**
-     * Take a request packet and modify it in place to be suitable for
-     * returning as a response to a timing request.
-     */
     void makeTimingResponse()
     {
-        makeAtomicResponse();
-        convertAtomicToTimingResponse();
+        makeResponse();
     }
 
     /**
@@ -462,9 +442,10 @@ class Packet : public FastAlloc
     void
     reinitNacked()
     {
-        assert(needsResponse() && result == Nacked);
-        dest =  Broadcast;
-        result = Unknown;
+        assert(wasNacked());
+        cmd = origCmd;
+        assert(needsResponse());
+        setDest(Broadcast);
     }
 
 
diff --git a/src/mem/physical.cc b/src/mem/physical.cc
index 93cba96c4..2742eca51 100644
--- a/src/mem/physical.cc
+++ b/src/mem/physical.cc
@@ -322,7 +322,7 @@ PhysicalMemory::doFunctionalAccess(PacketPtr pkt)
               pkt->cmdString());
     }
 
-    pkt->result = Packet::Success;
+    pkt->makeAtomicResponse();
 }
 
 
diff --git a/src/mem/port.cc b/src/mem/port.cc
index e6ea773f2..ba4f23668 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -58,12 +58,11 @@ void
 Port::blobHelper(Addr addr, uint8_t *p, int size, MemCmd cmd)
 {
     Request req;
-    Packet pkt(&req, cmd, Packet::Broadcast);
 
     for (ChunkGenerator gen(addr, size, peerBlockSize());
          !gen.done(); gen.next()) {
         req.setPhys(gen.addr(), gen.size(), 0);
-        pkt.reinitFromRequest();
+        Packet pkt(&req, cmd, Packet::Broadcast);
         pkt.dataStatic(p);
         sendFunctional(&pkt);
         p += gen.size();
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index 6c8c12ce2..d6ff64608 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -55,7 +55,7 @@ SimpleTimingPort::recvFunctional(PacketPtr pkt)
     checkFunctional(pkt);
 
     // Just do an atomic access and throw away the returned latency
-    if (pkt->result != Packet::Success)
+    if (!pkt->isResponse())
         recvAtomic(pkt);
 }
 
@@ -68,7 +68,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
     // correctly with the drain code, so that would need to be fixed
     // if we ever added it back.
     assert(pkt->isRequest());
-    assert(pkt->result == Packet::Unknown);
 
     if (pkt->memInhibitAsserted()) {
         // snooper will supply based on copy of packet
@@ -85,7 +84,6 @@ SimpleTimingPort::recvTiming(PacketPtr pkt)
         // recvAtomic() should already have turned packet into
         // atomic response
         assert(pkt->isResponse());
-        pkt->convertAtomicToTimingResponse();
         schedSendTiming(pkt, curTick + latency);
     } else {
         delete pkt->req;
@@ -138,12 +136,15 @@ void
 SimpleTimingPort::sendDeferredPacket()
 {
     assert(deferredPacketReady());
-    bool success = sendTiming(transmitList.front().pkt);
+    // take packet off list here; if recvTiming() on the other side
+    // calls sendTiming() back on us (like SimpleTimingCpu does), then
+    // we get confused by having a non-active packet on transmitList
+    DeferredPacket dp = transmitList.front();
+    transmitList.pop_front();
+    bool success = sendTiming(dp.pkt);
 
     if (success) {
-        //send successful, remove packet
-        transmitList.pop_front();
-        if (!transmitList.empty()) {
+        if (!transmitList.empty() && !sendEvent->scheduled()) {
             Tick time = transmitList.front().tick;
             sendEvent->schedule(time <= curTick ? curTick+1 : time);
         }
@@ -152,6 +153,12 @@ SimpleTimingPort::sendDeferredPacket()
             drainEvent->process();
             drainEvent = NULL;
         }
+    } else {
+        // Unsuccessful, need to put back on transmitList.  Callee
+        // should not have messed with it (since it didn't accept that
+        // packet), so we can just push it back on the front.
+        assert(!sendEvent->scheduled());
+        transmitList.push_front(dp);
     }
 
     waitingOnRetry = !success;
-- 
cgit v1.2.3


From 6babda7123be5e69db137e77589d88c768c19345 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 13:34:16 -0700
Subject: Fix up a few statistics problems. Stats pretty much line up with old
 code, except: - bug in old code included L1 latency in L2 miss time, making
 it too high - UniCoherence did cache-to-cache transfers even from non-owner
 caches, so occasionally the icache would get a block from the dcache not the
 L2 - L2 can now receive ReadExReq from L1 since L1s have coherence

--HG--
extra : convert_revision : 5052c1a1767b5a662f30a88f16012165a73b791c
---
 src/mem/cache/base_cache.cc      | 54 +++++++++++++++++++++-------------------
 src/mem/cache/base_cache.hh      |  6 ++---
 src/mem/cache/cache_impl.hh      | 21 +++++++++-------
 src/mem/cache/miss/mshr.cc       | 20 +++++++--------
 src/mem/cache/miss/mshr.hh       | 10 +++++---
 src/mem/cache/miss/mshr_queue.cc |  4 +--
 src/mem/cache/miss/mshr_queue.hh |  6 ++---
 src/mem/tport.hh                 |  2 +-
 8 files changed, 65 insertions(+), 58 deletions(-)

diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc
index 870658675..ec9e1cf9b 100644
--- a/src/mem/cache/base_cache.cc
+++ b/src/mem/cache/base_cache.cc
@@ -150,20 +150,29 @@ BaseCache::regStats()
             ;
     }
 
+// These macros make it easier to sum the right subset of commands and
+// to change the subset of commands that are considered "demand" vs
+// "non-demand"
+#define SUM_DEMAND(s) \
+    (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::ReadExReq])
+
+// should writebacks be included here?  prior code was inconsistent...
+#define SUM_NON_DEMAND(s) \
+    (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq])
+
     demandHits
         .name(name() + ".demand_hits")
         .desc("number of demand (read+write) hits")
         .flags(total)
         ;
-    demandHits = hits[MemCmd::ReadReq] + hits[MemCmd::WriteReq];
+    demandHits = SUM_DEMAND(hits);
 
     overallHits
         .name(name() + ".overall_hits")
         .desc("number of overall hits")
         .flags(total)
         ;
-    overallHits = demandHits + hits[MemCmd::SoftPFReq] + hits[MemCmd::HardPFReq]
-        + hits[MemCmd::Writeback];
+    overallHits = demandHits + SUM_NON_DEMAND(hits);
 
     // Miss statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -183,15 +192,14 @@ BaseCache::regStats()
         .desc("number of demand (read+write) misses")
         .flags(total)
         ;
-    demandMisses = misses[MemCmd::ReadReq] + misses[MemCmd::WriteReq];
+    demandMisses = SUM_DEMAND(misses);
 
     overallMisses
         .name(name() + ".overall_misses")
         .desc("number of overall misses")
         .flags(total)
         ;
-    overallMisses = demandMisses + misses[MemCmd::SoftPFReq] +
-        misses[MemCmd::HardPFReq] + misses[MemCmd::Writeback];
+    overallMisses = demandMisses + SUM_NON_DEMAND(misses);
 
     // Miss latency statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -211,15 +219,14 @@ BaseCache::regStats()
         .desc("number of demand (read+write) miss cycles")
         .flags(total)
         ;
-    demandMissLatency = missLatency[MemCmd::ReadReq] + missLatency[MemCmd::WriteReq];
+    demandMissLatency = SUM_DEMAND(missLatency);
 
     overallMissLatency
         .name(name() + ".overall_miss_latency")
         .desc("number of overall miss cycles")
         .flags(total)
         ;
-    overallMissLatency = demandMissLatency + missLatency[MemCmd::SoftPFReq] +
-        missLatency[MemCmd::HardPFReq];
+    overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency);
 
     // access formulas
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -368,15 +375,14 @@ BaseCache::regStats()
         .desc("number of demand (read+write) MSHR hits")
         .flags(total)
         ;
-    demandMshrHits = mshr_hits[MemCmd::ReadReq] + mshr_hits[MemCmd::WriteReq];
+    demandMshrHits = SUM_DEMAND(mshr_hits);
 
     overallMshrHits
         .name(name() + ".overall_mshr_hits")
         .desc("number of overall MSHR hits")
         .flags(total)
         ;
-    overallMshrHits = demandMshrHits + mshr_hits[MemCmd::SoftPFReq] +
-        mshr_hits[MemCmd::HardPFReq];
+    overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits);
 
     // MSHR miss statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -396,15 +402,14 @@ BaseCache::regStats()
         .desc("number of demand (read+write) MSHR misses")
         .flags(total)
         ;
-    demandMshrMisses = mshr_misses[MemCmd::ReadReq] + mshr_misses[MemCmd::WriteReq];
+    demandMshrMisses = SUM_DEMAND(mshr_misses);
 
     overallMshrMisses
         .name(name() + ".overall_mshr_misses")
         .desc("number of overall MSHR misses")
         .flags(total)
         ;
-    overallMshrMisses = demandMshrMisses + mshr_misses[MemCmd::SoftPFReq] +
-        mshr_misses[MemCmd::HardPFReq];
+    overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses);
 
     // MSHR miss latency statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -424,16 +429,15 @@ BaseCache::regStats()
         .desc("number of demand (read+write) MSHR miss cycles")
         .flags(total)
         ;
-    demandMshrMissLatency = mshr_miss_latency[MemCmd::ReadReq]
-        + mshr_miss_latency[MemCmd::WriteReq];
+    demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency);
 
     overallMshrMissLatency
         .name(name() + ".overall_mshr_miss_latency")
         .desc("number of overall MSHR miss cycles")
         .flags(total)
         ;
-    overallMshrMissLatency = demandMshrMissLatency +
-        mshr_miss_latency[MemCmd::SoftPFReq] + mshr_miss_latency[MemCmd::HardPFReq];
+    overallMshrMissLatency =
+        demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency);
 
     // MSHR uncacheable statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -453,9 +457,8 @@ BaseCache::regStats()
         .desc("number of overall MSHR uncacheable misses")
         .flags(total)
         ;
-    overallMshrUncacheable = mshr_uncacheable[MemCmd::ReadReq]
-        + mshr_uncacheable[MemCmd::WriteReq] + mshr_uncacheable[MemCmd::SoftPFReq]
-        + mshr_uncacheable[MemCmd::HardPFReq];
+    overallMshrUncacheable =
+        SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable);
 
     // MSHR miss latency statistics
     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
@@ -475,10 +478,9 @@ BaseCache::regStats()
         .desc("number of overall MSHR uncacheable cycles")
         .flags(total)
         ;
-    overallMshrUncacheableLatency = mshr_uncacheable_lat[MemCmd::ReadReq]
-        + mshr_uncacheable_lat[MemCmd::WriteReq]
-        + mshr_uncacheable_lat[MemCmd::SoftPFReq]
-        + mshr_uncacheable_lat[MemCmd::HardPFReq];
+    overallMshrUncacheableLatency =
+        SUM_DEMAND(mshr_uncacheable_lat) +
+        SUM_NON_DEMAND(mshr_uncacheable_lat);
 
 #if 0
     // MSHR access formulas
diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index 09484a14a..fcc040bd9 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -476,10 +476,10 @@ class BaseCache : public MemObject
         }
     }
 
-    Tick nextMSHRReadyTick()
+    Tick nextMSHRReadyTime()
     {
-        return std::min(mshrQueue.nextMSHRReadyTick(),
-                        writeBuffer.nextMSHRReadyTick());
+        return std::min(mshrQueue.nextMSHRReadyTime(),
+                        writeBuffer.nextMSHRReadyTime());
     }
 
     /**
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 1823ea6b9..568e7ff63 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -615,7 +615,7 @@ Cache<TagStore>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
 
             if (!target->pkt->req->isUncacheable()) {
                 missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] +=
-                    completion_time - target->time;
+                    completion_time - target->recvTime;
             }
             target->pkt->makeTimingResponse();
             cpuSidePort->respond(target->pkt, completion_time);
@@ -668,11 +668,14 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
     // Can we deallocate MSHR when done?
     bool deallocate = false;
 
+    // Initial target is used just for stats
+    MSHR::Target *initial_tgt = mshr->getTarget();
+    int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
+    Tick miss_latency = curTick - initial_tgt->recvTime;
+
     if (mshr->isCacheFill) {
-#if 0
-        mshr_miss_latency[mshr->originalCmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-            curTick - pkt->time;
-#endif
+        mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
+            miss_latency;
         DPRINTF(Cache, "Block for addr %x being updated in Cache\n",
                 pkt->getAddr());
         BlkType *blk = tags->findBlock(pkt->getAddr());
@@ -698,8 +701,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
         }
     } else {
         if (pkt->req->isUncacheable()) {
-            mshr_uncacheable_lat[pkt->cmd.toInt()][0/*pkt->req->getThreadNum()*/] +=
-                curTick - pkt->time;
+            mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] +=
+                miss_latency;
         }
 
         while (mshr->hasTargets()) {
@@ -1262,8 +1265,8 @@ Cache<TagStore>::MemSidePort::sendPacket()
     // tried to send packet... if it was successful (no retry), see if
     // we need to rerequest bus or not
     if (!waitingOnRetry) {
-        Tick nextReady = std::min(deferredPacketReadyTick(),
-                                  myCache()->nextMSHRReadyTick());
+        Tick nextReady = std::min(deferredPacketReadyTime(),
+                                  myCache()->nextMSHRReadyTime());
         // @TODO: need to facotr in prefetch requests here somehow
         if (nextReady != MaxTick) {
             DPRINTF(CachePort, "more packets to send @ %d\n", nextReady);
diff --git a/src/mem/cache/miss/mshr.cc b/src/mem/cache/miss/mshr.cc
index 63b3cacc2..5d5e63f90 100644
--- a/src/mem/cache/miss/mshr.cc
+++ b/src/mem/cache/miss/mshr.cc
@@ -56,11 +56,11 @@ MSHR::MSHR()
 
 void
 MSHR::allocate(Addr _addr, int _size, PacketPtr target,
-               Tick when, Counter _order)
+               Tick whenReady, Counter _order)
 {
     addr = _addr;
     size = _size;
-    readyTick = when;
+    readyTime = whenReady;
     order = _order;
     assert(target);
     isCacheFill = false;
@@ -71,7 +71,7 @@ MSHR::allocate(Addr _addr, int _size, PacketPtr target,
     ntargets = 1;
     // Don't know of a case where we would allocate a new MSHR for a
     // snoop (mem-side request), so set cpuSide to true here.
-    targets.push_back(Target(target, when, _order, true));
+    targets.push_back(Target(target, whenReady, _order, true));
     assert(deferredTargets.empty());
     deferredNeedsExclusive = false;
     pendingInvalidate = false;
@@ -94,33 +94,33 @@ MSHR::deallocate()
  * Adds a target to an MSHR
  */
 void
-MSHR::allocateTarget(PacketPtr target, Tick when, Counter _order)
+MSHR::allocateTarget(PacketPtr target, Tick whenReady, Counter _order)
 {
     if (inService) {
         if (!deferredTargets.empty() || pendingInvalidate ||
             (!needsExclusive && target->needsExclusive())) {
             // need to put on deferred list
-            deferredTargets.push_back(Target(target, when, _order, true));
+            deferredTargets.push_back(Target(target, whenReady, _order, true));
             if (target->needsExclusive()) {
                 deferredNeedsExclusive = true;
             }
         } else {
             // still OK to append to outstanding request
-            targets.push_back(Target(target, when, _order, true));
+            targets.push_back(Target(target, whenReady, _order, true));
         }
     } else {
         if (target->needsExclusive()) {
             needsExclusive = true;
         }
 
-        targets.push_back(Target(target, when, _order, true));
+        targets.push_back(Target(target, whenReady, _order, true));
     }
 
     ++ntargets;
 }
 
 void
-MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order)
+MSHR::allocateSnoopTarget(PacketPtr pkt, Tick whenReady, Counter _order)
 {
     assert(inService); // don't bother to call otherwise
 
@@ -137,7 +137,7 @@ MSHR::allocateSnoopTarget(PacketPtr pkt, Tick when, Counter _order)
     if (needsExclusive || pkt->needsExclusive()) {
         // actual target device (typ. PhysicalMemory) will delete the
         // packet on reception, so we need to save a copy here
-        targets.push_back(Target(new Packet(pkt), when, _order, false));
+        targets.push_back(Target(new Packet(pkt), whenReady, _order, false));
         ++ntargets;
 
         if (needsExclusive) {
@@ -177,7 +177,7 @@ MSHR::promoteDeferredTargets()
     pendingShared = false;
     deferredNeedsExclusive = false;
     order = targets.front().order;
-    readyTick = std::max(curTick, targets.front().time);
+    readyTime = std::max(curTick, targets.front().readyTime);
 
     return true;
 }
diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh
index 4db7b1cfe..293f290b8 100644
--- a/src/mem/cache/miss/mshr.hh
+++ b/src/mem/cache/miss/mshr.hh
@@ -54,15 +54,17 @@ class MSHR : public Packet::SenderState
 
     class Target {
       public:
-        Tick time;      //!< Time when request was received (for stats)
+        Tick recvTime;  //!< Time when request was received (for stats)
+        Tick readyTime; //!< Time when request is ready to be serviced
         Counter order;  //!< Global order (for memory consistency mgmt)
         PacketPtr pkt;  //!< Pending request packet.
         bool cpuSide;   //!< Did request come from cpu side or mem side?
 
         bool isCpuSide() { return cpuSide; }
 
-        Target(PacketPtr _pkt, Tick _time, Counter _order, bool _cpuSide)
-            : time(_time), order(_order), pkt(_pkt), cpuSide(_cpuSide)
+        Target(PacketPtr _pkt, Tick _readyTime, Counter _order, bool _cpuSide)
+            : recvTime(curTick), readyTime(_readyTime), order(_order),
+              pkt(_pkt), cpuSide(_cpuSide)
         {}
     };
 
@@ -81,7 +83,7 @@ class MSHR : public Packet::SenderState
     MSHRQueue *queue;
 
     /** Cycle when ready to issue */
-    Tick readyTick;
+    Tick readyTime;
 
     /** Order number assigned by the miss queue. */
     Counter order;
diff --git a/src/mem/cache/miss/mshr_queue.cc b/src/mem/cache/miss/mshr_queue.cc
index 18184bd20..56ec62a7d 100644
--- a/src/mem/cache/miss/mshr_queue.cc
+++ b/src/mem/cache/miss/mshr_queue.cc
@@ -111,14 +111,14 @@ MSHRQueue::findPending(Addr addr, int size) const
 MSHR::Iterator
 MSHRQueue::addToReadyList(MSHR *mshr)
 {
-    if (readyList.empty() || readyList.back()->readyTick <= mshr->readyTick) {
+    if (readyList.empty() || readyList.back()->readyTime <= mshr->readyTime) {
         return readyList.insert(readyList.end(), mshr);
     }
 
     MSHR::Iterator i = readyList.begin();
     MSHR::Iterator end = readyList.end();
     for (; i != end; ++i) {
-        if ((*i)->readyTick > mshr->readyTick) {
+        if ((*i)->readyTime > mshr->readyTime) {
             return readyList.insert(i, mshr);
         }
     }
diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh
index fd61dec8b..1f1d59e98 100644
--- a/src/mem/cache/miss/mshr_queue.hh
+++ b/src/mem/cache/miss/mshr_queue.hh
@@ -193,15 +193,15 @@ class MSHRQueue
      */
     MSHR *getNextMSHR() const
     {
-        if (readyList.empty() || readyList.front()->readyTick > curTick) {
+        if (readyList.empty() || readyList.front()->readyTime > curTick) {
             return NULL;
         }
         return readyList.front();
     }
 
-    Tick nextMSHRReadyTick() const
+    Tick nextMSHRReadyTime() const
     {
-        return readyList.empty() ? MaxTick : readyList.front()->readyTick;
+        return readyList.empty() ? MaxTick : readyList.front()->readyTime;
     }
 };
 
diff --git a/src/mem/tport.hh b/src/mem/tport.hh
index bfed29f34..bc9da6c44 100644
--- a/src/mem/tport.hh
+++ b/src/mem/tport.hh
@@ -105,7 +105,7 @@ class SimpleTimingPort : public Port
     bool deferredPacketReady()
     { return !transmitList.empty() && transmitList.front().tick <= curTick; }
 
-    Tick deferredPacketReadyTick()
+    Tick deferredPacketReadyTime()
     { return transmitList.empty() ? MaxTick : transmitList.front().tick; }
 
     void schedSendEvent(Tick when)
-- 
cgit v1.2.3


From f0c4dd79200bb76f472aa09d6aff02b67a1db8c5 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 13:56:25 -0700
Subject: Factor out a little more common code.

--HG--
extra : convert_revision : 626255a91679d534030c91bcdb4fc1bed36ceb9b
---
 src/mem/cache/cache_impl.hh | 78 +++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 46 deletions(-)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 568e7ff63..b4c3c6359 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -143,6 +143,37 @@ Cache<TagStore>::cmpAndSwap(BlkType *blk, PacketPtr pkt)
 }
 
 
+template<class TagStore>
+void
+Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
+{
+    assert(blk);
+    assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
+    assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
+
+    // Check RMW operations first since both isRead() and
+    // isWrite() will be true for them
+    if (pkt->cmd == MemCmd::SwapReq) {
+        cmpAndSwap(blk, pkt);
+    } else if (pkt->isWrite()) {
+        if (blk->checkWrite(pkt)) {
+            blk->status |= BlkDirty;
+            pkt->writeDataToBlock(blk->data, blkSize);
+        }
+    } else if (pkt->isRead()) {
+        if (pkt->isLocked()) {
+            blk->trackLoadLocked(pkt);
+        }
+        pkt->setDataFromBlock(blk->data, blkSize);
+    } else {
+        // Not a read or write... must be an upgrade.  it's OK
+        // to just ack those as long as we have an exclusive
+        // copy at this level.
+        assert(pkt->cmd == MemCmd::UpgradeReq);
+    }
+}
+
+
 /////////////////////////////////////////////////////
 //
 // MSHR helper functions
@@ -237,27 +268,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
             // OK to satisfy access
             hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             satisfied = true;
-
-            // Check RMW operations first since both isRead() and
-            // isWrite() will be true for them
-            if (pkt->cmd == MemCmd::SwapReq) {
-                cmpAndSwap(blk, pkt);
-            } else if (pkt->isWrite()) {
-                if (blk->checkWrite(pkt)) {
-                    blk->status |= BlkDirty;
-                    pkt->writeDataToBlock(blk->data, blkSize);
-                }
-            } else if (pkt->isRead()) {
-                if (pkt->isLocked()) {
-                    blk->trackLoadLocked(pkt);
-                }
-                pkt->setDataFromBlock(blk->data, blkSize);
-            } else {
-                // Not a read or write... must be an upgrade.  it's OK
-                // to just ack those as long as we have an exclusive
-                // copy at this level.
-                assert(pkt->cmd == MemCmd::UpgradeReq);
-            }
+            satisfyCpuSideRequest(pkt, blk);
         } else {
             // permission violation... nothing to do here, leave unsatisfied
             // for statistics purposes this counts like a complete miss
@@ -558,31 +569,6 @@ Cache<TagStore>::functionalAccess(PacketPtr pkt,
 /////////////////////////////////////////////////////
 
 
-template<class TagStore>
-void
-Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
-{
-    assert(blk);
-    assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid());
-    assert(pkt->isWrite() || pkt->isReadWrite() || pkt->isRead());
-    assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
-
-    if (pkt->isWrite()) {
-        if (blk->checkWrite(pkt)) {
-            blk->status |= BlkDirty;
-            pkt->writeDataToBlock(blk->data, blkSize);
-        }
-    } else if (pkt->isReadWrite()) {
-        cmpAndSwap(blk, pkt);
-    } else {
-        if (pkt->isLocked()) {
-            blk->trackLoadLocked(pkt);
-        }
-        pkt->setDataFromBlock(blk->data, blkSize);
-    }
-}
-
-
 template<class TagStore>
 bool
 Cache<TagStore>::satisfyMSHR(MSHR *mshr, PacketPtr pkt,
-- 
cgit v1.2.3


From ee54ad318a63e868ab10bbc1b714bbb8209a11da Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 17:45:58 -0700
Subject: Event descriptions should not end in "event" (they function as
 adjectives not nouns)

--HG--
extra : convert_revision : 6506474ff3356ae8c80ed276c3608d8a4680bfdb
---
 src/arch/mips/regfile/misc_regfile.cc  | 2 +-
 src/cpu/base.cc                        | 2 +-
 src/cpu/o3/commit_impl.hh              | 2 +-
 src/cpu/o3/cpu.cc                      | 6 +++---
 src/cpu/o3/inst_queue_impl.hh          | 2 +-
 src/cpu/o3/lsq_unit_impl.hh            | 2 +-
 src/cpu/ozone/back_end_impl.hh         | 4 ++--
 src/cpu/ozone/cpu_impl.hh              | 2 +-
 src/cpu/ozone/inorder_back_end_impl.hh | 2 +-
 src/cpu/ozone/inst_queue_impl.hh       | 2 +-
 src/cpu/ozone/lsq_unit_impl.hh         | 2 +-
 src/cpu/ozone/lw_back_end_impl.hh      | 2 +-
 src/cpu/ozone/lw_lsq_impl.hh           | 2 +-
 src/cpu/quiesce_event.cc               | 2 +-
 src/cpu/simple/atomic.cc               | 2 +-
 src/cpu/simple/timing.hh               | 6 +++---
 src/cpu/trace/opt_cpu.cc               | 2 +-
 src/cpu/trace/trace_cpu.cc             | 2 +-
 src/dev/ethertap.hh                    | 2 +-
 src/dev/uart8250.cc                    | 2 +-
 src/mem/bridge.hh                      | 2 +-
 21 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/arch/mips/regfile/misc_regfile.cc b/src/arch/mips/regfile/misc_regfile.cc
index c97d93cf9..71be3adf9 100755
--- a/src/arch/mips/regfile/misc_regfile.cc
+++ b/src/arch/mips/regfile/misc_regfile.cc
@@ -357,7 +357,7 @@ MiscRegFile::CP0Event::process()
 const char *
 MiscRegFile::CP0Event::description()
 {
-    return "Coprocessor-0 event";
+    return "Coprocessor-0";
 }
 
 void
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index f86313da0..cf007a06b 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -91,7 +91,7 @@ CPUProgressEvent::process()
 const char *
 CPUProgressEvent::description()
 {
-    return "CPU Progress event";
+    return "CPU Progress";
 }
 
 #if FULL_SYSTEM
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index 9411c6c62..f263383ae 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -67,7 +67,7 @@ template <class Impl>
 const char *
 DefaultCommit<Impl>::TrapEvent::description()
 {
-    return "Trap event";
+    return "Trap";
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 6a3eb9c43..2bf8f9832 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -83,7 +83,7 @@ template <class Impl>
 const char *
 FullO3CPU<Impl>::TickEvent::description()
 {
-    return "FullO3CPU tick event";
+    return "FullO3CPU tick";
 }
 
 template <class Impl>
@@ -112,7 +112,7 @@ template <class Impl>
 const char *
 FullO3CPU<Impl>::ActivateThreadEvent::description()
 {
-    return "FullO3CPU \"Activate Thread\" event";
+    return "FullO3CPU \"Activate Thread\"";
 }
 
 template <class Impl>
@@ -144,7 +144,7 @@ template <class Impl>
 const char *
 FullO3CPU<Impl>::DeallocateContextEvent::description()
 {
-    return "FullO3CPU \"Deallocate Context\" event";
+    return "FullO3CPU \"Deallocate Context\"";
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index bdf5f07aa..99bffe1a6 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -60,7 +60,7 @@ template <class Impl>
 const char *
 InstructionQueue<Impl>::FUCompletion::description()
 {
-    return "Functional unit completion event";
+    return "Functional unit completion";
 }
 
 template <class Impl>
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 91e616589..810a6d29f 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -69,7 +69,7 @@ template<class Impl>
 const char *
 LSQUnit<Impl>::WritebackEvent::description()
 {
-    return "Store writeback event";
+    return "Store writeback";
 }
 
 template<class Impl>
diff --git a/src/cpu/ozone/back_end_impl.hh b/src/cpu/ozone/back_end_impl.hh
index 4078699fe..27146ecf0 100644
--- a/src/cpu/ozone/back_end_impl.hh
+++ b/src/cpu/ozone/back_end_impl.hh
@@ -583,7 +583,7 @@ template<class Impl>
 const char *
 BackEnd<Impl>::LdWritebackEvent::description()
 {
-    return "Load writeback event";
+    return "Load writeback";
 }
 
 
@@ -603,7 +603,7 @@ template <class Impl>
 const char *
 BackEnd<Impl>::DCacheCompletionEvent::description()
 {
-    return "Cache completion event";
+    return "Cache completion";
 }
 
 template <class Impl>
diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh
index d1214223b..d73e5768a 100644
--- a/src/cpu/ozone/cpu_impl.hh
+++ b/src/cpu/ozone/cpu_impl.hh
@@ -84,7 +84,7 @@ template <class Impl>
 const char *
 OzoneCPU<Impl>::TickEvent::description()
 {
-    return "OzoneCPU tick event";
+    return "OzoneCPU tick";
 }
 
 template <class Impl>
diff --git a/src/cpu/ozone/inorder_back_end_impl.hh b/src/cpu/ozone/inorder_back_end_impl.hh
index 8d7ebb60e..c57fa0200 100644
--- a/src/cpu/ozone/inorder_back_end_impl.hh
+++ b/src/cpu/ozone/inorder_back_end_impl.hh
@@ -540,5 +540,5 @@ template <class Impl>
 const char *
 InorderBackEnd<Impl>::DCacheCompletionEvent::description()
 {
-    return "DCache completion event";
+    return "DCache completion";
 }
diff --git a/src/cpu/ozone/inst_queue_impl.hh b/src/cpu/ozone/inst_queue_impl.hh
index ea9d03c0d..461c7eb0f 100644
--- a/src/cpu/ozone/inst_queue_impl.hh
+++ b/src/cpu/ozone/inst_queue_impl.hh
@@ -64,7 +64,7 @@ template <class Impl>
 const char *
 InstQueue<Impl>::FUCompletion::description()
 {
-    return "Functional unit completion event";
+    return "Functional unit completion";
 }
 #endif
 template <class Impl>
diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh
index c46eb90be..e08e54835 100644
--- a/src/cpu/ozone/lsq_unit_impl.hh
+++ b/src/cpu/ozone/lsq_unit_impl.hh
@@ -62,7 +62,7 @@ template <class Impl>
 const char *
 OzoneLSQ<Impl>::StoreCompletionEvent::description()
 {
-    return "LSQ store completion event";
+    return "LSQ store completion";
 }
 
 template <class Impl>
diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh
index c0a9cad24..f84bda348 100644
--- a/src/cpu/ozone/lw_back_end_impl.hh
+++ b/src/cpu/ozone/lw_back_end_impl.hh
@@ -121,7 +121,7 @@ template <class Impl>
 const char *
 LWBackEnd<Impl>::TrapEvent::description()
 {
-    return "Trap event";
+    return "Trap";
 }
 
 template <class Impl>
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index eefc0df83..e3000288c 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -57,7 +57,7 @@ template<class Impl>
 const char *
 OzoneLWLSQ<Impl>::WritebackEvent::description()
 {
-    return "Store writeback event";
+    return "Store writeback";
 }
 
 template <class Impl>
diff --git a/src/cpu/quiesce_event.cc b/src/cpu/quiesce_event.cc
index fa79e6d1e..3495a0e52 100644
--- a/src/cpu/quiesce_event.cc
+++ b/src/cpu/quiesce_event.cc
@@ -47,5 +47,5 @@ EndQuiesceEvent::process()
 const char*
 EndQuiesceEvent::description()
 {
-    return "End Quiesce Event.";
+    return "End Quiesce";
 }
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index bcd6662c8..8e8da2fa2 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -57,7 +57,7 @@ AtomicSimpleCPU::TickEvent::process()
 const char *
 AtomicSimpleCPU::TickEvent::description()
 {
-    return "AtomicSimpleCPU tick event";
+    return "AtomicSimpleCPU tick";
 }
 
 Port *
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index 39958bfb6..ba194b3fa 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -101,7 +101,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
 
             TickEvent(TimingSimpleCPU *_cpu)
                 :Event(&mainEventQueue), cpu(_cpu) {}
-            const char *description() { return "Timing CPU clock event"; }
+            const char *description() { return "Timing CPU tick"; }
             void schedule(PacketPtr _pkt, Tick t);
         };
 
@@ -127,7 +127,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
             ITickEvent(TimingSimpleCPU *_cpu)
                 : TickEvent(_cpu) {}
             void process();
-            const char *description() { return "Timing CPU clock event"; }
+            const char *description() { return "Timing CPU icache tick"; }
         };
 
         ITickEvent tickEvent;
@@ -155,7 +155,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
             DTickEvent(TimingSimpleCPU *_cpu)
                 : TickEvent(_cpu) {}
             void process();
-            const char *description() { return "Timing CPU clock event"; }
+            const char *description() { return "Timing CPU dcache tick"; }
         };
 
         DTickEvent tickEvent;
diff --git a/src/cpu/trace/opt_cpu.cc b/src/cpu/trace/opt_cpu.cc
index 996e89f01..0f2944f07 100644
--- a/src/cpu/trace/opt_cpu.cc
+++ b/src/cpu/trace/opt_cpu.cc
@@ -207,7 +207,7 @@ OptCPU::TickEvent::process()
 const char *
 OptCPU::TickEvent::description()
 {
-    return "OptCPU tick event";
+    return "OptCPU tick";
 }
 
 
diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc
index 3c9da4849..32ed6c7d7 100644
--- a/src/cpu/trace/trace_cpu.cc
+++ b/src/cpu/trace/trace_cpu.cc
@@ -148,7 +148,7 @@ TraceCPU::TickEvent::process()
 const char *
 TraceCPU::TickEvent::description()
 {
-    return "TraceCPU tick event";
+    return "TraceCPU tick";
 }
 
 
diff --git a/src/dev/ethertap.hh b/src/dev/ethertap.hh
index f64ed7187..3d2838817 100644
--- a/src/dev/ethertap.hh
+++ b/src/dev/ethertap.hh
@@ -89,7 +89,7 @@ class EtherTap : public EtherInt
         TxEvent(EtherTap *_tap)
             : Event(&mainEventQueue), tap(_tap) {}
         void process() { tap->retransmit(); }
-        virtual const char *description() { return "retransmit event"; }
+        virtual const char *description() { return "EtherTap retransmit"; }
     };
 
     friend class TxEvent;
diff --git a/src/dev/uart8250.cc b/src/dev/uart8250.cc
index 0ad80e077..358dda0d8 100644
--- a/src/dev/uart8250.cc
+++ b/src/dev/uart8250.cc
@@ -58,7 +58,7 @@ Uart8250::IntrEvent::IntrEvent(Uart8250 *u, int bit)
 const char *
 Uart8250::IntrEvent::description()
 {
-    return "uart interrupt delay event";
+    return "uart interrupt delay";
 }
 
 void
diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh
index 7af764437..acae2f126 100644
--- a/src/mem/bridge.hh
+++ b/src/mem/bridge.hh
@@ -146,7 +146,7 @@ class Bridge : public MemObject
 
             virtual void process() { port->trySend(); }
 
-            virtual const char *description() { return "bridge send event"; }
+            virtual const char *description() { return "bridge send"; }
         };
 
         SendEvent sendEvent;
-- 
cgit v1.2.3


From d10a843723009ddee79cdbf94a46704df1e5cee6 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 17:51:29 -0700
Subject: Get rid of obsolete fixPacket() functions. Handled by
 Packet::checkFunctional() now.

--HG--
extra : convert_revision : 63642254e2789c80a369ac269f317ec054ffe3c0
---
 src/mem/packet.cc | 25 -------------------------
 src/mem/packet.hh | 16 ----------------
 src/mem/tport.cc  |  7 ++-----
 3 files changed, 2 insertions(+), 46 deletions(-)

diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 55fe13f3c..8de02f533 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -164,31 +164,6 @@ Packet::intersect(PacketPtr p)
     return !(s1 > e2 || e1 < s2);
 }
 
-bool
-fixDelayedResponsePacket(PacketPtr func, PacketPtr timing)
-{
-    bool result;
-
-    if (timing->isRead() || timing->isWrite()) {
-        // Ugly hack to deal with the fact that we queue the requests
-        // and don't convert them to responses until we issue them on
-        // the bus.  I tried to avoid this by converting packets to
-        // responses right away, but this breaks during snoops where a
-        // responder may do the conversion before other caches have
-        // done the snoop.  Would work if we copied the packet instead
-        // of just hanging on to a pointer.
-        MemCmd oldCmd = timing->cmd;
-        timing->cmd = timing->cmd.responseCommand();
-        result = fixPacket(func, timing);
-        timing->cmd = oldCmd;
-    }
-    else {
-        //Don't toggle if it isn't a read/write response
-        result = fixPacket(func, timing);
-    }
-
-    return result;
-}
 
 bool
 Packet::checkFunctional(Addr addr, int size, uint8_t *data)
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 10b9f490c..16bc6f458 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -574,22 +574,6 @@ class Packet : public FastAlloc
     }
 };
 
-
-
-/** Temporary for backwards compatibility.
- */
-inline
-bool fixPacket(PacketPtr func, PacketPtr timing) {
-    return !func->checkFunctional(timing);
-}
-
-/** This function is a wrapper for the fixPacket field that toggles
- * the hasData bit it is used when a response is waiting in the
- * caches, but hasn't been marked as a response yet (so the fixPacket
- * needs to get the correct value for the hasData)
- */
-bool fixDelayedResponsePacket(PacketPtr func, PacketPtr timing);
-
 std::ostream & operator<<(std::ostream &o, const Packet &p);
 
 #endif //__MEM_PACKET_HH
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index d6ff64608..a4f791048 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -40,11 +40,8 @@ SimpleTimingPort::checkFunctional(PacketPtr pkt)
         PacketPtr target = i->pkt;
         // If the target contains data, and it overlaps the
         // probed request, need to update data
-        if (target->intersect(pkt)) {
-            if (!fixPacket(pkt, target)) {
-                // fixPacket returns true for continue, false for done
-                return;
-            }
+        if (pkt->checkFunctional(target)) {
+            return;
         }
     }
 }
-- 
cgit v1.2.3


From 2447abe5ce6c40e61eb09430c95a592aa2445349 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 17:56:30 -0700
Subject: Can only call makeAtomicResponse() once...

--HG--
extra : convert_revision : c49aade46aa64f979da35eb653b544ee5bd82f01
---
 src/dev/ide_ctrl.cc | 9 +++++----
 src/dev/ns_gige.cc  | 2 +-
 src/dev/pcidev.cc   | 1 -
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/dev/ide_ctrl.cc b/src/dev/ide_ctrl.cc
index 01243ae73..07764aaba 100644
--- a/src/dev/ide_ctrl.cc
+++ b/src/dev/ide_ctrl.cc
@@ -232,8 +232,10 @@ Tick
 IdeController::readConfig(PacketPtr pkt)
 {
     int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
-    if (offset < PCI_DEVICE_SPECIFIC)
-        return  PciDev::readConfig(pkt);
+    if (offset < PCI_DEVICE_SPECIFIC) {
+        return PciDev::readConfig(pkt);
+    }
+
     assert(offset >= IDE_CTRL_CONF_START && (offset + 1) <= IDE_CTRL_CONF_END);
 
     pkt->allocate();
@@ -297,7 +299,6 @@ IdeController::readConfig(PacketPtr pkt)
     }
     pkt->makeAtomicResponse();
     return configDelay;
-
 }
 
 
@@ -361,6 +362,7 @@ IdeController::writeConfig(PacketPtr pkt)
           default:
             panic("invalid access size(?) for PCI configspace!\n");
         }
+        pkt->makeAtomicResponse();
     }
 
     /* Trap command register writes and enable IO/BM as appropriate as well as
@@ -403,7 +405,6 @@ IdeController::writeConfig(PacketPtr pkt)
             bm_enabled = false;
         break;
     }
-    pkt->makeAtomicResponse();
     return configDelay;
 }
 
diff --git a/src/dev/ns_gige.cc b/src/dev/ns_gige.cc
index 86f664238..17f7b433b 100644
--- a/src/dev/ns_gige.cc
+++ b/src/dev/ns_gige.cc
@@ -487,7 +487,7 @@ NSGigE::writeConfig(PacketPtr pkt)
             ioEnable = false;
         break;
     }
-    pkt->makeAtomicResponse();
+
     return configDelay;
 }
 
diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc
index 85337c841..06806f841 100644
--- a/src/dev/pcidev.cc
+++ b/src/dev/pcidev.cc
@@ -284,7 +284,6 @@ PciDev::writeConfig(PacketPtr pkt)
     }
     pkt->makeAtomicResponse();
     return configDelay;
-
 }
 
 void
-- 
cgit v1.2.3


From 07f091d6ed63d9b54c0415eacc070c3ea67566fc Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 17:59:45 -0700
Subject: Get rid of remaining traces of obsolete CoherenceProtocol object.

--HG--
extra : convert_revision : c5555b00bef1b304a84886188ad2c0dcb4d7c5b9
---
 configs/common/Caches.py                    | 1 -
 configs/splash2/cluster.py                  | 6 +-----
 configs/splash2/run.py                      | 4 ----
 src/mem/cache/BaseCache.py                  | 1 -
 tests/configs/memtest.py                    | 1 -
 tests/configs/o3-timing-mp.py               | 1 -
 tests/configs/simple-atomic-mp.py           | 1 -
 tests/configs/simple-timing-mp.py           | 1 -
 tests/configs/tsunami-simple-atomic-dual.py | 1 -
 tests/configs/tsunami-simple-atomic.py      | 1 -
 tests/configs/tsunami-simple-timing-dual.py | 1 -
 tests/configs/tsunami-simple-timing.py      | 1 -
 12 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/configs/common/Caches.py b/configs/common/Caches.py
index 4bff2c8a4..43a1c6378 100644
--- a/configs/common/Caches.py
+++ b/configs/common/Caches.py
@@ -35,7 +35,6 @@ class L1Cache(BaseCache):
     latency = '1ns'
     mshrs = 10
     tgts_per_mshr = 5
-    protocol = CoherenceProtocol(protocol='moesi')
 
 class L2Cache(BaseCache):
     assoc = 8
diff --git a/configs/splash2/cluster.py b/configs/splash2/cluster.py
index 799b85e6c..769bdcf5a 100644
--- a/configs/splash2/cluster.py
+++ b/configs/splash2/cluster.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2006 The Regents of The University of Michigan
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -51,9 +51,6 @@ parser.add_option("-n", "--numcpus",
 parser.add_option("-f", "--frequency",
                   default = "1GHz",
                   help="Frequency of each CPU")
-parser.add_option("-p", "--protocol",
-                  default="moesi",
-                  help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)")
 parser.add_option("--l1size",
                   default = "32kB")
 parser.add_option("--l1latency",
@@ -141,7 +138,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 12
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol=options.protocol)
 
 # ----------------------
 # Base L2 Cache Definition
diff --git a/configs/splash2/run.py b/configs/splash2/run.py
index d051f1f1b..ff0a9448c 100644
--- a/configs/splash2/run.py
+++ b/configs/splash2/run.py
@@ -48,9 +48,6 @@ parser.add_option("-n", "--numcpus",
 parser.add_option("-f", "--frequency",
                   default = "1GHz",
                   help="Frequency of each CPU")
-parser.add_option("-p", "--protocol",
-                  default="moesi",
-                  help="The coherence protocol to use for the L1'a (i.e. MOESI, MOSI)")
 parser.add_option("--l1size",
                   default = "32kB")
 parser.add_option("--l1latency",
@@ -162,7 +159,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 12
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol=options.protocol)
 
 # ----------------------
 # Base L2 Cache Definition
diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py
index 55b68f81f..86148f821 100644
--- a/src/mem/cache/BaseCache.py
+++ b/src/mem/cache/BaseCache.py
@@ -51,7 +51,6 @@ class BaseCache(MemObject):
     mshrs = Param.Int("number of MSHRs (max outstanding requests)")
     prioritizeRequests = Param.Bool(False,
         "always service demand misses first")
-    protocol = Param.CoherenceProtocol(NULL, "coherence protocol to use")
     repl = Param.Repl(NULL, "replacement policy")
     size = Param.MemorySize("capacity in bytes")
     split = Param.Bool(False, "whether or not this cache is split")
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py
index 6fe244acf..93ea4cc0e 100644
--- a/tests/configs/memtest.py
+++ b/tests/configs/memtest.py
@@ -38,7 +38,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 12
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/o3-timing-mp.py b/tests/configs/o3-timing-mp.py
index 1ac9bd2e4..fc6a72a82 100644
--- a/tests/configs/o3-timing-mp.py
+++ b/tests/configs/o3-timing-mp.py
@@ -39,7 +39,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py
index de0793d1c..bc0ced250 100644
--- a/tests/configs/simple-atomic-mp.py
+++ b/tests/configs/simple-atomic-mp.py
@@ -38,7 +38,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py
index 1fd0e8c3c..0b400e6b7 100644
--- a/tests/configs/simple-timing-mp.py
+++ b/tests/configs/simple-timing-mp.py
@@ -38,7 +38,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py
index 131095055..de8fe2474 100644
--- a/tests/configs/tsunami-simple-atomic-dual.py
+++ b/tests/configs/tsunami-simple-atomic-dual.py
@@ -40,7 +40,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/tsunami-simple-atomic.py b/tests/configs/tsunami-simple-atomic.py
index 595b1aeda..2ba50273a 100644
--- a/tests/configs/tsunami-simple-atomic.py
+++ b/tests/configs/tsunami-simple-atomic.py
@@ -40,7 +40,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py
index 47fba30ff..3b1a4f5cf 100644
--- a/tests/configs/tsunami-simple-timing-dual.py
+++ b/tests/configs/tsunami-simple-timing-dual.py
@@ -40,7 +40,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
diff --git a/tests/configs/tsunami-simple-timing.py b/tests/configs/tsunami-simple-timing.py
index 999bde087..3f18c6848 100644
--- a/tests/configs/tsunami-simple-timing.py
+++ b/tests/configs/tsunami-simple-timing.py
@@ -41,7 +41,6 @@ class L1(BaseCache):
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
-    protocol = CoherenceProtocol(protocol='moesi')
 
 # ----------------------
 # Base L2 Cache
-- 
cgit v1.2.3


From 5e59739416bf195173f4b37ba9afb1cb8ae16566 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 18:03:17 -0700
Subject: Don't propagate snoops across bridges.  Wouldn't work anyway.

--HG--
extra : convert_revision : af29fc7d0c134f5e89dd2e814c819151350fcb38
---
 src/mem/bridge.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index 77178d518..92beb3d7e 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -360,6 +360,8 @@ Bridge::BridgePort::getDeviceAddressRanges(AddrRangeList &resp,
                                            bool &snoop)
 {
     otherPort->getPeerAddressRanges(resp, snoop);
+    // we don't allow snooping across bridges
+    snoop = false;
 }
 
 BEGIN_DECLARE_SIM_OBJECT_PARAMS(Bridge)
-- 
cgit v1.2.3


From 3ad761bc8e89ff034fbf5ec6d8e9661e1025dcd7 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 30 Jun 2007 20:35:42 -0700
Subject: Make CPU models use new LoadLockedReq/StoreCondReq commands.

--HG--
extra : convert_revision : ab78d9d1d88c3698edfd653d71c8882e1272b781
---
 src/cpu/o3/lsq_unit.hh       |  5 ++++-
 src/cpu/o3/lsq_unit_impl.hh  |  4 +++-
 src/cpu/ozone/lw_lsq.hh      |  6 +++++-
 src/cpu/ozone/lw_lsq_impl.hh |  5 ++++-
 src/cpu/simple/atomic.cc     | 36 +++++++++++++++++++++---------------
 src/cpu/simple/timing.cc     | 33 +++++++++++++++++++--------------
 6 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index d964b9f9f..be9224099 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -643,7 +643,10 @@ LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
     // if we the cache is not blocked, do cache access
     if (!lsq->cacheBlocked()) {
         PacketPtr data_pkt =
-            new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+            new Packet(req,
+                       (req->isLocked() ?
+                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
+                       Packet::Broadcast);
         data_pkt->dataStatic(load_inst->memData);
 
         LSQSenderState *state = new LSQSenderState;
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index 810a6d29f..5ae1cc0e4 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -647,7 +647,9 @@ LSQUnit<Impl>::writebackStores()
 
         memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize());
 
-        MemCmd command = req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq;
+        MemCmd command =
+            req->isSwap() ? MemCmd::SwapReq :
+            (req->isLocked() ? MemCmd::WriteReq : MemCmd::StoreCondReq);
         PacketPtr data_pkt = new Packet(req, command,
                                         Packet::Broadcast);
         data_pkt->dataStatic(inst->memData);
diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh
index d9e0d04ac..ba40e9ce1 100644
--- a/src/cpu/ozone/lw_lsq.hh
+++ b/src/cpu/ozone/lw_lsq.hh
@@ -632,7 +632,11 @@ OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
     DPRINTF(OzoneLSQ, "Doing timing access for inst PC %#x\n",
             inst->readPC());
 
-    PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+    PacketPtr data_pkt =
+        new Packet(req,
+                   (req->isLocked() ?
+                    MemCmd::LoadLockedReq : Packet::ReadReq),
+                   Packet::Broadcast);
     data_pkt->dataStatic(inst->memData);
 
     LSQSenderState *state = new LSQSenderState;
diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh
index e3000288c..82191312a 100644
--- a/src/cpu/ozone/lw_lsq_impl.hh
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@ -587,7 +587,10 @@ OzoneLWLSQ<Impl>::writebackStores()
         memcpy(inst->memData, (uint8_t *)&(*sq_it).data,
                req->getSize());
 
-        PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+        MemCmd command =
+            req->isSwap() ? MemCmd::SwapReq :
+            (req->isLocked() ? MemCmd::WriteReq : MemCmd::StoreCondReq);
+        PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast);
         data_pkt->dataStatic(inst->memData);
 
         LSQSenderState *state = new LSQSenderState;
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 8e8da2fa2..01eb4873e 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -280,7 +280,10 @@ AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags)
 
     // Now do the access.
     if (fault == NoFault) {
-        Packet pkt = Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+        Packet pkt =
+            Packet(req,
+                   req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq,
+                   Packet::Broadcast);
         pkt.dataStatic(&data);
 
         if (req->isMmapedIpr())
@@ -370,23 +373,24 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     // Now do the access.
     if (fault == NoFault) {
-        Packet pkt =
-            Packet(req, req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq,
-                   Packet::Broadcast);
-        pkt.dataStatic(&data);
-
+        MemCmd cmd = MemCmd::WriteReq; // default
         bool do_access = true;  // flag to suppress cache access
 
         if (req->isLocked()) {
+            cmd = MemCmd::StoreCondReq;
             do_access = TheISA::handleLockedWrite(thread, req);
+        } else if (req->isSwap()) {
+            cmd = MemCmd::SwapReq;
+            if (req->isCondSwap()) {
+                assert(res);
+                req->setExtraData(*res);
+            }
         }
-        if (req->isCondSwap()) {
-             assert(res);
-             req->setExtraData(*res);
-        }
-
 
         if (do_access) {
+            Packet pkt = Packet(req, cmd, Packet::Broadcast);
+            pkt.dataStatic(&data);
+
             if (req->isMmapedIpr()) {
                 dcache_latency = TheISA::handleIprWrite(thread->getTC(), &pkt);
             } else {
@@ -395,12 +399,14 @@ AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
             }
             dcache_access = true;
             assert(!pkt.isError());
+
+            if (req->isSwap()) {
+                assert(res);
+                *res = pkt.get<T>();
+            }
         }
 
-        if (req->isSwap()) {
-            assert(res);
-            *res = pkt.get<T>();
-        } else if (res) {
+        if (res && !req->isSwap()) {
             *res = req->getExtraData();
         }
     }
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index b4e4a4433..77df2c05d 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -260,7 +260,10 @@ TimingSimpleCPU::read(Addr addr, T &data, unsigned flags)
     // Now do the access.
     if (fault == NoFault) {
         PacketPtr pkt =
-            new Packet(req, MemCmd::ReadReq, Packet::Broadcast);
+            new Packet(req,
+                       (req->isLocked() ?
+                        MemCmd::LoadLockedReq : MemCmd::ReadReq),
+                       Packet::Broadcast);
         pkt->dataDynamic<T>(new T);
 
         if (!dcachePort.sendTiming(pkt)) {
@@ -350,25 +353,27 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     // Now do the access.
     if (fault == NoFault) {
-        assert(dcache_pkt == NULL);
-        if (req->isSwap())
-            dcache_pkt = new Packet(req, MemCmd::SwapReq, Packet::Broadcast);
-        else
-            dcache_pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
-        dcache_pkt->allocate();
-        dcache_pkt->set(data);
-
+        MemCmd cmd = MemCmd::WriteReq; // default
         bool do_access = true;  // flag to suppress cache access
 
+        assert(dcache_pkt == NULL);
+
         if (req->isLocked()) {
+            cmd = MemCmd::StoreCondReq;
             do_access = TheISA::handleLockedWrite(thread, req);
-        }
-        if (req->isCondSwap()) {
-             assert(res);
-             req->setExtraData(*res);
+        } else if (req->isSwap()) {
+            cmd = MemCmd::SwapReq;
+            if (req->isCondSwap()) {
+                assert(res);
+                req->setExtraData(*res);
+            }
         }
 
         if (do_access) {
+            dcache_pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
+            dcache_pkt->allocate();
+            dcache_pkt->set(data);
+
             if (!dcachePort.sendTiming(dcache_pkt)) {
                 _status = DcacheRetry;
             } else {
@@ -609,7 +614,7 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
 
     Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
 
-    if (pkt->isRead() && pkt->req->isLocked()) {
+    if (pkt->isRead() && pkt->isLocked()) {
         TheISA::handleLockedRead(thread, pkt->req);
     }
 
-- 
cgit v1.2.3


From ffd697e14933b3012aaaa0fb93168b2fda59ea4a Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 2 Jul 2007 01:02:35 -0700
Subject: bus.cc: Fix atomic timing issue.

src/mem/bus.cc:
    Fix atomic timing issue.

--HG--
extra : convert_revision : a22ff80cd75f83c785b0604c2a4fde2e2e9f71ef
---
 src/mem/bus.cc | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 83ce0f87d..34f7f4fd0 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -377,7 +377,8 @@ Bus::recvAtomic(PacketPtr pkt)
     // original command so that additional snoops can take place
     // properly
     MemCmd orig_cmd = pkt->cmd;
-    MemCmd response_cmd = MemCmd::InvalidCmd;
+    MemCmd snoop_response_cmd = MemCmd::InvalidCmd;
+    Tick snoop_response_latency = 0;
     int orig_src = pkt->getSrc();
 
     Port *target_port = findPort(pkt->getAddr(), pkt->getSrc());
@@ -388,15 +389,16 @@ Bus::recvAtomic(PacketPtr pkt)
         // same port should not have both target addresses and snooping
         assert(p != target_port);
         if (p->getId() != pkt->getSrc()) {
-            p->sendAtomic(pkt);
+            Tick latency = p->sendAtomic(pkt);
             if (pkt->isResponse()) {
                 // response from snoop agent
                 assert(pkt->cmd != orig_cmd);
                 assert(pkt->memInhibitAsserted());
                 // should only happen once
-                assert(response_cmd == MemCmd::InvalidCmd);
+                assert(snoop_response_cmd == MemCmd::InvalidCmd);
                 // save response state
-                response_cmd = pkt->cmd;
+                snoop_response_cmd = pkt->cmd;
+                snoop_response_latency = latency;
                 // restore original packet state for remaining snoopers
                 pkt->cmd = orig_cmd;
                 pkt->setSrc(orig_src);
@@ -405,19 +407,20 @@ Bus::recvAtomic(PacketPtr pkt)
         }
     }
 
-    Tick response_time = target_port->sendAtomic(pkt);
+    Tick response_latency = target_port->sendAtomic(pkt);
 
     // if we got a response from a snooper, restore it here
-    if (response_cmd != MemCmd::InvalidCmd) {
+    if (snoop_response_cmd != MemCmd::InvalidCmd) {
         // no one else should have responded
         assert(!pkt->isResponse());
         assert(pkt->cmd == orig_cmd);
-        pkt->cmd = response_cmd;
+        pkt->cmd = snoop_response_cmd;
+        response_latency = snoop_response_latency;
     }
 
     // why do we have this packet field and the return value both???
-    pkt->finishTime = std::max(response_time, curTick + clock);
-    return pkt->finishTime;
+    pkt->finishTime = curTick + response_latency;
+    return response_latency;
 }
 
 /** Function called by the port when the bus is receiving a Functional
-- 
cgit v1.2.3


From e9c04dad60f7a382fe94ca587fa505926dbd925c Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 2 Jul 2007 09:26:36 -0700
Subject: Fix a couple LL/SC bugs that only affected timing mode.

src/cpu/simple/timing.cc:
    Fix swap/stq_c command bug.
src/mem/packet.cc:
    Fix incorrect LoadLockedReq command response field.

--HG--
extra : convert_revision : 7a4523be900bc2c9b1bdf2d372ce55f89ae58ae5
---
 src/cpu/simple/timing.cc | 2 +-
 src/mem/packet.cc        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 77df2c05d..492a669b8 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -370,7 +370,7 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
         }
 
         if (do_access) {
-            dcache_pkt = new Packet(req, MemCmd::WriteReq, Packet::Broadcast);
+            dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
             dcache_pkt->allocate();
             dcache_pkt->set(data);
 
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 8de02f533..8cd356768 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -99,7 +99,7 @@ MemCmd::commandInfo[] =
             InvalidCmd, "ReadExResp" },
     /* LoadLockedReq */
     { SET4(IsRead, IsLocked, IsRequest, NeedsResponse),
-            ReadResp, "LoadLockedReq" },
+            LoadLockedResp, "LoadLockedReq" },
     /* LoadLockedResp */
     { SET4(IsRead, IsLocked, IsResponse, HasData),
             InvalidCmd, "LoadLockedResp" },
-- 
cgit v1.2.3


From 4b68652c87f61fe0a2fd4040b79130de0846df85 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Mon, 2 Jul 2007 13:57:45 -0700
Subject: Couple more minor bug fixes for FS timing mode.

src/cpu/simple/timing.cc:
    Fix another SC problem.
src/mem/cache/cache_impl.hh:
    Forgot to call makeTimingResponse() on uncached timing responses.

--HG--
extra : convert_revision : 5a5a58ca2053e4e8de2133205bfd37de15eb4209
---
 src/cpu/simple/timing.cc    | 13 +++++++------
 src/mem/cache/cache_impl.hh |  1 +
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 492a669b8..0c03815b5 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -356,8 +356,6 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
         MemCmd cmd = MemCmd::WriteReq; // default
         bool do_access = true;  // flag to suppress cache access
 
-        assert(dcache_pkt == NULL);
-
         if (req->isLocked()) {
             cmd = MemCmd::StoreCondReq;
             do_access = TheISA::handleLockedWrite(thread, req);
@@ -369,11 +367,14 @@ TimingSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
             }
         }
 
-        if (do_access) {
-            dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
-            dcache_pkt->allocate();
-            dcache_pkt->set(data);
+        // Note: need to allocate dcache_pkt even if do_access is
+        // false, as it's used unconditionally to call completeAcc().
+        assert(dcache_pkt == NULL);
+        dcache_pkt = new Packet(req, cmd, Packet::Broadcast);
+        dcache_pkt->allocate();
+        dcache_pkt->set(data);
 
+        if (do_access) {
             if (!dcachePort.sendTiming(dcache_pkt)) {
                 _status = DcacheRetry;
             } else {
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b4c3c6359..0d76b6bec 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -698,6 +698,7 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
             if (pkt->isRead()) {
                 target->pkt->setData(pkt->getPtr<uint8_t>());
             }
+            target->pkt->makeTimingResponse();
             cpuSidePort->respond(target->pkt, time);
         }
         assert(!mshr->hasTargets());
-- 
cgit v1.2.3


From 4738649e32d06d92e6792b7ce80fcbd05627fc06 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Tue, 3 Jul 2007 00:40:31 -0400
Subject: Delete packets when we're done with them.

--HG--
extra : convert_revision : b8894d26e1ca7a6c9b736500accdaa53bfb09558
---
 src/mem/cache/cache_impl.hh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 0d76b6bec..320e0be81 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -705,6 +705,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
         deallocate = true;
     }
 
+    delete pkt;
+
     if (deallocate) {
         mq->deallocate(mshr);
         if (wasFull && !mq->isFull()) {
@@ -1242,6 +1244,9 @@ Cache<TagStore>::MemSidePort::sendPacket()
             waitingOnRetry = !success;
             if (waitingOnRetry) {
                 DPRINTF(CachePort, "now waiting on a retry\n");
+                if (!mshr->isSimpleForward()) {
+                    delete pkt;
+                }
             } else {
                 myCache()->markInService(mshr);
             }
-- 
cgit v1.2.3


From 3b4ff759398371ac14b7d694de1c87af245f7d42 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 14 Jul 2007 13:14:53 -0700
Subject: Fix bug in copying packet with static data pointer.

--HG--
extra : convert_revision : 2fcf99f050d73e007433c1db2475f2893c5961a0
---
 src/mem/packet.hh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 16bc6f458..c90842dee 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -368,14 +368,15 @@ class Packet : public FastAlloc
     }
 
     /** Alternate constructor for copying a packet.  Copy all fields
-     * *except* set data allocation as static... even if the original
-     * packet's data was dynamic, we don't want to free it when the
-     * new packet is deallocated.  Note that if original packet used
-     * dynamic data, user must guarantee that the new packet's
-     * lifetime is less than that of the original packet. */
+     * *except* if the original packet's data was dynamic, don't copy
+     * that, as we can't guarantee that the new packet's lifetime is
+     * less than that of the original packet.  In this case the new
+     * packet should allocate its own data. */
     Packet(Packet *origPkt)
         :  cmd(origPkt->cmd), req(origPkt->req),
-           data(NULL), staticData(false), dynamicData(false), arrayData(false),
+           data(origPkt->staticData ? origPkt->data : NULL),
+           staticData(origPkt->staticData),
+           dynamicData(false), arrayData(false),
            addr(origPkt->addr), size(origPkt->size),
            src(origPkt->src), dest(origPkt->dest),
            addrSizeValid(origPkt->addrSizeValid),
-- 
cgit v1.2.3


From abd194df5c2dfd0ebf608c5d59196a08ca0ef630 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 14 Jul 2007 13:16:58 -0700
Subject: Move a couple of DPRINTFs from Cache to CachePort.

--HG--
extra : convert_revision : 55a0d26660aeb8f63b41897d53e6b2d1f0a163be
---
 src/mem/cache/base_cache.hh | 2 +-
 src/mem/cache/cache_impl.hh | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mem/cache/base_cache.hh b/src/mem/cache/base_cache.hh
index fcc040bd9..46414974b 100644
--- a/src/mem/cache/base_cache.hh
+++ b/src/mem/cache/base_cache.hh
@@ -126,7 +126,7 @@ class BaseCache : public MemObject
 
         void requestBus(RequestCause cause, Tick time)
         {
-            DPRINTF(Cache, "Asserting bus request for cause %d\n", cause);
+            DPRINTF(CachePort, "Asserting bus request for cause %d\n", cause);
             if (!waitingOnRetry) {
                 schedSendEvent(time);
             }
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 320e0be81..b159df84a 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -1238,7 +1238,8 @@ Cache<TagStore>::MemSidePort::sendPacket()
             MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
 
             bool success = sendTiming(pkt);
-            DPRINTF(Cache, "Address %x was %s in sending the timing request\n",
+            DPRINTF(CachePort,
+                    "Address %x was %s in sending the timing request\n",
                     pkt->getAddr(), success ? "successful" : "unsuccessful");
 
             waitingOnRetry = !success;
-- 
cgit v1.2.3


From 15a51d0cae01defc116c9a937bfa8c4577f72826 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 14 Jul 2007 13:28:52 -0700
Subject: Add CacheRepl trace flag and move a couple DPRINTFs to it.

--HG--
extra : convert_revision : 31724d19ebdf2cdc2a2bafff83d17845b3a0b183
---
 src/base/traceflags.py    | 1 +
 src/mem/cache/tags/lru.cc | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index 70fadb210..8573eb9bf 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -47,6 +47,7 @@ baseFlags = [
     'BusBridge',
     'Cache',
     'CachePort',
+    'CacheRepl',
     'Chains',
     'Checker',
     'Clock',
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index 3269aa4db..0a8587c20 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -173,7 +173,7 @@ LRU::findBlock(Addr addr, int &lat)
     if (blk != NULL) {
         // move this block to head of the MRU list
         sets[set].moveToHead(blk);
-        DPRINTF(Cache, "set %x: moving blk %x to MRU\n",
+        DPRINTF(CacheRepl, "set %x: moving blk %x to MRU\n",
                 set, regenerateBlkAddr(tag, set));
         if (blk->whenReady > curTick
             && blk->whenReady - curTick > hitLatency) {
@@ -208,7 +208,7 @@ LRU::findReplacement(Addr addr, PacketList &writebacks)
         ++sampledRefs;
         blk->refCount = 0;
 
-        DPRINTF(Cache, "set %x: selecting blk %x for replacement\n",
+        DPRINTF(CacheRepl, "set %x: selecting blk %x for replacement\n",
                 set, regenerateBlkAddr(blk->tag, set));
     } else if (!blk->isTouched) {
         tagsInUse++;
-- 
cgit v1.2.3


From 4bcfa916f1f12e8cda253ca7154e75fa1f71ca44 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sat, 14 Jul 2007 23:49:24 -0700
Subject: New tree-based algorithm for creating more complex cache hierarchies.

--HG--
extra : convert_revision : de8dd4ef5dae0f3e084461e8ef7c549653e61d3f
---
 configs/example/memtest.py | 143 +++++++++++++++++++++++----------------------
 1 file changed, 73 insertions(+), 70 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 0e6260b5d..47853ffab 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -49,6 +49,10 @@ parser.add_option("-n", "--numtesters", type="int", default=8,
                   metavar="N",
                   help="Number of tester pseudo-CPUs [default: %default]")
 
+parser.add_option("-t", "--treespec", type="string",
+                  help="Colon-separated multilevel tree specification")
+
+
 parser.add_option("-f", "--functional", type="int", default=0,
                   metavar="PCT",
                   help="Target percentage of functional accesses "
@@ -69,84 +73,83 @@ if args:
      print "Error: script doesn't take any positional arguments"
      sys.exit(1)
 
-# Should generalize this someday... would be cool to have a loop that
-# just iterates, adding a level of caching each time.
-#if options.cache_levels != 2 and options.cache_levels != 0:
-#     print "Error: number of cache levels must be 0 or 2"
-#     sys.exit(1)
-
-if options.blocking:
-     num_l1_mshrs = 1
-     num_l2_mshrs = 1
-else:
-     num_l1_mshrs = 12
-     num_l2_mshrs = 92
-
 block_size = 64
 
-# --------------------
-# Base L1 Cache
-# ====================
-
-class L1(BaseCache):
-    latency = '1ns'
-    block_size = block_size
-    mshrs = num_l1_mshrs
-    tgts_per_mshr = 8
-
-# ----------------------
-# Base L2 Cache
-# ----------------------
-
-class L2(BaseCache):
-    block_size = block_size
-    latency = '10ns'
-    mshrs = num_l2_mshrs
-    tgts_per_mshr = 16
-    write_buffers = 8
-
-if options.numtesters > block_size:
+if not options.treespec:
+     # convert simple cache_levels option to treespec
+     treespec = [options.numtesters, 1]
+     numtesters = options.numtesters
+else:
+     try:
+          treespec = [int(x) for x in options.treespec.split(':')]
+          numtesters = reduce(lambda x,y: x*y, treespec)
+     except:
+          print "Error parsing treespec option"
+          sys.exit(1)
+
+if numtesters > block_size:
      print "Error: Number of testers limited to %s because of false sharing" \
            % (block_size)
      sys.exit(1)
 
-cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
-                 percent_functional=options.functional,
-                 percent_uncacheable=options.uncacheable,
-                 progress_interval=options.progress)
-         for i in xrange(options.numtesters) ]
+if len(treespec) < 1:
+     print "Error parsing treespec"
+     sys.exit(1)
 
-# system simulated
-system = System(cpu = cpus, funcmem = PhysicalMemory(),
-                physmem = PhysicalMemory(latency = "100ns"),
-                membus = Bus(clock="500MHz", width=16))
-
-# l2cache & bus
-if options.cache_levels == 2:
-    system.toL2Bus = Bus(clock="500MHz", width=16)
-    system.l2c = L2(size='64kB', assoc=8)
-    system.l2c.cpu_side = system.toL2Bus.port
-
-    # connect l2c to membus
-    system.l2c.mem_side = system.membus.port
-
-# add L1 caches
-for cpu in cpus:
-    if options.cache_levels == 2:
-         cpu.l1c = L1(size = '32kB', assoc = 4)
-         cpu.test = cpu.l1c.cpu_side
-         cpu.l1c.mem_side = system.toL2Bus.port
-    elif options.cache_levels == 1:
-         cpu.l1c = L1(size = '32kB', assoc = 4)
-         cpu.test = cpu.l1c.cpu_side
-         cpu.l1c.mem_side = system.membus.port
-    else:
-         cpu.test = system.membus.port
-    system.funcmem.port = cpu.functional
-
-# connect memory to membus
-system.physmem.port = system.membus.port
+# define prototype L1 cache
+proto_l1 = BaseCache(size = '32kB', assoc = 4, block_size = block_size,
+                     latency = '1ns', tgts_per_mshr = 8)
 
+if options.blocking:
+     proto_l1.mshrs = 1
+else:
+     proto_l1.mshrs = 8
+
+# build a list of prototypes, one for each cache level (L1 is at end,
+# followed by the tester pseudo-cpu objects)
+prototypes = [ proto_l1,
+               MemTest(atomic=options.atomic, max_loads=options.maxloads,
+                       percent_functional=options.functional,
+                       percent_uncacheable=options.uncacheable,
+                       progress_interval=options.progress) ]
+
+while len(prototypes) < len(treespec):
+     # clone previous level and update params
+     prev = prototypes[0]
+     next = prev()
+     next.size = prev.size * 4
+     next.latency = prev.latency * 10
+     next.assoc = prev.assoc * 2
+     prototypes.insert(0, next)
+
+# system simulated
+system = System(funcmem = PhysicalMemory(),
+                physmem = PhysicalMemory(latency = "100ns"))
+
+def make_level(spec, prototypes, attach_obj, attach_port):
+     fanout = spec[0]
+     parent = attach_obj # use attach obj as config parent too
+     if fanout > 1:
+          new_bus = Bus(clock="500MHz", width=16)
+          new_bus.port = getattr(attach_obj, attach_port)
+          parent.cpu_side_bus = new_bus
+          attach_obj = new_bus
+          attach_port = "port"
+     objs = [prototypes[0]() for i in xrange(fanout)]
+     if len(spec) > 1:
+          # we just built caches, more levels to go
+          parent.cache = objs
+          for cache in objs:
+               cache.mem_side = getattr(attach_obj, attach_port)
+               make_level(spec[1:], prototypes[1:], cache, "cpu_side")
+     else:
+          # we just built the MemTest objects
+          parent.cpu = objs
+          for t in objs:
+               t.test = getattr(attach_obj, attach_port)
+               t.functional = system.funcmem.port
+
+make_level(treespec, prototypes, system.physmem, "port")
 
 # -----------------------
 # run simulation
-- 
cgit v1.2.3


From ad560a6642fbb752e608c02048fc2103e60093b3 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 13:22:49 -0700
Subject: Add --force-bus option to memtest.py.

--HG--
extra : convert_revision : 101735cca426903704ff2edaff051fa7c5bfc46c
---
 configs/example/memtest.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 47853ffab..c9149865a 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -52,6 +52,8 @@ parser.add_option("-n", "--numtesters", type="int", default=8,
 parser.add_option("-t", "--treespec", type="string",
                   help="Colon-separated multilevel tree specification")
 
+parser.add_option("--force-bus", action="store_true",
+                  help="Use bus between levels even with single cache")
 
 parser.add_option("-f", "--functional", type="int", default=0,
                   metavar="PCT",
@@ -129,7 +131,7 @@ system = System(funcmem = PhysicalMemory(),
 def make_level(spec, prototypes, attach_obj, attach_port):
      fanout = spec[0]
      parent = attach_obj # use attach obj as config parent too
-     if fanout > 1:
+     if fanout > 1 or options.force_bus:
           new_bus = Bus(clock="500MHz", width=16)
           new_bus.port = getattr(attach_obj, attach_port)
           parent.cpu_side_bus = new_bus
-- 
cgit v1.2.3


From b1bdc3b3d9de40387a209777aa972f96792c8d6a Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 14:07:31 -0700
Subject: Punt on old -n/-c memtest args. Also added comments to document
 treespec format.

--HG--
extra : convert_revision : fa9e8f66b68b96a4efca8a7fe6e7c37367382d9d
---
 configs/example/memtest.py | 51 +++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index c9149865a..e7f39d8bd 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -33,24 +33,38 @@ m5.AddToPath('../common')
 
 parser = optparse.OptionParser()
 
-parser.add_option("-c", "--cache-levels", type="int", default=2,
-                  metavar="LEVELS",
-                  help="Number of cache levels [default: %default]")
 parser.add_option("-a", "--atomic", action="store_true",
                   help="Use atomic (non-timing) mode")
 parser.add_option("-b", "--blocking", action="store_true",
                   help="Use blocking caches")
-parser.add_option("-l", "--maxloads", default="1G", metavar="N",
-                  help="Stop after N loads [default: %default]")
+parser.add_option("-l", "--maxloads", metavar="N",
+                  help="Stop after N loads")
 parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
                   metavar="T",
                   help="Stop after T ticks")
-parser.add_option("-n", "--numtesters", type="int", default=8,
-                  metavar="N",
-                  help="Number of tester pseudo-CPUs [default: %default]")
 
-parser.add_option("-t", "--treespec", type="string",
-                  help="Colon-separated multilevel tree specification")
+#
+# The "tree" specification is a colon-separated list of one or more
+# integers.  The first integer is the number of caches/testers
+# connected directly to main memory.  The last integer in the list is
+# the number of testers associated with the uppermost level of memory
+# (L1 cache, if there are caches, or main memory if no caches).  Thus
+# if there is only one integer, there are no caches, and the integer
+# specifies the number of testers connected directly to main memory.
+# The other integers (if any) specify the number of caches at each
+# level of the hierarchy between.
+#
+# Examples:
+#
+#  "2:1"    Two caches connected to memory with a single tester behind each
+#           (single-level hierarchy, two testers total)
+#
+#  "2:2:1"  Two-level hierarchy, 2 L1s behind each of 2 L2s, 4 testers total
+#
+parser.add_option("-t", "--treespec", type="string", default="8:1",
+                  help="Colon-separated multilevel tree specification, "
+                  "see script comments for details "
+                  "[default: %default]")
 
 parser.add_option("--force-bus", action="store_true",
                   help="Use bus between levels even with single cache")
@@ -77,17 +91,12 @@ if args:
 
 block_size = 64
 
-if not options.treespec:
-     # convert simple cache_levels option to treespec
-     treespec = [options.numtesters, 1]
-     numtesters = options.numtesters
-else:
-     try:
-          treespec = [int(x) for x in options.treespec.split(':')]
-          numtesters = reduce(lambda x,y: x*y, treespec)
-     except:
-          print "Error parsing treespec option"
-          sys.exit(1)
+try:
+     treespec = [int(x) for x in options.treespec.split(':')]
+     numtesters = reduce(lambda x,y: x*y, treespec)
+except:
+     print "Error parsing treespec option"
+     sys.exit(1)
 
 if numtesters > block_size:
      print "Error: Number of testers limited to %s because of false sharing" \
-- 
cgit v1.2.3


From 9172876dd7ba4877c586ced30904548539451f37 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 14:32:55 -0700
Subject: Fix problem with unset max_loads in memtest. Also make default 0, and
 make that mean run forever.

--HG--
extra : convert_revision : 3e60a52b1c5e334a9ef3d744cf7ee1d851ba4aa9
---
 configs/example/memtest.py | 14 +++++++++-----
 src/cpu/memtest/MemTest.py |  2 +-
 src/cpu/memtest/memtest.cc |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index e7f39d8bd..af100c9a9 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -37,7 +37,7 @@ parser.add_option("-a", "--atomic", action="store_true",
                   help="Use atomic (non-timing) mode")
 parser.add_option("-b", "--blocking", action="store_true",
                   help="Use blocking caches")
-parser.add_option("-l", "--maxloads", metavar="N",
+parser.add_option("-l", "--maxloads", metavar="N", default=0,
                   help="Stop after N loads")
 parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
                   metavar="T",
@@ -116,14 +116,18 @@ if options.blocking:
 else:
      proto_l1.mshrs = 8
 
-# build a list of prototypes, one for each cache level (L1 is at end,
-# followed by the tester pseudo-cpu objects)
-prototypes = [ proto_l1,
-               MemTest(atomic=options.atomic, max_loads=options.maxloads,
+# build a list of prototypes, one for each level of treespec, starting
+# at the end (last entry is tester objects)
+prototypes = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
                        percent_functional=options.functional,
                        percent_uncacheable=options.uncacheable,
                        progress_interval=options.progress) ]
 
+# next comes L1 cache, if any
+if len(treespec) > 1:
+     prototypes.insert(0, proto_l1)
+
+# now add additional cache levels (if any) by scaling L1 params
 while len(prototypes) < len(treespec):
      # clone previous level and update params
      prev = prototypes[0]
diff --git a/src/cpu/memtest/MemTest.py b/src/cpu/memtest/MemTest.py
index 381519972..a328f4734 100644
--- a/src/cpu/memtest/MemTest.py
+++ b/src/cpu/memtest/MemTest.py
@@ -33,7 +33,7 @@ from m5 import build_env
 
 class MemTest(SimObject):
     type = 'MemTest'
-    max_loads = Param.Counter("number of loads to execute")
+    max_loads = Param.Counter(0, "number of loads to execute")
     atomic = Param.Bool(False, "Execute tester in atomic mode? (or timing)\n")
     memory_size = Param.Int(65536, "memory size")
     percent_dest_unaligned = Param.Percent(50,
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index 019b4328c..db3ca282a 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -232,7 +232,7 @@ MemTest::completeRequest(PacketPtr pkt)
             nextProgressMessage += progressInterval;
         }
 
-        if (numReads >= maxLoads)
+        if (maxLoads != 0 && numReads >= maxLoads)
             exitSimLoop("maximum number of loads reached");
         break;
 
-- 
cgit v1.2.3


From f790f34fe30aaca22b829104a8cf3f547624132a Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 20:09:03 -0700
Subject: Make Bus::findPort() a little more useful. Move check for loops
 outside, since half the call sites end up working around it anyway.  Return
 integer port ID instead of port object pointer.

--HG--
extra : convert_revision : 4c31fe9930f4d1aa4919e764efb7c50d43792ea3
---
 src/mem/bus.cc | 47 ++++++++++++++++++++++++-----------------------
 src/mem/bus.hh |  6 ++----
 2 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 0cb1240f3..24a0c6f02 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -172,7 +172,7 @@ void Bus::occupyBus(PacketPtr pkt)
 bool
 Bus::recvTiming(PacketPtr pkt)
 {
-    Port *port;
+    int port_id;
     DPRINTF(Bus, "recvTiming: packet src %d dest %d addr 0x%x cmd %s\n",
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
 
@@ -196,8 +196,8 @@ Bus::recvTiming(PacketPtr pkt)
     // Make sure to clear the snoop commit flag so it doesn't think an
     // access has been handled twice.
     if (dest == Packet::Broadcast) {
-        port = findPort(pkt->getAddr(), pkt->getSrc());
-        timingSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
+        port_id = findPort(pkt->getAddr());
+        timingSnoop(pkt, interfaces[port_id]);
 
         if (pkt->memInhibitAsserted()) {
             //Cache-Cache transfer occuring
@@ -213,13 +213,13 @@ Bus::recvTiming(PacketPtr pkt)
     } else {
         assert(dest >= 0 && dest < maxId);
         assert(dest != pkt->getSrc()); // catch infinite loops
-        port = interfaces[dest];
+        port_id = dest;
     }
 
     occupyBus(pkt);
 
-    if (port) {
-        if (port->sendTiming(pkt))  {
+    if (port_id != pkt->getSrc()) {
+        if (interfaces[port_id]->sendTiming(pkt))  {
             // Packet was successfully sent. Return true.
             // Also take care of retries
             if (inRetry) {
@@ -279,8 +279,8 @@ Bus::recvRetry(int id)
     }
 }
 
-Port *
-Bus::findPort(Addr addr, int id)
+int
+Bus::findPort(Addr addr)
 {
     /* An interval tree would be a better way to do this. --ali. */
     int dest_id = -1;
@@ -295,7 +295,7 @@ Bus::findPort(Addr addr, int id)
              iter != defaultRange.end(); iter++) {
             if (*iter == addr) {
                 DPRINTF(Bus, "  found addr %#llx on default\n", addr);
-                return defaultPort;
+                return defaultId;
             }
         }
 
@@ -306,18 +306,11 @@ Bus::findPort(Addr addr, int id)
             DPRINTF(Bus, "Unable to find destination for addr: %#llx, will use "
                     "default port", addr);
 
-            return defaultPort;
+            return defaultId;
         }
     }
 
-
-    // we shouldn't be sending this back to where it came from
-    // do the snoop access and then we should terminate
-    // the cyclical call.
-    if (dest_id == id)
-        return 0;
-
-    return interfaces[dest_id];
+    return dest_id;
 }
 
 void
@@ -380,7 +373,8 @@ Bus::recvAtomic(PacketPtr pkt)
     Tick snoop_response_latency = 0;
     int orig_src = pkt->getSrc();
 
-    Port *target_port = findPort(pkt->getAddr(), pkt->getSrc());
+    int target_port_id = findPort(pkt->getAddr());
+    Port *target_port = interfaces[target_port_id];
 
     SnoopIter s_end = snoopPorts.end();
     for (SnoopIter s_iter = snoopPorts.begin(); s_iter != s_end; s_iter++) {
@@ -406,7 +400,13 @@ Bus::recvAtomic(PacketPtr pkt)
         }
     }
 
-    Tick response_latency = target_port->sendAtomic(pkt);
+    Tick response_latency = 0;
+
+    // we can get requests sent up from the memory side of the bus for
+    // snooping... don't send them back down!
+    if (target_port_id != pkt->getSrc()) {
+        response_latency = target_port->sendAtomic(pkt);
+    }
 
     // if we got a response from a snooper, restore it here
     if (snoop_response_cmd != MemCmd::InvalidCmd) {
@@ -431,11 +431,12 @@ Bus::recvFunctional(PacketPtr pkt)
             pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
     assert(pkt->getDest() == Packet::Broadcast);
 
-    Port* port = findPort(pkt->getAddr(), pkt->getSrc());
-    functionalSnoop(pkt, port ? port : interfaces[pkt->getSrc()]);
+    int port_id = findPort(pkt->getAddr());
+    Port *port = interfaces[port_id];
+    functionalSnoop(pkt, port);
 
     // If the snooping hasn't found what we were looking for, keep going.
-    if (!pkt->isResponse() && port) {
+    if (!pkt->isResponse() && port_id != pkt->getSrc()) {
         port->sendFunctional(pkt);
     }
 }
diff --git a/src/mem/bus.hh b/src/mem/bus.hh
index bd51337ed..a19420244 100644
--- a/src/mem/bus.hh
+++ b/src/mem/bus.hh
@@ -176,11 +176,9 @@ class Bus : public MemObject
     /** Find which port connected to this bus (if any) should be given a packet
      * with this address.
      * @param addr Address to find port for.
-     * @param id Id of the port this packet was received from (to prevent
-     *             loops)
-     * @return pointer to port that the packet should be sent out of.
+     * @return id of port that the packet should be sent out of.
      */
-    Port *findPort(Addr addr, int id);
+    int findPort(Addr addr);
 
     /** Snoop all relevant ports functionally. */
     void functionalSnoop(PacketPtr pkt, Port *responder);
-- 
cgit v1.2.3


From 884807a68ad7e4f390660b3becfe4ee094334e95 Mon Sep 17 00:00:00 2001
From: Steve Reinhardt <stever@eecs.umich.edu>
Date: Sun, 15 Jul 2007 20:11:06 -0700
Subject: Fix up a bunch of multilevel coherence issues. Atomic mode seems to
 work.  Timing is closer but not there yet.

--HG--
extra : convert_revision : 0dea5c3d4b973d009e9d4a4c21b9cad15961d56f
---
 configs/example/memtest.py  |  2 +-
 src/cpu/memtest/memtest.cc  |  4 +-
 src/cpu/o3/lsq_impl.hh      |  7 ++--
 src/mem/bus.cc              |  5 ++-
 src/mem/cache/cache_impl.hh | 94 ++++++++++++++++++++++++++++++++++++++++-----
 src/mem/packet.hh           | 12 ++++--
 6 files changed, 105 insertions(+), 19 deletions(-)

diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index af100c9a9..5bb874e85 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -144,7 +144,7 @@ system = System(funcmem = PhysicalMemory(),
 def make_level(spec, prototypes, attach_obj, attach_port):
      fanout = spec[0]
      parent = attach_obj # use attach obj as config parent too
-     if fanout > 1 or options.force_bus:
+     if len(spec) > 1 and (fanout > 1 or options.force_bus):
           new_bus = Bus(clock="500MHz", width=16)
           new_bus.port = getattr(attach_obj, attach_port)
           parent.cpu_side_bus = new_bus
diff --git a/src/cpu/memtest/memtest.cc b/src/cpu/memtest/memtest.cc
index db3ca282a..f5c8bb93b 100644
--- a/src/cpu/memtest/memtest.cc
+++ b/src/cpu/memtest/memtest.cc
@@ -64,7 +64,9 @@ MemTest::CpuPort::recvTiming(PacketPtr pkt)
 Tick
 MemTest::CpuPort::recvAtomic(PacketPtr pkt)
 {
-    panic("MemTest doesn't expect recvAtomic callback!");
+    // must be snoop upcall
+    assert(pkt->isRequest());
+    assert(pkt->getDest() == Packet::Broadcast);
     return curTick;
 }
 
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index b4a6a02da..10c0afd38 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -84,9 +84,10 @@ LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
         lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
     }
     else {
-    //else it is a coherence request, maybe you need to do something
-        warn("Recieved a coherence request (Invalidate?), 03CPU doesn't"
-             "update LSQ for these\n");
+        // must be a snoop
+
+        // @TODO someday may need to process invalidations in LSQ here
+        // to provide stronger consistency model
     }
     return true;
 }
diff --git a/src/mem/bus.cc b/src/mem/bus.cc
index 24a0c6f02..e70558bd6 100644
--- a/src/mem/bus.cc
+++ b/src/mem/bus.cc
@@ -183,8 +183,9 @@ Bus::recvTiming(PacketPtr pkt)
 
     // If the bus is busy, or other devices are in line ahead of the current
     // one, put this device on the retry list.
-    if (tickNextIdle > curTick ||
-        (retryList.size() && (!inRetry || pktPort != retryList.front())))
+    if (!pkt->isExpressSnoop() &&
+        (tickNextIdle > curTick ||
+         (retryList.size() && (!inRetry || pktPort != retryList.front()))))
     {
         addToRetryList(pktPort);
         DPRINTF(Bus, "recvTiming: Bus is busy, returning false\n");
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index b159df84a..59571dd6f 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -165,11 +165,25 @@ Cache<TagStore>::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk)
             blk->trackLoadLocked(pkt);
         }
         pkt->setDataFromBlock(blk->data, blkSize);
+        if (pkt->getSize() == blkSize) {
+            // special handling for coherent block requests from
+            // upper-level caches
+            if (pkt->needsExclusive()) {
+                // on ReadExReq we give up our copy
+                tags->invalidateBlk(blk);
+            } else {
+                // on ReadReq we create shareable copies here and in
+                // the requester
+                pkt->assertShared();
+                blk->status &= ~BlkWritable;
+            }
+        }
     } else {
         // Not a read or write... must be an upgrade.  it's OK
         // to just ack those as long as we have an exclusive
         // copy at this level.
         assert(pkt->cmd == MemCmd::UpgradeReq);
+        tags->invalidateBlk(blk);
     }
 }
 
@@ -269,6 +283,18 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk, int &lat)
             hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++;
             satisfied = true;
             satisfyCpuSideRequest(pkt, blk);
+        } else if (pkt->cmd == MemCmd::Writeback) {
+            // special case: writeback to read-only block (e.g., from
+            // L1 into L2).  since we're really just passing ownership
+            // from one cache to another, we can update this cache to
+            // be the owner without making the block writeable
+            assert(!blk->isWritable() /* && !blk->isDirty() */);
+            assert(blkSize == pkt->getSize());
+            std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
+            blk->status |= BlkDirty;
+            satisfied = true;
+            // nothing else to do; writeback doesn't expect response
+            assert(!pkt->needsResponse());
         } else {
             // permission violation... nothing to do here, leave unsatisfied
             // for statistics purposes this counts like a complete miss
@@ -363,9 +389,10 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
     bool needsResponse = pkt->needsResponse();
 
     if (satisfied) {
-        assert(needsResponse);
-        pkt->makeTimingResponse();
-        cpuSidePort->respond(pkt, curTick+lat);
+        if (needsResponse) {
+            pkt->makeTimingResponse();
+            cpuSidePort->respond(pkt, curTick+lat);
+        }
     } else {
         // miss
         if (prefetchMiss)
@@ -456,10 +483,30 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
 {
     int lat = hitLatency;
 
+    // @TODO: make this a parameter
+    bool last_level_cache = false;
+
     if (pkt->memInhibitAsserted()) {
-        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
-                pkt->getAddr());
         assert(!pkt->req->isUncacheable());
+        // have to invalidate ourselves and any lower caches even if
+        // upper cache will be responding
+        if (pkt->isInvalidate()) {
+            BlkType *blk = tags->findBlock(pkt->getAddr());
+            if (blk && blk->isValid()) {
+                tags->invalidateBlk(blk);
+                DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: invalidating\n",
+                        pkt->cmdString(), pkt->getAddr());
+            }
+            if (!last_level_cache) {
+                DPRINTF(Cache, "forwarding mem-inhibited %s on 0x%x\n",
+                        pkt->cmdString(), pkt->getAddr());
+                lat += memSidePort->sendAtomic(pkt);
+            }
+        } else {
+            DPRINTF(Cache, "rcvd mem-inhibited %s on 0x%x: not responding\n",
+                    pkt->cmdString(), pkt->getAddr());
+        }
+
         return lat;
     }
 
@@ -791,9 +838,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
         assert(pkt->isRead() || blk->isValid());
     }
 
-    if (pkt->needsExclusive()) {
-        blk->status = BlkValid | BlkWritable | BlkDirty;
-    } else if (!pkt->sharedAsserted()) {
+    if (pkt->needsExclusive() || !pkt->sharedAsserted()) {
         blk->status = BlkValid | BlkWritable;
     } else {
         blk->status = BlkValid;
@@ -839,6 +884,37 @@ void
 Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
                              bool is_timing, bool is_deferred)
 {
+    assert(pkt->isRequest());
+
+    // first propagate snoop upward to see if anyone above us wants to
+    // handle it.  save & restore packet src since it will get
+    // rewritten to be relative to cpu-side bus (if any)
+    bool alreadySupplied = pkt->memInhibitAsserted();
+    bool upperSupply = false;
+    if (is_timing) {
+        Packet *snoopPkt = new Packet(pkt, true);  // clear flags
+        snoopPkt->setExpressSnoop();
+        cpuSidePort->sendTiming(snoopPkt);
+        if (snoopPkt->memInhibitAsserted()) {
+            // cache-to-cache response from some upper cache
+            assert(!alreadySupplied);
+            pkt->assertMemInhibit();
+        }
+        if (snoopPkt->sharedAsserted()) {
+            pkt->assertShared();
+        }
+        delete snoopPkt;
+    } else {
+        int origSrc = pkt->getSrc();
+        cpuSidePort->sendAtomic(pkt);
+        if (!alreadySupplied && pkt->memInhibitAsserted()) {
+            // cache-to-cache response from some upper cache:
+            // forward response to original requester
+            assert(pkt->isResponse());
+        }
+        pkt->setSrc(origSrc);
+    }
+
     if (!blk || !blk->isValid()) {
         return;
     }
@@ -846,7 +922,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
     // we may end up modifying both the block state and the packet (if
     // we respond in atomic mode), so just figure out what to do now
     // and then do it later
-    bool supply = blk->isDirty() && pkt->isRead();
+    bool supply = blk->isDirty() && pkt->isRead() && !upperSupply;
     bool invalidate = pkt->isInvalidate();
 
     if (pkt->isRead() && !pkt->isInvalidate()) {
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index c90842dee..036bd3fd7 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -252,9 +252,11 @@ class Packet : public FastAlloc
     bool destValid;
 
     enum Flag {
-        // Snoop flags
+        // Snoop response flags
         MemInhibit,
         Shared,
+        // Special control flags
+        ExpressSnoop,
         NUM_PACKET_FLAGS
     };
 
@@ -317,6 +319,10 @@ class Packet : public FastAlloc
     bool memInhibitAsserted()   { return flags[MemInhibit]; }
     bool sharedAsserted()       { return flags[Shared]; }
 
+    // Special control flags
+    void setExpressSnoop()      { flags[ExpressSnoop] = true; }
+    bool isExpressSnoop()       { return flags[ExpressSnoop]; }
+
     // Network error conditions... encapsulate them as methods since
     // their encoding keeps changing (from result field to command
     // field, etc.)
@@ -372,7 +378,7 @@ class Packet : public FastAlloc
      * that, as we can't guarantee that the new packet's lifetime is
      * less than that of the original packet.  In this case the new
      * packet should allocate its own data. */
-    Packet(Packet *origPkt)
+    Packet(Packet *origPkt, bool clearFlags = false)
         :  cmd(origPkt->cmd), req(origPkt->req),
            data(origPkt->staticData ? origPkt->data : NULL),
            staticData(origPkt->staticData),
@@ -381,7 +387,7 @@ class Packet : public FastAlloc
            src(origPkt->src), dest(origPkt->dest),
            addrSizeValid(origPkt->addrSizeValid),
            srcValid(origPkt->srcValid), destValid(origPkt->destValid),
-           flags(origPkt->flags),
+           flags(clearFlags ? 0 : origPkt->flags),
            time(curTick), senderState(origPkt->senderState)
     {
     }
-- 
cgit v1.2.3